libs/script/scripttokeniser.h

   1 /*
   2    Copyright (C) 2001-2006, William Joseph.
   3    All Rights Reserved.
   4
   5    This file is part of GtkRadiant.
   6
   7    GtkRadiant is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    GtkRadiant is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GtkRadiant; if not, write to the Free Software
  19    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  20  */
  21
  22 #if !defined( INCLUDED_SCRIPT_SCRIPTTOKENISER_H )
  23 #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H
  24
  25 #include "iscriplib.h"
  26
  27 class ScriptTokeniser : public Tokeniser
  28 {
  29 enum CharType
  30 {
  31         eWhitespace,
  32         eCharToken,
  33         eNewline,
  34         eCharQuote,
  35         eCharSolidus,
  36         eCharStar,
  37         eCharSpecial,
  38 };
  39
  40 typedef bool ( ScriptTokeniser::*Tokenise )( char c );
  41
  42 Tokenise m_stack[3];
  43 Tokenise* m_state;
  44 SingleCharacterInputStream<TextInputStream> m_istream;
  45 std::size_t m_scriptline;
  46 std::size_t m_scriptcolumn;
  47
  48 char m_token[MAXTOKEN];
  49 char* m_write;
  50
  51 char m_current;
  52 bool m_eof;
  53 bool m_crossline;
  54 bool m_unget;
  55 bool m_emit;
  56
  57 bool m_special;
  58
  59 CharType charType( const char c ){
  60         switch ( c )
  61         {
  62         case '\n': return eNewline;
  63         case '"': return eCharQuote;
  64         case '/': return eCharSolidus;
  65         case '*': return eCharStar;
  66         case '{': case '(': case '}': case ')': case '[': case ']': case ',': case ':': return ( m_special ) ? eCharSpecial : eCharToken;
  67         }
  68
  69         if ( c > 32 ) {
  70                 return eCharToken;
  71         }
  72         return eWhitespace;
  73 }
  74
  75 Tokenise state(){
  76         return *m_state;
  77 }
  78 void push( Tokenise state ){
  79         ASSERT_MESSAGE( m_state != m_stack + 2, "token parser: illegal stack push" );
  80         *( ++m_state ) = state;
  81 }
  82 void pop(){
  83         ASSERT_MESSAGE( m_state != m_stack, "token parser: illegal stack pop" );
  84         --m_state;
  85 }
  86 void add( const char c ){
  87         if ( m_write < m_token + MAXTOKEN - 1 ) {
  88                 *m_write++ = c;
  89         }
  90 }
  91 void remove(){
  92         ASSERT_MESSAGE( m_write > m_token, "no char to remove" );
  93         --m_write;
  94 }
  95
  96 bool tokeniseDefault( char c ){
  97         switch ( charType( c ) )
  98         {
  99         case eNewline:
 100                 if ( !m_crossline ) {
 101                         globalErrorStream() << Unsigned( getLine() ) << ":" << Unsigned( getColumn() ) << ": unexpected end-of-line before token\n";
 102                         return false;
 103                 }
 104                 break;
 105         case eCharToken:
 106         case eCharStar:
 107                 push( Tokenise( &ScriptTokeniser::tokeniseToken ) );
 108                 add( c );
 109                 break;
 110         case eCharSpecial:
 111                 push( Tokenise( &ScriptTokeniser::tokeniseSpecial ) );
 112                 add( c );
 113                 break;
 114         case eCharQuote:
 115                 push( Tokenise( &ScriptTokeniser::tokeniseQuotedToken ) );
 116                 break;
 117         case eCharSolidus:
 118                 push( Tokenise( &ScriptTokeniser::tokeniseSolidus ) );
 119                 break;
 120         default:
 121                 break;
 122         }
 123         return true;
 124 }
 125 bool tokeniseToken( char c ){
 126         switch ( charType( c ) )
 127         {
 128         case eNewline:
 129         case eWhitespace:
 130         case eCharQuote:
 131         case eCharSpecial:
 132                 pop();
 133                 m_emit = true; // emit token
 134                 break;
 135         case eCharSolidus:
 136 #if 0 //SPoG: ignore comments in the middle of tokens.
 137                 push( Tokenise( &ScriptTokeniser::tokeniseSolidus ) );
 138                 break;
 139 #endif
 140         case eCharToken:
 141         case eCharStar:
 142                 add( c );
 143                 break;
 144         default:
 145                 break;
 146         }
 147         return true;
 148 }
 149 bool tokeniseQuotedToken( char c ){
 150         switch ( charType( c ) )
 151         {
 152         case eNewline:
 153                 if ( m_crossline ) {
 154                         globalErrorStream() << Unsigned( getLine() ) << ":" << Unsigned( getColumn() ) << ": unexpected end-of-line in quoted token\n";
 155                         return false;
 156                 }
 157                 break;
 158         case eWhitespace:
 159         case eCharToken:
 160         case eCharSolidus:
 161         case eCharStar:
 162         case eCharSpecial:
 163                 add( c );
 164                 break;
 165         case eCharQuote:
 166                 pop();
 167                 push( Tokenise( &ScriptTokeniser::tokeniseEndQuote ) );
 168                 break;
 169         default:
 170                 break;
 171         }
 172         return true;
 173 }
 174 bool tokeniseSolidus( char c ){
 175         switch ( charType( c ) )
 176         {
 177         case eNewline:
 178         case eWhitespace:
 179         case eCharQuote:
 180         case eCharSpecial:
 181                 pop();
 182                 add( '/' );
 183                 m_emit = true; // emit single slash
 184                 break;
 185         case eCharToken:
 186                 pop();
 187                 add( '/' );
 188                 add( c );
 189                 break;
 190         case eCharSolidus:
 191                 pop();
 192                 push( Tokenise( &ScriptTokeniser::tokeniseComment ) );
 193                 break; // dont emit single slash
 194         case eCharStar:
 195                 pop();
 196                 push( Tokenise( &ScriptTokeniser::tokeniseBlockComment ) );
 197                 break; // dont emit single slash
 198         default:
 199                 break;
 200         }
 201         return true;
 202 }
 203 bool tokeniseComment( char c ){
 204         if ( c == '\n' ) {
 205                 pop();
 206                 if ( state() == Tokenise( &ScriptTokeniser::tokeniseToken ) ) {
 207                         pop();
 208                         m_emit = true; // emit token immediatly preceding comment
 209                 }
 210         }
 211         return true;
 212 }
 213 bool tokeniseBlockComment( char c ){
 214         if ( c == '*' ) {
 215                 pop();
 216                 push( Tokenise( &ScriptTokeniser::tokeniseEndBlockComment ) );
 217         }
 218         return true;
 219 }
 220 bool tokeniseEndBlockComment( char c ){
 221         switch ( c )
 222         {
 223         case '/':
 224                 pop();
 225                 if ( state() == Tokenise( &ScriptTokeniser::tokeniseToken ) ) {
 226                         pop();
 227                         m_emit = true; // emit token immediatly preceding comment
 228                 }
 229                 break; // dont emit comment
 230         case '*':
 231                 break; // no state change
 232         default:
 233                 pop();
 234                 push( Tokenise( &ScriptTokeniser::tokeniseBlockComment ) );
 235                 break;
 236         }
 237         return true;
 238 }
 239 bool tokeniseEndQuote( char c ){
 240         pop();
 241         m_emit = true; // emit quoted token
 242         return true;
 243 }
 244 bool tokeniseSpecial( char c ){
 245         pop();
 246         m_emit = true; // emit single-character token
 247         return true;
 248 }
 249
 250 /// Returns true if a token was successfully parsed.
 251 bool tokenise(){
 252         m_write = m_token;
 253         while ( !eof() )
 254         {
 255                 char c = m_current;
 256
 257                 if ( !( ( *this ).*state() )( c ) ) {
 258                         // parse error
 259                         m_eof = true;
 260                         return false;
 261                 }
 262                 if ( m_emit ) {
 263                         m_emit = false;
 264                         return true;
 265                 }
 266
 267                 if ( c == '\n' ) {
 268                         ++m_scriptline;
 269                         m_scriptcolumn = 1;
 270                 }
 271                 else
 272                 {
 273                         ++m_scriptcolumn;
 274                 }
 275
 276                 m_eof = !m_istream.readChar( m_current );
 277         }
 278         return m_write != m_token;
 279 }
 280
 281 const char* fillToken(){
 282         if ( !tokenise() ) {
 283                 return 0;
 284         }
 285
 286         add( '\0' );
 287         return m_token;
 288 }
 289
 290 bool eof(){
 291         return m_eof;
 292 }
 293
 294 public:
 295 ScriptTokeniser( TextInputStream& istream, bool special )
 296         : m_state( m_stack ),
 297         m_istream( istream ),
 298         m_scriptline( 1 ),
 299         m_scriptcolumn( 1 ),
 300         m_crossline( false ),
 301         m_unget( false ),
 302         m_emit( false ),
 303         m_special( special ){
 304         m_stack[0] = Tokenise( &ScriptTokeniser::tokeniseDefault );
 305         m_eof = !m_istream.readChar( m_current );
 306         m_token[MAXTOKEN - 1] = '\0';
 307 }
 308 void release(){
 309         delete this;
 310 }
 311 void nextLine(){
 312         m_crossline = true;
 313 }
 314 const char* getToken(){
 315         if ( m_unget ) {
 316                 m_unget = false;
 317                 return m_token;
 318         }
 319
 320         return fillToken();
 321 }
 322 void ungetToken(){
 323         ASSERT_MESSAGE( !m_unget, "can't unget more than one token" );
 324         m_unget = true;
 325 }
 326 std::size_t getLine() const {
 327         return m_scriptline;
 328 }
 329 std::size_t getColumn() const {
 330         return m_scriptcolumn;
 331 }
 332 };
 333
 334
 335 inline Tokeniser& NewScriptTokeniser( TextInputStream& istream ){
 336         return *( new ScriptTokeniser( istream, true ) );
 337 }
 338
 339 inline Tokeniser& NewSimpleTokeniser( TextInputStream& istream ){
 340         return *( new ScriptTokeniser( istream, false ) );
 341 }
 342
 343 #endif