/* Copyright (C) 2001-2006, William Joseph. All Rights Reserved. This file is part of GtkRadiant. GtkRadiant is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. GtkRadiant is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GtkRadiant; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #if !defined(INCLUDED_SCRIPT_SCRIPTTOKENISER_H) #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H #include "iscriplib.h" class ScriptTokeniser : public Tokeniser { enum CharType { eWhitespace, eCharToken, eNewline, eCharQuote, eCharSolidus, eCharStar, eCharSpecial, }; typedef bool (ScriptTokeniser::*Tokenise)(char c); Tokenise m_stack[3]; Tokenise* m_state; SingleCharacterInputStream m_istream; std::size_t m_scriptline; std::size_t m_scriptcolumn; char m_token[MAXTOKEN]; char* m_write; char m_current; bool m_eof; bool m_crossline; bool m_unget; bool m_emit; bool m_special; CharType charType(const char c) { switch(c) { case '\n': return eNewline; case '"': return eCharQuote; case '/': return eCharSolidus; case '*': return eCharStar; case '{': case '(': case '}': case ')': case '[': case ']': case ',': case ':': return (m_special) ? eCharSpecial : eCharToken; } if(c > 32) { return eCharToken; } return eWhitespace; } Tokenise state() { return *m_state; } void push(Tokenise state) { ASSERT_MESSAGE(m_state != m_stack + 2, "token parser: illegal stack push"); *(++m_state) = state; } void pop() { ASSERT_MESSAGE(m_state != m_stack, "token parser: illegal stack pop"); --m_state; } void add(const char c) { if(m_write < m_token + MAXTOKEN - 1) { *m_write++ = c; } } void remove() { ASSERT_MESSAGE(m_write > m_token, "no char to remove"); --m_write; } bool tokeniseDefault(char c) { switch(charType(c)) { case eNewline: if(!m_crossline) { globalErrorStream() << Unsigned(getLine()) << ":" << Unsigned(getColumn()) << ": unexpected end-of-line before token\n"; return false; } break; case eCharToken: case eCharStar: push(Tokenise(&ScriptTokeniser::tokeniseToken)); add(c); break; case eCharSpecial: push(Tokenise(&ScriptTokeniser::tokeniseSpecial)); add(c); break; case eCharQuote: push(Tokenise(&ScriptTokeniser::tokeniseQuotedToken)); break; case eCharSolidus: push(Tokenise(&ScriptTokeniser::tokeniseSolidus)); break; default: break; } return true; } bool tokeniseToken(char c) { switch(charType(c)) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); m_emit = true; // emit token break; case eCharSolidus: #if 0 //SPoG: ignore comments in the middle of tokens. push(Tokenise(&ScriptTokeniser::tokeniseSolidus)); break; #endif case eCharToken: case eCharStar: add(c); break; default: break; } return true; } bool tokeniseQuotedToken(char c) { switch(charType(c)) { case eNewline: if(m_crossline) { globalErrorStream() << Unsigned(getLine()) << ":" << Unsigned(getColumn()) << ": unexpected end-of-line in quoted token\n"; return false; } break; case eWhitespace: case eCharToken: case eCharSolidus: case eCharStar: case eCharSpecial: add(c); break; case eCharQuote: pop(); push(Tokenise(&ScriptTokeniser::tokeniseEndQuote)); break; default: break; } return true; } bool tokeniseSolidus(char c) { switch(charType(c)) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); add('/'); m_emit = true; // emit single slash break; case eCharToken: pop(); add('/'); add(c); break; case eCharSolidus: pop(); push(Tokenise(&ScriptTokeniser::tokeniseComment)); break; // dont emit single slash case eCharStar: pop(); push(Tokenise(&ScriptTokeniser::tokeniseBlockComment)); break; // dont emit single slash default: break; } return true; } bool tokeniseComment(char c) { if(c == '\n') { pop(); if(state() == Tokenise(&ScriptTokeniser::tokeniseToken)) { pop(); m_emit = true; // emit token immediatly preceding comment } } return true; } bool tokeniseBlockComment(char c) { if(c == '*') { pop(); push(Tokenise(&ScriptTokeniser::tokeniseEndBlockComment)); } return true; } bool tokeniseEndBlockComment(char c) { switch(c) { case '/': pop(); if(state() == Tokenise(&ScriptTokeniser::tokeniseToken)) { pop(); m_emit = true; // emit token immediatly preceding comment } break; // dont emit comment case '*': break; // no state change default: pop(); push(Tokenise(&ScriptTokeniser::tokeniseBlockComment)); break; } return true; } bool tokeniseEndQuote(char c) { pop(); m_emit = true; // emit quoted token return true; } bool tokeniseSpecial(char c) { pop(); m_emit = true; // emit single-character token return true; } /// Returns true if a token was successfully parsed. bool tokenise() { m_write = m_token; while(!eof()) { char c = m_current; if(!((*this).*state())(c)) { // parse error m_eof = true; return false; } if(m_emit) { m_emit = false; return true; } if(c == '\n') { ++m_scriptline; m_scriptcolumn = 1; } else { ++m_scriptcolumn; } m_eof = !m_istream.readChar(m_current); } return m_write != m_token; } const char* fillToken() { if(!tokenise()) { return 0; } add('\0'); return m_token; } bool eof() { return m_eof; } public: ScriptTokeniser(TextInputStream& istream, bool special) : m_state(m_stack), m_istream(istream), m_scriptline(1), m_scriptcolumn(1), m_crossline(false), m_unget(false), m_emit(false), m_special(special) { m_stack[0] = Tokenise(&ScriptTokeniser::tokeniseDefault); m_eof = !m_istream.readChar(m_current); m_token[MAXTOKEN - 1] = '\0'; } void release() { delete this; } void nextLine() { m_crossline = true; } const char* getToken() { if(m_unget) { m_unget = false; return m_token; } return fillToken(); } void ungetToken() { ASSERT_MESSAGE(!m_unget, "can't unget more than one token"); m_unget = true; } std::size_t getLine() const { return m_scriptline; } std::size_t getColumn() const { return m_scriptcolumn; } }; inline Tokeniser& NewScriptTokeniser(TextInputStream& istream) { return *(new ScriptTokeniser(istream, true)); } inline Tokeniser& NewSimpleTokeniser(TextInputStream& istream) { return *(new ScriptTokeniser(istream, false)); } #endif