* are working on.
* The are merely wrapping get/put in order to count line numbers.
*/
+static void lex_ungetch(lex_file *lex, int ch);
+static int lex_try_trigraph(lex_file *lex, int old)
+{
+ int c2, c3;
+ c2 = fgetc(lex->file);
+ if (c2 != '?') {
+ lex_ungetch(lex, c2);
+ return old;
+ }
+
+ c3 = fgetc(lex->file);
+ switch (c3) {
+ case '=': return '#';
+ case '/': return '\\';
+ case '\'': return '^';
+ case '(': return '[';
+ case ')': return ']';
+ case '!': return '|';
+ case '<': return '{';
+ case '>': return '}';
+ case '-': return '~';
+ default:
+ lex_ungetch(lex, c3);
+ lex_ungetch(lex, c2);
+ return old;
+ }
+}
+
+static int lex_try_digraph(lex_file *lex, int ch)
+{
+ int c2;
+ c2 = fgetc(lex->file);
+ if (ch == '<' && c2 == ':')
+ return '[';
+ else if (ch == ':' && c2 == '>')
+ return ']';
+ else if (ch == '<' && c2 == '%')
+ return '{';
+ else if (ch == '%' && c2 == '>')
+ return '}';
+ else if (ch == '%' && c2 == ':')
+ return '#';
+ lex_ungetch(lex, c2);
+ return ch;
+}
+
static int lex_getch(lex_file *lex)
{
int ch;
ch = fgetc(lex->file);
if (ch == '\n')
lex->line++;
+ else if (ch == '?')
+ return lex_try_trigraph(lex, ch);
+ else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
+ return lex_try_digraph(lex, ch);
return ch;
}
return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
}
+/* Append a character to the token buffer */
+static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
+{
+ if (!token_value_add(&lex->tok, ch)) {
+ lexerror(lex, "out of memory");
+ return false;
+ }
+ return true;
+}
+
+/* Append a trailing null-byte */
+static bool GMQCC_WARN lex_endtoken(lex_file *lex)
+{
+ if (!token_value_add(&lex->tok, 0)) {
+ lexerror(lex, "out of memory");
+ return false;
+ }
+ lex->tok.value_count--;
+ return true;
+}
+
/* Skip whitespace and comments and return the first
* non-white character.
* As this makes use of the above getch() ungetch() functions,
static int lex_skipwhite(lex_file *lex)
{
int ch = 0;
+ bool haswhite = false;
do
{
ch = lex_getch(lex);
- while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
+ while (ch != EOF && isspace(ch)) {
+ if (lex->flags.preprocessing) {
+ if (ch == '\n') {
+ /* end-of-line */
+ /* see if there was whitespace first */
+ if (haswhite) { /* (lex->tok.value_count) { */
+ lex_ungetch(lex, ch);
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ return TOKEN_WHITE;
+ }
+ /* otherwise return EOL */
+ return TOKEN_EOL;
+ }
+ haswhite = true;
+ if (!lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ }
+ ch = lex_getch(lex);
+ }
if (ch == '/') {
ch = lex_getch(lex);
/* one line comment */
ch = lex_getch(lex);
- /* check for special: '/', '/', '*', '/' */
- if (ch == '*') {
- ch = lex_getch(lex);
- if (ch == '/') {
- ch = ' ';
- continue;
+ if (lex->flags.preprocessing) {
+ haswhite = true;
+ if (!lex_tokench(lex, '/') ||
+ !lex_tokench(lex, '/'))
+ {
+ return TOKEN_FATAL;
}
}
while (ch != EOF && ch != '\n') {
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
ch = lex_getch(lex);
}
+ if (lex->flags.preprocessing) {
+ lex_ungetch(lex, '\n');
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ return TOKEN_WHITE;
+ }
continue;
}
if (ch == '*')
{
/* multiline comment */
+ if (lex->flags.preprocessing) {
+ haswhite = true;
+ if (!lex_tokench(lex, '/') ||
+ !lex_tokench(lex, '*'))
+ {
+ return TOKEN_FATAL;
+ }
+ }
+
while (ch != EOF)
{
ch = lex_getch(lex);
if (ch == '*') {
ch = lex_getch(lex);
if (ch == '/') {
- ch = lex_getch(lex);
+ if (lex->flags.preprocessing) {
+ if (!lex_tokench(lex, '*') ||
+ !lex_tokench(lex, '/'))
+ {
+ return TOKEN_FATAL;
+ }
+ }
break;
}
}
+ if (lex->flags.preprocessing) {
+ if (!lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ }
}
- if (ch == '/') /* allow *//* direct following comment */
- {
- lex_ungetch(lex, ch);
- ch = ' '; /* cause TRUE in the isspace check */
- }
+ ch = ' '; /* cause TRUE in the isspace check */
continue;
}
/* Otherwise roll back to the slash and break out of the loop */
}
} while (ch != EOF && isspace(ch));
- return ch;
-}
-
-/* Append a character to the token buffer */
-static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
-{
- if (!token_value_add(&lex->tok, ch)) {
- lexerror(lex, "out of memory");
- return false;
- }
- return true;
-}
-
-/* Append a trailing null-byte */
-static bool GMQCC_WARN lex_endtoken(lex_file *lex)
-{
- if (!token_value_add(&lex->tok, 0)) {
- lexerror(lex, "out of memory");
- return false;
+ if (haswhite) {
+ if (!lex_endtoken(lex))
+ return TOKEN_FATAL;
+ lex_ungetch(lex, ch);
+ return TOKEN_WHITE;
}
- lex->tok.value_count--;
- return true;
+ return ch;
}
/* Get a token */
if (ch == quote)
return TOKEN_STRINGCONST;
- if (ch == '\\') {
+ if (!lex->flags.preprocessing && ch == '\\') {
ch = lex_getch(lex);
if (ch == EOF) {
lexerror(lex, "unexpected end of file");
lex->tok.ctx.line = lex->sline;
lex->tok.ctx.file = lex->name;
+ if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
+ return (lex->tok.ttype = ch);
+ }
+
if (lex->eof)
return (lex->tok.ttype = TOKEN_FATAL);
}
lex->modelname = lex->tok.value;
lex->tok.value = NULL;
+ lex->tok.value_alloc = lex->tok.value_count = 0;
for (frame = 0; frame < lex->frames_count; ++frame) {
if (!strcmp(v, lex->frames[frame].name)) {
lex->framevalue = lex->frames[frame].value;
/* single-character tokens */
switch (ch)
{
+ case '[':
case '(':
if (!lex_tokench(lex, ch) ||
!lex_endtoken(lex))
case ';':
case '{':
case '}':
- case '[':
case ']':
case '#':
!strcmp(v, "local") ||
!strcmp(v, "return") ||
!strcmp(v, "const"))
+ {
lex->tok.ttype = TOKEN_KEYWORD;
+ }
+ else if (opts_standard != COMPILER_QCC)
+ {
+ /* other standards reserve these keywords */
+ if (!strcmp(v, "switch") ||
+ !strcmp(v, "struct") ||
+ !strcmp(v, "union") ||
+ !strcmp(v, "break") ||
+ !strcmp(v, "continue") ||
+ !strcmp(v, "var"))
+ {
+ lex->tok.ttype = TOKEN_KEYWORD;
+ }
+ }
return lex->tok.ttype;
}
if (ch == '"')
{
+ lex->flags.nodigraphs = true;
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
lex->tok.ttype = lex_finish_string(lex, '"');
- while (lex->tok.ttype == TOKEN_STRINGCONST)
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
{
/* Allow c style "string" "continuation" */
ch = lex_skipwhite(lex);
lex->tok.ttype = lex_finish_string(lex, '"');
}
+ lex->flags.nodigraphs = false;
if (!lex_endtoken(lex))
return (lex->tok.ttype = TOKEN_FATAL);
return lex->tok.ttype;
* Likewise actual unescaping has to be done by the parser.
* The difference is we don't allow 'char' 'continuation'.
*/
- lex->tok.ttype = lex_finish_string(lex, '\'');
- if (!lex_endtoken(lex))
- return (lex->tok.ttype = TOKEN_FATAL);
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ lex->tok.ttype = lex_finish_string(lex, '\'');
+ if (lex->flags.preprocessing && !lex_tokench(lex, ch))
+ return TOKEN_FATAL;
+ if (!lex_endtoken(lex))
+ return (lex->tok.ttype = TOKEN_FATAL);
/* It's a vector if we can successfully scan 3 floats */
#ifdef WIN32
- if (sscanf_s(lex->tok.value, " %f %f %f ",
- &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+ if (sscanf_s(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
#else
- if (sscanf(lex->tok.value, " %f %f %f ",
- &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+ if (sscanf(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
#endif
- {
- lex->tok.ttype = TOKEN_VECTORCONST;
- }
- return lex->tok.ttype;
+ {
+ lex->tok.ttype = TOKEN_VECTORCONST;
+ }
+
+ return lex->tok.ttype;
}
if (isdigit(ch))