lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 char* *lex_filenames;
  10
  11 void lexerror(lex_file *lex, const char *fmt, ...)
  12 {
  13         va_list ap;
  14
  15         va_start(ap, fmt);
  16     con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  17         va_end(ap);
  18 }
  19
  20 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  21 {
  22         va_list ap;
  23         int lvl = LVL_WARNING;
  24
  25     if (!OPTS_WARN(warntype))
  26         return false;
  27
  28     if (opts_werror)
  29             lvl = LVL_ERROR;
  30
  31         va_start(ap, fmt);
  32     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  33         va_end(ap);
  34
  35         return opts_werror;
  36 }
  37
  38
  39 #if 0
  40 token* token_new()
  41 {
  42     token *tok = (token*)mem_a(sizeof(token));
  43     if (!tok)
  44         return NULL;
  45     memset(tok, 0, sizeof(*tok));
  46     return tok;
  47 }
  48
  49 void token_delete(token *self)
  50 {
  51     if (self->next && self->next->prev == self)
  52         self->next->prev = self->prev;
  53     if (self->prev && self->prev->next == self)
  54         self->prev->next = self->next;
  55     MEM_VECTOR_CLEAR(self, value);
  56     mem_d(self);
  57 }
  58
  59 token* token_copy(const token *cp)
  60 {
  61     token* self = token_new();
  62     if (!self)
  63         return NULL;
  64     /* copy the value */
  65     self->value_alloc = cp->value_count + 1;
  66     self->value_count = cp->value_count;
  67     self->value = (char*)mem_a(self->value_alloc);
  68     if (!self->value) {
  69         mem_d(self);
  70         return NULL;
  71     }
  72     memcpy(self->value, cp->value, cp->value_count);
  73     self->value[self->value_alloc-1] = 0;
  74
  75     /* rest */
  76     self->ctx = cp->ctx;
  77     self->ttype = cp->ttype;
  78     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  79     return self;
  80 }
  81
  82 void token_delete_all(token *t)
  83 {
  84     token *n;
  85
  86     do {
  87         n = t->next;
  88         token_delete(t);
  89         t = n;
  90     } while(t);
  91 }
  92
  93 token* token_copy_all(const token *cp)
  94 {
  95     token *cur;
  96     token *out;
  97
  98     out = cur = token_copy(cp);
  99     if (!out)
 100         return NULL;
 101
 102     while (cp->next) {
 103         cp = cp->next;
 104         cur->next = token_copy(cp);
 105         if (!cur->next) {
 106             token_delete_all(out);
 107             return NULL;
 108         }
 109         cur->next->prev = cur;
 110         cur = cur->next;
 111     }
 112
 113     return out;
 114 }
 115 #else
 116 static void lex_token_new(lex_file *lex)
 117 {
 118 #if 0
 119     if (lex->tok)
 120         token_delete(lex->tok);
 121     lex->tok = token_new();
 122 #else
 123     if (lex->tok.value)
 124         vec_shrinkto(lex->tok.value, 0);
 125     lex->tok.constval.t  = 0;
 126     lex->tok.ctx.line = lex->sline;
 127     lex->tok.ctx.file = lex->name;
 128 #endif
 129 }
 130 #endif
 131
 132 lex_file* lex_open(const char *file)
 133 {
 134     lex_file *lex;
 135     FILE *in = util_fopen(file, "rb");
 136
 137     if (!in) {
 138         lexerror(NULL, "open failed: '%s'\n", file);
 139         return NULL;
 140     }
 141
 142     lex = (lex_file*)mem_a(sizeof(*lex));
 143     if (!lex) {
 144         fclose(in);
 145         lexerror(NULL, "out of memory\n");
 146         return NULL;
 147     }
 148
 149     memset(lex, 0, sizeof(*lex));
 150
 151     lex->file = in;
 152     lex->name = util_strdup(file);
 153     lex->line = 1; /* we start counting at 1 */
 154
 155     lex->peekpos = 0;
 156     lex->eof = false;
 157
 158     vec_push(lex_filenames, lex->name);
 159     return lex;
 160 }
 161
 162 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 163 {
 164     lex_file *lex;
 165
 166     lex = (lex_file*)mem_a(sizeof(*lex));
 167     if (!lex) {
 168         lexerror(NULL, "out of memory\n");
 169         return NULL;
 170     }
 171
 172     memset(lex, 0, sizeof(*lex));
 173
 174     lex->file = NULL;
 175     lex->open_string        = str;
 176     lex->open_string_length = len;
 177     lex->open_string_pos    = 0;
 178
 179     lex->name = util_strdup(name ? name : "<string-source>");
 180     lex->line = 1; /* we start counting at 1 */
 181
 182     lex->peekpos = 0;
 183     lex->eof = false;
 184
 185     vec_push(lex_filenames, lex->name);
 186
 187     return lex;
 188 }
 189
 190 void lex_cleanup(void)
 191 {
 192     size_t i;
 193     for (i = 0; i < vec_size(lex_filenames); ++i)
 194         mem_d(lex_filenames[i]);
 195     vec_free(lex_filenames);
 196 }
 197
 198 void lex_close(lex_file *lex)
 199 {
 200     size_t i;
 201     for (i = 0; i < vec_size(lex->frames); ++i)
 202         mem_d(lex->frames[i].name);
 203     vec_free(lex->frames);
 204
 205     if (lex->modelname)
 206         vec_free(lex->modelname);
 207
 208     if (lex->file)
 209         fclose(lex->file);
 210 #if 0
 211     if (lex->tok)
 212         token_delete(lex->tok);
 213 #else
 214     vec_free(lex->tok.value);
 215 #endif
 216     /* mem_d(lex->name); collected in lex_filenames */
 217     mem_d(lex);
 218 }
 219
 220 static int lex_fgetc(lex_file *lex)
 221 {
 222     if (lex->file)
 223         return fgetc(lex->file);
 224     if (lex->open_string) {
 225         if (lex->open_string_pos >= lex->open_string_length)
 226             return EOF;
 227         return lex->open_string[lex->open_string_pos++];
 228     }
 229     return EOF;
 230 }
 231
 232 /* Get or put-back data
 233  * The following to functions do NOT understand what kind of data they
 234  * are working on.
 235  * The are merely wrapping get/put in order to count line numbers.
 236  */
 237 static void lex_ungetch(lex_file *lex, int ch);
 238 static int lex_try_trigraph(lex_file *lex, int old)
 239 {
 240     int c2, c3;
 241     c2 = lex_fgetc(lex);
 242     if (c2 != '?') {
 243         lex_ungetch(lex, c2);
 244         return old;
 245     }
 246
 247     c3 = lex_fgetc(lex);
 248     switch (c3) {
 249         case '=': return '#';
 250         case '/': return '\\';
 251         case '\'': return '^';
 252         case '(': return '[';
 253         case ')': return ']';
 254         case '!': return '|';
 255         case '<': return '{';
 256         case '>': return '}';
 257         case '-': return '~';
 258         default:
 259             lex_ungetch(lex, c3);
 260             lex_ungetch(lex, c2);
 261             return old;
 262     }
 263 }
 264
 265 static int lex_try_digraph(lex_file *lex, int ch)
 266 {
 267     int c2;
 268     c2 = lex_fgetc(lex);
 269     if      (ch == '<' && c2 == ':')
 270         return '[';
 271     else if (ch == ':' && c2 == '>')
 272         return ']';
 273     else if (ch == '<' && c2 == '%')
 274         return '{';
 275     else if (ch == '%' && c2 == '>')
 276         return '}';
 277     else if (ch == '%' && c2 == ':')
 278         return '#';
 279     lex_ungetch(lex, c2);
 280     return ch;
 281 }
 282
 283 static int lex_getch(lex_file *lex)
 284 {
 285     int ch;
 286
 287     if (lex->peekpos) {
 288         lex->peekpos--;
 289         if (lex->peek[lex->peekpos] == '\n')
 290             lex->line++;
 291         return lex->peek[lex->peekpos];
 292     }
 293
 294     ch = lex_fgetc(lex);
 295     if (ch == '\n')
 296         lex->line++;
 297     else if (ch == '?')
 298         return lex_try_trigraph(lex, ch);
 299     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 300         return lex_try_digraph(lex, ch);
 301     return ch;
 302 }
 303
 304 static void lex_ungetch(lex_file *lex, int ch)
 305 {
 306     lex->peek[lex->peekpos++] = ch;
 307     if (ch == '\n')
 308         lex->line--;
 309 }
 310
 311 /* classify characters
 312  * some additions to the is*() functions of ctype.h
 313  */
 314
 315 /* Idents are alphanumberic, but they start with alpha or _ */
 316 static bool isident_start(int ch)
 317 {
 318     return isalpha(ch) || ch == '_';
 319 }
 320
 321 static bool isident(int ch)
 322 {
 323     return isident_start(ch) || isdigit(ch);
 324 }
 325
 326 /* isxdigit_only is used when we already know it's not a digit
 327  * and want to see if it's a hex digit anyway.
 328  */
 329 static bool isxdigit_only(int ch)
 330 {
 331     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 332 }
 333
 334 /* Append a character to the token buffer */
 335 static void lex_tokench(lex_file *lex, int ch)
 336 {
 337     vec_push(lex->tok.value, ch);
 338 }
 339
 340 /* Append a trailing null-byte */
 341 static void lex_endtoken(lex_file *lex)
 342 {
 343     vec_push(lex->tok.value, 0);
 344     vec_shrinkby(lex->tok.value, 1);
 345 }
 346
 347 /* Skip whitespace and comments and return the first
 348  * non-white character.
 349  * As this makes use of the above getch() ungetch() functions,
 350  * we don't need to care at all about line numbering anymore.
 351  *
 352  * In theory, this function should only be used at the beginning
 353  * of lexing, or when we *know* the next character is part of the token.
 354  * Otherwise, if the parser throws an error, the linenumber may not be
 355  * the line of the error, but the line of the next token AFTER the error.
 356  *
 357  * This is currently only problematic when using c-like string-continuation,
 358  * since comments and whitespaces are allowed between 2 such strings.
 359  * Example:
 360 printf(   "line one\n"
 361 // A comment
 362           "A continuation of the previous string"
 363 // This line is skipped
 364       , foo);
 365
 366  * In this case, if the parse decides it didn't actually want a string,
 367  * and uses lex->line to print an error, it will show the ', foo);' line's
 368  * linenumber.
 369  *
 370  * On the other hand, the parser is supposed to remember the line of the next
 371  * token's beginning. In this case we would want skipwhite() to be called
 372  * AFTER reading a token, so that the parser, before reading the NEXT token,
 373  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 374  *
 375  * THIS SOLUTION
 376  *    here is to store the line of the first character after skipping
 377  *    the initial whitespace in lex->sline, this happens in lex_do.
 378  */
 379 static int lex_skipwhite(lex_file *lex)
 380 {
 381     int ch = 0;
 382     bool haswhite = false;
 383
 384     do
 385     {
 386         ch = lex_getch(lex);
 387         while (ch != EOF && isspace(ch)) {
 388             if (lex->flags.preprocessing) {
 389                 if (ch == '\n') {
 390                     /* end-of-line */
 391                     /* see if there was whitespace first */
 392                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 393                         lex_ungetch(lex, ch);
 394                         lex_endtoken(lex);
 395                         return TOKEN_WHITE;
 396                     }
 397                     /* otherwise return EOL */
 398                     return TOKEN_EOL;
 399                 }
 400                 haswhite = true;
 401                 lex_tokench(lex, ch);
 402             }
 403             ch = lex_getch(lex);
 404         }
 405
 406         if (ch == '/') {
 407             ch = lex_getch(lex);
 408             if (ch == '/')
 409             {
 410                 /* one line comment */
 411                 ch = lex_getch(lex);
 412
 413                 if (lex->flags.preprocessing) {
 414                     haswhite = true;
 415                     lex_tokench(lex, '/');
 416                     lex_tokench(lex, '/');
 417                 }
 418
 419                 while (ch != EOF && ch != '\n') {
 420                     if (lex->flags.preprocessing)
 421                         lex_tokench(lex, ch);
 422                     ch = lex_getch(lex);
 423                 }
 424                 if (lex->flags.preprocessing) {
 425                     lex_ungetch(lex, '\n');
 426                     lex_endtoken(lex);
 427                     return TOKEN_WHITE;
 428                 }
 429                 continue;
 430             }
 431             if (ch == '*')
 432             {
 433                 /* multiline comment */
 434                 if (lex->flags.preprocessing) {
 435                     haswhite = true;
 436                     lex_tokench(lex, '/');
 437                     lex_tokench(lex, '*');
 438                 }
 439
 440                 while (ch != EOF)
 441                 {
 442                     ch = lex_getch(lex);
 443                     if (ch == '*') {
 444                         ch = lex_getch(lex);
 445                         if (ch == '/') {
 446                             if (lex->flags.preprocessing) {
 447                                 lex_tokench(lex, '*');
 448                                 lex_tokench(lex, '/');
 449                             }
 450                             break;
 451                         }
 452                     }
 453                     if (lex->flags.preprocessing) {
 454                         lex_tokench(lex, ch);
 455                     }
 456                 }
 457                 ch = ' '; /* cause TRUE in the isspace check */
 458                 continue;
 459             }
 460             /* Otherwise roll back to the slash and break out of the loop */
 461             lex_ungetch(lex, ch);
 462             ch = '/';
 463             break;
 464         }
 465     } while (ch != EOF && isspace(ch));
 466
 467     if (haswhite) {
 468         lex_endtoken(lex);
 469         lex_ungetch(lex, ch);
 470         return TOKEN_WHITE;
 471     }
 472     return ch;
 473 }
 474
 475 /* Get a token */
 476 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 477 {
 478     int ch;
 479
 480     ch = lex_getch(lex);
 481     while (ch != EOF && isident(ch))
 482     {
 483         lex_tokench(lex, ch);
 484         ch = lex_getch(lex);
 485     }
 486
 487     /* last ch was not an ident ch: */
 488     lex_ungetch(lex, ch);
 489
 490     return true;
 491 }
 492
 493 /* read one ident for the frame list */
 494 static int lex_parse_frame(lex_file *lex)
 495 {
 496     int ch;
 497
 498     lex_token_new(lex);
 499
 500     ch = lex_getch(lex);
 501     while (ch != EOF && ch != '\n' && isspace(ch))
 502         ch = lex_getch(lex);
 503
 504     if (ch == '\n')
 505         return 1;
 506
 507     if (!isident_start(ch)) {
 508         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 509         return -1;
 510     }
 511
 512     lex_tokench(lex, ch);
 513     if (!lex_finish_ident(lex))
 514         return -1;
 515     lex_endtoken(lex);
 516     return 0;
 517 }
 518
 519 /* read a list of $frames */
 520 static bool lex_finish_frames(lex_file *lex)
 521 {
 522     do {
 523         size_t i;
 524         int    rc;
 525         frame_macro m;
 526
 527         rc = lex_parse_frame(lex);
 528         if (rc > 0) /* end of line */
 529             return true;
 530         if (rc < 0) /* error */
 531             return false;
 532
 533         for (i = 0; i < vec_size(lex->frames); ++i) {
 534             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 535                 lex->frames[i].value = lex->framevalue++;
 536                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 537                     return false;
 538                 break;
 539             }
 540         }
 541         if (i < vec_size(lex->frames))
 542             continue;
 543
 544         m.value = lex->framevalue++;
 545         m.name = util_strdup(lex->tok.value);
 546         vec_shrinkto(lex->tok.value, 0);
 547         vec_push(lex->frames, m);
 548     } while (true);
 549 }
 550
 551 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 552 {
 553     int ch = 0;
 554
 555     while (ch != EOF)
 556     {
 557         ch = lex_getch(lex);
 558         if (ch == quote)
 559             return TOKEN_STRINGCONST;
 560
 561         if (!lex->flags.preprocessing && ch == '\\') {
 562             ch = lex_getch(lex);
 563             if (ch == EOF) {
 564                 lexerror(lex, "unexpected end of file");
 565                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 566                 return (lex->tok.ttype = TOKEN_ERROR);
 567             }
 568
 569             switch (ch) {
 570             case '\\': break;
 571             case 'a':  ch = '\a'; break;
 572             case 'b':  ch = '\b'; break;
 573             case 'r':  ch = '\r'; break;
 574             case 'n':  ch = '\n'; break;
 575             case 't':  ch = '\t'; break;
 576             case 'f':  ch = '\f'; break;
 577             case 'v':  ch = '\v'; break;
 578             default:
 579                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 580                 /* so we just add the character plus backslash no matter what it actually is */
 581                 lex_tokench(lex, '\\');
 582             }
 583             /* add the character finally */
 584             lex_tokench(lex, ch);
 585         }
 586         else
 587             lex_tokench(lex, ch);
 588     }
 589     lexerror(lex, "unexpected end of file within string constant");
 590     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 591     return (lex->tok.ttype = TOKEN_ERROR);
 592 }
 593
 594 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 595 {
 596     bool ishex = false;
 597
 598     int  ch = lastch;
 599
 600     /* parse a number... */
 601     lex->tok.ttype = TOKEN_INTCONST;
 602
 603     lex_tokench(lex, ch);
 604
 605     ch = lex_getch(lex);
 606     if (ch != '.' && !isdigit(ch))
 607     {
 608         if (lastch != '0' || ch != 'x')
 609         {
 610             /* end of the number or EOF */
 611             lex_ungetch(lex, ch);
 612             lex_endtoken(lex);
 613
 614             lex->tok.constval.i = lastch - '0';
 615             return lex->tok.ttype;
 616         }
 617
 618         ishex = true;
 619     }
 620
 621     /* EOF would have been caught above */
 622
 623     if (ch != '.')
 624     {
 625         lex_tokench(lex, ch);
 626         ch = lex_getch(lex);
 627         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 628         {
 629             lex_tokench(lex, ch);
 630             ch = lex_getch(lex);
 631         }
 632     }
 633     /* NOT else, '.' can come from above as well */
 634     if (ch == '.' && !ishex)
 635     {
 636         /* Allow floating comma in non-hex mode */
 637         lex->tok.ttype = TOKEN_FLOATCONST;
 638         lex_tokench(lex, ch);
 639
 640         /* continue digits-only */
 641         ch = lex_getch(lex);
 642         while (isdigit(ch))
 643         {
 644             lex_tokench(lex, ch);
 645             ch = lex_getch(lex);
 646         }
 647     }
 648     /* put back the last character */
 649     /* but do not put back the trailing 'f' or a float */
 650     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 651         ch = lex_getch(lex);
 652
 653     /* generally we don't want words to follow numbers: */
 654     if (isident(ch)) {
 655         lexerror(lex, "unexpected trailing characters after number");
 656         return (lex->tok.ttype = TOKEN_ERROR);
 657     }
 658     lex_ungetch(lex, ch);
 659
 660     lex_endtoken(lex);
 661     if (lex->tok.ttype == TOKEN_FLOATCONST)
 662         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 663     else
 664         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 665     return lex->tok.ttype;
 666 }
 667
 668 int lex_do(lex_file *lex)
 669 {
 670     int ch, nextch;
 671
 672     lex_token_new(lex);
 673 #if 0
 674     if (!lex->tok)
 675         return TOKEN_FATAL;
 676 #endif
 677
 678     ch = lex_skipwhite(lex);
 679     lex->sline = lex->line;
 680     lex->tok.ctx.line = lex->sline;
 681     lex->tok.ctx.file = lex->name;
 682
 683     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 684         return (lex->tok.ttype = ch);
 685     }
 686
 687     if (lex->eof)
 688         return (lex->tok.ttype = TOKEN_FATAL);
 689
 690     if (ch == EOF) {
 691         lex->eof = true;
 692         return (lex->tok.ttype = TOKEN_EOF);
 693     }
 694
 695     /* modelgen / spiritgen commands */
 696     if (ch == '$') {
 697         const char *v;
 698         size_t frame;
 699
 700         ch = lex_getch(lex);
 701         if (!isident_start(ch)) {
 702             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 703             return lex_do(lex);
 704         }
 705         lex_tokench(lex, ch);
 706         if (!lex_finish_ident(lex))
 707             return (lex->tok.ttype = TOKEN_ERROR);
 708         lex_endtoken(lex);
 709         /* skip the known commands */
 710         v = lex->tok.value;
 711
 712         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 713         {
 714             /* frame/framesave command works like an enum
 715              * similar to fteqcc we handle this in the lexer.
 716              * The reason for this is that it is sensitive to newlines,
 717              * which the parser is unaware of
 718              */
 719             if (!lex_finish_frames(lex))
 720                  return (lex->tok.ttype = TOKEN_ERROR);
 721             return lex_do(lex);
 722         }
 723
 724         if (!strcmp(v, "framevalue"))
 725         {
 726             ch = lex_getch(lex);
 727             while (ch != EOF && isspace(ch) && ch != '\n')
 728                 ch = lex_getch(lex);
 729
 730             if (!isdigit(ch)) {
 731                 lexerror(lex, "$framevalue requires an integer parameter");
 732                 return lex_do(lex);
 733             }
 734
 735             lex_token_new(lex);
 736             lex->tok.ttype = lex_finish_digit(lex, ch);
 737             lex_endtoken(lex);
 738             if (lex->tok.ttype != TOKEN_INTCONST) {
 739                 lexerror(lex, "$framevalue requires an integer parameter");
 740                 return lex_do(lex);
 741             }
 742             lex->framevalue = lex->tok.constval.i;
 743             return lex_do(lex);
 744         }
 745
 746         if (!strcmp(v, "framerestore"))
 747         {
 748             int rc;
 749
 750             lex_token_new(lex);
 751
 752             rc = lex_parse_frame(lex);
 753
 754             if (rc > 0) {
 755                 lexerror(lex, "$framerestore requires a framename parameter");
 756                 return lex_do(lex);
 757             }
 758             if (rc < 0)
 759                 return (lex->tok.ttype = TOKEN_FATAL);
 760
 761             v = lex->tok.value;
 762             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 763                 if (!strcmp(v, lex->frames[frame].name)) {
 764                     lex->framevalue = lex->frames[frame].value;
 765                     return lex_do(lex);
 766                 }
 767             }
 768             lexerror(lex, "unknown framename `%s`", v);
 769             return lex_do(lex);
 770         }
 771
 772         if (!strcmp(v, "modelname"))
 773         {
 774             int rc;
 775
 776             lex_token_new(lex);
 777
 778             rc = lex_parse_frame(lex);
 779
 780             if (rc > 0) {
 781                 lexerror(lex, "$modelname requires a parameter");
 782                 return lex_do(lex);
 783             }
 784             if (rc < 0)
 785                 return (lex->tok.ttype = TOKEN_FATAL);
 786
 787             v = lex->tok.value;
 788             if (lex->modelname) {
 789                 frame_macro m;
 790                 m.value = lex->framevalue;
 791                 m.name = lex->modelname;
 792                 lex->modelname = NULL;
 793                 vec_push(lex->frames, m);
 794             }
 795             lex->modelname = lex->tok.value;
 796             lex->tok.value = NULL;
 797             return lex_do(lex);
 798         }
 799
 800         if (!strcmp(v, "flush"))
 801         {
 802             size_t frame;
 803             for (frame = 0; frame < vec_size(lex->frames); ++frame)
 804                 mem_d(lex->frames[frame].name);
 805             vec_free(lex->frames);
 806             /* skip line (fteqcc does it too) */
 807             ch = lex_getch(lex);
 808             while (ch != EOF && ch != '\n')
 809                 ch = lex_getch(lex);
 810             return lex_do(lex);
 811         }
 812
 813         if (!strcmp(v, "cd") ||
 814             !strcmp(v, "origin") ||
 815             !strcmp(v, "base") ||
 816             !strcmp(v, "flags") ||
 817             !strcmp(v, "scale") ||
 818             !strcmp(v, "skin"))
 819         {
 820             /* skip line */
 821             ch = lex_getch(lex);
 822             while (ch != EOF && ch != '\n')
 823                 ch = lex_getch(lex);
 824             return lex_do(lex);
 825         }
 826
 827         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 828             if (!strcmp(v, lex->frames[frame].name)) {
 829                 lex->tok.constval.i = lex->frames[frame].value;
 830                 return (lex->tok.ttype = TOKEN_INTCONST);
 831             }
 832         }
 833
 834         lexerror(lex, "invalid frame macro");
 835         return lex_do(lex);
 836     }
 837
 838     /* single-character tokens */
 839     switch (ch)
 840     {
 841         case '[':
 842         case '(':
 843             lex_tokench(lex, ch);
 844             lex_endtoken(lex);
 845             if (lex->flags.noops)
 846                 return (lex->tok.ttype = ch);
 847             else
 848                 return (lex->tok.ttype = TOKEN_OPERATOR);
 849         case ')':
 850         case ';':
 851         case '{':
 852         case '}':
 853         case ']':
 854
 855         case '#':
 856             lex_tokench(lex, ch);
 857             lex_endtoken(lex);
 858             return (lex->tok.ttype = ch);
 859         default:
 860             break;
 861     }
 862
 863     if (lex->flags.noops)
 864     {
 865         /* Detect characters early which are normally
 866          * operators OR PART of an operator.
 867          */
 868         switch (ch)
 869         {
 870             case '+':
 871             case '-':
 872             case '*':
 873             case '/':
 874             case '<':
 875             case '>':
 876             case '=':
 877             case '&':
 878             case '|':
 879             case '^':
 880             case '~':
 881             case ',':
 882             case '!':
 883                 lex_tokench(lex, ch);
 884                 lex_endtoken(lex);
 885                 return (lex->tok.ttype = ch);
 886             default:
 887                 break;
 888         }
 889
 890         if (ch == '.')
 891         {
 892             lex_tokench(lex, ch);
 893             /* peak ahead once */
 894             nextch = lex_getch(lex);
 895             if (nextch != '.') {
 896                 lex_ungetch(lex, nextch);
 897                 lex_endtoken(lex);
 898                 return (lex->tok.ttype = ch);
 899             }
 900             /* peak ahead again */
 901             nextch = lex_getch(lex);
 902             if (nextch != '.') {
 903                 lex_ungetch(lex, nextch);
 904                 lex_ungetch(lex, nextch);
 905                 lex_endtoken(lex);
 906                 return (lex->tok.ttype = ch);
 907             }
 908             /* fill the token to be "..." */
 909             lex_tokench(lex, ch);
 910             lex_tokench(lex, ch);
 911             lex_endtoken(lex);
 912             return (lex->tok.ttype = TOKEN_DOTS);
 913         }
 914     }
 915
 916     if (ch == ',' || ch == '.') {
 917         lex_tokench(lex, ch);
 918         lex_endtoken(lex);
 919         return (lex->tok.ttype = TOKEN_OPERATOR);
 920     }
 921
 922     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 923         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 924         ch == '=' || ch == '!' || /* ==, != */
 925         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 926     {
 927         lex_tokench(lex, ch);
 928
 929         nextch = lex_getch(lex);
 930         if (nextch == ch || nextch == '=') {
 931             lex_tokench(lex, nextch);
 932         } else if (ch == '-' && nextch == '>') {
 933             lex_tokench(lex, nextch);
 934         } else
 935             lex_ungetch(lex, nextch);
 936
 937         lex_endtoken(lex);
 938         return (lex->tok.ttype = TOKEN_OPERATOR);
 939     }
 940
 941     /*
 942     if (ch == '^' || ch == '~' || ch == '!')
 943     {
 944         lex_tokench(lex, ch);
 945         lex_endtoken(lex);
 946         return (lex->tok.ttype = TOKEN_OPERATOR);
 947     }
 948     */
 949
 950     if (ch == '*' || ch == '/') /* *=, /= */
 951     {
 952         lex_tokench(lex, ch);
 953
 954         nextch = lex_getch(lex);
 955         if (nextch == '=') {
 956             lex_tokench(lex, nextch);
 957         } else
 958             lex_ungetch(lex, nextch);
 959
 960         lex_endtoken(lex);
 961         return (lex->tok.ttype = TOKEN_OPERATOR);
 962     }
 963
 964     if (isident_start(ch))
 965     {
 966         const char *v;
 967
 968         lex_tokench(lex, ch);
 969         if (!lex_finish_ident(lex)) {
 970             /* error? */
 971             return (lex->tok.ttype = TOKEN_ERROR);
 972         }
 973         lex_endtoken(lex);
 974         lex->tok.ttype = TOKEN_IDENT;
 975
 976         v = lex->tok.value;
 977         if (!strcmp(v, "void")) {
 978             lex->tok.ttype = TOKEN_TYPENAME;
 979             lex->tok.constval.t = TYPE_VOID;
 980         } else if (!strcmp(v, "int")) {
 981             lex->tok.ttype = TOKEN_TYPENAME;
 982             lex->tok.constval.t = TYPE_INTEGER;
 983         } else if (!strcmp(v, "float")) {
 984             lex->tok.ttype = TOKEN_TYPENAME;
 985             lex->tok.constval.t = TYPE_FLOAT;
 986         } else if (!strcmp(v, "string")) {
 987             lex->tok.ttype = TOKEN_TYPENAME;
 988             lex->tok.constval.t = TYPE_STRING;
 989         } else if (!strcmp(v, "entity")) {
 990             lex->tok.ttype = TOKEN_TYPENAME;
 991             lex->tok.constval.t = TYPE_ENTITY;
 992         } else if (!strcmp(v, "vector")) {
 993             lex->tok.ttype = TOKEN_TYPENAME;
 994             lex->tok.constval.t = TYPE_VECTOR;
 995         } else if (!strcmp(v, "for")  ||
 996                  !strcmp(v, "while")  ||
 997                  !strcmp(v, "do")     ||
 998                  !strcmp(v, "if")     ||
 999                  !strcmp(v, "else")   ||
1000                  !strcmp(v, "local")  ||
1001                  !strcmp(v, "return") ||
1002                  !strcmp(v, "not")    ||
1003                  !strcmp(v, "const"))
1004         {
1005             lex->tok.ttype = TOKEN_KEYWORD;
1006         }
1007         else if (opts_standard != COMPILER_QCC)
1008         {
1009             /* other standards reserve these keywords */
1010             if (!strcmp(v, "switch") ||
1011                 !strcmp(v, "struct") ||
1012                 !strcmp(v, "union")  ||
1013                 !strcmp(v, "break")  ||
1014                 !strcmp(v, "continue") ||
1015                 !strcmp(v, "var"))
1016             {
1017                 lex->tok.ttype = TOKEN_KEYWORD;
1018             }
1019         }
1020
1021         return lex->tok.ttype;
1022     }
1023
1024     if (ch == '"')
1025     {
1026         lex->flags.nodigraphs = true;
1027         if (lex->flags.preprocessing)
1028             lex_tokench(lex, ch);
1029         lex->tok.ttype = lex_finish_string(lex, '"');
1030         if (lex->flags.preprocessing)
1031             lex_tokench(lex, ch);
1032         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1033         {
1034             /* Allow c style "string" "continuation" */
1035             ch = lex_skipwhite(lex);
1036             if (ch != '"') {
1037                 lex_ungetch(lex, ch);
1038                 break;
1039             }
1040
1041             lex->tok.ttype = lex_finish_string(lex, '"');
1042         }
1043         lex->flags.nodigraphs = false;
1044         lex_endtoken(lex);
1045         return lex->tok.ttype;
1046     }
1047
1048     if (ch == '\'')
1049     {
1050         /* we parse character constants like string,
1051          * but return TOKEN_CHARCONST, or a vector type if it fits...
1052          * Likewise actual unescaping has to be done by the parser.
1053          * The difference is we don't allow 'char' 'continuation'.
1054          */
1055         if (lex->flags.preprocessing)
1056             lex_tokench(lex, ch);
1057         lex->tok.ttype = lex_finish_string(lex, '\'');
1058         if (lex->flags.preprocessing)
1059             lex_tokench(lex, ch);
1060         lex_endtoken(lex);
1061
1062          /* It's a vector if we can successfully scan 3 floats */
1063 #ifdef WIN32
1064         if (sscanf_s(lex->tok.value, " %f %f %f ",
1065                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1066 #else
1067         if (sscanf(lex->tok.value, " %f %f %f ",
1068                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1069 #endif
1070
1071         {
1072              lex->tok.ttype = TOKEN_VECTORCONST;
1073         }
1074
1075         return lex->tok.ttype;
1076     }
1077
1078     if (isdigit(ch))
1079     {
1080         lex->tok.ttype = lex_finish_digit(lex, ch);
1081         lex_endtoken(lex);
1082         return lex->tok.ttype;
1083     }
1084
1085     lexerror(lex, "unknown token");
1086     return (lex->tok.ttype = TOKEN_ERROR);
1087 }