lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 char* *lex_filenames;
  10
  11 void lexerror(lex_file *lex, const char *fmt, ...)
  12 {
  13         va_list ap;
  14
  15         va_start(ap, fmt);
  16     con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  17         va_end(ap);
  18 }
  19
  20 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  21 {
  22         va_list ap;
  23         int lvl = LVL_WARNING;
  24
  25     if (!OPTS_WARN(warntype))
  26         return false;
  27
  28     if (opts_werror)
  29             lvl = LVL_ERROR;
  30
  31         va_start(ap, fmt);
  32     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  33         va_end(ap);
  34
  35         return opts_werror;
  36 }
  37
  38
  39 #if 0
  40 token* token_new()
  41 {
  42     token *tok = (token*)mem_a(sizeof(token));
  43     if (!tok)
  44         return NULL;
  45     memset(tok, 0, sizeof(*tok));
  46     return tok;
  47 }
  48
  49 void token_delete(token *self)
  50 {
  51     if (self->next && self->next->prev == self)
  52         self->next->prev = self->prev;
  53     if (self->prev && self->prev->next == self)
  54         self->prev->next = self->next;
  55     MEM_VECTOR_CLEAR(self, value);
  56     mem_d(self);
  57 }
  58
  59 token* token_copy(const token *cp)
  60 {
  61     token* self = token_new();
  62     if (!self)
  63         return NULL;
  64     /* copy the value */
  65     self->value_alloc = cp->value_count + 1;
  66     self->value_count = cp->value_count;
  67     self->value = (char*)mem_a(self->value_alloc);
  68     if (!self->value) {
  69         mem_d(self);
  70         return NULL;
  71     }
  72     memcpy(self->value, cp->value, cp->value_count);
  73     self->value[self->value_alloc-1] = 0;
  74
  75     /* rest */
  76     self->ctx = cp->ctx;
  77     self->ttype = cp->ttype;
  78     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  79     return self;
  80 }
  81
  82 void token_delete_all(token *t)
  83 {
  84     token *n;
  85
  86     do {
  87         n = t->next;
  88         token_delete(t);
  89         t = n;
  90     } while(t);
  91 }
  92
  93 token* token_copy_all(const token *cp)
  94 {
  95     token *cur;
  96     token *out;
  97
  98     out = cur = token_copy(cp);
  99     if (!out)
 100         return NULL;
 101
 102     while (cp->next) {
 103         cp = cp->next;
 104         cur->next = token_copy(cp);
 105         if (!cur->next) {
 106             token_delete_all(out);
 107             return NULL;
 108         }
 109         cur->next->prev = cur;
 110         cur = cur->next;
 111     }
 112
 113     return out;
 114 }
 115 #else
 116 static void lex_token_new(lex_file *lex)
 117 {
 118 #if 0
 119     if (lex->tok)
 120         token_delete(lex->tok);
 121     lex->tok = token_new();
 122 #else
 123     if (lex->tok.value)
 124         vec_shrinkto(lex->tok.value, 0);
 125     lex->tok.constval.t  = 0;
 126     lex->tok.ctx.line = lex->sline;
 127     lex->tok.ctx.file = lex->name;
 128 #endif
 129 }
 130 #endif
 131
 132 lex_file* lex_open(const char *file)
 133 {
 134     lex_file *lex;
 135     FILE *in = util_fopen(file, "rb");
 136
 137     if (!in) {
 138         lexerror(NULL, "open failed: '%s'\n", file);
 139         return NULL;
 140     }
 141
 142     lex = (lex_file*)mem_a(sizeof(*lex));
 143     if (!lex) {
 144         fclose(in);
 145         lexerror(NULL, "out of memory\n");
 146         return NULL;
 147     }
 148
 149     memset(lex, 0, sizeof(*lex));
 150
 151     lex->file = in;
 152     lex->name = util_strdup(file);
 153     lex->line = 1; /* we start counting at 1 */
 154
 155     lex->peekpos = 0;
 156     lex->eof = false;
 157
 158     vec_push(lex_filenames, lex->name);
 159     return lex;
 160 }
 161
 162 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 163 {
 164     lex_file *lex;
 165
 166     lex = (lex_file*)mem_a(sizeof(*lex));
 167     if (!lex) {
 168         lexerror(NULL, "out of memory\n");
 169         return NULL;
 170     }
 171
 172     memset(lex, 0, sizeof(*lex));
 173
 174     lex->file = NULL;
 175     lex->open_string        = str;
 176     lex->open_string_length = len;
 177     lex->open_string_pos    = 0;
 178
 179     lex->name = util_strdup(name ? name : "<string-source>");
 180     lex->line = 1; /* we start counting at 1 */
 181
 182     lex->peekpos = 0;
 183     lex->eof = false;
 184
 185     vec_push(lex_filenames, lex->name);
 186
 187     return lex;
 188 }
 189
 190 void lex_cleanup(void)
 191 {
 192     size_t i;
 193     for (i = 0; i < vec_size(lex_filenames); ++i)
 194         mem_d(lex_filenames[i]);
 195     vec_free(lex_filenames);
 196 }
 197
 198 void lex_close(lex_file *lex)
 199 {
 200     size_t i;
 201     for (i = 0; i < vec_size(lex->frames); ++i)
 202         mem_d(lex->frames[i].name);
 203     vec_free(lex->frames);
 204
 205     if (lex->modelname)
 206         vec_free(lex->modelname);
 207
 208     if (lex->file)
 209         fclose(lex->file);
 210 #if 0
 211     if (lex->tok)
 212         token_delete(lex->tok);
 213 #else
 214     vec_free(lex->tok.value);
 215 #endif
 216     /* mem_d(lex->name); collected in lex_filenames */
 217     mem_d(lex);
 218 }
 219
 220 static int lex_fgetc(lex_file *lex)
 221 {
 222     if (lex->file)
 223         return fgetc(lex->file);
 224     if (lex->open_string) {
 225         if (lex->open_string_pos >= lex->open_string_length)
 226             return EOF;
 227         return lex->open_string[lex->open_string_pos++];
 228     }
 229     return EOF;
 230 }
 231
 232 /* Get or put-back data
 233  * The following to functions do NOT understand what kind of data they
 234  * are working on.
 235  * The are merely wrapping get/put in order to count line numbers.
 236  */
 237 static void lex_ungetch(lex_file *lex, int ch);
 238 static int lex_try_trigraph(lex_file *lex, int old)
 239 {
 240     int c2, c3;
 241     c2 = lex_fgetc(lex);
 242     if (c2 != '?') {
 243         lex_ungetch(lex, c2);
 244         return old;
 245     }
 246
 247     c3 = lex_fgetc(lex);
 248     switch (c3) {
 249         case '=': return '#';
 250         case '/': return '\\';
 251         case '\'': return '^';
 252         case '(': return '[';
 253         case ')': return ']';
 254         case '!': return '|';
 255         case '<': return '{';
 256         case '>': return '}';
 257         case '-': return '~';
 258         default:
 259             lex_ungetch(lex, c3);
 260             lex_ungetch(lex, c2);
 261             return old;
 262     }
 263 }
 264
 265 static int lex_try_digraph(lex_file *lex, int ch)
 266 {
 267     int c2;
 268     c2 = lex_fgetc(lex);
 269     if      (ch == '<' && c2 == ':')
 270         return '[';
 271     else if (ch == ':' && c2 == '>')
 272         return ']';
 273     else if (ch == '<' && c2 == '%')
 274         return '{';
 275     else if (ch == '%' && c2 == '>')
 276         return '}';
 277     else if (ch == '%' && c2 == ':')
 278         return '#';
 279     lex_ungetch(lex, c2);
 280     return ch;
 281 }
 282
 283 static int lex_getch(lex_file *lex)
 284 {
 285     int ch;
 286
 287     if (lex->peekpos) {
 288         lex->peekpos--;
 289         if (lex->peek[lex->peekpos] == '\n')
 290             lex->line++;
 291         return lex->peek[lex->peekpos];
 292     }
 293
 294     ch = lex_fgetc(lex);
 295     if (ch == '\n')
 296         lex->line++;
 297     else if (ch == '?')
 298         return lex_try_trigraph(lex, ch);
 299     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 300         return lex_try_digraph(lex, ch);
 301     return ch;
 302 }
 303
 304 static void lex_ungetch(lex_file *lex, int ch)
 305 {
 306     lex->peek[lex->peekpos++] = ch;
 307     if (ch == '\n')
 308         lex->line--;
 309 }
 310
 311 /* classify characters
 312  * some additions to the is*() functions of ctype.h
 313  */
 314
 315 /* Idents are alphanumberic, but they start with alpha or _ */
 316 static bool isident_start(int ch)
 317 {
 318     return isalpha(ch) || ch == '_';
 319 }
 320
 321 static bool isident(int ch)
 322 {
 323     return isident_start(ch) || isdigit(ch);
 324 }
 325
 326 /* isxdigit_only is used when we already know it's not a digit
 327  * and want to see if it's a hex digit anyway.
 328  */
 329 static bool isxdigit_only(int ch)
 330 {
 331     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 332 }
 333
 334 /* Append a character to the token buffer */
 335 static void lex_tokench(lex_file *lex, int ch)
 336 {
 337     vec_push(lex->tok.value, ch);
 338 }
 339
 340 /* Append a trailing null-byte */
 341 static void lex_endtoken(lex_file *lex)
 342 {
 343     vec_push(lex->tok.value, 0);
 344     vec_shrinkby(lex->tok.value, 1);
 345 }
 346
 347 /* Skip whitespace and comments and return the first
 348  * non-white character.
 349  * As this makes use of the above getch() ungetch() functions,
 350  * we don't need to care at all about line numbering anymore.
 351  *
 352  * In theory, this function should only be used at the beginning
 353  * of lexing, or when we *know* the next character is part of the token.
 354  * Otherwise, if the parser throws an error, the linenumber may not be
 355  * the line of the error, but the line of the next token AFTER the error.
 356  *
 357  * This is currently only problematic when using c-like string-continuation,
 358  * since comments and whitespaces are allowed between 2 such strings.
 359  * Example:
 360 printf(   "line one\n"
 361 // A comment
 362           "A continuation of the previous string"
 363 // This line is skipped
 364       , foo);
 365
 366  * In this case, if the parse decides it didn't actually want a string,
 367  * and uses lex->line to print an error, it will show the ', foo);' line's
 368  * linenumber.
 369  *
 370  * On the other hand, the parser is supposed to remember the line of the next
 371  * token's beginning. In this case we would want skipwhite() to be called
 372  * AFTER reading a token, so that the parser, before reading the NEXT token,
 373  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 374  *
 375  * THIS SOLUTION
 376  *    here is to store the line of the first character after skipping
 377  *    the initial whitespace in lex->sline, this happens in lex_do.
 378  */
 379 static int lex_skipwhite(lex_file *lex)
 380 {
 381     int ch = 0;
 382     bool haswhite = false;
 383
 384     do
 385     {
 386         ch = lex_getch(lex);
 387         while (ch != EOF && isspace(ch)) {
 388             if (lex->flags.preprocessing) {
 389                 if (ch == '\n') {
 390                     /* end-of-line */
 391                     /* see if there was whitespace first */
 392                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 393                         lex_ungetch(lex, ch);
 394                         lex_endtoken(lex);
 395                         return TOKEN_WHITE;
 396                     }
 397                     /* otherwise return EOL */
 398                     return TOKEN_EOL;
 399                 }
 400                 haswhite = true;
 401                 lex_tokench(lex, ch);
 402             }
 403             ch = lex_getch(lex);
 404         }
 405
 406         if (ch == '/') {
 407             ch = lex_getch(lex);
 408             if (ch == '/')
 409             {
 410                 /* one line comment */
 411                 ch = lex_getch(lex);
 412
 413                 if (lex->flags.preprocessing) {
 414                     haswhite = true;
 415                     /*
 416                     lex_tokench(lex, '/');
 417                     lex_tokench(lex, '/');
 418                     */
 419                     lex_tokench(lex, ' ');
 420                     lex_tokench(lex, ' ');
 421                 }
 422
 423                 while (ch != EOF && ch != '\n') {
 424                     if (lex->flags.preprocessing)
 425                         lex_tokench(lex, ' '); /* ch); */
 426                     ch = lex_getch(lex);
 427                 }
 428                 if (lex->flags.preprocessing) {
 429                     lex_ungetch(lex, '\n');
 430                     lex_endtoken(lex);
 431                     return TOKEN_WHITE;
 432                 }
 433                 continue;
 434             }
 435             if (ch == '*')
 436             {
 437                 /* multiline comment */
 438                 if (lex->flags.preprocessing) {
 439                     haswhite = true;
 440                     /*
 441                     lex_tokench(lex, '/');
 442                     lex_tokench(lex, '*');
 443                     */
 444                     lex_tokench(lex, ' ');
 445                     lex_tokench(lex, ' ');
 446                 }
 447
 448                 while (ch != EOF)
 449                 {
 450                     ch = lex_getch(lex);
 451                     if (ch == '*') {
 452                         ch = lex_getch(lex);
 453                         if (ch == '/') {
 454                             if (lex->flags.preprocessing) {
 455                                 /*
 456                                 lex_tokench(lex, '*');
 457                                 lex_tokench(lex, '/');
 458                                 */
 459                                 lex_tokench(lex, ' ');
 460                                 lex_tokench(lex, ' ');
 461                             }
 462                             break;
 463                         }
 464                     }
 465                     if (lex->flags.preprocessing) {
 466                         lex_tokench(lex, ' '); /* ch); */
 467                     }
 468                 }
 469                 ch = ' '; /* cause TRUE in the isspace check */
 470                 continue;
 471             }
 472             /* Otherwise roll back to the slash and break out of the loop */
 473             lex_ungetch(lex, ch);
 474             ch = '/';
 475             break;
 476         }
 477     } while (ch != EOF && isspace(ch));
 478
 479     if (haswhite) {
 480         lex_endtoken(lex);
 481         lex_ungetch(lex, ch);
 482         return TOKEN_WHITE;
 483     }
 484     return ch;
 485 }
 486
 487 /* Get a token */
 488 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 489 {
 490     int ch;
 491
 492     ch = lex_getch(lex);
 493     while (ch != EOF && isident(ch))
 494     {
 495         lex_tokench(lex, ch);
 496         ch = lex_getch(lex);
 497     }
 498
 499     /* last ch was not an ident ch: */
 500     lex_ungetch(lex, ch);
 501
 502     return true;
 503 }
 504
 505 /* read one ident for the frame list */
 506 static int lex_parse_frame(lex_file *lex)
 507 {
 508     int ch;
 509
 510     lex_token_new(lex);
 511
 512     ch = lex_getch(lex);
 513     while (ch != EOF && ch != '\n' && isspace(ch))
 514         ch = lex_getch(lex);
 515
 516     if (ch == '\n')
 517         return 1;
 518
 519     if (!isident_start(ch)) {
 520         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 521         return -1;
 522     }
 523
 524     lex_tokench(lex, ch);
 525     if (!lex_finish_ident(lex))
 526         return -1;
 527     lex_endtoken(lex);
 528     return 0;
 529 }
 530
 531 /* read a list of $frames */
 532 static bool lex_finish_frames(lex_file *lex)
 533 {
 534     do {
 535         size_t i;
 536         int    rc;
 537         frame_macro m;
 538
 539         rc = lex_parse_frame(lex);
 540         if (rc > 0) /* end of line */
 541             return true;
 542         if (rc < 0) /* error */
 543             return false;
 544
 545         for (i = 0; i < vec_size(lex->frames); ++i) {
 546             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 547                 lex->frames[i].value = lex->framevalue++;
 548                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 549                     return false;
 550                 break;
 551             }
 552         }
 553         if (i < vec_size(lex->frames))
 554             continue;
 555
 556         m.value = lex->framevalue++;
 557         m.name = util_strdup(lex->tok.value);
 558         vec_shrinkto(lex->tok.value, 0);
 559         vec_push(lex->frames, m);
 560     } while (true);
 561 }
 562
 563 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 564 {
 565     int ch = 0;
 566
 567     while (ch != EOF)
 568     {
 569         ch = lex_getch(lex);
 570         if (ch == quote)
 571             return TOKEN_STRINGCONST;
 572
 573         if (!lex->flags.preprocessing && ch == '\\') {
 574             ch = lex_getch(lex);
 575             if (ch == EOF) {
 576                 lexerror(lex, "unexpected end of file");
 577                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 578                 return (lex->tok.ttype = TOKEN_ERROR);
 579             }
 580
 581             switch (ch) {
 582             case '\\': break;
 583             case 'a':  ch = '\a'; break;
 584             case 'b':  ch = '\b'; break;
 585             case 'r':  ch = '\r'; break;
 586             case 'n':  ch = '\n'; break;
 587             case 't':  ch = '\t'; break;
 588             case 'f':  ch = '\f'; break;
 589             case 'v':  ch = '\v'; break;
 590             default:
 591                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 592                 /* so we just add the character plus backslash no matter what it actually is */
 593                 lex_tokench(lex, '\\');
 594             }
 595             /* add the character finally */
 596             lex_tokench(lex, ch);
 597         }
 598         else
 599             lex_tokench(lex, ch);
 600     }
 601     lexerror(lex, "unexpected end of file within string constant");
 602     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 603     return (lex->tok.ttype = TOKEN_ERROR);
 604 }
 605
 606 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 607 {
 608     bool ishex = false;
 609
 610     int  ch = lastch;
 611
 612     /* parse a number... */
 613     lex->tok.ttype = TOKEN_INTCONST;
 614
 615     lex_tokench(lex, ch);
 616
 617     ch = lex_getch(lex);
 618     if (ch != '.' && !isdigit(ch))
 619     {
 620         if (lastch != '0' || ch != 'x')
 621         {
 622             /* end of the number or EOF */
 623             lex_ungetch(lex, ch);
 624             lex_endtoken(lex);
 625
 626             lex->tok.constval.i = lastch - '0';
 627             return lex->tok.ttype;
 628         }
 629
 630         ishex = true;
 631     }
 632
 633     /* EOF would have been caught above */
 634
 635     if (ch != '.')
 636     {
 637         lex_tokench(lex, ch);
 638         ch = lex_getch(lex);
 639         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 640         {
 641             lex_tokench(lex, ch);
 642             ch = lex_getch(lex);
 643         }
 644     }
 645     /* NOT else, '.' can come from above as well */
 646     if (ch == '.' && !ishex)
 647     {
 648         /* Allow floating comma in non-hex mode */
 649         lex->tok.ttype = TOKEN_FLOATCONST;
 650         lex_tokench(lex, ch);
 651
 652         /* continue digits-only */
 653         ch = lex_getch(lex);
 654         while (isdigit(ch))
 655         {
 656             lex_tokench(lex, ch);
 657             ch = lex_getch(lex);
 658         }
 659     }
 660     /* put back the last character */
 661     /* but do not put back the trailing 'f' or a float */
 662     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 663         ch = lex_getch(lex);
 664
 665     /* generally we don't want words to follow numbers: */
 666     if (isident(ch)) {
 667         lexerror(lex, "unexpected trailing characters after number");
 668         return (lex->tok.ttype = TOKEN_ERROR);
 669     }
 670     lex_ungetch(lex, ch);
 671
 672     lex_endtoken(lex);
 673     if (lex->tok.ttype == TOKEN_FLOATCONST)
 674         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 675     else
 676         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 677     return lex->tok.ttype;
 678 }
 679
 680 int lex_do(lex_file *lex)
 681 {
 682     int ch, nextch;
 683
 684     lex_token_new(lex);
 685 #if 0
 686     if (!lex->tok)
 687         return TOKEN_FATAL;
 688 #endif
 689
 690     while (true) {
 691         ch = lex_skipwhite(lex);
 692         if (!lex->flags.mergelines || ch != '\\')
 693             break;
 694         ch = lex_getch(lex);
 695         if (ch != '\n') {
 696             lex_ungetch(lex, ch);
 697             ch = '\\';
 698             break;
 699         }
 700         /* we reached a linemerge */
 701         lex_tokench(lex, '\n');
 702         continue;
 703     }
 704
 705     lex->sline = lex->line;
 706     lex->tok.ctx.line = lex->sline;
 707     lex->tok.ctx.file = lex->name;
 708
 709     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 710         return (lex->tok.ttype = ch);
 711     }
 712
 713     if (lex->eof)
 714         return (lex->tok.ttype = TOKEN_FATAL);
 715
 716     if (ch == EOF) {
 717         lex->eof = true;
 718         return (lex->tok.ttype = TOKEN_EOF);
 719     }
 720
 721     /* modelgen / spiritgen commands */
 722     if (ch == '$') {
 723         const char *v;
 724         size_t frame;
 725
 726         ch = lex_getch(lex);
 727         if (!isident_start(ch)) {
 728             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 729             return lex_do(lex);
 730         }
 731         lex_tokench(lex, ch);
 732         if (!lex_finish_ident(lex))
 733             return (lex->tok.ttype = TOKEN_ERROR);
 734         lex_endtoken(lex);
 735         /* skip the known commands */
 736         v = lex->tok.value;
 737
 738         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 739         {
 740             /* frame/framesave command works like an enum
 741              * similar to fteqcc we handle this in the lexer.
 742              * The reason for this is that it is sensitive to newlines,
 743              * which the parser is unaware of
 744              */
 745             if (!lex_finish_frames(lex))
 746                  return (lex->tok.ttype = TOKEN_ERROR);
 747             return lex_do(lex);
 748         }
 749
 750         if (!strcmp(v, "framevalue"))
 751         {
 752             ch = lex_getch(lex);
 753             while (ch != EOF && isspace(ch) && ch != '\n')
 754                 ch = lex_getch(lex);
 755
 756             if (!isdigit(ch)) {
 757                 lexerror(lex, "$framevalue requires an integer parameter");
 758                 return lex_do(lex);
 759             }
 760
 761             lex_token_new(lex);
 762             lex->tok.ttype = lex_finish_digit(lex, ch);
 763             lex_endtoken(lex);
 764             if (lex->tok.ttype != TOKEN_INTCONST) {
 765                 lexerror(lex, "$framevalue requires an integer parameter");
 766                 return lex_do(lex);
 767             }
 768             lex->framevalue = lex->tok.constval.i;
 769             return lex_do(lex);
 770         }
 771
 772         if (!strcmp(v, "framerestore"))
 773         {
 774             int rc;
 775
 776             lex_token_new(lex);
 777
 778             rc = lex_parse_frame(lex);
 779
 780             if (rc > 0) {
 781                 lexerror(lex, "$framerestore requires a framename parameter");
 782                 return lex_do(lex);
 783             }
 784             if (rc < 0)
 785                 return (lex->tok.ttype = TOKEN_FATAL);
 786
 787             v = lex->tok.value;
 788             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 789                 if (!strcmp(v, lex->frames[frame].name)) {
 790                     lex->framevalue = lex->frames[frame].value;
 791                     return lex_do(lex);
 792                 }
 793             }
 794             lexerror(lex, "unknown framename `%s`", v);
 795             return lex_do(lex);
 796         }
 797
 798         if (!strcmp(v, "modelname"))
 799         {
 800             int rc;
 801
 802             lex_token_new(lex);
 803
 804             rc = lex_parse_frame(lex);
 805
 806             if (rc > 0) {
 807                 lexerror(lex, "$modelname requires a parameter");
 808                 return lex_do(lex);
 809             }
 810             if (rc < 0)
 811                 return (lex->tok.ttype = TOKEN_FATAL);
 812
 813             v = lex->tok.value;
 814             if (lex->modelname) {
 815                 frame_macro m;
 816                 m.value = lex->framevalue;
 817                 m.name = lex->modelname;
 818                 lex->modelname = NULL;
 819                 vec_push(lex->frames, m);
 820             }
 821             lex->modelname = lex->tok.value;
 822             lex->tok.value = NULL;
 823             return lex_do(lex);
 824         }
 825
 826         if (!strcmp(v, "flush"))
 827         {
 828             size_t frame;
 829             for (frame = 0; frame < vec_size(lex->frames); ++frame)
 830                 mem_d(lex->frames[frame].name);
 831             vec_free(lex->frames);
 832             /* skip line (fteqcc does it too) */
 833             ch = lex_getch(lex);
 834             while (ch != EOF && ch != '\n')
 835                 ch = lex_getch(lex);
 836             return lex_do(lex);
 837         }
 838
 839         if (!strcmp(v, "cd") ||
 840             !strcmp(v, "origin") ||
 841             !strcmp(v, "base") ||
 842             !strcmp(v, "flags") ||
 843             !strcmp(v, "scale") ||
 844             !strcmp(v, "skin"))
 845         {
 846             /* skip line */
 847             ch = lex_getch(lex);
 848             while (ch != EOF && ch != '\n')
 849                 ch = lex_getch(lex);
 850             return lex_do(lex);
 851         }
 852
 853         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 854             if (!strcmp(v, lex->frames[frame].name)) {
 855                 lex->tok.constval.i = lex->frames[frame].value;
 856                 return (lex->tok.ttype = TOKEN_INTCONST);
 857             }
 858         }
 859
 860         lexerror(lex, "invalid frame macro");
 861         return lex_do(lex);
 862     }
 863
 864     /* single-character tokens */
 865     switch (ch)
 866     {
 867         case '[':
 868         case '(':
 869             lex_tokench(lex, ch);
 870             lex_endtoken(lex);
 871             if (lex->flags.noops)
 872                 return (lex->tok.ttype = ch);
 873             else
 874                 return (lex->tok.ttype = TOKEN_OPERATOR);
 875         case ')':
 876         case ';':
 877         case ':':
 878         case '{':
 879         case '}':
 880         case ']':
 881
 882         case '#':
 883             lex_tokench(lex, ch);
 884             lex_endtoken(lex);
 885             return (lex->tok.ttype = ch);
 886         default:
 887             break;
 888     }
 889
 890     if (lex->flags.noops)
 891     {
 892         /* Detect characters early which are normally
 893          * operators OR PART of an operator.
 894          */
 895         switch (ch)
 896         {
 897             case '+':
 898             case '-':
 899             case '*':
 900             case '/':
 901             case '<':
 902             case '>':
 903             case '=':
 904             case '&':
 905             case '|':
 906             case '^':
 907             case '~':
 908             case ',':
 909             case '!':
 910                 lex_tokench(lex, ch);
 911                 lex_endtoken(lex);
 912                 return (lex->tok.ttype = ch);
 913             default:
 914                 break;
 915         }
 916
 917         if (ch == '.')
 918         {
 919             lex_tokench(lex, ch);
 920             /* peak ahead once */
 921             nextch = lex_getch(lex);
 922             if (nextch != '.') {
 923                 lex_ungetch(lex, nextch);
 924                 lex_endtoken(lex);
 925                 return (lex->tok.ttype = ch);
 926             }
 927             /* peak ahead again */
 928             nextch = lex_getch(lex);
 929             if (nextch != '.') {
 930                 lex_ungetch(lex, nextch);
 931                 lex_ungetch(lex, nextch);
 932                 lex_endtoken(lex);
 933                 return (lex->tok.ttype = ch);
 934             }
 935             /* fill the token to be "..." */
 936             lex_tokench(lex, ch);
 937             lex_tokench(lex, ch);
 938             lex_endtoken(lex);
 939             return (lex->tok.ttype = TOKEN_DOTS);
 940         }
 941     }
 942
 943     if (ch == ',' || ch == '.') {
 944         lex_tokench(lex, ch);
 945         lex_endtoken(lex);
 946         return (lex->tok.ttype = TOKEN_OPERATOR);
 947     }
 948
 949     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 950         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 951         ch == '=' || ch == '!' || /* ==, != */
 952         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 953     {
 954         lex_tokench(lex, ch);
 955
 956         nextch = lex_getch(lex);
 957         if (nextch == ch || nextch == '=') {
 958             lex_tokench(lex, nextch);
 959         } else if (ch == '-' && nextch == '>') {
 960             lex_tokench(lex, nextch);
 961         } else
 962             lex_ungetch(lex, nextch);
 963
 964         lex_endtoken(lex);
 965         return (lex->tok.ttype = TOKEN_OPERATOR);
 966     }
 967
 968     /*
 969     if (ch == '^' || ch == '~' || ch == '!')
 970     {
 971         lex_tokench(lex, ch);
 972         lex_endtoken(lex);
 973         return (lex->tok.ttype = TOKEN_OPERATOR);
 974     }
 975     */
 976
 977     if (ch == '*' || ch == '/') /* *=, /= */
 978     {
 979         lex_tokench(lex, ch);
 980
 981         nextch = lex_getch(lex);
 982         if (nextch == '=') {
 983             lex_tokench(lex, nextch);
 984         } else
 985             lex_ungetch(lex, nextch);
 986
 987         lex_endtoken(lex);
 988         return (lex->tok.ttype = TOKEN_OPERATOR);
 989     }
 990
 991     if (isident_start(ch))
 992     {
 993         const char *v;
 994
 995         lex_tokench(lex, ch);
 996         if (!lex_finish_ident(lex)) {
 997             /* error? */
 998             return (lex->tok.ttype = TOKEN_ERROR);
 999         }
1000         lex_endtoken(lex);
1001         lex->tok.ttype = TOKEN_IDENT;
1002
1003         v = lex->tok.value;
1004         if (!strcmp(v, "void")) {
1005             lex->tok.ttype = TOKEN_TYPENAME;
1006             lex->tok.constval.t = TYPE_VOID;
1007         } else if (!strcmp(v, "int")) {
1008             lex->tok.ttype = TOKEN_TYPENAME;
1009             lex->tok.constval.t = TYPE_INTEGER;
1010         } else if (!strcmp(v, "float")) {
1011             lex->tok.ttype = TOKEN_TYPENAME;
1012             lex->tok.constval.t = TYPE_FLOAT;
1013         } else if (!strcmp(v, "string")) {
1014             lex->tok.ttype = TOKEN_TYPENAME;
1015             lex->tok.constval.t = TYPE_STRING;
1016         } else if (!strcmp(v, "entity")) {
1017             lex->tok.ttype = TOKEN_TYPENAME;
1018             lex->tok.constval.t = TYPE_ENTITY;
1019         } else if (!strcmp(v, "vector")) {
1020             lex->tok.ttype = TOKEN_TYPENAME;
1021             lex->tok.constval.t = TYPE_VECTOR;
1022         } else if (!strcmp(v, "for")  ||
1023                  !strcmp(v, "while")  ||
1024                  !strcmp(v, "do")     ||
1025                  !strcmp(v, "if")     ||
1026                  !strcmp(v, "else")   ||
1027                  !strcmp(v, "local")  ||
1028                  !strcmp(v, "return") ||
1029                  !strcmp(v, "not")    ||
1030                  !strcmp(v, "const"))
1031         {
1032             lex->tok.ttype = TOKEN_KEYWORD;
1033         }
1034         else if (opts_standard != COMPILER_QCC)
1035         {
1036             /* other standards reserve these keywords */
1037             if (!strcmp(v, "switch") ||
1038                 !strcmp(v, "struct") ||
1039                 !strcmp(v, "union")  ||
1040                 !strcmp(v, "break")  ||
1041                 !strcmp(v, "continue") ||
1042                 !strcmp(v, "var"))
1043             {
1044                 lex->tok.ttype = TOKEN_KEYWORD;
1045             }
1046         }
1047
1048         return lex->tok.ttype;
1049     }
1050
1051     if (ch == '"')
1052     {
1053         lex->flags.nodigraphs = true;
1054         if (lex->flags.preprocessing)
1055             lex_tokench(lex, ch);
1056         lex->tok.ttype = lex_finish_string(lex, '"');
1057         if (lex->flags.preprocessing)
1058             lex_tokench(lex, ch);
1059         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1060         {
1061             /* Allow c style "string" "continuation" */
1062             ch = lex_skipwhite(lex);
1063             if (ch != '"') {
1064                 lex_ungetch(lex, ch);
1065                 break;
1066             }
1067
1068             lex->tok.ttype = lex_finish_string(lex, '"');
1069         }
1070         lex->flags.nodigraphs = false;
1071         lex_endtoken(lex);
1072         return lex->tok.ttype;
1073     }
1074
1075     if (ch == '\'')
1076     {
1077         /* we parse character constants like string,
1078          * but return TOKEN_CHARCONST, or a vector type if it fits...
1079          * Likewise actual unescaping has to be done by the parser.
1080          * The difference is we don't allow 'char' 'continuation'.
1081          */
1082         if (lex->flags.preprocessing)
1083             lex_tokench(lex, ch);
1084         lex->tok.ttype = lex_finish_string(lex, '\'');
1085         if (lex->flags.preprocessing)
1086             lex_tokench(lex, ch);
1087         lex_endtoken(lex);
1088
1089          /* It's a vector if we can successfully scan 3 floats */
1090 #ifdef WIN32
1091         if (sscanf_s(lex->tok.value, " %f %f %f ",
1092                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1093 #else
1094         if (sscanf(lex->tok.value, " %f %f %f ",
1095                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1096 #endif
1097
1098         {
1099              lex->tok.ttype = TOKEN_VECTORCONST;
1100         }
1101
1102         return lex->tok.ttype;
1103     }
1104
1105     if (isdigit(ch))
1106     {
1107         lex->tok.ttype = lex_finish_digit(lex, ch);
1108         lex_endtoken(lex);
1109         return lex->tok.ttype;
1110     }
1111
1112     lexerror(lex, "unknown token");
1113     return (lex->tok.ttype = TOKEN_ERROR);
1114 }