lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     lex_filenames_add(lex->name);
 161
 162     return lex;
 163 }
 164
 165 void lex_cleanup(void)
 166 {
 167     size_t i;
 168     for (i = 0; i < lex_filenames_elements; ++i)
 169         mem_d(lex_filenames_data[i]);
 170     mem_d(lex_filenames_data);
 171 }
 172
 173 void lex_close(lex_file *lex)
 174 {
 175     size_t i;
 176     for (i = 0; i < lex->frames_count; ++i)
 177         mem_d(lex->frames[i].name);
 178     MEM_VECTOR_CLEAR(lex, frames);
 179
 180     if (lex->modelname)
 181         mem_d(lex->modelname);
 182
 183     if (lex->file)
 184         fclose(lex->file);
 185 #if 0
 186     if (lex->tok)
 187         token_delete(lex->tok);
 188 #else
 189     MEM_VECTOR_CLEAR(&(lex->tok), value);
 190 #endif
 191     /* mem_d(lex->name); collected in lex_filenames */
 192     mem_d(lex);
 193 }
 194
 195 /* Get or put-back data
 196  * The following to functions do NOT understand what kind of data they
 197  * are working on.
 198  * The are merely wrapping get/put in order to count line numbers.
 199  */
 200 static void lex_ungetch(lex_file *lex, int ch);
 201 static int lex_try_trigraph(lex_file *lex, int old)
 202 {
 203     int c2, c3;
 204     c2 = fgetc(lex->file);
 205     if (c2 != '?') {
 206         lex_ungetch(lex, c2);
 207         return old;
 208     }
 209
 210     c3 = fgetc(lex->file);
 211     switch (c3) {
 212         case '=': return '#';
 213         case '/': return '\\';
 214         case '\'': return '^';
 215         case '(': return '[';
 216         case ')': return ']';
 217         case '!': return '|';
 218         case '<': return '{';
 219         case '>': return '}';
 220         case '-': return '~';
 221         default:
 222             lex_ungetch(lex, c3);
 223             lex_ungetch(lex, c2);
 224             return old;
 225     }
 226 }
 227
 228 static int lex_getch(lex_file *lex)
 229 {
 230     int ch;
 231
 232     if (lex->peekpos) {
 233         lex->peekpos--;
 234         if (lex->peek[lex->peekpos] == '\n')
 235             lex->line++;
 236         return lex->peek[lex->peekpos];
 237     }
 238
 239     ch = fgetc(lex->file);
 240     if (ch == '\n')
 241         lex->line++;
 242     else if (ch == '?')
 243         return lex_try_trigraph(lex, ch);
 244     return ch;
 245 }
 246
 247 static void lex_ungetch(lex_file *lex, int ch)
 248 {
 249     lex->peek[lex->peekpos++] = ch;
 250     if (ch == '\n')
 251         lex->line--;
 252 }
 253
 254 /* classify characters
 255  * some additions to the is*() functions of ctype.h
 256  */
 257
 258 /* Idents are alphanumberic, but they start with alpha or _ */
 259 static bool isident_start(int ch)
 260 {
 261     return isalpha(ch) || ch == '_';
 262 }
 263
 264 static bool isident(int ch)
 265 {
 266     return isident_start(ch) || isdigit(ch);
 267 }
 268
 269 /* isxdigit_only is used when we already know it's not a digit
 270  * and want to see if it's a hex digit anyway.
 271  */
 272 static bool isxdigit_only(int ch)
 273 {
 274     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 275 }
 276
 277 /* Skip whitespace and comments and return the first
 278  * non-white character.
 279  * As this makes use of the above getch() ungetch() functions,
 280  * we don't need to care at all about line numbering anymore.
 281  *
 282  * In theory, this function should only be used at the beginning
 283  * of lexing, or when we *know* the next character is part of the token.
 284  * Otherwise, if the parser throws an error, the linenumber may not be
 285  * the line of the error, but the line of the next token AFTER the error.
 286  *
 287  * This is currently only problematic when using c-like string-continuation,
 288  * since comments and whitespaces are allowed between 2 such strings.
 289  * Example:
 290 printf(   "line one\n"
 291 // A comment
 292           "A continuation of the previous string"
 293 // This line is skipped
 294       , foo);
 295
 296  * In this case, if the parse decides it didn't actually want a string,
 297  * and uses lex->line to print an error, it will show the ', foo);' line's
 298  * linenumber.
 299  *
 300  * On the other hand, the parser is supposed to remember the line of the next
 301  * token's beginning. In this case we would want skipwhite() to be called
 302  * AFTER reading a token, so that the parser, before reading the NEXT token,
 303  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 304  *
 305  * THIS SOLUTION
 306  *    here is to store the line of the first character after skipping
 307  *    the initial whitespace in lex->sline, this happens in lex_do.
 308  */
 309 static int lex_skipwhite(lex_file *lex)
 310 {
 311     int ch = 0;
 312
 313     do
 314     {
 315         ch = lex_getch(lex);
 316         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 317
 318         if (ch == '/') {
 319             ch = lex_getch(lex);
 320             if (ch == '/')
 321             {
 322                 /* one line comment */
 323                 ch = lex_getch(lex);
 324
 325                 /* check for special: '/', '/', '*', '/' */
 326                 if (ch == '*') {
 327                     ch = lex_getch(lex);
 328                     if (ch == '/') {
 329                         ch = ' ';
 330                         continue;
 331                     }
 332                 }
 333
 334                 while (ch != EOF && ch != '\n') {
 335                     ch = lex_getch(lex);
 336                 }
 337                 continue;
 338             }
 339             if (ch == '*')
 340             {
 341                 /* multiline comment */
 342                 while (ch != EOF)
 343                 {
 344                     ch = lex_getch(lex);
 345                     if (ch == '*') {
 346                         ch = lex_getch(lex);
 347                         if (ch == '/') {
 348                             ch = lex_getch(lex);
 349                             break;
 350                         }
 351                     }
 352                 }
 353                 if (ch == '/') /* allow *//* direct following comment */
 354                 {
 355                     lex_ungetch(lex, ch);
 356                     ch = ' '; /* cause TRUE in the isspace check */
 357                 }
 358                 continue;
 359             }
 360             /* Otherwise roll back to the slash and break out of the loop */
 361             lex_ungetch(lex, ch);
 362             ch = '/';
 363             break;
 364         }
 365     } while (ch != EOF && isspace(ch));
 366
 367     return ch;
 368 }
 369
 370 /* Append a character to the token buffer */
 371 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 372 {
 373     if (!token_value_add(&lex->tok, ch)) {
 374         lexerror(lex, "out of memory");
 375         return false;
 376     }
 377     return true;
 378 }
 379
 380 /* Append a trailing null-byte */
 381 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 382 {
 383     if (!token_value_add(&lex->tok, 0)) {
 384         lexerror(lex, "out of memory");
 385         return false;
 386     }
 387     lex->tok.value_count--;
 388     return true;
 389 }
 390
 391 /* Get a token */
 392 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 393 {
 394     int ch;
 395
 396     ch = lex_getch(lex);
 397     while (ch != EOF && isident(ch))
 398     {
 399         if (!lex_tokench(lex, ch))
 400             return (lex->tok.ttype = TOKEN_FATAL);
 401         ch = lex_getch(lex);
 402     }
 403
 404     /* last ch was not an ident ch: */
 405     lex_ungetch(lex, ch);
 406
 407     return true;
 408 }
 409
 410 /* read one ident for the frame list */
 411 static int lex_parse_frame(lex_file *lex)
 412 {
 413     int ch;
 414
 415     lex_token_new(lex);
 416
 417     ch = lex_getch(lex);
 418     while (ch != EOF && ch != '\n' && isspace(ch))
 419         ch = lex_getch(lex);
 420
 421     if (ch == '\n')
 422         return 1;
 423
 424     if (!isident_start(ch)) {
 425         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 426         return -1;
 427     }
 428
 429     if (!lex_tokench(lex, ch))
 430         return -1;
 431     if (!lex_finish_ident(lex))
 432         return -1;
 433     if (!lex_endtoken(lex))
 434         return -1;
 435     return 0;
 436 }
 437
 438 /* read a list of $frames */
 439 static bool lex_finish_frames(lex_file *lex)
 440 {
 441     do {
 442         size_t i;
 443         int    rc;
 444         frame_macro m;
 445
 446         rc = lex_parse_frame(lex);
 447         if (rc > 0) /* end of line */
 448             return true;
 449         if (rc < 0) /* error */
 450             return false;
 451
 452         for (i = 0; i < lex->frames_count; ++i) {
 453             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 454                 lex->frames[i].value = lex->framevalue++;
 455                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 456                     return false;
 457                 break;
 458             }
 459         }
 460         if (i < lex->frames_count)
 461             continue;
 462
 463         m.value = lex->framevalue++;
 464         m.name = lex->tok.value;
 465         lex->tok.value = NULL;
 466         lex->tok.value_alloc = lex->tok.value_count = 0;
 467         if (!lex_file_frames_add(lex, m)) {
 468             lexerror(lex, "out of memory");
 469             return false;
 470         }
 471     } while (true);
 472 }
 473
 474 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 475 {
 476     int ch = 0;
 477
 478     while (ch != EOF)
 479     {
 480         ch = lex_getch(lex);
 481         if (ch == quote)
 482             return TOKEN_STRINGCONST;
 483
 484         if (ch == '\\') {
 485             ch = lex_getch(lex);
 486             if (ch == EOF) {
 487                 lexerror(lex, "unexpected end of file");
 488                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 489                 return (lex->tok.ttype = TOKEN_ERROR);
 490             }
 491
 492             switch (ch) {
 493             case '\\': break;
 494             case 'a':  ch = '\a'; break;
 495             case 'b':  ch = '\b'; break;
 496             case 'r':  ch = '\r'; break;
 497             case 'n':  ch = '\n'; break;
 498             case 't':  ch = '\t'; break;
 499             case 'f':  ch = '\f'; break;
 500             case 'v':  ch = '\v'; break;
 501             default:
 502                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 503                 /* so we just add the character plus backslash no matter what it actually is */
 504                 if (!lex_tokench(lex, '\\'))
 505                     return (lex->tok.ttype = TOKEN_FATAL);
 506             }
 507             /* add the character finally */
 508             if (!lex_tokench(lex, ch))
 509                 return (lex->tok.ttype = TOKEN_FATAL);
 510         }
 511         else if (!lex_tokench(lex, ch))
 512             return (lex->tok.ttype = TOKEN_FATAL);
 513     }
 514     lexerror(lex, "unexpected end of file within string constant");
 515     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 516     return (lex->tok.ttype = TOKEN_ERROR);
 517 }
 518
 519 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 520 {
 521     bool ishex = false;
 522
 523     int  ch = lastch;
 524
 525     /* parse a number... */
 526     lex->tok.ttype = TOKEN_INTCONST;
 527
 528     if (!lex_tokench(lex, ch))
 529         return (lex->tok.ttype = TOKEN_FATAL);
 530
 531     ch = lex_getch(lex);
 532     if (ch != '.' && !isdigit(ch))
 533     {
 534         if (lastch != '0' || ch != 'x')
 535         {
 536             /* end of the number or EOF */
 537             lex_ungetch(lex, ch);
 538             if (!lex_endtoken(lex))
 539                 return (lex->tok.ttype = TOKEN_FATAL);
 540
 541             lex->tok.constval.i = lastch - '0';
 542             return lex->tok.ttype;
 543         }
 544
 545         ishex = true;
 546     }
 547
 548     /* EOF would have been caught above */
 549
 550     if (ch != '.')
 551     {
 552         if (!lex_tokench(lex, ch))
 553             return (lex->tok.ttype = TOKEN_FATAL);
 554         ch = lex_getch(lex);
 555         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 556         {
 557             if (!lex_tokench(lex, ch))
 558                 return (lex->tok.ttype = TOKEN_FATAL);
 559             ch = lex_getch(lex);
 560         }
 561     }
 562     /* NOT else, '.' can come from above as well */
 563     if (ch == '.' && !ishex)
 564     {
 565         /* Allow floating comma in non-hex mode */
 566         lex->tok.ttype = TOKEN_FLOATCONST;
 567         if (!lex_tokench(lex, ch))
 568             return (lex->tok.ttype = TOKEN_FATAL);
 569
 570         /* continue digits-only */
 571         ch = lex_getch(lex);
 572         while (isdigit(ch))
 573         {
 574             if (!lex_tokench(lex, ch))
 575                 return (lex->tok.ttype = TOKEN_FATAL);
 576             ch = lex_getch(lex);
 577         }
 578     }
 579     /* put back the last character */
 580     /* but do not put back the trailing 'f' or a float */
 581     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 582         ch = lex_getch(lex);
 583
 584     /* generally we don't want words to follow numbers: */
 585     if (isident(ch)) {
 586         lexerror(lex, "unexpected trailing characters after number");
 587         return (lex->tok.ttype = TOKEN_ERROR);
 588     }
 589     lex_ungetch(lex, ch);
 590
 591     if (!lex_endtoken(lex))
 592         return (lex->tok.ttype = TOKEN_FATAL);
 593     if (lex->tok.ttype == TOKEN_FLOATCONST)
 594         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 595     else
 596         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 597     return lex->tok.ttype;
 598 }
 599
 600 int lex_do(lex_file *lex)
 601 {
 602     int ch, nextch;
 603
 604     lex_token_new(lex);
 605 #if 0
 606     if (!lex->tok)
 607         return TOKEN_FATAL;
 608 #endif
 609
 610     ch = lex_skipwhite(lex);
 611     lex->sline = lex->line;
 612     lex->tok.ctx.line = lex->sline;
 613     lex->tok.ctx.file = lex->name;
 614
 615     if (lex->eof)
 616         return (lex->tok.ttype = TOKEN_FATAL);
 617
 618     if (ch == EOF) {
 619         lex->eof = true;
 620         return (lex->tok.ttype = TOKEN_EOF);
 621     }
 622
 623     /* modelgen / spiritgen commands */
 624     if (ch == '$') {
 625         const char *v;
 626         size_t frame;
 627
 628         ch = lex_getch(lex);
 629         if (!isident_start(ch)) {
 630             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 631             return lex_do(lex);
 632         }
 633         if (!lex_tokench(lex, ch))
 634             return (lex->tok.ttype = TOKEN_FATAL);
 635         if (!lex_finish_ident(lex))
 636             return (lex->tok.ttype = TOKEN_ERROR);
 637         if (!lex_endtoken(lex))
 638             return (lex->tok.ttype = TOKEN_FATAL);
 639         /* skip the known commands */
 640         v = lex->tok.value;
 641
 642         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 643         {
 644             /* frame/framesave command works like an enum
 645              * similar to fteqcc we handle this in the lexer.
 646              * The reason for this is that it is sensitive to newlines,
 647              * which the parser is unaware of
 648              */
 649             if (!lex_finish_frames(lex))
 650                  return (lex->tok.ttype = TOKEN_ERROR);
 651             return lex_do(lex);
 652         }
 653
 654         if (!strcmp(v, "framevalue"))
 655         {
 656             ch = lex_getch(lex);
 657             while (ch != EOF && isspace(ch) && ch != '\n')
 658                 ch = lex_getch(lex);
 659
 660             if (!isdigit(ch)) {
 661                 lexerror(lex, "$framevalue requires an integer parameter");
 662                 return lex_do(lex);
 663             }
 664
 665             lex_token_new(lex);
 666             lex->tok.ttype = lex_finish_digit(lex, ch);
 667             if (!lex_endtoken(lex))
 668                 return (lex->tok.ttype = TOKEN_FATAL);
 669             if (lex->tok.ttype != TOKEN_INTCONST) {
 670                 lexerror(lex, "$framevalue requires an integer parameter");
 671                 return lex_do(lex);
 672             }
 673             lex->framevalue = lex->tok.constval.i;
 674             return lex_do(lex);
 675         }
 676
 677         if (!strcmp(v, "framerestore"))
 678         {
 679             int rc;
 680
 681             lex_token_new(lex);
 682
 683             rc = lex_parse_frame(lex);
 684
 685             if (rc > 0) {
 686                 lexerror(lex, "$framerestore requires a framename parameter");
 687                 return lex_do(lex);
 688             }
 689             if (rc < 0)
 690                 return (lex->tok.ttype = TOKEN_FATAL);
 691
 692             v = lex->tok.value;
 693             for (frame = 0; frame < lex->frames_count; ++frame) {
 694                 if (!strcmp(v, lex->frames[frame].name)) {
 695                     lex->framevalue = lex->frames[frame].value;
 696                     return lex_do(lex);
 697                 }
 698             }
 699             lexerror(lex, "unknown framename `%s`", v);
 700             return lex_do(lex);
 701         }
 702
 703         if (!strcmp(v, "modelname"))
 704         {
 705             int rc;
 706
 707             lex_token_new(lex);
 708
 709             rc = lex_parse_frame(lex);
 710
 711             if (rc > 0) {
 712                 lexerror(lex, "$framerestore requires a framename parameter");
 713                 return lex_do(lex);
 714             }
 715             if (rc < 0)
 716                 return (lex->tok.ttype = TOKEN_FATAL);
 717
 718             v = lex->tok.value;
 719             if (lex->modelname) {
 720                 frame_macro m;
 721                 m.value = lex->framevalue;
 722                 m.name = lex->modelname;
 723                 lex->modelname = NULL;
 724                 if (!lex_file_frames_add(lex, m)) {
 725                     lexerror(lex, "out of memory");
 726                     return (lex->tok.ttype = TOKEN_FATAL);
 727                 }
 728             }
 729             lex->modelname = lex->tok.value;
 730             lex->tok.value = NULL;
 731             lex->tok.value_alloc = lex->tok.value_count = 0;
 732             for (frame = 0; frame < lex->frames_count; ++frame) {
 733                 if (!strcmp(v, lex->frames[frame].name)) {
 734                     lex->framevalue = lex->frames[frame].value;
 735                     break;
 736                 }
 737             }
 738             return lex_do(lex);
 739         }
 740
 741         if (!strcmp(v, "flush"))
 742         {
 743             size_t frame;
 744             for (frame = 0; frame < lex->frames_count; ++frame)
 745                 mem_d(lex->frames[frame].name);
 746             MEM_VECTOR_CLEAR(lex, frames);
 747             /* skip line (fteqcc does it too) */
 748             ch = lex_getch(lex);
 749             while (ch != EOF && ch != '\n')
 750                 ch = lex_getch(lex);
 751             return lex_do(lex);
 752         }
 753
 754         if (!strcmp(v, "cd") ||
 755             !strcmp(v, "origin") ||
 756             !strcmp(v, "base") ||
 757             !strcmp(v, "flags") ||
 758             !strcmp(v, "scale") ||
 759             !strcmp(v, "skin"))
 760         {
 761             /* skip line */
 762             ch = lex_getch(lex);
 763             while (ch != EOF && ch != '\n')
 764                 ch = lex_getch(lex);
 765             return lex_do(lex);
 766         }
 767
 768         for (frame = 0; frame < lex->frames_count; ++frame) {
 769             if (!strcmp(v, lex->frames[frame].name)) {
 770                 lex->tok.constval.i = lex->frames[frame].value;
 771                 return (lex->tok.ttype = TOKEN_INTCONST);
 772             }
 773         }
 774
 775         lexerror(lex, "invalid frame macro");
 776         return lex_do(lex);
 777     }
 778
 779     /* single-character tokens */
 780     switch (ch)
 781     {
 782         case '(':
 783             if (!lex_tokench(lex, ch) ||
 784                 !lex_endtoken(lex))
 785             {
 786                 return (lex->tok.ttype = TOKEN_FATAL);
 787             }
 788             if (lex->flags.noops)
 789                 return (lex->tok.ttype = ch);
 790             else
 791                 return (lex->tok.ttype = TOKEN_OPERATOR);
 792         case ')':
 793         case ';':
 794         case '{':
 795         case '}':
 796         case '[':
 797         case ']':
 798
 799         case '#':
 800             if (!lex_tokench(lex, ch) ||
 801                 !lex_endtoken(lex))
 802             {
 803                 return (lex->tok.ttype = TOKEN_FATAL);
 804             }
 805             return (lex->tok.ttype = ch);
 806         default:
 807             break;
 808     }
 809
 810     if (lex->flags.noops)
 811     {
 812         /* Detect characters early which are normally
 813          * operators OR PART of an operator.
 814          */
 815         switch (ch)
 816         {
 817             case '+':
 818             case '-':
 819             case '*':
 820             case '/':
 821             case '<':
 822             case '>':
 823             case '=':
 824             case '&':
 825             case '|':
 826             case '^':
 827             case '~':
 828             case ',':
 829             case '!':
 830                 if (!lex_tokench(lex, ch) ||
 831                     !lex_endtoken(lex))
 832                 {
 833                     return (lex->tok.ttype = TOKEN_FATAL);
 834                 }
 835                 return (lex->tok.ttype = ch);
 836             default:
 837                 break;
 838         }
 839
 840         if (ch == '.')
 841         {
 842             if (!lex_tokench(lex, ch))
 843                 return (lex->tok.ttype = TOKEN_FATAL);
 844             /* peak ahead once */
 845             nextch = lex_getch(lex);
 846             if (nextch != '.') {
 847                 lex_ungetch(lex, nextch);
 848                 if (!lex_endtoken(lex))
 849                     return (lex->tok.ttype = TOKEN_FATAL);
 850                 return (lex->tok.ttype = ch);
 851             }
 852             /* peak ahead again */
 853             nextch = lex_getch(lex);
 854             if (nextch != '.') {
 855                 lex_ungetch(lex, nextch);
 856                 lex_ungetch(lex, nextch);
 857                 if (!lex_endtoken(lex))
 858                     return (lex->tok.ttype = TOKEN_FATAL);
 859                 return (lex->tok.ttype = ch);
 860             }
 861             /* fill the token to be "..." */
 862             if (!lex_tokench(lex, ch) ||
 863                 !lex_tokench(lex, ch) ||
 864                 !lex_endtoken(lex))
 865             {
 866                 return (lex->tok.ttype = TOKEN_FATAL);
 867             }
 868             return (lex->tok.ttype = TOKEN_DOTS);
 869         }
 870     }
 871
 872     if (ch == ',' || ch == '.') {
 873         if (!lex_tokench(lex, ch) ||
 874             !lex_endtoken(lex))
 875         {
 876             return (lex->tok.ttype = TOKEN_FATAL);
 877         }
 878         return (lex->tok.ttype = TOKEN_OPERATOR);
 879     }
 880
 881     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 882         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 883         ch == '=' || ch == '!' || /* ==, != */
 884         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 885     {
 886         if (!lex_tokench(lex, ch))
 887             return (lex->tok.ttype = TOKEN_FATAL);
 888
 889         nextch = lex_getch(lex);
 890         if (nextch == ch || nextch == '=') {
 891             if (!lex_tokench(lex, nextch))
 892                 return (lex->tok.ttype = TOKEN_FATAL);
 893         } else if (ch == '-' && nextch == '>') {
 894             if (!lex_tokench(lex, nextch))
 895                 return (lex->tok.ttype = TOKEN_FATAL);
 896         } else
 897             lex_ungetch(lex, nextch);
 898
 899         if (!lex_endtoken(lex))
 900             return (lex->tok.ttype = TOKEN_FATAL);
 901         return (lex->tok.ttype = TOKEN_OPERATOR);
 902     }
 903
 904     /*
 905     if (ch == '^' || ch == '~' || ch == '!')
 906     {
 907         if (!lex_tokench(lex, ch) ||
 908             !lex_endtoken(lex))
 909         {
 910             return (lex->tok.ttype = TOKEN_FATAL);
 911         }
 912         return (lex->tok.ttype = TOKEN_OPERATOR);
 913     }
 914     */
 915
 916     if (ch == '*' || ch == '/') /* *=, /= */
 917     {
 918         if (!lex_tokench(lex, ch))
 919             return (lex->tok.ttype = TOKEN_FATAL);
 920
 921         nextch = lex_getch(lex);
 922         if (nextch == '=') {
 923             if (!lex_tokench(lex, nextch))
 924                 return (lex->tok.ttype = TOKEN_FATAL);
 925         } else
 926             lex_ungetch(lex, nextch);
 927
 928         if (!lex_endtoken(lex))
 929             return (lex->tok.ttype = TOKEN_FATAL);
 930         return (lex->tok.ttype = TOKEN_OPERATOR);
 931     }
 932
 933     if (isident_start(ch))
 934     {
 935         const char *v;
 936
 937         if (!lex_tokench(lex, ch))
 938             return (lex->tok.ttype = TOKEN_FATAL);
 939         if (!lex_finish_ident(lex)) {
 940             /* error? */
 941             return (lex->tok.ttype = TOKEN_ERROR);
 942         }
 943         if (!lex_endtoken(lex))
 944             return (lex->tok.ttype = TOKEN_FATAL);
 945         lex->tok.ttype = TOKEN_IDENT;
 946
 947         v = lex->tok.value;
 948         if (!strcmp(v, "void")) {
 949             lex->tok.ttype = TOKEN_TYPENAME;
 950             lex->tok.constval.t = TYPE_VOID;
 951         } else if (!strcmp(v, "int")) {
 952             lex->tok.ttype = TOKEN_TYPENAME;
 953             lex->tok.constval.t = TYPE_INTEGER;
 954         } else if (!strcmp(v, "float")) {
 955             lex->tok.ttype = TOKEN_TYPENAME;
 956             lex->tok.constval.t = TYPE_FLOAT;
 957         } else if (!strcmp(v, "string")) {
 958             lex->tok.ttype = TOKEN_TYPENAME;
 959             lex->tok.constval.t = TYPE_STRING;
 960         } else if (!strcmp(v, "entity")) {
 961             lex->tok.ttype = TOKEN_TYPENAME;
 962             lex->tok.constval.t = TYPE_ENTITY;
 963         } else if (!strcmp(v, "vector")) {
 964             lex->tok.ttype = TOKEN_TYPENAME;
 965             lex->tok.constval.t = TYPE_VECTOR;
 966         } else if (!strcmp(v, "for")  ||
 967                  !strcmp(v, "while")  ||
 968                  !strcmp(v, "do")     ||
 969                  !strcmp(v, "if")     ||
 970                  !strcmp(v, "else")   ||
 971                  !strcmp(v, "local")  ||
 972                  !strcmp(v, "return") ||
 973                  !strcmp(v, "const"))
 974             lex->tok.ttype = TOKEN_KEYWORD;
 975
 976         return lex->tok.ttype;
 977     }
 978
 979     if (ch == '"')
 980     {
 981         lex->tok.ttype = lex_finish_string(lex, '"');
 982         while (lex->tok.ttype == TOKEN_STRINGCONST)
 983         {
 984             /* Allow c style "string" "continuation" */
 985             ch = lex_skipwhite(lex);
 986             if (ch != '"') {
 987                 lex_ungetch(lex, ch);
 988                 break;
 989             }
 990
 991             lex->tok.ttype = lex_finish_string(lex, '"');
 992         }
 993         if (!lex_endtoken(lex))
 994             return (lex->tok.ttype = TOKEN_FATAL);
 995         return lex->tok.ttype;
 996     }
 997
 998     if (ch == '\'')
 999     {
1000         /* we parse character constants like string,
1001          * but return TOKEN_CHARCONST, or a vector type if it fits...
1002          * Likewise actual unescaping has to be done by the parser.
1003          * The difference is we don't allow 'char' 'continuation'.
1004          */
1005          lex->tok.ttype = lex_finish_string(lex, '\'');
1006          if (!lex_endtoken(lex))
1007               return (lex->tok.ttype = TOKEN_FATAL);
1008
1009          /* It's a vector if we can successfully scan 3 floats */
1010 #ifdef WIN32
1011          if (sscanf_s(lex->tok.value, " %f %f %f ",
1012                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1013 #else
1014          if (sscanf(lex->tok.value, " %f %f %f ",
1015                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1016 #endif
1017          {
1018               lex->tok.ttype = TOKEN_VECTORCONST;
1019          }
1020
1021          return lex->tok.ttype;
1022     }
1023
1024     if (isdigit(ch))
1025     {
1026         lex->tok.ttype = lex_finish_digit(lex, ch);
1027         if (!lex_endtoken(lex))
1028             return (lex->tok.ttype = TOKEN_FATAL);
1029         return lex->tok.ttype;
1030     }
1031
1032     lexerror(lex, "unknown token");
1033     return (lex->tok.ttype = TOKEN_ERROR);
1034 }