12 static const char *keywords_qc[] = {
20 static const char *keywords_fg[] = {
21 "switch", "case", "default",
26 "__builtin_debug_printtype"
32 static char* *lex_filenames;
34 static void lexerror(lex_file *lex, const char *fmt, ...)
40 con_vprintmsg(LVL_ERROR, lex->name, lex->sline, lex->column, "parse error", fmt, ap);
42 con_vprintmsg(LVL_ERROR, "", 0, 0, "parse error", fmt, ap);
46 static bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
53 ctx.line = lex->sline;
54 ctx.column = lex->column;
57 r = vcompile_warning(ctx, warntype, fmt, ap);
62 static void lex_token_new(lex_file *lex)
64 lex->tok.value.shrinkto(0);
66 lex->tok.constval.t = TYPE_VOID;
67 lex->tok.ctx.line = lex->sline;
68 lex->tok.ctx.file = lex->name;
69 lex->tok.ctx.column = lex->column;
72 static void lex_ungetch(lex_file *lex, Token ch);
73 static Token lex_getch(lex_file *lex);
75 lex_file* lex_open(const char *file)
78 FILE *in = fopen(file, "rb");
82 lexerror(nullptr, "open failed: '%s'\n", file);
86 lex = (lex_file*)mem_a(sizeof(*lex));
89 lexerror(nullptr, "out of memory\n");
93 memset(lex, 0, sizeof(*lex));
96 lex->name = util_strdup(file);
97 lex->line = 1; /* we start counting at 1 */
103 if ((read = (lex_getch(lex) << 16) | (lex_getch(lex) << 8) | lex_getch(lex)) != 0xEFBBBF) {
104 lex_ungetch(lex, static_cast<Token>((read & 0x0000FF)));
105 lex_ungetch(lex, static_cast<Token>((read & 0x00FF00) >> 8));
106 lex_ungetch(lex, static_cast<Token>((read & 0xFF0000) >> 16));
109 * otherwise the lexer has advanced 3 bytes for the BOM, we need
110 * to set the column back to 0
115 vec_push(lex_filenames, lex->name);
119 lex_file* lex_open_string(const char *str, size_t len, const char *name)
123 lex = (lex_file*)mem_a(sizeof(*lex));
125 lexerror(nullptr, "out of memory\n");
129 memset(lex, 0, sizeof(*lex));
132 lex->open_string = str;
133 lex->open_string_length = len;
134 lex->open_string_pos = 0;
136 lex->name = util_strdup(name ? name : "<string-source>");
137 lex->line = 1; /* we start counting at 1 */
142 vec_push(lex_filenames, lex->name);
149 for (size_t i = 0; i < vec_size(lex_filenames); ++i)
150 mem_d(lex_filenames[i]);
151 vec_free(lex_filenames);
154 void lex_close(lex_file *lex)
156 vec_free(lex->frames);
161 /* mem_d(lex->name); collected in lex_filenames */
167 static Token lex_fgetc(lex_file *lex)
171 auto c = fgetc(lex->file);
172 return c == EOF ? Token::END : static_cast<Token>(c);
174 if (lex->open_string) {
175 if (lex->open_string_pos >= lex->open_string_length)
178 auto c = lex->open_string[lex->open_string_pos++];
179 return static_cast<Token>(c);
184 /* Get or put-back data
185 * The following to functions do NOT understand what kind of data they
187 * The are merely wrapping get/put in order to count line numbers.
189 static Token lex_try_trigraph(lex_file *lex, Token old)
191 auto c2 = lex_fgetc(lex);
192 if (!lex->push_line && c2 == Token::LF) {
197 if (c2 != Token::QUESTION) {
198 lex_ungetch(lex, c2);
202 auto c3 = lex_fgetc(lex);
203 if (!lex->push_line && c3 == Token::LF) {
209 case Token::EQ: return Token::HASH;
210 case Token::DIV: return Token::BACKSLASH;
211 case Token::QUOT_SINGLE: return Token::XOR;
212 case Token::PAREN_OPEN: return Token::BRACKET_OPEN;
213 case Token::PAREN_CLOSE: return Token::BRACKET_CLOSE;
214 case Token::NOT: return Token::OR;
215 case Token::LT: return Token::BRACE_OPEN;
216 case Token::GT: return Token::BRACE_CLOSE;
217 case Token::SUB: return Token::BITNOT;
219 lex_ungetch(lex, c3);
220 lex_ungetch(lex, c2);
225 static Token lex_try_digraph(lex_file *lex, Token ch)
227 auto c2 = lex_fgetc(lex);
228 /* we just used fgetc() so count lines
229 * need to offset a \n the ungetch would recognize
231 if (!lex->push_line && c2 == Token::LF)
233 if (ch == Token::LT && c2 == Token::COLON)
234 return Token::BRACKET_OPEN;
235 else if (ch == Token::COLON && c2 == Token::GT)
236 return Token::BRACKET_CLOSE;
237 else if (ch == Token::LT && c2 == Token::MOD)
238 return Token::BRACE_OPEN;
239 else if (ch == Token::MOD && c2 == Token::GT)
240 return Token::BRACE_CLOSE;
241 else if (ch == Token::MOD && c2 == Token::COLON)
243 lex_ungetch(lex, c2);
247 static Token lex_getch(lex_file *lex)
251 if (!lex->push_line && lex->peek[lex->peekpos] == Token::LF) {
255 return lex->peek[lex->peekpos];
258 auto ch = lex_fgetc(lex);
259 if (!lex->push_line && ch == Token::LF) {
263 else if (ch == Token::QUESTION)
264 return lex_try_trigraph(lex, ch);
265 else if (!lex->flags.nodigraphs && (ch == Token::LT || ch == Token::COLON || ch == Token::MOD))
266 return lex_try_digraph(lex, ch);
270 static void lex_ungetch(lex_file *lex, Token ch)
272 lex->peek[lex->peekpos++] = ch;
274 if (!lex->push_line && ch == Token::LF) {
280 /* classify characters
281 * some additions to the is*() functions of ctype.h
284 /* Idents are alphanumberic, but they start with alpha or _ */
285 static bool isident_start(int ch)
287 return util_isalpha(ch) || ch == '_';
290 static bool isident(int ch)
292 return isident_start(ch) || util_isdigit(ch);
295 /* isxdigit_only is used when we already know it's not a digit
296 * and want to see if it's a hex digit anyway.
298 static bool isxdigit_only(int ch)
300 return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
303 /* Append a character to the token buffer */
304 static void lex_tokench(lex_file *lex, int ch)
306 lex->tok.value.push(ch);
309 /* Append a trailing null-byte */
310 static void lex_endtoken(lex_file *lex)
312 lex->tok.value.push(0);
313 lex->tok.value.shrinkby(1);
316 static bool lex_try_pragma(lex_file *lex)
318 char *pragma = nullptr;
319 char *command = nullptr;
320 char *param = nullptr;
323 if (lex->flags.preprocessing)
328 auto ch = lex_getch(lex);
329 if (ch != Token::HASH) {
330 lex_ungetch(lex, ch);
334 for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
335 vec_push(pragma, ch);
338 if (ch != Token::WS|| strcmp(pragma, "pragma")) {
339 lex_ungetch(lex, ch);
343 for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
344 vec_push(command, ch);
345 vec_push(command, 0);
347 if (ch != Token::PAREN_OPEN) {
348 lex_ungetch(lex, ch);
352 for (ch = lex_getch(lex); vec_size(param) < 1024 && ch != Token::PAREN_CLOSE && ch != Token::LF; ch = lex_getch(lex))
356 if (ch != Token::PAREN_CLOSE) {
357 lex_ungetch(lex, ch);
361 if (!strcmp(command, "push")) {
362 if (!strcmp(param, "line")) {
364 if (lex->push_line == 1)
370 else if (!strcmp(command, "pop")) {
371 if (!strcmp(param, "line")) {
374 if (lex->push_line == 0)
380 else if (!strcmp(command, "file")) {
381 lex->name = util_strdup(param);
382 vec_push(lex_filenames, lex->name);
384 else if (!strcmp(command, "line")) {
385 line = strtol(param, nullptr, 0)-1;
391 while (ch != Token::LF && ch != Token::END)
401 while (vec_size(command)) {
402 lex_ungetch(lex, static_cast<Token>(vec_last(command)));
406 lex_ungetch(lex, Token::WS);
410 while (vec_size(param)) {
411 lex_ungetch(lex, static_cast<Token>(vec_last(param)));
415 lex_ungetch(lex, Token::WS);
419 while (vec_size(pragma)) {
420 lex_ungetch(lex, static_cast<Token>(vec_last(pragma)));
425 lex_ungetch(lex, Token::HASH);
431 /* Skip whitespace and comments and return the first
432 * non-white character.
433 * As this makes use of the above getch() ungetch() functions,
434 * we don't need to care at all about line numbering anymore.
436 * In theory, this function should only be used at the beginning
437 * of lexing, or when we *know* the next character is part of the token.
438 * Otherwise, if the parser throws an error, the linenumber may not be
439 * the line of the error, but the line of the next token AFTER the error.
441 * This is currently only problematic when using c-like string-continuation,
442 * since comments and whitespaces are allowed between 2 such strings.
446 "A continuation of the previous string"
447 // This line is skipped
450 * In this case, if the parse decides it didn't actually want a string,
451 * and uses lex->line to print an error, it will show the ', foo);' line's
454 * On the other hand, the parser is supposed to remember the line of the next
455 * token's beginning. In this case we would want skipwhite() to be called
456 * AFTER reading a token, so that the parser, before reading the NEXT token,
457 * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
460 * here is to store the line of the first character after skipping
461 * the initial whitespace in lex->sline, this happens in lex_do.
463 static Token lex_skipwhite(lex_file *lex, bool hadwhite)
466 bool haswhite = hadwhite;
471 while (ch != Token::END && util_isspace(ch)) {
472 if (ch == Token::LF) {
473 if (lex_try_pragma(lex))
476 if (lex->flags.preprocessing) {
477 if (ch == Token::LF) {
479 /* see if there was whitespace first */
480 if (haswhite) { /* (vec_size(lex->tok.value)) { */
481 lex_ungetch(lex, ch);
485 /* otherwise return EOL */
489 lex_tokench(lex, ch);
494 if (ch == Token::DIV) {
496 if (ch == Token::DIV)
498 /* one line comment */
501 if (lex->flags.preprocessing) {
503 lex_tokench(lex, Token::WS);
504 lex_tokench(lex, Token::WS);
507 while (ch != Token::END && ch != Token::LF) {
508 if (lex->flags.preprocessing)
509 lex_tokench(lex, Token::WS); /* ch); */
512 if (lex->flags.preprocessing) {
513 lex_ungetch(lex, Token::LF);
519 if (ch == Token::MUL)
521 /* multiline comment */
522 if (lex->flags.preprocessing) {
524 lex_tokench(lex, Token::WS);
525 lex_tokench(lex, Token::WS);
528 while (ch != Token::END)
531 if (ch == Token::MUL) {
533 if (ch == Token::DIV) {
534 if (lex->flags.preprocessing) {
535 lex_tokench(lex, Token::WS);
536 lex_tokench(lex, Token::WS);
540 lex_ungetch(lex, ch);
542 if (lex->flags.preprocessing) {
544 lex_tokench(lex, Token::LF);
546 lex_tokench(lex, Token::WS);
549 ch = Token::WS; /* cause TRUE in the isspace check */
552 /* Otherwise roll back to the slash and break out of the loop */
553 lex_ungetch(lex, ch);
557 } while (ch != Token::END && util_isspace(ch));
561 lex_ungetch(lex, ch);
568 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
570 auto ch = lex_getch(lex);
571 while (ch != Token::END && isident(ch))
573 lex_tokench(lex, ch);
577 /* last ch was not an ident ch: */
578 lex_ungetch(lex, ch);
583 /* read one ident for the frame list */
584 static int lex_parse_frame(lex_file *lex)
588 auto ch = lex_getch(lex);
589 while (ch != Token::END && ch != Token::LF && util_isspace(ch))
595 if (!isident_start(ch)) {
596 lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
600 lex_tokench(lex, ch);
601 if (!lex_finish_ident(lex))
607 /* read a list of $frames */
608 static bool lex_finish_frames(lex_file *lex)
615 rc = lex_parse_frame(lex);
616 if (rc > 0) /* end of line */
618 if (rc < 0) /* error */
621 for (i = 0; i < vec_size(lex->frames); ++i) {
622 if (lex->frames[i].name == lex->tok.value.c_str()) {
623 lex->frames[i].value = lex->framevalue++;
624 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
629 if (i < vec_size(lex->frames))
632 m.value = lex->framevalue++;
633 m.name = util_strdup(lex->tok.value.c_str());
634 lex->tok.value.shrinkto(0);
635 vec_push(lex->frames, m);
641 static Token GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
644 int ch = 0, texttype = 0;
645 Token nextch = Token::NONE;
648 char u8buf[8]; /* way more than enough */
651 while (ch != Token::END)
655 return Token::STRINGCONST;
657 if (lex->flags.preprocessing && ch == '\\') {
658 lex_tokench(lex, ch);
660 if (ch == Token::END) {
661 lexerror(lex, "unexpected end of file");
662 lex_ungetch(lex, Token::END); /* next token to be Token::END */
663 return (lex->tok.ttype = Token::ERROR);
665 lex_tokench(lex, ch);
667 else if (ch == '\\') {
669 if (ch == Token::END) {
670 lexerror(lex, "unexpected end of file");
671 lex_ungetch(lex, Token::END); /* next token to be Token::END */
672 return (lex->tok.ttype = Token::ERROR);
679 case 'a': ch = '\a'; break;
680 case 'r': ch = '\r'; break;
681 case 'n': ch = '\n'; break;
682 case 't': ch = '\t'; break;
683 case 'f': ch = '\f'; break;
684 case 'v': ch = '\v'; break;
687 /* same procedure as in fteqcc */
689 nextch = lex_getch(lex);
690 if (nextch >= '0' && nextch <= '9')
692 else if (nextch >= 'a' && nextch <= 'f')
693 ch += nextch - 'a' + 10;
694 else if (nextch >= 'A' && nextch <= 'F')
695 ch += nextch - 'A' + 10;
697 lexerror(lex, "bad character code");
698 lex_ungetch(lex, nextch);
699 return (lex->tok.ttype = Token::ERROR);
703 nextch = lex_getch(lex);
704 if (nextch >= '0' && nextch <= '9')
706 else if (nextch >= 'a' && nextch <= 'f')
707 ch += nextch - 'a' + 10;
708 else if (nextch >= 'A' && nextch <= 'F')
709 ch += nextch - 'A' + 10;
711 lexerror(lex, "bad character code");
712 lex_ungetch(lex, nextch);
713 return (lex->tok.ttype = Token::ERROR);
718 case '0': case '1': case '2': case '3':
719 case '4': case '5': case '6': case '7':
723 case '<': ch = 29; break;
724 case '-': ch = 30; break;
725 case '>': ch = 31; break;
726 case '[': ch = 16; break;
727 case ']': ch = 17; break;
730 nextch = lex_getch(lex);
731 hex = (nextch == 'x');
732 oct = (nextch == '0');
734 lex_ungetch(lex, nextch);
735 for (nextch = lex_getch(lex); nextch != Token::BRACE_CLOSE; nextch = lex_getch(lex)) {
737 if (nextch >= '0' && nextch <= '9')
738 chr = chr * 10 + nextch - '0';
740 lexerror(lex, "bad character code");
741 return (lex->tok.ttype = Token::ERROR);
744 if (nextch >= '0' && nextch <= '9')
745 chr = chr * 0x10 + nextch - '0';
746 else if (nextch >= 'a' && nextch <= 'f')
747 chr = chr * 0x10 + nextch - 'a' + 10;
748 else if (nextch >= 'A' && nextch <= 'F')
749 chr = chr * 0x10 + nextch - 'A' + 10;
751 lexerror(lex, "bad character code");
752 return (lex->tok.ttype = Token::ERROR);
755 if (nextch >= '0' && nextch <= '9')
756 chr = chr * 8 + chr - '0';
758 lexerror(lex, "bad character code");
759 return (lex->tok.ttype = Token::ERROR);
762 if (chr > 0x10FFFF || (!OPTS_FLAG(UTF8) && chr > 255))
764 lexerror(lex, "character code out of range");
765 return (lex->tok.ttype = Token::ERROR);
768 if (OPTS_FLAG(UTF8) && chr >= 128) {
769 u8len = utf8_from(u8buf, chr);
774 lex->column += u8len;
775 for (uc = 0; uc < u8len; ++uc)
776 lex_tokench(lex, u8buf[uc]);
778 * the last character will be inserted with the tokench() call
798 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
799 /* so we just add the character plus backslash no matter what it actually is */
800 lex_tokench(lex, '\\');
802 /* add the character finally */
803 lex_tokench(lex, ch | texttype);
806 lex_tokench(lex, ch);
808 lexerror(lex, "unexpected end of file within string constant");
809 lex_ungetch(lex, Token::END); /* next token to be Token::END */
810 return (lex->tok.ttype = Token::ERROR);
813 static Token GMQCC_WARN lex_finish_digit(lex_file *lex, Token lastch)
819 /* parse a number... */
820 if (ch == Token::DOT)
821 lex->tok.ttype = Token::FLOATCONST;
823 lex->tok.ttype = Token::INTCONST;
825 lex_tokench(lex, ch);
828 if (ch != Token::DOT && !util_isdigit(ch))
830 if (lastch != '0' || ch != 'x')
832 /* end of the number or EOF */
833 lex_ungetch(lex, ch);
836 lex->tok.constval.i = lastch - '0';
837 return lex->tok.ttype;
843 /* EOF would have been caught above */
845 if (ch != Token::DOT)
847 lex_tokench(lex, ch);
849 while (util_isdigit(ch) || (ishex && isxdigit_only(ch)))
851 lex_tokench(lex, ch);
855 /* NOT else, '.' can come from above as well */
856 if (lex->tok.ttype != Token::FLOATCONST && ch == Token::DOT && !ishex)
858 /* Allow floating comma in non-hex mode */
859 lex->tok.ttype = Token::FLOATCONST;
860 lex_tokench(lex, ch);
862 /* continue digits-only */
864 while (util_isdigit(ch))
866 lex_tokench(lex, ch);
870 /* put back the last character */
871 /* but do not put back the trailing 'f' or a float */
872 if (lex->tok.ttype == Token::FLOATCONST && ch == 'f')
875 /* generally we don't want words to follow numbers: */
877 lexerror(lex, "unexpected trailing characters after number");
878 return (lex->tok.ttype = Token::ERROR);
880 lex_ungetch(lex, ch);
883 if (lex->tok.ttype == Token::FLOATCONST)
884 lex->tok.constval.f = strtod(lex->tok.value.c_str(), nullptr);
886 lex->tok.constval.i = strtol(lex->tok.value.c_str(), nullptr, 0);
887 return lex->tok.ttype;
890 Token lex_do(lex_file *lex)
892 Token ch, nextch, thirdch;
893 bool hadwhite = false;
898 ch = lex_skipwhite(lex, hadwhite);
900 if (!lex->flags.mergelines || ch != Token::BACKSLASH)
905 if (ch != Token::LF) {
906 lex_ungetch(lex, ch);
907 ch = Token::BACKSLASH;
910 /* we reached a linemerge */
911 lex_tokench(lex, '\n');
914 if (lex->flags.preprocessing && (ch == Token::WHITE || ch == Token::EOL || ch == Token::FATAL)) {
915 return (lex->tok.ttype = ch);
918 lex->sline = lex->line;
919 lex->tok.ctx.line = lex->sline;
920 lex->tok.ctx.file = lex->name;
923 return (lex->tok.ttype = Token::FATAL);
925 if (ch == Token::END) {
927 return (lex->tok.ttype = Token::END);
930 /* modelgen / spiritgen commands */
931 if (ch == Token::DOLLAR && !lex->flags.preprocessing) {
936 if (!isident_start(ch)) {
937 lexerror(lex, "hanging '$' modelgen/spritegen command line");
940 lex_tokench(lex, ch);
941 if (!lex_finish_ident(lex))
942 return (lex->tok.ttype = Token::ERROR);
944 /* skip the known commands */
945 v = lex->tok.value.c_str();
947 if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
949 /* frame/framesave command works like an enum
950 * similar to fteqcc we handle this in the lexer.
951 * The reason for this is that it is sensitive to newlines,
952 * which the parser is unaware of
954 if (!lex_finish_frames(lex))
955 return (lex->tok.ttype = Token::ERROR);
959 if (!strcmp(v, "framevalue"))
962 while (ch != Token::END && util_isspace(ch) && ch != Token::LF)
965 if (!util_isdigit(ch)) {
966 lexerror(lex, "$framevalue requires an integer parameter");
971 lex->tok.ttype = lex_finish_digit(lex, ch);
973 if (lex->tok.ttype != Token::INTCONST) {
974 lexerror(lex, "$framevalue requires an integer parameter");
977 lex->framevalue = lex->tok.constval.i;
981 if (!strcmp(v, "framerestore"))
987 rc = lex_parse_frame(lex);
990 lexerror(lex, "$framerestore requires a framename parameter");
994 return (lex->tok.ttype = Token::FATAL);
996 v = lex->tok.value.c_str();
997 for (frame = 0; frame < vec_size(lex->frames); ++frame) {
998 if (lex->frames[frame].name == v) {
999 lex->framevalue = lex->frames[frame].value;
1003 lexerror(lex, "unknown framename `%s`", v);
1007 if (!strcmp(v, "modelname"))
1013 rc = lex_parse_frame(lex);
1016 lexerror(lex, "$modelname requires a parameter");
1020 return (lex->tok.ttype = Token::FATAL);
1022 if (lex->modelname.size()) {
1024 m.name = std::move(lex->modelname);
1025 m.value = lex->framevalue;
1026 vec_push(lex->frames, m);
1028 lex->modelname = std::string(lex->tok.value.c_str());
1032 if (!strcmp(v, "flush"))
1034 vec_free(lex->frames);
1035 /* skip line (fteqcc does it too) */
1036 ch = lex_getch(lex);
1037 while (ch != Token::END && ch != Token::LF)
1038 ch = lex_getch(lex);
1042 if (!strcmp(v, "cd") ||
1043 !strcmp(v, "origin") ||
1044 !strcmp(v, "base") ||
1045 !strcmp(v, "flags") ||
1046 !strcmp(v, "scale") ||
1050 ch = lex_getch(lex);
1051 while (ch != Token::END && ch != Token::LF)
1052 ch = lex_getch(lex);
1056 for (frame = 0; frame < vec_size(lex->frames); ++frame) {
1057 if (lex->frames[frame].name == v) {
1058 lex->tok.constval.i = lex->frames[frame].value;
1059 return (lex->tok.ttype = Token::INTCONST);
1063 lexerror(lex, "invalid frame macro");
1067 /* single-character tokens */
1070 case Token::BRACKET_OPEN:
1071 nextch = lex_getch(lex);
1072 if (nextch == Token::BRACKET_OPEN) {
1073 lex_tokench(lex, ch);
1074 lex_tokench(lex, nextch);
1076 return (lex->tok.ttype = Token::ATTRIBUTE_OPEN);
1078 lex_ungetch(lex, nextch);
1080 case Token::PAREN_OPEN:
1082 case Token::QUESTION:
1083 lex_tokench(lex, ch);
1085 if (lex->flags.noops)
1086 return (lex->tok.ttype = ch);
1088 return (lex->tok.ttype = Token::OPERATOR);
1090 case Token::BRACKET_CLOSE:
1091 if (lex->flags.noops) {
1092 nextch = lex_getch(lex);
1093 if (nextch == Token::BRACKET_CLOSE) {
1094 lex_tokench(lex, ch);
1095 lex_tokench(lex, nextch);
1097 return (lex->tok.ttype = Token::ATTRIBUTE_CLOSE);
1099 lex_ungetch(lex, nextch);
1102 case Token::PAREN_CLOSE:
1103 case Token::SEMICOLON:
1104 case Token::BRACE_OPEN:
1105 case Token::BRACE_CLOSE:
1108 lex_tokench(lex, ch);
1110 return (lex->tok.ttype = ch);
1115 if (ch == Token::DOT) {
1116 nextch = lex_getch(lex);
1117 /* digits starting with a dot */
1118 if (util_isdigit(nextch)) {
1119 lex_ungetch(lex, nextch);
1120 lex->tok.ttype = lex_finish_digit(lex, ch);
1122 return lex->tok.ttype;
1124 lex_ungetch(lex, nextch);
1127 if (lex->flags.noops)
1129 /* Detect characters early which are normally
1130 * operators OR PART of an operator.
1148 lex_tokench(lex, ch);
1150 return (lex->tok.ttype = ch);
1156 if (ch == Token::DOT)
1158 lex_tokench(lex, ch);
1160 nextch = lex_getch(lex);
1161 if (nextch != Token::DOT) {
1162 lex_ungetch(lex, nextch);
1164 if (lex->flags.noops)
1165 return (lex->tok.ttype = ch);
1167 return (lex->tok.ttype = Token::OPERATOR);
1170 nextch = lex_getch(lex);
1171 if (nextch != Token::DOT) {
1172 lex_ungetch(lex, nextch);
1173 lex_ungetch(lex, Token::DOT);
1175 if (lex->flags.noops)
1176 return (lex->tok.ttype = ch);
1178 return (lex->tok.ttype = Token::OPERATOR);
1180 // fill the token to be "..."
1181 lex_tokench(lex, ch);
1182 lex_tokench(lex, ch);
1184 return (lex->tok.ttype = Token::DOTS);
1187 if (ch == Token::COMMA || ch == Token::DOT) {
1188 lex_tokench(lex, ch);
1190 return (lex->tok.ttype = Token::OPERATOR);
1193 if (ch == Token::ADD || ch == Token::SUB || /* ++, --, +=, -= */
1194 ch == Token::GT || ch == Token::LT|| /* <<, >>, <=, >= and >< as well! */
1195 ch == Token::EQ || ch == Token::NOT || /* <=>, ==, != */
1196 ch == Token::AND || ch == Token::OR || /* &&, ||, &=, |= */
1197 ch == Token::BITNOT || ch == Token::XOR /* ~=, ~, ^ */
1199 lex_tokench(lex, ch);
1200 nextch = lex_getch(lex);
1202 if ((nextch == Token::EQ && ch != Token::LT) || (nextch == Token::LT && ch == Token::GT))
1203 lex_tokench(lex, nextch);
1204 else if (nextch == ch && ch != Token::NOT) {
1205 lex_tokench(lex, nextch);
1206 if ((thirdch = lex_getch(lex)) == Token::EQ)
1207 lex_tokench(lex, thirdch);
1209 lex_ungetch(lex, thirdch);
1210 } else if (ch == Token::LT && nextch == Token::EQ) {
1211 lex_tokench(lex, nextch);
1212 if ((thirdch = lex_getch(lex)) == Token::GT)
1213 lex_tokench(lex, thirdch);
1215 lex_ungetch(lex, thirdch);
1216 } else if (ch == Token::AND && nextch == Token::BITNOT) {
1217 thirdch = lex_getch(lex);
1218 if (thirdch != Token::EQ) {
1219 lex_ungetch(lex, thirdch);
1220 lex_ungetch(lex, nextch);
1223 lex_tokench(lex, nextch);
1224 lex_tokench(lex, thirdch);
1227 else if (lex->flags.preprocessing &&
1228 ch == Token::SUB && util_isdigit(nextch))
1230 lex->tok.ttype = lex_finish_digit(lex, nextch);
1231 if (lex->tok.ttype == Token::INTCONST)
1232 lex->tok.constval.i = -lex->tok.constval.i;
1234 lex->tok.constval.f = -lex->tok.constval.f;
1236 return lex->tok.ttype;
1238 lex_ungetch(lex, nextch);
1242 return (lex->tok.ttype = Token::OPERATOR);
1245 if (ch == Token::MUL || ch == Token::DIV) /* *=, /= */
1247 lex_tokench(lex, ch);
1249 nextch = lex_getch(lex);
1250 if (nextch == Token::EQ || nextch == Token::MUL) {
1251 lex_tokench(lex, nextch);
1253 lex_ungetch(lex, nextch);
1256 return (lex->tok.ttype = Token::OPERATOR);
1259 if (ch == Token::MOD) {
1260 lex_tokench(lex, ch);
1262 return (lex->tok.ttype = Token::OPERATOR);
1265 if (isident_start(ch))
1269 lex_tokench(lex, ch);
1270 if (!lex_finish_ident(lex)) {
1272 return (lex->tok.ttype = Token::ERROR);
1275 lex->tok.ttype = Token::IDENT;
1277 v = lex->tok.value.c_str();
1278 if (!strcmp(v, "void")) {
1279 lex->tok.ttype = Token::TYPENAME;
1280 lex->tok.constval.t = TYPE_VOID;
1281 } else if (!strcmp(v, "int")) {
1282 lex->tok.ttype = Token::TYPENAME;
1283 lex->tok.constval.t = TYPE_INTEGER;
1284 } else if (!strcmp(v, "float")) {
1285 lex->tok.ttype = Token::TYPENAME;
1286 lex->tok.constval.t = TYPE_FLOAT;
1287 } else if (!strcmp(v, "string")) {
1288 lex->tok.ttype = Token::TYPENAME;
1289 lex->tok.constval.t = TYPE_STRING;
1290 } else if (!strcmp(v, "entity")) {
1291 lex->tok.ttype = Token::TYPENAME;
1292 lex->tok.constval.t = TYPE_ENTITY;
1293 } else if (!strcmp(v, "vector")) {
1294 lex->tok.ttype = Token::TYPENAME;
1295 lex->tok.constval.t = TYPE_VECTOR;
1296 } else if (!strcmp(v, "_length")) {
1297 lex->tok.ttype = Token::OPERATOR;
1300 for (kw = 0; kw < GMQCC_ARRAY_COUNT(keywords_qc); ++kw) {
1301 if (!strcmp(v, keywords_qc[kw]))
1302 return (lex->tok.ttype = Token::KEYWORD);
1304 if (OPTS_OPTION_U32(OPTION_STANDARD) != COMPILER_QCC) {
1305 for (kw = 0; kw < GMQCC_ARRAY_COUNT(keywords_fg); ++kw) {
1306 if (!strcmp(v, keywords_fg[kw]))
1307 return (lex->tok.ttype = Token::KEYWORD);
1312 return lex->tok.ttype;
1315 if (ch == Token::QUOT_DOUBLE)
1317 lex->flags.nodigraphs = true;
1318 if (lex->flags.preprocessing)
1319 lex_tokench(lex, ch);
1320 lex->tok.ttype = lex_finish_string(lex, Token::QUOT_DOUBLE);
1321 if (lex->flags.preprocessing)
1322 lex_tokench(lex, ch);
1323 while (!lex->flags.preprocessing && lex->tok.ttype == Token::STRINGCONST)
1325 /* Allow c style "string" "continuation" */
1326 ch = lex_skipwhite(lex, false);
1327 if (ch != Token::QUOT_DOUBLE) {
1328 lex_ungetch(lex, ch);
1332 lex->tok.ttype = lex_finish_string(lex, Token::QUOT_DOUBLE);
1334 lex->flags.nodigraphs = false;
1336 return lex->tok.ttype;
1339 if (ch == Token::QUOT_SINGLE)
1341 /* we parse character constants like string,
1342 * but return Token::CHARCONST, or a vector type if it fits...
1343 * Likewise actual unescaping has to be done by the parser.
1344 * The difference is we don't allow 'char' 'continuation'.
1346 if (lex->flags.preprocessing)
1347 lex_tokench(lex, ch);
1348 lex->tok.ttype = lex_finish_string(lex, Token::QUOT_SINGLE);
1349 if (lex->flags.preprocessing)
1350 lex_tokench(lex, ch);
1353 lex->tok.ttype = Token::CHARCONST;
1355 /* It's a vector if we can successfully scan 3 floats */
1356 if (util_sscanf(lex->tok.value.c_str(), " %f %f %f ",
1357 &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1360 lex->tok.ttype = Token::VECTORCONST;
1364 if (!lex->flags.preprocessing && strlen(lex->tok.value.c_str()) > 1) {
1366 /* check for a valid utf8 character */
1367 if (!OPTS_FLAG(UTF8) || !utf8_to(&u8char, (const unsigned char *)lex->tok.value.c_str(), 8)) {
1368 if (lexwarn(lex, WARN_MULTIBYTE_CHARACTER,
1369 ( OPTS_FLAG(UTF8) ? "invalid multibyte character sequence `%s`"
1370 : "multibyte character: `%s`" ),
1372 return (lex->tok.ttype = Token::ERROR);
1375 lex->tok.constval.i = u8char;
1378 lex->tok.constval.i = lex->tok.value.c_str()[0];
1381 return lex->tok.ttype;
1384 if (util_isdigit(ch))
1386 lex->tok.ttype = lex_finish_digit(lex, ch);
1388 return lex->tok.ttype;
1391 if (lex->flags.preprocessing) {
1392 lex_tokench(lex, static_cast<int>(ch));
1394 return (lex->tok.ttype = ch);
1397 lexerror(lex, "unknown token: `%c`", ch);
1398 return (lex->tok.ttype = Token::ERROR);