11 void lexerror(lex_file *lex, const char *fmt, ...)
17 con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
19 con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
26 int lvl = LVL_WARNING;
28 if (!OPTS_WARN(warntype))
35 con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
45 token *tok = (token*)mem_a(sizeof(token));
48 memset(tok, 0, sizeof(*tok));
52 void token_delete(token *self)
54 if (self->next && self->next->prev == self)
55 self->next->prev = self->prev;
56 if (self->prev && self->prev->next == self)
57 self->prev->next = self->next;
58 MEM_VECTOR_CLEAR(self, value);
62 token* token_copy(const token *cp)
64 token* self = token_new();
68 self->value_alloc = cp->value_count + 1;
69 self->value_count = cp->value_count;
70 self->value = (char*)mem_a(self->value_alloc);
75 memcpy(self->value, cp->value, cp->value_count);
76 self->value[self->value_alloc-1] = 0;
80 self->ttype = cp->ttype;
81 memcpy(&self->constval, &cp->constval, sizeof(self->constval));
85 void token_delete_all(token *t)
96 token* token_copy_all(const token *cp)
101 out = cur = token_copy(cp);
107 cur->next = token_copy(cp);
109 token_delete_all(out);
112 cur->next->prev = cur;
119 static void lex_token_new(lex_file *lex)
123 token_delete(lex->tok);
124 lex->tok = token_new();
127 vec_shrinkto(lex->tok.value, 0);
128 lex->tok.constval.t = 0;
129 lex->tok.ctx.line = lex->sline;
130 lex->tok.ctx.file = lex->name;
135 lex_file* lex_open(const char *file)
138 FILE *in = util_fopen(file, "rb");
141 lexerror(NULL, "open failed: '%s'\n", file);
145 lex = (lex_file*)mem_a(sizeof(*lex));
148 lexerror(NULL, "out of memory\n");
152 memset(lex, 0, sizeof(*lex));
155 lex->name = util_strdup(file);
156 lex->line = 1; /* we start counting at 1 */
161 vec_push(lex_filenames, lex->name);
165 lex_file* lex_open_string(const char *str, size_t len, const char *name)
169 lex = (lex_file*)mem_a(sizeof(*lex));
171 lexerror(NULL, "out of memory\n");
175 memset(lex, 0, sizeof(*lex));
178 lex->open_string = str;
179 lex->open_string_length = len;
180 lex->open_string_pos = 0;
182 lex->name = util_strdup(name ? name : "<string-source>");
183 lex->line = 1; /* we start counting at 1 */
188 vec_push(lex_filenames, lex->name);
193 void lex_cleanup(void)
196 for (i = 0; i < vec_size(lex_filenames); ++i)
197 mem_d(lex_filenames[i]);
198 vec_free(lex_filenames);
201 void lex_close(lex_file *lex)
204 for (i = 0; i < vec_size(lex->frames); ++i)
205 mem_d(lex->frames[i].name);
206 vec_free(lex->frames);
209 vec_free(lex->modelname);
215 token_delete(lex->tok);
217 vec_free(lex->tok.value);
219 /* mem_d(lex->name); collected in lex_filenames */
223 static int lex_fgetc(lex_file *lex)
226 return fgetc(lex->file);
227 if (lex->open_string) {
228 if (lex->open_string_pos >= lex->open_string_length)
230 return lex->open_string[lex->open_string_pos++];
235 /* Get or put-back data
236 * The following to functions do NOT understand what kind of data they
238 * The are merely wrapping get/put in order to count line numbers.
240 static void lex_ungetch(lex_file *lex, int ch);
241 static int lex_try_trigraph(lex_file *lex, int old)
246 lex_ungetch(lex, c2);
252 case '=': return '#';
253 case '/': return '\\';
254 case '\'': return '^';
255 case '(': return '[';
256 case ')': return ']';
257 case '!': return '|';
258 case '<': return '{';
259 case '>': return '}';
260 case '-': return '~';
262 lex_ungetch(lex, c3);
263 lex_ungetch(lex, c2);
268 static int lex_try_digraph(lex_file *lex, int ch)
272 if (ch == '<' && c2 == ':')
274 else if (ch == ':' && c2 == '>')
276 else if (ch == '<' && c2 == '%')
278 else if (ch == '%' && c2 == '>')
280 else if (ch == '%' && c2 == ':')
282 lex_ungetch(lex, c2);
286 static int lex_getch(lex_file *lex)
292 if (lex->peek[lex->peekpos] == '\n')
294 return lex->peek[lex->peekpos];
301 return lex_try_trigraph(lex, ch);
302 else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
303 return lex_try_digraph(lex, ch);
307 static void lex_ungetch(lex_file *lex, int ch)
309 lex->peek[lex->peekpos++] = ch;
314 /* classify characters
315 * some additions to the is*() functions of ctype.h
318 /* Idents are alphanumberic, but they start with alpha or _ */
319 static bool isident_start(int ch)
321 return isalpha(ch) || ch == '_';
324 static bool isident(int ch)
326 return isident_start(ch) || isdigit(ch);
329 /* isxdigit_only is used when we already know it's not a digit
330 * and want to see if it's a hex digit anyway.
332 static bool isxdigit_only(int ch)
334 return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
337 /* Append a character to the token buffer */
338 static void lex_tokench(lex_file *lex, int ch)
340 vec_push(lex->tok.value, ch);
343 /* Append a trailing null-byte */
344 static void lex_endtoken(lex_file *lex)
346 vec_push(lex->tok.value, 0);
347 vec_shrinkby(lex->tok.value, 1);
350 /* Skip whitespace and comments and return the first
351 * non-white character.
352 * As this makes use of the above getch() ungetch() functions,
353 * we don't need to care at all about line numbering anymore.
355 * In theory, this function should only be used at the beginning
356 * of lexing, or when we *know* the next character is part of the token.
357 * Otherwise, if the parser throws an error, the linenumber may not be
358 * the line of the error, but the line of the next token AFTER the error.
360 * This is currently only problematic when using c-like string-continuation,
361 * since comments and whitespaces are allowed between 2 such strings.
365 "A continuation of the previous string"
366 // This line is skipped
369 * In this case, if the parse decides it didn't actually want a string,
370 * and uses lex->line to print an error, it will show the ', foo);' line's
373 * On the other hand, the parser is supposed to remember the line of the next
374 * token's beginning. In this case we would want skipwhite() to be called
375 * AFTER reading a token, so that the parser, before reading the NEXT token,
376 * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
379 * here is to store the line of the first character after skipping
380 * the initial whitespace in lex->sline, this happens in lex_do.
382 static int lex_skipwhite(lex_file *lex)
385 bool haswhite = false;
390 while (ch != EOF && isspace(ch)) {
391 if (lex->flags.preprocessing) {
394 /* see if there was whitespace first */
395 if (haswhite) { /* (vec_size(lex->tok.value)) { */
396 lex_ungetch(lex, ch);
400 /* otherwise return EOL */
404 lex_tokench(lex, ch);
413 /* one line comment */
416 if (lex->flags.preprocessing) {
418 lex_tokench(lex, '/');
419 lex_tokench(lex, '/');
422 while (ch != EOF && ch != '\n') {
423 if (lex->flags.preprocessing)
424 lex_tokench(lex, ch);
427 if (lex->flags.preprocessing) {
428 lex_ungetch(lex, '\n');
436 /* multiline comment */
437 if (lex->flags.preprocessing) {
439 lex_tokench(lex, '/');
440 lex_tokench(lex, '*');
449 if (lex->flags.preprocessing) {
450 lex_tokench(lex, '*');
451 lex_tokench(lex, '/');
456 if (lex->flags.preprocessing) {
457 lex_tokench(lex, ch);
460 ch = ' '; /* cause TRUE in the isspace check */
463 /* Otherwise roll back to the slash and break out of the loop */
464 lex_ungetch(lex, ch);
468 } while (ch != EOF && isspace(ch));
472 lex_ungetch(lex, ch);
479 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
484 while (ch != EOF && isident(ch))
486 lex_tokench(lex, ch);
490 /* last ch was not an ident ch: */
491 lex_ungetch(lex, ch);
496 /* read one ident for the frame list */
497 static int lex_parse_frame(lex_file *lex)
504 while (ch != EOF && ch != '\n' && isspace(ch))
510 if (!isident_start(ch)) {
511 lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
515 lex_tokench(lex, ch);
516 if (!lex_finish_ident(lex))
522 /* read a list of $frames */
523 static bool lex_finish_frames(lex_file *lex)
530 rc = lex_parse_frame(lex);
531 if (rc > 0) /* end of line */
533 if (rc < 0) /* error */
536 for (i = 0; i < vec_size(lex->frames); ++i) {
537 if (!strcmp(lex->tok.value, lex->frames[i].name)) {
538 lex->frames[i].value = lex->framevalue++;
539 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
544 if (i < vec_size(lex->frames))
547 m.value = lex->framevalue++;
548 m.name = util_strdup(lex->tok.value);
549 vec_shrinkto(lex->tok.value, 0);
550 vec_push(lex->frames, m);
554 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
562 return TOKEN_STRINGCONST;
564 if (!lex->flags.preprocessing && ch == '\\') {
567 lexerror(lex, "unexpected end of file");
568 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
569 return (lex->tok.ttype = TOKEN_ERROR);
574 case 'a': ch = '\a'; break;
575 case 'b': ch = '\b'; break;
576 case 'r': ch = '\r'; break;
577 case 'n': ch = '\n'; break;
578 case 't': ch = '\t'; break;
579 case 'f': ch = '\f'; break;
580 case 'v': ch = '\v'; break;
582 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
583 /* so we just add the character plus backslash no matter what it actually is */
584 lex_tokench(lex, '\\');
586 /* add the character finally */
587 lex_tokench(lex, ch);
590 lex_tokench(lex, ch);
592 lexerror(lex, "unexpected end of file within string constant");
593 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
594 return (lex->tok.ttype = TOKEN_ERROR);
597 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
603 /* parse a number... */
604 lex->tok.ttype = TOKEN_INTCONST;
606 lex_tokench(lex, ch);
609 if (ch != '.' && !isdigit(ch))
611 if (lastch != '0' || ch != 'x')
613 /* end of the number or EOF */
614 lex_ungetch(lex, ch);
617 lex->tok.constval.i = lastch - '0';
618 return lex->tok.ttype;
624 /* EOF would have been caught above */
628 lex_tokench(lex, ch);
630 while (isdigit(ch) || (ishex && isxdigit_only(ch)))
632 lex_tokench(lex, ch);
636 /* NOT else, '.' can come from above as well */
637 if (ch == '.' && !ishex)
639 /* Allow floating comma in non-hex mode */
640 lex->tok.ttype = TOKEN_FLOATCONST;
641 lex_tokench(lex, ch);
643 /* continue digits-only */
647 lex_tokench(lex, ch);
651 /* put back the last character */
652 /* but do not put back the trailing 'f' or a float */
653 if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
656 /* generally we don't want words to follow numbers: */
658 lexerror(lex, "unexpected trailing characters after number");
659 return (lex->tok.ttype = TOKEN_ERROR);
661 lex_ungetch(lex, ch);
664 if (lex->tok.ttype == TOKEN_FLOATCONST)
665 lex->tok.constval.f = strtod(lex->tok.value, NULL);
667 lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
668 return lex->tok.ttype;
671 int lex_do(lex_file *lex)
681 ch = lex_skipwhite(lex);
682 lex->sline = lex->line;
683 lex->tok.ctx.line = lex->sline;
684 lex->tok.ctx.file = lex->name;
686 if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
687 return (lex->tok.ttype = ch);
691 return (lex->tok.ttype = TOKEN_FATAL);
695 return (lex->tok.ttype = TOKEN_EOF);
698 /* modelgen / spiritgen commands */
704 if (!isident_start(ch)) {
705 lexerror(lex, "hanging '$' modelgen/spritegen command line");
708 lex_tokench(lex, ch);
709 if (!lex_finish_ident(lex))
710 return (lex->tok.ttype = TOKEN_ERROR);
712 /* skip the known commands */
715 if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
717 /* frame/framesave command works like an enum
718 * similar to fteqcc we handle this in the lexer.
719 * The reason for this is that it is sensitive to newlines,
720 * which the parser is unaware of
722 if (!lex_finish_frames(lex))
723 return (lex->tok.ttype = TOKEN_ERROR);
727 if (!strcmp(v, "framevalue"))
730 while (ch != EOF && isspace(ch) && ch != '\n')
734 lexerror(lex, "$framevalue requires an integer parameter");
739 lex->tok.ttype = lex_finish_digit(lex, ch);
741 if (lex->tok.ttype != TOKEN_INTCONST) {
742 lexerror(lex, "$framevalue requires an integer parameter");
745 lex->framevalue = lex->tok.constval.i;
749 if (!strcmp(v, "framerestore"))
755 rc = lex_parse_frame(lex);
758 lexerror(lex, "$framerestore requires a framename parameter");
762 return (lex->tok.ttype = TOKEN_FATAL);
765 for (frame = 0; frame < vec_size(lex->frames); ++frame) {
766 if (!strcmp(v, lex->frames[frame].name)) {
767 lex->framevalue = lex->frames[frame].value;
771 lexerror(lex, "unknown framename `%s`", v);
775 if (!strcmp(v, "modelname"))
781 rc = lex_parse_frame(lex);
784 lexerror(lex, "$modelname requires a parameter");
788 return (lex->tok.ttype = TOKEN_FATAL);
791 if (lex->modelname) {
793 m.value = lex->framevalue;
794 m.name = lex->modelname;
795 lex->modelname = NULL;
796 vec_push(lex->frames, m);
798 lex->modelname = lex->tok.value;
799 lex->tok.value = NULL;
803 if (!strcmp(v, "flush"))
806 for (frame = 0; frame < vec_size(lex->frames); ++frame)
807 mem_d(lex->frames[frame].name);
808 vec_free(lex->frames);
809 /* skip line (fteqcc does it too) */
811 while (ch != EOF && ch != '\n')
816 if (!strcmp(v, "cd") ||
817 !strcmp(v, "origin") ||
818 !strcmp(v, "base") ||
819 !strcmp(v, "flags") ||
820 !strcmp(v, "scale") ||
825 while (ch != EOF && ch != '\n')
830 for (frame = 0; frame < vec_size(lex->frames); ++frame) {
831 if (!strcmp(v, lex->frames[frame].name)) {
832 lex->tok.constval.i = lex->frames[frame].value;
833 return (lex->tok.ttype = TOKEN_INTCONST);
837 lexerror(lex, "invalid frame macro");
841 /* single-character tokens */
846 lex_tokench(lex, ch);
848 if (lex->flags.noops)
849 return (lex->tok.ttype = ch);
851 return (lex->tok.ttype = TOKEN_OPERATOR);
859 lex_tokench(lex, ch);
861 return (lex->tok.ttype = ch);
866 if (lex->flags.noops)
868 /* Detect characters early which are normally
869 * operators OR PART of an operator.
886 lex_tokench(lex, ch);
888 return (lex->tok.ttype = ch);
895 lex_tokench(lex, ch);
896 /* peak ahead once */
897 nextch = lex_getch(lex);
899 lex_ungetch(lex, nextch);
901 return (lex->tok.ttype = ch);
903 /* peak ahead again */
904 nextch = lex_getch(lex);
906 lex_ungetch(lex, nextch);
907 lex_ungetch(lex, nextch);
909 return (lex->tok.ttype = ch);
911 /* fill the token to be "..." */
912 lex_tokench(lex, ch);
913 lex_tokench(lex, ch);
915 return (lex->tok.ttype = TOKEN_DOTS);
919 if (ch == ',' || ch == '.') {
920 lex_tokench(lex, ch);
922 return (lex->tok.ttype = TOKEN_OPERATOR);
925 if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
926 ch == '>' || ch == '<' || /* <<, >>, <=, >= */
927 ch == '=' || ch == '!' || /* ==, != */
928 ch == '&' || ch == '|') /* &&, ||, &=, |= */
930 lex_tokench(lex, ch);
932 nextch = lex_getch(lex);
933 if (nextch == ch || nextch == '=') {
934 lex_tokench(lex, nextch);
935 } else if (ch == '-' && nextch == '>') {
936 lex_tokench(lex, nextch);
938 lex_ungetch(lex, nextch);
941 return (lex->tok.ttype = TOKEN_OPERATOR);
945 if (ch == '^' || ch == '~' || ch == '!')
947 lex_tokench(lex, ch);
949 return (lex->tok.ttype = TOKEN_OPERATOR);
953 if (ch == '*' || ch == '/') /* *=, /= */
955 lex_tokench(lex, ch);
957 nextch = lex_getch(lex);
959 lex_tokench(lex, nextch);
961 lex_ungetch(lex, nextch);
964 return (lex->tok.ttype = TOKEN_OPERATOR);
967 if (isident_start(ch))
971 lex_tokench(lex, ch);
972 if (!lex_finish_ident(lex)) {
974 return (lex->tok.ttype = TOKEN_ERROR);
977 lex->tok.ttype = TOKEN_IDENT;
980 if (!strcmp(v, "void")) {
981 lex->tok.ttype = TOKEN_TYPENAME;
982 lex->tok.constval.t = TYPE_VOID;
983 } else if (!strcmp(v, "int")) {
984 lex->tok.ttype = TOKEN_TYPENAME;
985 lex->tok.constval.t = TYPE_INTEGER;
986 } else if (!strcmp(v, "float")) {
987 lex->tok.ttype = TOKEN_TYPENAME;
988 lex->tok.constval.t = TYPE_FLOAT;
989 } else if (!strcmp(v, "string")) {
990 lex->tok.ttype = TOKEN_TYPENAME;
991 lex->tok.constval.t = TYPE_STRING;
992 } else if (!strcmp(v, "entity")) {
993 lex->tok.ttype = TOKEN_TYPENAME;
994 lex->tok.constval.t = TYPE_ENTITY;
995 } else if (!strcmp(v, "vector")) {
996 lex->tok.ttype = TOKEN_TYPENAME;
997 lex->tok.constval.t = TYPE_VECTOR;
998 } else if (!strcmp(v, "for") ||
999 !strcmp(v, "while") ||
1002 !strcmp(v, "else") ||
1003 !strcmp(v, "local") ||
1004 !strcmp(v, "return") ||
1005 !strcmp(v, "not") ||
1006 !strcmp(v, "const"))
1008 lex->tok.ttype = TOKEN_KEYWORD;
1010 else if (opts_standard != COMPILER_QCC)
1012 /* other standards reserve these keywords */
1013 if (!strcmp(v, "switch") ||
1014 !strcmp(v, "struct") ||
1015 !strcmp(v, "union") ||
1016 !strcmp(v, "break") ||
1017 !strcmp(v, "continue") ||
1020 lex->tok.ttype = TOKEN_KEYWORD;
1024 return lex->tok.ttype;
1029 lex->flags.nodigraphs = true;
1030 if (lex->flags.preprocessing)
1031 lex_tokench(lex, ch);
1032 lex->tok.ttype = lex_finish_string(lex, '"');
1033 if (lex->flags.preprocessing)
1034 lex_tokench(lex, ch);
1035 while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1037 /* Allow c style "string" "continuation" */
1038 ch = lex_skipwhite(lex);
1040 lex_ungetch(lex, ch);
1044 lex->tok.ttype = lex_finish_string(lex, '"');
1046 lex->flags.nodigraphs = false;
1048 return lex->tok.ttype;
1053 /* we parse character constants like string,
1054 * but return TOKEN_CHARCONST, or a vector type if it fits...
1055 * Likewise actual unescaping has to be done by the parser.
1056 * The difference is we don't allow 'char' 'continuation'.
1058 if (lex->flags.preprocessing)
1059 lex_tokench(lex, ch);
1060 lex->tok.ttype = lex_finish_string(lex, '\'');
1061 if (lex->flags.preprocessing)
1062 lex_tokench(lex, ch);
1065 /* It's a vector if we can successfully scan 3 floats */
1067 if (sscanf_s(lex->tok.value, " %f %f %f ",
1068 &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1070 if (sscanf(lex->tok.value, " %f %f %f ",
1071 &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1075 lex->tok.ttype = TOKEN_VECTORCONST;
1078 return lex->tok.ttype;
1083 lex->tok.ttype = lex_finish_digit(lex, ch);
1085 return lex->tok.ttype;
1088 lexerror(lex, "unknown token");
1089 return (lex->tok.ttype = TOKEN_ERROR);