X-Git-Url: http://de.git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=blobdiff_plain;f=lexer.c;h=1a8c17a1f3e414928737277fb7a5a59b76e1e8da;hp=5882bdbfc2e76f2100e12006194e001f55da90c1;hb=641136fee3e2f589f93ad6a7b3213b0247107303;hpb=f83cc1b91d45e2daae5851457c47249e00ac01f4 diff --git a/lexer.c b/lexer.c index 5882bdb..1a8c17a 100644 --- a/lexer.c +++ b/lexer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012, 2013 + * Copyright (C) 2012, 2013, 2014 * Wolfgang Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a copy of @@ -25,6 +25,7 @@ #include "gmqcc.h" #include "lexer.h" + /* * List of Keywords */ @@ -81,91 +82,8 @@ static bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...) return r; } - -#if 0 -token* token_new() -{ - token *tok = (token*)mem_a(sizeof(token)); - if (!tok) - return NULL; - memset(tok, 0, sizeof(*tok)); - return tok; -} - -void token_delete(token *self) -{ - if (self->next && self->next->prev == self) - self->next->prev = self->prev; - if (self->prev && self->prev->next == self) - self->prev->next = self->next; - MEM_VECTOR_CLEAR(self, value); - mem_d(self); -} - -token* token_copy(const token *cp) -{ - token* self = token_new(); - if (!self) - return NULL; - /* copy the value */ - self->value_alloc = cp->value_count + 1; - self->value_count = cp->value_count; - self->value = (char*)mem_a(self->value_alloc); - if (!self->value) { - mem_d(self); - return NULL; - } - memcpy(self->value, cp->value, cp->value_count); - self->value[self->value_alloc-1] = 0; - - /* rest */ - self->ctx = cp->ctx; - self->ttype = cp->ttype; - memcpy(&self->constval, &cp->constval, sizeof(self->constval)); - return self; -} - -void token_delete_all(token *t) -{ - token *n; - - do { - n = t->next; - token_delete(t); - t = n; - } while(t); -} - -token* token_copy_all(const token *cp) -{ - token *cur; - token *out; - - out = cur = token_copy(cp); - if (!out) - return NULL; - - while (cp->next) { - cp = cp->next; - cur->next = token_copy(cp); - if (!cur->next) { - token_delete_all(out); - return NULL; - } - cur->next->prev = cur; - cur = cur->next; - } - - return out; -} -#else static void lex_token_new(lex_file *lex) { -#if 0 - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); -#else if (lex->tok.value) vec_shrinkto(lex->tok.value, 0); @@ -173,14 +91,16 @@ static void lex_token_new(lex_file *lex) lex->tok.ctx.line = lex->sline; lex->tok.ctx.file = lex->name; lex->tok.ctx.column = lex->column; -#endif } -#endif + +static void lex_ungetch(lex_file *lex, int ch); +static int lex_getch(lex_file *lex); lex_file* lex_open(const char *file) { - lex_file *lex; - FILE *in = fs_file_open(file, "rb"); + lex_file *lex; + fs_file_t *in = fs_file_open(file, "rb"); + uint32_t read; if (!in) { lexerror(NULL, "open failed: '%s'\n", file); @@ -203,6 +123,19 @@ lex_file* lex_open(const char *file) lex->peekpos = 0; lex->eof = false; + /* handle BOM */ + if ((read = (lex_getch(lex) << 16) | (lex_getch(lex) << 8) | lex_getch(lex)) != 0xEFBBBF) { + lex_ungetch(lex, (read & 0x0000FF)); + lex_ungetch(lex, (read & 0x00FF00) >> 8); + lex_ungetch(lex, (read & 0xFF0000) >> 16); + } else { + /* + * otherwise the lexer has advanced 3 bytes for the BOM, we need + * to set the column back to 0 + */ + lex->column = 0; + } + vec_push(lex_filenames, lex->name); return lex; } @@ -255,16 +188,15 @@ void lex_close(lex_file *lex) if (lex->file) fs_file_close(lex->file); -#if 0 - if (lex->tok) - token_delete(lex->tok); -#else + vec_free(lex->tok.value); -#endif + /* mem_d(lex->name); collected in lex_filenames */ mem_d(lex); } + + static int lex_fgetc(lex_file *lex) { if (lex->file) { @@ -273,11 +205,11 @@ static int lex_fgetc(lex_file *lex) } if (lex->open_string) { if (lex->open_string_pos >= lex->open_string_length) - return EOF; + return FS_FILE_EOF; lex->column++; return lex->open_string[lex->open_string_pos++]; } - return EOF; + return FS_FILE_EOF; } /* Get or put-back data @@ -285,7 +217,6 @@ static int lex_fgetc(lex_file *lex) * are working on. * The are merely wrapping get/put in order to count line numbers. */ -static void lex_ungetch(lex_file *lex, int ch); static int lex_try_trigraph(lex_file *lex, int old) { int c2, c3; @@ -493,7 +424,7 @@ static bool lex_try_pragma(lex_file *lex) goto unroll; lex->line = line; - while (ch != '\n' && ch != EOF) + while (ch != '\n' && ch != FS_FILE_EOF) ch = lex_getch(lex); vec_free(command); vec_free(param); @@ -573,7 +504,7 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) do { ch = lex_getch(lex); - while (ch != EOF && util_isspace(ch)) { + while (ch != FS_FILE_EOF && util_isspace(ch)) { if (ch == '\n') { if (lex_try_pragma(lex)) continue; @@ -605,15 +536,11 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) if (lex->flags.preprocessing) { haswhite = true; - /* - lex_tokench(lex, '/'); - lex_tokench(lex, '/'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } - while (ch != EOF && ch != '\n') { + while (ch != FS_FILE_EOF && ch != '\n') { if (lex->flags.preprocessing) lex_tokench(lex, ' '); /* ch); */ ch = lex_getch(lex); @@ -630,25 +557,17 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) /* multiline comment */ if (lex->flags.preprocessing) { haswhite = true; - /* - lex_tokench(lex, '/'); - lex_tokench(lex, '*'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } - while (ch != EOF) + while (ch != FS_FILE_EOF) { ch = lex_getch(lex); if (ch == '*') { ch = lex_getch(lex); if (ch == '/') { if (lex->flags.preprocessing) { - /* - lex_tokench(lex, '*'); - lex_tokench(lex, '/'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } @@ -660,7 +579,7 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) if (ch == '\n') lex_tokench(lex, '\n'); else - lex_tokench(lex, ' '); /* ch); */ + lex_tokench(lex, ' '); } } ch = ' '; /* cause TRUE in the isspace check */ @@ -671,7 +590,7 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) ch = '/'; break; } - } while (ch != EOF && util_isspace(ch)); + } while (ch != FS_FILE_EOF && util_isspace(ch)); if (haswhite) { lex_endtoken(lex); @@ -687,7 +606,7 @@ static bool GMQCC_WARN lex_finish_ident(lex_file *lex) int ch; ch = lex_getch(lex); - while (ch != EOF && isident(ch)) + while (ch != FS_FILE_EOF && isident(ch)) { lex_tokench(lex, ch); ch = lex_getch(lex); @@ -707,7 +626,7 @@ static int lex_parse_frame(lex_file *lex) lex_token_new(lex); ch = lex_getch(lex); - while (ch != EOF && ch != '\n' && util_isspace(ch)) + while (ch != FS_FILE_EOF && ch != '\n' && util_isspace(ch)) ch = lex_getch(lex); if (ch == '\n') @@ -765,10 +684,11 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) int ch = 0; int nextch; bool hex; + bool oct; char u8buf[8]; /* way more than enough */ int u8len, uc; - while (ch != EOF) + while (ch != FS_FILE_EOF) { ch = lex_getch(lex); if (ch == quote) @@ -777,18 +697,18 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) if (lex->flags.preprocessing && ch == '\\') { lex_tokench(lex, ch); ch = lex_getch(lex); - if (ch == EOF) { + if (ch == FS_FILE_EOF) { lexerror(lex, "unexpected end of file"); - lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + lex_ungetch(lex, FS_FILE_EOF); /* next token to be TOKEN_EOF */ return (lex->tok.ttype = TOKEN_ERROR); } lex_tokench(lex, ch); } else if (ch == '\\') { ch = lex_getch(lex); - if (ch == EOF) { + if (ch == FS_FILE_EOF) { lexerror(lex, "unexpected end of file"); - lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + lex_ungetch(lex, FS_FILE_EOF); /* next token to be TOKEN_EOF */ return (lex->tok.ttype = TOKEN_ERROR); } @@ -850,17 +770,18 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) chr = 0; nextch = lex_getch(lex); hex = (nextch == 'x'); - if (!hex) + oct = (nextch == '0'); + if (!hex && !oct) lex_ungetch(lex, nextch); for (nextch = lex_getch(lex); nextch != '}'; nextch = lex_getch(lex)) { - if (!hex) { + if (!hex && !oct) { if (nextch >= '0' && nextch <= '9') chr = chr * 10 + nextch - '0'; else { lexerror(lex, "bad character code"); return (lex->tok.ttype = TOKEN_ERROR); } - } else { + } else if (!oct) { if (nextch >= '0' && nextch <= '9') chr = chr * 0x10 + nextch - '0'; else if (nextch >= 'a' && nextch <= 'f') @@ -871,6 +792,13 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) lexerror(lex, "bad character code"); return (lex->tok.ttype = TOKEN_ERROR); } + } else { + if (nextch >= '0' && nextch <= '9') + chr = chr * 8 + chr - '0'; + else { + lexerror(lex, "bad character code"); + return (lex->tok.ttype = TOKEN_ERROR); + } } if (chr > 0x10FFFF || (!OPTS_FLAG(UTF8) && chr > 255)) { @@ -911,7 +839,7 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) lex_tokench(lex, ch); } lexerror(lex, "unexpected end of file within string constant"); - lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + lex_ungetch(lex, FS_FILE_EOF); /* next token to be TOKEN_EOF */ return (lex->tok.ttype = TOKEN_ERROR); } @@ -998,10 +926,6 @@ int lex_do(lex_file *lex) bool hadwhite = false; lex_token_new(lex); -#if 0 - if (!lex->tok) - return TOKEN_FATAL; -#endif while (true) { ch = lex_skipwhite(lex, hadwhite); @@ -1032,7 +956,7 @@ int lex_do(lex_file *lex) if (lex->eof) return (lex->tok.ttype = TOKEN_FATAL); - if (ch == EOF) { + if (ch == FS_FILE_EOF) { lex->eof = true; return (lex->tok.ttype = TOKEN_EOF); } @@ -1069,7 +993,7 @@ int lex_do(lex_file *lex) if (!strcmp(v, "framevalue")) { ch = lex_getch(lex); - while (ch != EOF && util_isspace(ch) && ch != '\n') + while (ch != FS_FILE_EOF && util_isspace(ch) && ch != '\n') ch = lex_getch(lex); if (!util_isdigit(ch)) { @@ -1149,7 +1073,7 @@ int lex_do(lex_file *lex) vec_free(lex->frames); /* skip line (fteqcc does it too) */ ch = lex_getch(lex); - while (ch != EOF && ch != '\n') + while (ch != FS_FILE_EOF && ch != '\n') ch = lex_getch(lex); return lex_do(lex); } @@ -1163,7 +1087,7 @@ int lex_do(lex_file *lex) { /* skip line */ ch = lex_getch(lex); - while (ch != EOF && ch != '\n') + while (ch != FS_FILE_EOF && ch != '\n') ch = lex_getch(lex); return lex_do(lex); } @@ -1246,10 +1170,6 @@ int lex_do(lex_file *lex) */ switch (ch) { - /* - case '+': - case '-': - */ case '*': case '/': case '<': @@ -1313,12 +1233,16 @@ int lex_do(lex_file *lex) ch == '~' || ch == '^' /* ~=, ~, ^ */ ) { lex_tokench(lex, ch); - nextch = lex_getch(lex); - if ((nextch == '=' && ch != '<') || - (nextch == ch && ch != '!') || - (nextch == '<' && ch == '>')) { + + if ((nextch == '=' && ch != '<') || (nextch == '<' && ch == '>')) + lex_tokench(lex, nextch); + else if (nextch == ch && ch != '!') { lex_tokench(lex, nextch); + if ((thirdch = lex_getch(lex)) == '=') + lex_tokench(lex, thirdch); + else + lex_ungetch(lex, thirdch); } else if (ch == '<' && nextch == '=') { lex_tokench(lex, nextch); if ((thirdch = lex_getch(lex)) == '>') @@ -1357,15 +1281,6 @@ int lex_do(lex_file *lex) return (lex->tok.ttype = TOKEN_OPERATOR); } - /* - if (ch == '^' || ch == '~' || ch == '!') - { - lex_tokench(lex, ch); - lex_endtoken(lex); - return (lex->tok.ttype = TOKEN_OPERATOR); - } - */ - if (ch == '*' || ch == '/') /* *=, /= */ { lex_tokench(lex, ch); @@ -1386,6 +1301,28 @@ int lex_do(lex_file *lex) return (lex->tok.ttype = TOKEN_OPERATOR); } + /* length operator */ + if (ch == 'l') { + if ((nextch = lex_getch(lex)) == 'e') { + if ((nextch = lex_getch(lex)) == 'n') { + if ((nextch = lex_getch(lex)) == 'g') { + if ((nextch = lex_getch(lex)) == 't') { + if ((nextch = lex_getch(lex)) == 'h') { + lex_tokench(lex, 'l'); + lex_tokench(lex, 'e'); + lex_tokench(lex, 'n'); + lex_tokench(lex, 'g'); + lex_tokench(lex, 't'); + lex_tokench(lex, 'h'); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } + if (isident_start(ch)) { const char *v; @@ -1473,14 +1410,10 @@ int lex_do(lex_file *lex) lex_endtoken(lex); lex->tok.ttype = TOKEN_CHARCONST; - /* It's a vector if we can successfully scan 3 floats */ -#ifdef _MSC_VER - if (sscanf_s(lex->tok.value, " %f %f %f ", - &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) -#else - if (sscanf(lex->tok.value, " %f %f %f ", + + /* It's a vector if we can successfully scan 3 floats */ + if (util_sscanf(lex->tok.value, " %f %f %f ", &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) -#endif { lex->tok.ttype = TOKEN_VECTORCONST;