X-Git-Url: http://de.git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=blobdiff_plain;f=lexer.c;h=1a8c17a1f3e414928737277fb7a5a59b76e1e8da;hp=6c8ccd9db127c82c11996ccf82a08037e196c840;hb=641136fee3e2f589f93ad6a7b3213b0247107303;hpb=92c0d6157c3d6b24ffff811a989572e2ae6d92f9 diff --git a/lexer.c b/lexer.c index 6c8ccd9..1a8c17a 100644 --- a/lexer.c +++ b/lexer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012, 2013 + * Copyright (C) 2012, 2013, 2014 * Wolfgang Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a copy of @@ -82,91 +82,8 @@ static bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...) return r; } - -#if 0 -token* token_new() -{ - token *tok = (token*)mem_a(sizeof(token)); - if (!tok) - return NULL; - memset(tok, 0, sizeof(*tok)); - return tok; -} - -void token_delete(token *self) -{ - if (self->next && self->next->prev == self) - self->next->prev = self->prev; - if (self->prev && self->prev->next == self) - self->prev->next = self->next; - MEM_VECTOR_CLEAR(self, value); - mem_d(self); -} - -token* token_copy(const token *cp) -{ - token* self = token_new(); - if (!self) - return NULL; - /* copy the value */ - self->value_alloc = cp->value_count + 1; - self->value_count = cp->value_count; - self->value = (char*)mem_a(self->value_alloc); - if (!self->value) { - mem_d(self); - return NULL; - } - memcpy(self->value, cp->value, cp->value_count); - self->value[self->value_alloc-1] = 0; - - /* rest */ - self->ctx = cp->ctx; - self->ttype = cp->ttype; - memcpy(&self->constval, &cp->constval, sizeof(self->constval)); - return self; -} - -void token_delete_all(token *t) -{ - token *n; - - do { - n = t->next; - token_delete(t); - t = n; - } while(t); -} - -token* token_copy_all(const token *cp) -{ - token *cur; - token *out; - - out = cur = token_copy(cp); - if (!out) - return NULL; - - while (cp->next) { - cp = cp->next; - cur->next = token_copy(cp); - if (!cur->next) { - token_delete_all(out); - return NULL; - } - cur->next->prev = cur; - cur = cur->next; - } - - return out; -} -#else static void lex_token_new(lex_file *lex) { -#if 0 - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); -#else if (lex->tok.value) vec_shrinkto(lex->tok.value, 0); @@ -174,14 +91,16 @@ static void lex_token_new(lex_file *lex) lex->tok.ctx.line = lex->sline; lex->tok.ctx.file = lex->name; lex->tok.ctx.column = lex->column; -#endif } -#endif + +static void lex_ungetch(lex_file *lex, int ch); +static int lex_getch(lex_file *lex); lex_file* lex_open(const char *file) { lex_file *lex; fs_file_t *in = fs_file_open(file, "rb"); + uint32_t read; if (!in) { lexerror(NULL, "open failed: '%s'\n", file); @@ -204,6 +123,19 @@ lex_file* lex_open(const char *file) lex->peekpos = 0; lex->eof = false; + /* handle BOM */ + if ((read = (lex_getch(lex) << 16) | (lex_getch(lex) << 8) | lex_getch(lex)) != 0xEFBBBF) { + lex_ungetch(lex, (read & 0x0000FF)); + lex_ungetch(lex, (read & 0x00FF00) >> 8); + lex_ungetch(lex, (read & 0xFF0000) >> 16); + } else { + /* + * otherwise the lexer has advanced 3 bytes for the BOM, we need + * to set the column back to 0 + */ + lex->column = 0; + } + vec_push(lex_filenames, lex->name); return lex; } @@ -256,16 +188,15 @@ void lex_close(lex_file *lex) if (lex->file) fs_file_close(lex->file); -#if 0 - if (lex->tok) - token_delete(lex->tok); -#else + vec_free(lex->tok.value); -#endif + /* mem_d(lex->name); collected in lex_filenames */ mem_d(lex); } + + static int lex_fgetc(lex_file *lex) { if (lex->file) { @@ -286,7 +217,6 @@ static int lex_fgetc(lex_file *lex) * are working on. * The are merely wrapping get/put in order to count line numbers. */ -static void lex_ungetch(lex_file *lex, int ch); static int lex_try_trigraph(lex_file *lex, int old) { int c2, c3; @@ -606,10 +536,6 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) if (lex->flags.preprocessing) { haswhite = true; - /* - lex_tokench(lex, '/'); - lex_tokench(lex, '/'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } @@ -631,10 +557,6 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) /* multiline comment */ if (lex->flags.preprocessing) { haswhite = true; - /* - lex_tokench(lex, '/'); - lex_tokench(lex, '*'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } @@ -646,10 +568,6 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) ch = lex_getch(lex); if (ch == '/') { if (lex->flags.preprocessing) { - /* - lex_tokench(lex, '*'); - lex_tokench(lex, '/'); - */ lex_tokench(lex, ' '); lex_tokench(lex, ' '); } @@ -661,7 +579,7 @@ static int lex_skipwhite(lex_file *lex, bool hadwhite) if (ch == '\n') lex_tokench(lex, '\n'); else - lex_tokench(lex, ' '); /* ch); */ + lex_tokench(lex, ' '); } } ch = ' '; /* cause TRUE in the isspace check */ @@ -766,6 +684,7 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) int ch = 0; int nextch; bool hex; + bool oct; char u8buf[8]; /* way more than enough */ int u8len, uc; @@ -851,17 +770,18 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) chr = 0; nextch = lex_getch(lex); hex = (nextch == 'x'); - if (!hex) + oct = (nextch == '0'); + if (!hex && !oct) lex_ungetch(lex, nextch); for (nextch = lex_getch(lex); nextch != '}'; nextch = lex_getch(lex)) { - if (!hex) { + if (!hex && !oct) { if (nextch >= '0' && nextch <= '9') chr = chr * 10 + nextch - '0'; else { lexerror(lex, "bad character code"); return (lex->tok.ttype = TOKEN_ERROR); } - } else { + } else if (!oct) { if (nextch >= '0' && nextch <= '9') chr = chr * 0x10 + nextch - '0'; else if (nextch >= 'a' && nextch <= 'f') @@ -872,6 +792,13 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) lexerror(lex, "bad character code"); return (lex->tok.ttype = TOKEN_ERROR); } + } else { + if (nextch >= '0' && nextch <= '9') + chr = chr * 8 + chr - '0'; + else { + lexerror(lex, "bad character code"); + return (lex->tok.ttype = TOKEN_ERROR); + } } if (chr > 0x10FFFF || (!OPTS_FLAG(UTF8) && chr > 255)) { @@ -999,10 +926,6 @@ int lex_do(lex_file *lex) bool hadwhite = false; lex_token_new(lex); -#if 0 - if (!lex->tok) - return TOKEN_FATAL; -#endif while (true) { ch = lex_skipwhite(lex, hadwhite); @@ -1247,10 +1170,6 @@ int lex_do(lex_file *lex) */ switch (ch) { - /* - case '+': - case '-': - */ case '*': case '/': case '<': @@ -1314,12 +1233,16 @@ int lex_do(lex_file *lex) ch == '~' || ch == '^' /* ~=, ~, ^ */ ) { lex_tokench(lex, ch); - nextch = lex_getch(lex); - if ((nextch == '=' && ch != '<') || - (nextch == ch && ch != '!') || - (nextch == '<' && ch == '>')) { + + if ((nextch == '=' && ch != '<') || (nextch == '<' && ch == '>')) lex_tokench(lex, nextch); + else if (nextch == ch && ch != '!') { + lex_tokench(lex, nextch); + if ((thirdch = lex_getch(lex)) == '=') + lex_tokench(lex, thirdch); + else + lex_ungetch(lex, thirdch); } else if (ch == '<' && nextch == '=') { lex_tokench(lex, nextch); if ((thirdch = lex_getch(lex)) == '>') @@ -1358,15 +1281,6 @@ int lex_do(lex_file *lex) return (lex->tok.ttype = TOKEN_OPERATOR); } - /* - if (ch == '^' || ch == '~' || ch == '!') - { - lex_tokench(lex, ch); - lex_endtoken(lex); - return (lex->tok.ttype = TOKEN_OPERATOR); - } - */ - if (ch == '*' || ch == '/') /* *=, /= */ { lex_tokench(lex, ch); @@ -1387,6 +1301,28 @@ int lex_do(lex_file *lex) return (lex->tok.ttype = TOKEN_OPERATOR); } + /* length operator */ + if (ch == 'l') { + if ((nextch = lex_getch(lex)) == 'e') { + if ((nextch = lex_getch(lex)) == 'n') { + if ((nextch = lex_getch(lex)) == 'g') { + if ((nextch = lex_getch(lex)) == 't') { + if ((nextch = lex_getch(lex)) == 'h') { + lex_tokench(lex, 'l'); + lex_tokench(lex, 'e'); + lex_tokench(lex, 'n'); + lex_tokench(lex, 'g'); + lex_tokench(lex, 't'); + lex_tokench(lex, 'h'); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } else lex_ungetch(lex, nextch); + } + if (isident_start(ch)) { const char *v;