From 9493da4f8b373ba2266d411f6ec50f1920b9255c Mon Sep 17 00:00:00 2001 From: Dale Weiler Date: Thu, 12 Apr 2012 02:22:28 -0400 Subject: [PATCH] I got parsing to work finally. --- code.c | 1 - lex.c | 22 ++- parse.c | 343 ++++++++++++++++++++++------------------------- test/digraph.qc | 5 - test/if.qc | 9 -- test/parth.qc | 26 ++-- test/tree.qc | 22 --- test/trigraph.qc | 5 - test/typedef.qc | 2 +- test/types.qc | 4 +- 10 files changed, 191 insertions(+), 248 deletions(-) delete mode 100644 test/digraph.qc delete mode 100644 test/if.qc delete mode 100644 test/tree.qc delete mode 100644 test/trigraph.qc diff --git a/code.c b/code.c index 00cf702..9f999f5 100644 --- a/code.c +++ b/code.c @@ -146,7 +146,6 @@ VECTOR_MAKE(prog_section_field, code_fields ); VECTOR_MAKE(prog_section_function, code_functions ); VECTOR_MAKE(int, code_globals ); VECTOR_MAKE(char, code_strings ); -static uint16_t code_crc16 = 0; prog_header code_header ={0}; void code_init() { diff --git a/lex.c b/lex.c index 669d13d..556f485 100644 --- a/lex.c +++ b/lex.c @@ -48,7 +48,7 @@ struct lex_file *lex_open(FILE *fp) { lex->size = lex->length; /* copy, this is never changed */ fseek(lex->file, 0, SEEK_SET); lex->last = 0; - lex->line = 1; + lex->line = 0; memset(lex->peek, 0, sizeof(lex->peek)); return lex; @@ -139,15 +139,20 @@ static int lex_digraph(struct lex_file *file, int first) { static int lex_getch(struct lex_file *file) { int ch = lex_inget(file); - + + static int str = 0; switch (ch) { case '?' : return lex_trigraph(file); case '<' : case ':' : case '%' : - return lex_digraph (file, ch); - case '\n': file->line ++; + case '"' : str = !str; if (str) { file->line ++; } + return lex_digraph(file, ch); + + case '\n': + if (!str) + file->line++; } return ch; @@ -277,7 +282,14 @@ int lex_token(struct lex_file *file) { /* valid identifier */ if (ch > 0 && (ch == '_' || isalpha(ch))) { lex_clear(file); - while (ch > 0 && ch != ' ' && ch != '(' && ch != '\n' && ch != ';') { + + /* + * Yes this is dirty, but there is no other _sane_ easy + * way to do it, this is what I call defensive programming + * if something breaks, add more defense :-) + */ + while (ch > 0 && ch != ' ' && ch != '(' && + ch != '\n' && ch != ';' && ch != ')') { lex_addch(ch, file); ch = lex_getsource(file); } diff --git a/parse.c b/parse.c index f6de1c6..15ea054 100644 --- a/parse.c +++ b/parse.c @@ -230,30 +230,9 @@ int parse_tree(struct lex_file *file) { token != ERROR_PARSE && \ token != ERROR_PREPRO && file->length >= 0) { switch (token) { - case TOKEN_IF: - TOKEN_SKIPWHITE(); - if (token != '(') - error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line); - PARSE_TREE_ADD(PARSE_TYPE_IF); - PARSE_TREE_ADD(PARSE_TYPE_LPARTH); - break; - case TOKEN_ELSE: - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_ELSE); - break; - case TOKEN_FOR: - while ((token == ' ' || token == '\n') && file->length >= 0) - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_FOR); - break; - - /* - * This is a quick and easy way to do typedefs at parse time - * all power is in typedef_add(), in typedef.c. We handle - * the tokens accordingly here. - */ case TOKEN_TYPEDEF: { - char *f,*t; + char *f; /* from */ + char *t; /* to */ token = lex_token(file); token = lex_token(file); f = util_strdup(file->lastok); @@ -261,101 +240,192 @@ int parse_tree(struct lex_file *file) { token = lex_token(file); t = util_strdup(file->lastok); typedef_add(f, t); - - printf("TYPEDEF %s as %s\n", f, t); - mem_d(f); mem_d(t); - //while (token != '\n') token = lex_token(file); + if (token == ' ') + token = lex_token(file); + if (token != ';') error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line); token = lex_token(file); - printf("TOK: %c\n", token); break; } - /* - * Returns are addable as-is, statement checking is during - * the actual parse tree check. - */ - case TOKEN_RETURN: - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_RETURN); - break; - case TOKEN_CONTINUE: - PARSE_TREE_ADD(PARSE_TYPE_CONTINUE); - break; - - case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {}); - case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {}); - case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {}); - case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {}); - case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {}); - + case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); goto fall; case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall; case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall; case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall; case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); goto fall; - /* fall into this for all types */ { fall:; char *name = NULL; - TOKEN_SKIPWHITE(); - name = util_strdup(file->lastok); - token = lex_token (file); + int type = token; /* story copy */ - /* is it NOT a definition? */ - if (token != ';') { - while (token == ' ') - token = lex_token(file); + /* skip over space */ + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + /* save name */ + name = util_strdup(file->lastok); + + /* skip spaces */ + token = lex_token(file); + if (token == ' ') + token = lex_token(file); - /* it's a function? */ - if (token == '(') { - /* - * Now I essentially have to do a ton of parsing for - * function definition. - */ - PARSE_TREE_ADD(PARSE_TYPE_LPARTH); + if (token == ';') { + printf("definition\n"); + } else if (token == '=') { + token = lex_token(file); + if (token == ' ') token = lex_token(file); - while (token != '\n' && token != ')') { - switch (token) { - case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); break; - case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); break; - case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break; - case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); break; - /* - * TODO: Need to parse function pointers: I have no clue how - * I'm actually going to pull that off, it's going to be hard - * since you can have a function pointer-pointer-pointer .... - */ + + /* strings are in file->lastok */ + switch (type) { + case TOKEN_VOID: return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line); + case TOKEN_STRING: + if (*file->lastok != '"') + error(ERROR_PARSE, "%s:%d Expected a '\"' for string constant\n", file->name, file->line); + break; + case TOKEN_VECTOR: { + float compile_calc_x = 0; + float compile_calc_y = 0; + float compile_calc_z = 0; + int compile_calc_d = 0; /* dot? */ + + char compile_data[1024]; + char *compile_eval = compile_data; + + if (token != '{') + error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line); + + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + /* + * we support .7623, unlike anyother QuakeC + * compiler. Does that make us better :-). + */ + if (token == '.') + compile_calc_d = 1; + if (!isdigit(token) && !compile_calc_d) + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line); + + /* + * Read in constant data, will be in float format + * which means we use atof. + */ + while (isdigit(token) || token == '.') { + *compile_eval++ = token; + token = lex_token(file); + if (token == '.' && compile_calc_d) { + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line); + } else if (token == '.' && !compile_calc_d) { + compile_calc_d = 1; + } } - } - /* just a definition */ - if (token == ')') { + if (token == ' ') + token = lex_token(file); + + if (token != ',' && token != ' ') + error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line); + compile_calc_x = atof(compile_data); + compile_calc_d = 0; + memset(compile_data, 0, sizeof(compile_data)); + compile_eval = &compile_data[0]; + + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + if (token == '.') + compile_calc_d = 1; + if (!isdigit(token) && !compile_calc_d) + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line); + /* - * I like to put my { on the same line as the ) for - * functions, ifs, elses, so we must support that!. + * Read in constant data, will be in float format + * which means we use atof. */ - PARSE_TREE_ADD(PARSE_TYPE_RPARTH); + while (isdigit(token) || token == '.') { + *compile_eval++ = token; + token = lex_token(file); + if (token == '.' && compile_calc_d) { + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line); + } else if (token == '.' && !compile_calc_d) { + compile_calc_d = 1; + } + } + if (token == ' ') + token = lex_token(file); + + if (token != ',' && token != ' ') + error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line); + compile_calc_y = atof(compile_data); + compile_calc_d = 0; + memset(compile_data, 0, sizeof(compile_data)); + compile_eval = &compile_data[0]; + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + if (token == '.') + compile_calc_d = 1; + + if (!isdigit(token) && !compile_calc_d) + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line); + + /* + * Read in constant data, will be in float format + * which means we use atof. + */ + while (isdigit(token) || token == '.') { + *compile_eval++ = token; + token = lex_token(file); + if (token == '.' && compile_calc_d) { + error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line); + } else if (token == '.' && !compile_calc_d) { + compile_calc_d = 1; + } + } + if (token == ' ') + token = lex_token(file); + + if (token != '}') + error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line); + + compile_calc_z = atof(compile_data); + + /* + * Check for the semi-colon... This is insane + * the amount of parsing here that is. + */ token = lex_token(file); - if(token == '{') - PARSE_TREE_ADD(PARSE_TYPE_LBS); + if (token == ' ') + token = lex_token(file); + if (token != ';') + error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line); + + //printf("VEC_X: %f\n", compile_calc_x); + //printf("VEC_Y: %f\n", compile_calc_y); + //printf("VEC_X: %f\n", compile_calc_z); + break; } - else if (token == '\n') - error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name); - - } else if (token == '=') { - PARSE_TREE_ADD(PARSE_TYPE_EQUAL); - } else { - error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant], or `;` [definition] for %s\n", file->name, file->line, name); - } - } else { - /* definition */ - printf("FOUND DEFINITION\n"); + + case TOKEN_ENTITY: + case TOKEN_FLOAT: + if (!isdigit(token)) + error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n"); + break; + } + } else if (token == '(') { + printf("FUNCTION ??\n"); } mem_d(name); } @@ -391,97 +461,6 @@ int parse_tree(struct lex_file *file) { token = lex_token(file); break; - case '.': - PARSE_TREE_ADD(PARSE_TYPE_DOT); - break; - case '(': - PARSE_TREE_ADD(PARSE_TYPE_LPARTH); - break; - case ')': - PARSE_TREE_ADD(PARSE_TYPE_RPARTH); - break; - - case '&': /* & */ - token = lex_token(file); - if (token == '&') { /* && */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LAND); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_BAND); - break; - case '|': /* | */ - token = lex_token(file); - if (token == '|') { /* || */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LOR); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_BOR); - break; - case '!': /* ! */ - token = lex_token(file); - if (token == '=') { /* != */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LNEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_LNOT); - break; - case '<': /* < */ - token = lex_token(file); - if (token == '=') { /* <= */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LTEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_LT); - break; - case '>': /* > */ - token = lex_token(file); - if (token == '=') { /* >= */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_GTEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_GT); - break; - case '=': /* = */ - token = lex_token(file); - if (token == '=') { /* == */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_EQEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_EQUAL); - break; - case ';': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_DONE); - break; - case '-': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_MINUS); - break; - case '+': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_ADD); - break; - case '{': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LBS); - break; - case '}': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_RBS); - break; - - /* - * TODO: Fix lexer to spit out ( ) as tokens, it seems the - * using '(' or ')' in parser doesn't work properly unless - * there are spaces before them to allow the lexer to properly - * seperate identifiers. -- otherwise it eats all of it. - */ case LEX_IDENT: token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_IDENT); diff --git a/test/digraph.qc b/test/digraph.qc deleted file mode 100644 index 6467638..0000000 --- a/test/digraph.qc +++ /dev/null @@ -1,5 +0,0 @@ -<% - <% - <% %> - %> -%> diff --git a/test/if.qc b/test/if.qc deleted file mode 100644 index 9ef7234..0000000 --- a/test/if.qc +++ /dev/null @@ -1,9 +0,0 @@ -float test_1data = 1; -float test_2data = 2; - -float test_if() -{ - if (test_1data == test_2data) { - /* do this code */ - } -} diff --git a/test/parth.qc b/test/parth.qc index d9c1e59..396f25b 100644 --- a/test/parth.qc +++ b/test/parth.qc @@ -1,16 +1,10 @@ -( -( -( -) -) -) -( -( -) -) -( -) -( -) -( -) +void test_parth() { + if (1) { } + if (2) { } + if (3) { } + if (4) { } + if (5) { } + if (6) { } + if (7) { } + if (8) { } +} diff --git a/test/tree.qc b/test/tree.qc deleted file mode 100644 index b8b5568..0000000 --- a/test/tree.qc +++ /dev/null @@ -1,22 +0,0 @@ -if(1) { - if(1) { - return 0; - } else { - return 1; - } -} else { - for { - if(1) { - return 2; - } else { - continue; - } - } - do { - if(1){ - break; - } else { - goto finish; - } - } while ( ) -} diff --git a/test/trigraph.qc b/test/trigraph.qc deleted file mode 100644 index 770272c..0000000 --- a/test/trigraph.qc +++ /dev/null @@ -1,5 +0,0 @@ -??< - ??< - ??< ??> - ??> -??> diff --git a/test/typedef.qc b/test/typedef.qc index 7971ba8..f60bf77 100644 --- a/test/typedef.qc +++ b/test/typedef.qc @@ -4,7 +4,7 @@ typedef string my_string; typedef entity my_entity; typedef void my_void; -my_float type_float +my_float type_float; my_vector type_vector; my_string type_string; my_entity type_entity; diff --git a/test/types.qc b/test/types.qc index 814ce11..4fe3a27 100644 --- a/test/types.qc +++ b/test/types.qc @@ -1,5 +1,5 @@ -float typef; -vector typev; +float typef = 1; +vector typev = {0,1,2}; string types; entity typee; void typev; -- 2.39.2