From 9493da4f8b373ba2266d411f6ec50f1920b9255c Mon Sep 17 00:00:00 2001
From: Dale Weiler <killfieldengine@gmail.com>
Date: Thu, 12 Apr 2012 02:22:28 -0400
Subject: [PATCH] I got parsing to work finally.

---
 code.c           |   1 -
 lex.c            |  22 ++-
 parse.c          | 343 ++++++++++++++++++++++-------------------------
 test/digraph.qc  |   5 -
 test/if.qc       |   9 --
 test/parth.qc    |  26 ++--
 test/tree.qc     |  22 ---
 test/trigraph.qc |   5 -
 test/typedef.qc  |   2 +-
 test/types.qc    |   4 +-
 10 files changed, 191 insertions(+), 248 deletions(-)
 delete mode 100644 test/digraph.qc
 delete mode 100644 test/if.qc
 delete mode 100644 test/tree.qc
 delete mode 100644 test/trigraph.qc

diff --git a/code.c b/code.c
index 00cf702..9f999f5 100644
--- a/code.c
+++ b/code.c
@@ -146,7 +146,6 @@ VECTOR_MAKE(prog_section_field,     code_fields    );
 VECTOR_MAKE(prog_section_function,  code_functions );
 VECTOR_MAKE(int,                    code_globals   );
 VECTOR_MAKE(char,                   code_strings   );
-static uint16_t code_crc16  = 0;
 prog_header     code_header ={0};
 
 void code_init() {
diff --git a/lex.c b/lex.c
index 669d13d..556f485 100644
--- a/lex.c
+++ b/lex.c
@@ -48,7 +48,7 @@ struct lex_file *lex_open(FILE *fp) {
 	lex->size   = lex->length; /* copy, this is never changed */
 	fseek(lex->file, 0, SEEK_SET);
 	lex->last = 0;
-	lex->line = 1;
+	lex->line = 0;
 	
 	memset(lex->peek, 0, sizeof(lex->peek));
 	return lex;
@@ -139,15 +139,20 @@ static int lex_digraph(struct lex_file *file, int first) {
 
 static int lex_getch(struct lex_file *file) {
 	int ch = lex_inget(file);
-	
+
+	static int str = 0;
 	switch (ch) {
 		case '?' :
 			return lex_trigraph(file);
 		case '<' :
 		case ':' :
 		case '%' :
-			return lex_digraph (file, ch);
-		case '\n': file->line ++;
+		case '"' : str = !str; if (str) { file->line ++; }
+			return lex_digraph(file, ch);
+			
+		case '\n':
+			if (!str)
+				file->line++;
 	}
 		
 	return ch;
@@ -277,7 +282,14 @@ int lex_token(struct lex_file *file) {
 	/* valid identifier */
 	if (ch > 0 && (ch == '_' || isalpha(ch))) {
 		lex_clear(file);
-		while (ch > 0 && ch != ' ' && ch != '(' && ch != '\n' && ch != ';') {
+		
+		/*
+		 * Yes this is dirty, but there is no other _sane_ easy
+		 * way to do it, this is what I call defensive programming
+		 * if something breaks, add more defense :-)
+		 */
+		while (ch >   0   && ch != ' ' && ch != '(' &&
+		       ch != '\n' && ch != ';' && ch != ')') {
 			lex_addch(ch, file);
 			ch = lex_getsource(file);
 		}
diff --git a/parse.c b/parse.c
index f6de1c6..15ea054 100644
--- a/parse.c
+++ b/parse.c
@@ -230,30 +230,9 @@ int parse_tree(struct lex_file *file) {
 		    token                    != ERROR_PARSE    && \
 		    token                    != ERROR_PREPRO   && file->length >= 0) {
 		switch (token) {
-			case TOKEN_IF:
-				TOKEN_SKIPWHITE();
-				if (token != '(')
-					error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
-				PARSE_TREE_ADD(PARSE_TYPE_IF);
-				PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
-				break;
-			case TOKEN_ELSE:
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_ELSE);
-				break;
-			case TOKEN_FOR:
-				while ((token == ' ' || token == '\n') && file->length >= 0)
-					token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_FOR);
-				break;
-			
-			/*
-			 * This is a quick and easy way to do typedefs at parse time
-			 * all power is in typedef_add(), in typedef.c.  We handle 
-			 * the tokens accordingly here.
-			 */
 			case TOKEN_TYPEDEF: {
-				char *f,*t;
+				char *f; /* from */
+				char *t; /* to   */
 				
 				token = lex_token(file); 
 				token = lex_token(file); f = util_strdup(file->lastok);
@@ -261,101 +240,192 @@ int parse_tree(struct lex_file *file) {
 				token = lex_token(file); t = util_strdup(file->lastok);
 				
 				typedef_add(f, t);
-				
-				printf("TYPEDEF %s as %s\n", f, t);
-				
 				mem_d(f);
 				mem_d(t);
 				
-				//while (token != '\n')
 				token = lex_token(file);
+				if (token == ' ')
+					token = lex_token(file);
+					
 				if (token != ';')
 					error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line);
 					
 				token = lex_token(file);
-				printf("TOK: %c\n", token);
 				break;
 			}
 			
-			/*
-			 * Returns are addable as-is, statement checking is during
-			 * the actual parse tree check.
-			 */
-			case TOKEN_RETURN:
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_RETURN);
-				break;
-			case TOKEN_CONTINUE:
-				PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
-				break;
-			
-			case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
-			case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
-			case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
-			case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
-			case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
-			
+			case TOKEN_VOID:      PARSE_TREE_ADD(PARSE_TYPE_VOID);   goto fall;
 			case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
 			case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
 			case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
 			case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
-			/* fall into this for all types */
 			{
 			fall:;
 				char *name = NULL;
-				TOKEN_SKIPWHITE();
-				name  = util_strdup(file->lastok);
-				token = lex_token  (file);
+				int   type = token; /* story copy */
 				
-				/* is it NOT a definition? */
-				if (token != ';') {
-					while (token == ' ')
-						token = lex_token(file);
+				/* skip over space */
+				token = lex_token(file);
+				if (token == ' ')
+					token = lex_token(file);
+				
+				/* save name */
+				name = util_strdup(file->lastok);
+				
+				/* skip spaces */
+				token = lex_token(file);
+				if (token == ' ')
+					token = lex_token(file);
 					
-					/* it's a function? */
-					if (token == '(') {
-						/*
-						 * Now I essentially have to do a ton of parsing for
-						 * function definition.
-						 */
-						PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
+				if (token == ';') {
+					printf("definition\n");
+				} else if (token == '=') {
+					token = lex_token(file);
+					if (token == ' ')
 						token = lex_token(file);
-						while (token != '\n' && token != ')') {
-							switch (token) {
-								case TOKEN_VOID:    PARSE_TREE_ADD(PARSE_TYPE_VOID);   break;
-								case TOKEN_STRING:  PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
-								case TOKEN_ENTITY:  PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
-								case TOKEN_FLOAT:   PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  break;
-								/*
-								 * TODO:  Need to parse function pointers:  I have no clue how
-								 * I'm actually going to pull that off, it's going to be hard
-								 * since you can have a function pointer-pointer-pointer ....
-								 */
+					
+					/* strings are in file->lastok */
+					switch (type) {
+						case TOKEN_VOID:   return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
+						case TOKEN_STRING:
+							if (*file->lastok != '"')
+								error(ERROR_PARSE, "%s:%d Expected a '\"' for string constant\n", file->name, file->line);
+							break;
+						case TOKEN_VECTOR: {
+							float compile_calc_x = 0;
+							float compile_calc_y = 0;
+							float compile_calc_z = 0;
+							int   compile_calc_d = 0; /* dot? */
+							
+							char  compile_data[1024];
+							char *compile_eval = compile_data;
+							
+							if (token != '{')
+								error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);	
+								
+							token = lex_token(file);
+							if (token == ' ')
+								token = lex_token(file);
+							
+							/*
+							 * we support .7623, unlike anyother QuakeC
+							 * compiler.  Does that make us better :-).
+							 */
+							if (token == '.')
+								compile_calc_d = 1;
+							if (!isdigit(token) && !compile_calc_d)
+								error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
+							
+							/*
+							 * Read in constant data, will be in float format
+							 * which means we use atof.
+							 */
+							while (isdigit(token) || token == '.') {
+								*compile_eval++ = token;
+								token           = lex_token(file);
+								if (token == '.' && compile_calc_d) {
+									error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
+								} else if (token == '.' && !compile_calc_d) {
+									compile_calc_d = 1;
+								}
 							}
-						}
-						/* just a definition */
-						if (token == ')') {
+							if (token == ' ')
+								token = lex_token(file);
+								
+							if (token != ',' && token != ' ')
+								error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
+							compile_calc_x = atof(compile_data);
+							compile_calc_d = 0;
+							memset(compile_data, 0, sizeof(compile_data));
+							compile_eval   = &compile_data[0];
+							
+							token = lex_token(file);
+							if (token == ' ')
+								token = lex_token(file);
+								
+							if (token == '.')
+								compile_calc_d = 1;
+							if (!isdigit(token) && !compile_calc_d)
+								error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
+							
 							/*
-							 * I like to put my { on the same line as the ) for
-							 * functions, ifs, elses, so we must support that!.
+							 * Read in constant data, will be in float format
+							 * which means we use atof.
 							 */
-							PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
+							while (isdigit(token) || token == '.') {
+								*compile_eval++ = token;
+								token           = lex_token(file);
+								if (token == '.' && compile_calc_d) {
+									error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
+								} else if (token == '.' && !compile_calc_d) {
+									compile_calc_d = 1;
+								}
+							}
+							if (token == ' ')
+								token = lex_token(file);
+								
+							if (token != ',' && token != ' ')
+								error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
+							compile_calc_y = atof(compile_data);
+							compile_calc_d = 0;
+							memset(compile_data, 0, sizeof(compile_data));
+							compile_eval   = &compile_data[0];
+							
 							token = lex_token(file);
+							if (token == ' ')
+								token = lex_token(file);
+								
+							if (token == '.')
+								compile_calc_d = 1;
+								
+							if (!isdigit(token) && !compile_calc_d)
+								error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
+							
+							/*
+							 * Read in constant data, will be in float format
+							 * which means we use atof.
+							 */
+							while (isdigit(token) || token == '.') {
+								*compile_eval++ = token;
+								token           = lex_token(file);
+								if (token == '.' && compile_calc_d) {
+									error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
+								} else if (token == '.' && !compile_calc_d) {
+									compile_calc_d = 1;
+								}
+							}
+							if (token == ' ')
+								token = lex_token(file);
+								
+							if (token != '}')
+								error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);
+							
+							compile_calc_z = atof(compile_data);
+							
+							/*
+							 * Check for the semi-colon... This is insane
+							 * the amount of parsing here that is.
+							 */
 							token = lex_token(file);
-							if(token == '{')
-								PARSE_TREE_ADD(PARSE_TYPE_LBS);
+							if (token == ' ')
+								token = lex_token(file);
+							if (token != ';')
+								error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
+							
+							//printf("VEC_X: %f\n", compile_calc_x);
+							//printf("VEC_Y: %f\n", compile_calc_y);
+							//printf("VEC_X: %f\n", compile_calc_z);
+							break;
 						}
-						else if (token == '\n')
-							error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
-							 
-					} else if (token == '=') {
-						PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
-					} else {
-						error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant], or `;` [definition] for %s\n", file->name, file->line, name);
-					} 
-				} else {
-					/* definition */
-					printf("FOUND DEFINITION\n");
+							
+						case TOKEN_ENTITY:
+						case TOKEN_FLOAT:
+							if (!isdigit(token))
+								error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
+							break;
+					}
+				} else if (token == '(') {
+					printf("FUNCTION ??\n");
 				}
 				mem_d(name);
 			}
@@ -391,97 +461,6 @@ int parse_tree(struct lex_file *file) {
 					token = lex_token(file);
 				break;
 				
-			case '.':
-				PARSE_TREE_ADD(PARSE_TYPE_DOT);
-				break;
-			case '(':
-				PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
-				break;
-			case ')':
-				PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
-				break;
-				
-			case '&':				/* &  */
-				token = lex_token(file);
-				if (token == '&') { /* && */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_LAND);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_BAND);
-				break;
-			case '|':				/* |  */
-				token = lex_token(file);
-				if (token == '|') { /* || */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_LOR);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_BOR);
-				break;
-			case '!':				/* !  */
-				token = lex_token(file);
-				if (token == '=') { /* != */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_LNOT);
-				break;
-			case '<':				/* <  */
-				token = lex_token(file);
-				if (token == '=') { /* <= */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_LT);
-				break;
-			case '>':				/* >  */
-				token = lex_token(file);
-				if (token == '=') { /* >= */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_GT);
-				break;
-			case '=':				/* =  */
-				token = lex_token(file);
-				if (token == '=') { /* == */
-					token = lex_token(file);
-					PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
-					break;
-				}
-				PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
-				break;
-			case ';':
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_DONE);
-				break;
-			case '-':
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_MINUS);
-				break;
-			case '+':
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_ADD);
-				break;
-			case '{':
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_LBS);
-				break;
-			case '}':
-				token = lex_token(file);
-				PARSE_TREE_ADD(PARSE_TYPE_RBS);
-				break;
-				
-			/*
-			 * TODO: Fix lexer to spit out ( ) as tokens, it seems the
-			 * using '(' or ')' in parser doesn't work properly unless
-			 * there are spaces before them to allow the lexer to properly
-			 * seperate identifiers. -- otherwise it eats all of it.
-			 */
 			case LEX_IDENT:
 				token = lex_token(file);
 				PARSE_TREE_ADD(PARSE_TYPE_IDENT);
diff --git a/test/digraph.qc b/test/digraph.qc
deleted file mode 100644
index 6467638..0000000
--- a/test/digraph.qc
+++ /dev/null
@@ -1,5 +0,0 @@
-<%
-	<%
-		<% %>
-	%>
-%>
diff --git a/test/if.qc b/test/if.qc
deleted file mode 100644
index 9ef7234..0000000
--- a/test/if.qc
+++ /dev/null
@@ -1,9 +0,0 @@
-float test_1data = 1;
-float test_2data = 2;
-
-float test_if()
-{
-	if (test_1data == test_2data) {
-		/* do this code */
-	}
-}
diff --git a/test/parth.qc b/test/parth.qc
index d9c1e59..396f25b 100644
--- a/test/parth.qc
+++ b/test/parth.qc
@@ -1,16 +1,10 @@
-(
-(
-(
-)
-)
-)
-(
-(
-)
-)
-(
-)
-(
-)
-(
-)
+void test_parth() {
+	if (1) { }
+	if (2) { }
+	if (3) { }
+	if (4) { }
+	if (5) { }
+	if (6) { }
+	if (7) { }
+	if (8) { }
+}
diff --git a/test/tree.qc b/test/tree.qc
deleted file mode 100644
index b8b5568..0000000
--- a/test/tree.qc
+++ /dev/null
@@ -1,22 +0,0 @@
-if(1) {
-	if(1) {
-		return 0;
-	} else {
-		return 1;
-	}
-} else {
-	for {
-		if(1) {
-			return 2;
-		} else {
-			continue;
-		}
-	}
-	do {
-		if(1){
-			break;
-		} else {
-			goto finish;
-		}
-	} while ( )
-}
diff --git a/test/trigraph.qc b/test/trigraph.qc
deleted file mode 100644
index 770272c..0000000
--- a/test/trigraph.qc
+++ /dev/null
@@ -1,5 +0,0 @@
-??<
-	??<
-		??< ??>
-	??>
-??>
diff --git a/test/typedef.qc b/test/typedef.qc
index 7971ba8..f60bf77 100644
--- a/test/typedef.qc
+++ b/test/typedef.qc
@@ -4,7 +4,7 @@ typedef string my_string;
 typedef entity my_entity;
 typedef void   my_void;
 
-my_float  type_float
+my_float  type_float;
 my_vector type_vector;
 my_string type_string;
 my_entity type_entity;
diff --git a/test/types.qc b/test/types.qc
index 814ce11..4fe3a27 100644
--- a/test/types.qc
+++ b/test/types.qc
@@ -1,5 +1,5 @@
-float  typef;
-vector typev;
+float  typef = 1;
+vector typev = {0,1,2};
 string types;
 entity typee;
 void   typev;
-- 
2.39.2