]> de.git.xonotic.org Git - xonotic/gmqcc.git/blob - parse.c
I got parsing to work finally.
[xonotic/gmqcc.git] / parse.c
1 /*
2  * Copyright (C) 2012 
3  *      Dale Weiler
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
6  * this software and associated documentation files (the "Software"), to deal in
7  * the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is furnished to do
10  * so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include "gmqcc.h"
28
29 /*
30  * These are not lexical tokens:  These are parse tree types.  Most people
31  * perform tokenizing on language punctuation which is wrong.  That stuff
32  * is technically already tokenized, it just needs to be parsed into a tree
33  */
34 #define PARSE_TYPE_DO       0
35 #define PARSE_TYPE_ELSE     1
36 #define PARSE_TYPE_IF       2
37 #define PARSE_TYPE_WHILE    3
38 #define PARSE_TYPE_BREAK    4
39 #define PARSE_TYPE_CONTINUE 5
40 #define PARSE_TYPE_RETURN   6
41 #define PARSE_TYPE_GOTO     7
42 #define PARSE_TYPE_FOR      8
43 #define PARSE_TYPE_VOID     9
44 #define PARSE_TYPE_STRING   10
45 #define PARSE_TYPE_FLOAT    11
46 #define PARSE_TYPE_VECTOR   12
47 #define PARSE_TYPE_ENTITY   13
48 #define PARSE_TYPE_LAND     14
49 #define PARSE_TYPE_LOR      15
50 #define PARSE_TYPE_LTEQ     16
51 #define PARSE_TYPE_GTEQ     17
52 #define PARSE_TYPE_EQEQ     18
53 #define PARSE_TYPE_LNEQ     19
54 #define PARSE_TYPE_COMMA    20
55 #define PARSE_TYPE_LNOT     21
56 #define PARSE_TYPE_STAR     22
57 #define PARSE_TYPE_DIVIDE   23
58 #define PARSE_TYPE_LPARTH   24
59 #define PARSE_TYPE_RPARTH   25
60 #define PARSE_TYPE_MINUS    26
61 #define PARSE_TYPE_ADD      27
62 #define PARSE_TYPE_EQUAL    28
63 #define PARSE_TYPE_LBS      29
64 #define PARSE_TYPE_RBS      30
65 #define PARSE_TYPE_ELIP     31
66 #define PARSE_TYPE_DOT      32
67 #define PARSE_TYPE_LT       33
68 #define PARSE_TYPE_GT       34
69 #define PARSE_TYPE_BAND     35
70 #define PARSE_TYPE_BOR      36
71 #define PARSE_TYPE_DONE     37
72 #define PARSE_TYPE_IDENT    38
73
74 /*
75  * Adds a parse type to the parse tree, this is where all the hard
76  * work actually begins.
77  */
78 #define PARSE_TREE_ADD(X)                                        \
79         do {                                                         \
80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
81                 parsetree->next->next = NULL;                            \
82                 parsetree->next->type = (X);                             \
83                 parsetree             = parsetree->next;                 \
84         } while (0)
85
86 /*
87  * This is all the punctuation handled in the parser, these don't
88  * need tokens, they're already tokens.
89  */
90 #if 0
91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
93         "." , "<" , ">" , "&" , "|" , 
94 #endif
95
96 #define STORE(X,C) {  \
97     long f = fill;    \
98     while(f--) {      \
99       putchar(' ');   \
100     }                 \
101     fill C;           \
102         printf(X);        \
103         break;            \
104 }
105
106 void parse_debug(struct parsenode *tree) {
107         long fill = 0;
108         while (tree) {  
109                 switch (tree->type) {
110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
117                         
118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
123
124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
130                         
131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
137                         
138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
142                         
143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
146                         
147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
149                         
150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
153                 }
154                 tree = tree->next;
155         }
156 }
157
158 /*
159  * Performs a parse operation:  This is a macro to prevent bugs, if the
160  * calls to lex_token are'nt exactly enough to feed to the end of the
161  * actual lexees for the current thing that is being parsed, the state 
162  * of the next iteration in the creation of the parse tree will be wrong
163  * and everything will fail.
164  */
165 #define PARSE_PERFORM(X,C) {     \
166     token = lex_token(file);     \
167     { C }                        \
168     while (token != '\n') {      \
169             token = lex_token(file); \
170     }                            \
171     PARSE_TREE_ADD(X);           \
172     break;                       \
173 }
174
175 void parse_clear(struct parsenode *tree) {
176         if (!tree) return;
177         struct parsenode *temp = NULL;
178         while (tree != NULL) {
179                 temp = tree;
180                 tree = tree->next;
181                 mem_d (temp);
182         }
183         
184         /* free any potential typedefs */
185         typedef_clear();
186 }
187
188 const char *STRING_(char ch) {
189         if (ch == ' ')
190                 return "<space>";
191         if (ch == '\n')
192                 return "<newline>";
193         if (ch == '\0')
194                 return "<null>";
195                 
196         return &ch;
197 }
198
199 #define TOKEN_SKIPWHITE()        \
200         token = lex_token(file);     \
201         while (token == ' ') {       \
202                 token = lex_token(file); \
203         }
204
205 /*
206  * Generates a parse tree out of the lexees generated by the lexer.  This
207  * is where the tree is built.  This is where valid check is performed.
208  */
209 int parse_tree(struct lex_file *file) {
210         struct parsenode *parsetree = NULL;
211         struct parsenode *parseroot = NULL;
212         
213         /*
214          * Allocate memory for our parse tree:
215          * the parse tree is just a singly linked list which will contain
216          * all the data for code generation.
217          */
218         if (!parseroot) {
219                 parseroot = mem_a(sizeof(struct parsenode));
220                 if (!parseroot)
221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
222                 parsetree       = parseroot;
223                 parsetree->type = -1; /* not a valid type -- root element */
224         }
225         
226         int     token = 0;
227         while ((token = lex_token(file)) != ERROR_LEX      && \
228                     token                    != ERROR_COMPILER && \
229                     token                    != ERROR_INTERNAL && \
230                     token                    != ERROR_PARSE    && \
231                     token                    != ERROR_PREPRO   && file->length >= 0) {
232                 switch (token) {
233                         case TOKEN_TYPEDEF: {
234                                 char *f; /* from */
235                                 char *t; /* to   */
236                                 
237                                 token = lex_token(file); 
238                                 token = lex_token(file); f = util_strdup(file->lastok);
239                                 token = lex_token(file); 
240                                 token = lex_token(file); t = util_strdup(file->lastok);
241                                 
242                                 typedef_add(f, t);
243                                 mem_d(f);
244                                 mem_d(t);
245                                 
246                                 token = lex_token(file);
247                                 if (token == ' ')
248                                         token = lex_token(file);
249                                         
250                                 if (token != ';')
251                                         error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line);
252                                         
253                                 token = lex_token(file);
254                                 break;
255                         }
256                         
257                         case TOKEN_VOID:      PARSE_TREE_ADD(PARSE_TYPE_VOID);   goto fall;
258                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
259                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
260                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
261                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
262                         {
263                         fall:;
264                                 char *name = NULL;
265                                 int   type = token; /* story copy */
266                                 
267                                 /* skip over space */
268                                 token = lex_token(file);
269                                 if (token == ' ')
270                                         token = lex_token(file);
271                                 
272                                 /* save name */
273                                 name = util_strdup(file->lastok);
274                                 
275                                 /* skip spaces */
276                                 token = lex_token(file);
277                                 if (token == ' ')
278                                         token = lex_token(file);
279                                         
280                                 if (token == ';') {
281                                         printf("definition\n");
282                                 } else if (token == '=') {
283                                         token = lex_token(file);
284                                         if (token == ' ')
285                                                 token = lex_token(file);
286                                         
287                                         /* strings are in file->lastok */
288                                         switch (type) {
289                                                 case TOKEN_VOID:   return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
290                                                 case TOKEN_STRING:
291                                                         if (*file->lastok != '"')
292                                                                 error(ERROR_PARSE, "%s:%d Expected a '\"' for string constant\n", file->name, file->line);
293                                                         break;
294                                                 case TOKEN_VECTOR: {
295                                                         float compile_calc_x = 0;
296                                                         float compile_calc_y = 0;
297                                                         float compile_calc_z = 0;
298                                                         int   compile_calc_d = 0; /* dot? */
299                                                         
300                                                         char  compile_data[1024];
301                                                         char *compile_eval = compile_data;
302                                                         
303                                                         if (token != '{')
304                                                                 error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);    
305                                                                 
306                                                         token = lex_token(file);
307                                                         if (token == ' ')
308                                                                 token = lex_token(file);
309                                                         
310                                                         /*
311                                                          * we support .7623, unlike anyother QuakeC
312                                                          * compiler.  Does that make us better :-).
313                                                          */
314                                                         if (token == '.')
315                                                                 compile_calc_d = 1;
316                                                         if (!isdigit(token) && !compile_calc_d)
317                                                                 error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
318                                                         
319                                                         /*
320                                                          * Read in constant data, will be in float format
321                                                          * which means we use atof.
322                                                          */
323                                                         while (isdigit(token) || token == '.') {
324                                                                 *compile_eval++ = token;
325                                                                 token           = lex_token(file);
326                                                                 if (token == '.' && compile_calc_d) {
327                                                                         error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
328                                                                 } else if (token == '.' && !compile_calc_d) {
329                                                                         compile_calc_d = 1;
330                                                                 }
331                                                         }
332                                                         if (token == ' ')
333                                                                 token = lex_token(file);
334                                                                 
335                                                         if (token != ',' && token != ' ')
336                                                                 error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
337                                                         compile_calc_x = atof(compile_data);
338                                                         compile_calc_d = 0;
339                                                         memset(compile_data, 0, sizeof(compile_data));
340                                                         compile_eval   = &compile_data[0];
341                                                         
342                                                         token = lex_token(file);
343                                                         if (token == ' ')
344                                                                 token = lex_token(file);
345                                                                 
346                                                         if (token == '.')
347                                                                 compile_calc_d = 1;
348                                                         if (!isdigit(token) && !compile_calc_d)
349                                                                 error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
350                                                         
351                                                         /*
352                                                          * Read in constant data, will be in float format
353                                                          * which means we use atof.
354                                                          */
355                                                         while (isdigit(token) || token == '.') {
356                                                                 *compile_eval++ = token;
357                                                                 token           = lex_token(file);
358                                                                 if (token == '.' && compile_calc_d) {
359                                                                         error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
360                                                                 } else if (token == '.' && !compile_calc_d) {
361                                                                         compile_calc_d = 1;
362                                                                 }
363                                                         }
364                                                         if (token == ' ')
365                                                                 token = lex_token(file);
366                                                                 
367                                                         if (token != ',' && token != ' ')
368                                                                 error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
369                                                         compile_calc_y = atof(compile_data);
370                                                         compile_calc_d = 0;
371                                                         memset(compile_data, 0, sizeof(compile_data));
372                                                         compile_eval   = &compile_data[0];
373                                                         
374                                                         token = lex_token(file);
375                                                         if (token == ' ')
376                                                                 token = lex_token(file);
377                                                                 
378                                                         if (token == '.')
379                                                                 compile_calc_d = 1;
380                                                                 
381                                                         if (!isdigit(token) && !compile_calc_d)
382                                                                 error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
383                                                         
384                                                         /*
385                                                          * Read in constant data, will be in float format
386                                                          * which means we use atof.
387                                                          */
388                                                         while (isdigit(token) || token == '.') {
389                                                                 *compile_eval++ = token;
390                                                                 token           = lex_token(file);
391                                                                 if (token == '.' && compile_calc_d) {
392                                                                         error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
393                                                                 } else if (token == '.' && !compile_calc_d) {
394                                                                         compile_calc_d = 1;
395                                                                 }
396                                                         }
397                                                         if (token == ' ')
398                                                                 token = lex_token(file);
399                                                                 
400                                                         if (token != '}')
401                                                                 error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);
402                                                         
403                                                         compile_calc_z = atof(compile_data);
404                                                         
405                                                         /*
406                                                          * Check for the semi-colon... This is insane
407                                                          * the amount of parsing here that is.
408                                                          */
409                                                         token = lex_token(file);
410                                                         if (token == ' ')
411                                                                 token = lex_token(file);
412                                                         if (token != ';')
413                                                                 error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
414                                                         
415                                                         //printf("VEC_X: %f\n", compile_calc_x);
416                                                         //printf("VEC_Y: %f\n", compile_calc_y);
417                                                         //printf("VEC_X: %f\n", compile_calc_z);
418                                                         break;
419                                                 }
420                                                         
421                                                 case TOKEN_ENTITY:
422                                                 case TOKEN_FLOAT:
423                                                         if (!isdigit(token))
424                                                                 error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
425                                                         break;
426                                         }
427                                 } else if (token == '(') {
428                                         printf("FUNCTION ??\n");
429                                 }
430                                 mem_d(name);
431                         }
432                                 
433                         /*
434                          * From here down is all language punctuation:  There is no
435                          * need to actual create tokens from these because they're already
436                          * tokenized as these individual tokens (which are in a special area
437                          * of the ascii table which doesn't conflict with our other tokens
438                          * which are higer than the ascii table.)
439                          */
440                         case '#':
441                                 token = lex_token(file); /* skip '#' */
442                                 /*
443                                  * If we make it here we found a directive, the supported
444                                  * directives so far are #include.
445                                  */
446                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
447                                         /*
448                                          * We only suport include " ", not <> like in C (why?)
449                                          * because the latter is silly.
450                                          */
451                                         while (*file->lastok != '"' && token != '\n')
452                                                 token = lex_token(file);
453                                         
454                                         /* we handle lexing at that point now */
455                                         if (token == '\n')
456                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
457                                 }
458                         
459                                 /* skip all tokens to end of directive */
460                                 while (token != '\n')
461                                         token = lex_token(file);
462                                 break;
463                                 
464                         case LEX_IDENT:
465                                 token = lex_token(file);
466                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
467                                 break;
468                 }
469         }
470         parse_debug(parseroot);
471         lex_reset(file);
472         parse_clear(parseroot);
473         return 1;
474 }