parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "gmqcc.h"
  28
  29 /*
  30  * These are not lexical tokens:  These are parse tree types.  Most people
  31  * perform tokenizing on language punctuation which is wrong.  That stuff
  32  * is technically already tokenized, it just needs to be parsed into a tree
  33  */
  34 #define PARSE_TYPE_DO       0
  35 #define PARSE_TYPE_ELSE     1
  36 #define PARSE_TYPE_IF       2
  37 #define PARSE_TYPE_WHILE    3
  38 #define PARSE_TYPE_BREAK    4
  39 #define PARSE_TYPE_CONTINUE 5
  40 #define PARSE_TYPE_RETURN   6
  41 #define PARSE_TYPE_GOTO     7
  42 #define PARSE_TYPE_FOR      8
  43 #define PARSE_TYPE_VOID     9
  44 #define PARSE_TYPE_STRING   10
  45 #define PARSE_TYPE_FLOAT    11
  46 #define PARSE_TYPE_VECTOR   12
  47 #define PARSE_TYPE_ENTITY   13
  48 #define PARSE_TYPE_LAND     14
  49 #define PARSE_TYPE_LOR      15
  50 #define PARSE_TYPE_LTEQ     16
  51 #define PARSE_TYPE_GTEQ     17
  52 #define PARSE_TYPE_EQEQ     18
  53 #define PARSE_TYPE_LNEQ     19
  54 #define PARSE_TYPE_COMMA    20
  55 #define PARSE_TYPE_LNOT     21
  56 #define PARSE_TYPE_STAR     22
  57 #define PARSE_TYPE_DIVIDE   23
  58 #define PARSE_TYPE_LPARTH   24
  59 #define PARSE_TYPE_RPARTH   25
  60 #define PARSE_TYPE_MINUS    26
  61 #define PARSE_TYPE_ADD      27
  62 #define PARSE_TYPE_EQUAL    28
  63 #define PARSE_TYPE_LBS      29
  64 #define PARSE_TYPE_RBS      30
  65 #define PARSE_TYPE_ELIP     31
  66 #define PARSE_TYPE_DOT      32
  67 #define PARSE_TYPE_LT       33
  68 #define PARSE_TYPE_GT       34
  69 #define PARSE_TYPE_BAND     35
  70 #define PARSE_TYPE_BOR      36
  71 #define PARSE_TYPE_DONE     37
  72 #define PARSE_TYPE_IDENT    38
  73
  74 /*
  75  * Adds a parse type to the parse tree, this is where all the hard
  76  * work actually begins.
  77  */
  78 #define PARSE_TREE_ADD(X)                                        \
  79         do {                                                         \
  80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
  81                 parsetree->next->next = NULL;                            \
  82                 parsetree->next->type = (X);                             \
  83                 parsetree             = parsetree->next;                 \
  84         } while (0)
  85
  86 /*
  87  * This is all the punctuation handled in the parser, these don't
  88  * need tokens, they're already tokens.
  89  */
  90 #if 0
  91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
  92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
  93         "." , "<" , ">" , "&" , "|" ,
  94 #endif
  95
  96 #define STORE(X,C) {  \
  97     long f = fill;    \
  98     while(f--) {      \
  99       putchar(' ');   \
 100     }                 \
 101     fill C;           \
 102         printf(X);        \
 103         break;            \
 104 }
 105
 106 void parse_debug(struct parsenode *tree) {
 107         long fill = 0;
 108         while (tree) {
 109                 switch (tree->type) {
 110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
 111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
 112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
 113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
 114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
 115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
 116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
 117
 118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
 119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
 120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
 121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
 122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
 123
 124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
 125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
 126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
 127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
 128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
 129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
 130
 131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
 132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
 133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
 134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
 135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
 136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
 137
 138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
 139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
 140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
 141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
 142
 143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
 144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
 145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
 146
 147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
 148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
 149
 150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
 151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
 152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
 153                 }
 154                 tree = tree->next;
 155         }
 156 }
 157
 158 /*
 159  * Performs a parse operation:  This is a macro to prevent bugs, if the
 160  * calls to lex_token are'nt exactly enough to feed to the end of the
 161  * actual lexees for the current thing that is being parsed, the state
 162  * of the next iteration in the creation of the parse tree will be wrong
 163  * and everything will fail.
 164  */
 165 #define PARSE_PERFORM(X,C) {     \
 166     token = lex_token(file);     \
 167     { C }                        \
 168     while (token != '\n') {      \
 169             token = lex_token(file); \
 170     }                            \
 171     PARSE_TREE_ADD(X);           \
 172     break;                       \
 173 }
 174
 175 void parse_clear(struct parsenode *tree) {
 176         if (!tree) return;
 177         struct parsenode *temp = NULL;
 178         while (tree != NULL) {
 179                 temp = tree;
 180                 tree = tree->next;
 181                 mem_d (temp);
 182         }
 183
 184         /* free any potential typedefs */
 185         typedef_clear();
 186 }
 187
 188 const char *STRING_(char ch) {
 189         if (ch == ' ')
 190                 return "<space>";
 191         if (ch == '\n')
 192                 return "<newline>";
 193         if (ch == '\0')
 194                 return "<null>";
 195
 196         return &ch;
 197 }
 198
 199 #define TOKEN_SKIPWHITE()        \
 200         token = lex_token(file);     \
 201         while (token == ' ') {       \
 202                 token = lex_token(file); \
 203         }
 204
 205 /*
 206  * Generates a parse tree out of the lexees generated by the lexer.  This
 207  * is where the tree is built.  This is where valid check is performed.
 208  */
 209 int parse_tree(struct lex_file *file) {
 210         struct parsenode *parsetree = NULL;
 211         struct parsenode *parseroot = NULL;
 212
 213         /*
 214          * Allocate memory for our parse tree:
 215          * the parse tree is just a singly linked list which will contain
 216          * all the data for code generation.
 217          */
 218         if (!parseroot) {
 219                 parseroot = mem_a(sizeof(struct parsenode));
 220                 if (!parseroot)
 221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 222                 parsetree       = parseroot;
 223                 parsetree->type = -1; /* not a valid type -- root element */
 224         }
 225
 226         int     token = 0;
 227         long    line  = 0;
 228         while ((token = lex_token(file)) != ERROR_LEX      && \
 229                     token                    != ERROR_COMPILER && \
 230                     token                    != ERROR_INTERNAL && \
 231                     token                    != ERROR_PARSE    && \
 232                     token                    != ERROR_PREPRO   && file->length >= 0) {
 233                 line = file->line;
 234                 switch (token) {
 235                         case TOKEN_TYPEDEF: {
 236                                 char *f; /* from */
 237                                 char *t; /* to   */
 238
 239                                 token = lex_token(file);
 240                                 token = lex_token(file); f = util_strdup(file->lastok);
 241                                 token = lex_token(file);
 242                                 token = lex_token(file); t = util_strdup(file->lastok);
 243
 244                                 typedef_add(f, t);
 245                                 mem_d(f);
 246                                 mem_d(t);
 247
 248                                 token = lex_token(file);
 249                                 if (token == ' ')
 250                                         token = lex_token(file);
 251
 252                                 if (token != ';')
 253                                         error(ERROR_PARSE, "%s:%d Expected a `;` at end of typedef statement\n", file->name, file->line);
 254
 255                                 token = lex_token(file);
 256                                 break;
 257                         }
 258
 259                         case TOKEN_VOID:      PARSE_TREE_ADD(PARSE_TYPE_VOID);   goto fall;
 260                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
 261                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
 262                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
 263                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
 264                         {
 265                         fall:;
 266                                 char *name = NULL;
 267                                 int   type = token; /* story copy */
 268
 269                                 /* skip over space */
 270                                 token = lex_token(file);
 271                                 if (token == ' ')
 272                                         token = lex_token(file);
 273
 274                                 /* save name */
 275                                 name = util_strdup(file->lastok);
 276
 277                                 /* skip spaces */
 278                                 token = lex_token(file);
 279                                 if (token == ' ')
 280                                         token = lex_token(file);
 281
 282                                 if (token == ';') {
 283                                         /*
 284                                          * Definitions go to the defs table, they don't have
 285                                          * any sort of data with them yet.
 286                                          */
 287                                 } else if (token == '=') {
 288                                         token = lex_token(file);
 289                                         if (token == ' ')
 290                                                 token = lex_token(file);
 291
 292                                         /* strings are in file->lastok */
 293                                         switch (type) {
 294                                                 case TOKEN_VOID:
 295                                                         return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
 296                                                 case TOKEN_STRING:
 297                                                         if (*file->lastok != '"')
 298                                                                 error(ERROR_PARSE, "%s:%d Expected a '\"' (quote) for string constant\n", file->name, file->line);
 299                                                         break;
 300                                                 case TOKEN_VECTOR: {
 301                                                         float compile_calc_x = 0;
 302                                                         float compile_calc_y = 0;
 303                                                         float compile_calc_z = 0;
 304                                                         int   compile_calc_d = 0; /* dot?        */
 305                                                         int   compile_calc_s = 0; /* sign (-, +) */
 306
 307                                                         char  compile_data[1024];
 308                                                         char *compile_eval = compile_data;
 309
 310                                                         if (token != '{')
 311                                                                 error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);
 312
 313                                                         /*
 314                                                          * This parses a single vector element: x,y & z.  This will handle all the
 315                                                          * complicated mechanics of a vector, and can be extended as well.  This
 316                                                          * is a rather large macro, and is #undef after it's use below.
 317                                                          */
 318                                                         #define PARSE_VEC_ELEMENT(NAME, BIT)                                                                                                                                   \
 319                                                             token = lex_token(file);                                                                                                                                           \
 320                                                             if (token == ' ') {                                                                                                                                                \
 321                                                                 token = lex_token(file);                                                                                                                                       \
 322                                                             }                                                                                                                                                                  \
 323                                                             if (token == '.') {                                                                                                                                                \
 324                                                                 compile_calc_d = 1;                                                                                                                                            \
 325                                                             }                                                                                                                                                                  \
 326                                                             if (!isdigit(token) && !compile_calc_d && token != '+' && token != '-')  {                                                                                         \
 327                                                                 error(ERROR_PARSE,"%s:%d Invalid constant initializer element %c for vector, must be numeric\n", file->name, file->line, NAME);                                \
 328                                                             }                                                                                                                                                                  \
 329                                                             if (token == '+') {                                                                                                                                                \
 330                                                                 compile_calc_s = '+';                                                                                                                                          \
 331                                                             }                                                                                                                                                                  \
 332                                                             if (token == '-' && !compile_calc_s) {                                                                                                                             \
 333                                                                 compile_calc_s = '-';                                                                                                                                          \
 334                                                             }                                                                                                                                                                  \
 335                                                             while (isdigit(token) || token == '.' || token == '+' || token == '-') {                                                                                           \
 336                                                                 *compile_eval++ = token;                                                                                                                                       \
 337                                                                 token           = lex_token(file);                                                                                                                             \
 338                                                                 if (token == '.' && compile_calc_d) {                                                                                                                          \
 339                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer element %c for vector, must be numeric.\n", file->name, file->line, NAME);                          \
 340                                                                     token = lex_token(file);                                                                                                                                   \
 341                                                                 }                                                                                                                                                              \
 342                                                                 if ((token == '-' || token == '+') && compile_calc_s) {                                                                                                        \
 343                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer sign for vector element %c\n", file->name, file->line, NAME);                                       \
 344                                                                     token = lex_token(file);                                                                                                                                   \
 345                                                                 } else if (token == '.' && !compile_calc_d) {                                                                                                                  \
 346                                                                     compile_calc_d = 1;                                                                                                                                        \
 347                                                                 } else if (token == '-' && !compile_calc_s) {                                                                                                                  \
 348                                                                     compile_calc_s = '-';                                                                                                                                      \
 349                                                                 } else if (token == '+' && !compile_calc_s) {                                                                                                                  \
 350                                                                     compile_calc_s = '+';                                                                                                                                      \
 351                                                                 }                                                                                                                                                              \
 352                                                             }                                                                                                                                                                  \
 353                                                             if (token == ' ') {                                                                                                                                                \
 354                                                                 token = lex_token(file);                                                                                                                                       \
 355                                                             }                                                                                                                                                                  \
 356                                                             if (NAME != 'z') {                                                                                                                                                 \
 357                                                                 if (token != ',' && token != ' ')  {                                                                                                                           \
 358                                                                     error(ERROR_PARSE, "%s:%d invalid constant initializer element %c for vector (missing spaces, or comma delimited list?)\n", file->name, file->line, NAME); \
 359                                                                 }                                                                                                                                                              \
 360                                                             } else if (token != '}') {                                                                                                                                         \
 361                                                                 error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);                                               \
 362                                                             }                                                                                                                                                                  \
 363                                                             compile_calc_##BIT = atof(compile_data);                                                                                                                           \
 364                                                             compile_calc_d = 0;                                                                                                                                                \
 365                                                             compile_calc_s = 0;                                                                                                                                                \
 366                                                             compile_eval   = &compile_data[0];                                                                                                                                 \
 367                                                             memset(compile_data, 0, sizeof(compile_data))
 368
 369                                                         /*
 370                                                          * Parse all elements using the macro above.
 371                                                          * We must undef the macro afterwards.
 372                                                          */
 373                                                         PARSE_VEC_ELEMENT('x', x);
 374                                                         PARSE_VEC_ELEMENT('y', y);
 375                                                         PARSE_VEC_ELEMENT('z', z);
 376                                                         #undef PARSE_VEC_ELEMENT
 377
 378                                                         /*
 379                                                          * Check for the semi-colon... This is insane
 380                                                          * the amount of parsing here that is.
 381                                                          */
 382                                                         token = lex_token(file);
 383                                                         if (token == ' ')
 384                                                                 token = lex_token(file);
 385                                                         if (token != ';')
 386                                                                 error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
 387
 388                                                         printf("VEC_X: %f\n", compile_calc_x);
 389                                                         printf("VEC_Y: %f\n", compile_calc_y);
 390                                                         printf("VEC_Z: %f\n", compile_calc_z);
 391                                                         break;
 392                                                 }
 393
 394                                                 case TOKEN_ENTITY:
 395                                                 case TOKEN_FLOAT:
 396                                                         if (!isdigit(token))
 397                                                                 error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
 398                                                         break;
 399                                         }
 400                                 } else if (token == '(') {
 401                                         printf("FUNCTION ??\n");
 402                                 }
 403                                 mem_d(name);
 404                         }
 405
 406                         /*
 407                          * From here down is all language punctuation:  There is no
 408                          * need to actual create tokens from these because they're already
 409                          * tokenized as these individual tokens (which are in a special area
 410                          * of the ascii table which doesn't conflict with our other tokens
 411                          * which are higer than the ascii table.)
 412                          */
 413                         case '#':
 414                                 token = lex_token(file); /* skip '#' */
 415                                 if (token == ' ')
 416                                         token = lex_token(file);
 417                                 /*
 418                                  * If we make it here we found a directive, the supported
 419                                  * directives so far are #include.
 420                                  */
 421                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
 422                                         /*
 423                                          * We only suport include " ", not <> like in C (why?)
 424                                          * because the latter is silly.
 425                                          */
 426                                         while (*file->lastok != '"' && token != '\n')
 427                                                 token = lex_token(file);
 428                                         if (token == '\n')
 429                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line-1);
 430                                 }
 431
 432                                 /* skip all tokens to end of directive */
 433                                 while (token != '\n')
 434                                         token = lex_token(file);
 435                                 break;
 436
 437                         case LEX_IDENT:
 438                                 token = lex_token(file);
 439                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 440                                 break;
 441                 }
 442         }
 443         parse_debug(parseroot);
 444         lex_reset(file);
 445         parse_clear(parseroot);
 446         return 1;
 447 }