]> de.git.xonotic.org Git - voretournament/voretournament.git/blobdiff - misc/source/fteqcc-src/pr_x86.c
Update fteqcc source
[voretournament/voretournament.git] / misc / source / fteqcc-src / pr_x86.c
index 9f217b339ef96f677a873ced7f716f356c1df491..9f1f1acc242708d08f2869b58d55784979b65867 100644 (file)
@@ -42,6 +42,10 @@ optimisations:
 \r
 #ifdef QCJIT\r
 \r
+#ifndef _WIN32\r
+#include <sys/mman.h>\r
+#endif\r
+\r
 static float ta, tb, nullfloat=0;\r
 \r
 struct jitstate\r
@@ -52,6 +56,10 @@ struct jitstate
        unsigned char *code;\r
        unsigned int codesize;\r
        unsigned int jitstatements;\r
+\r
+       float *glob;\r
+       unsigned int cachedglobal;\r
+       unsigned int cachereg;\r
 };\r
 \r
 static void EmitByte(struct jitstate *jit, unsigned char byte)\r
@@ -108,7 +116,11 @@ enum
        REG_ESP,\r
        REG_EBP,\r
        REG_ESI,\r
-       REG_EDI\r
+       REG_EDI,\r
+\r
+       /*I'm not going to list S1 here, as that makes things too awkward*/\r
+       REG_S0,\r
+       REG_NONE\r
 };\r
 #define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);\r
 #define CLEARREG(reg) XOR(reg,reg)\r
@@ -118,6 +130,117 @@ enum
 #define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);\r
 #define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);\r
 \r
+#define ARGREGS(a,b,c) GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)\r
+#define RESULTREG(r) GCache_Store(jit, op[i].c, r)\r
+\r
+//for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.\r
+//if they were read at any other time, then we must write them out in full.\r
+//this logic applies only to locals of a function.\r
+//#define USECACHE\r
+\r
+static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)\r
+{\r
+#if USECACHE\r
+       if (jit->cachedreg != REG_NONE)\r
+       {\r
+               /*something is cached, if its one of the input offsets then can chain the instruction*/\r
+\r
+               if (jit->cachedglobal === ao && ar != REG_NONE)\r
+               {\r
+                       if (jit->cachedreg == ar)\r
+                               ar = REG_NONE;\r
+               }\r
+               if (jit->cachedglobal === bo && br != REG_NONE)\r
+               {\r
+                       if (jit->cachedreg == br)\r
+                               br = REG_NONE;\r
+               }\r
+               if (jit->cachedglobal === co && cr != REG_NONE)\r
+               {\r
+                       if (jit->cachedreg == cr)\r
+                               cr = REG_NONE;\r
+               }\r
+\r
+               if (!istemp(ao))\r
+               {\r
+                       /*purge the old cache*/\r
+                       switch(jit->cachedreg)\r
+                       {\r
+                       case REG_NONE:\r
+                               break;\r
+                       case REG_S0:\r
+                               //fstps glob[C]\r
+                               EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);\r
+                               break;\r
+                       default:\r
+                               STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);\r
+                               break;\r
+               }\r
+               jit->cachedglobal = -1;\r
+               jit->cachedreg = REG_NONE;\r
+       }\r
+\r
+#endif\r
+       switch(ar)\r
+       {\r
+       case REG_NONE:\r
+               break;\r
+       case REG_S0:\r
+               //flds glob[A]\r
+               EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].a);\r
+               break;\r
+       default:\r
+               LOADREG(jit->glob + ao, ar);\r
+               break;\r
+       }\r
+\r
+       switch(br)\r
+       {\r
+       case REG_NONE:\r
+               break;\r
+       case REG_S0:\r
+               //flds glob[A]\r
+               EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].b);\r
+               break;\r
+       default:\r
+               LOADREG(jit->glob + bo, br);\r
+               break;\r
+       }\r
+\r
+       switch(cr)\r
+       {\r
+       case REG_NONE:\r
+               break;\r
+       case REG_S0:\r
+               //flds glob[A]\r
+               EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].c);\r
+               break;\r
+       default:\r
+               LOADREG(jit->glob + co, cr);\r
+               break;\r
+       }\r
+}\r
+static void GCache_Store(struct jitstate *jit, int ofs, int reg)\r
+{\r
+#if USECACHE\r
+       jit->cachedglobal = ofs;\r
+       jit->cachedreg = reg;\r
+#else\r
+       switch(reg)\r
+       {\r
+       case REG_NONE:\r
+               break;\r
+       case REG_S0:\r
+               //fstps glob[C]\r
+               EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);\r
+               break;\r
+       default:\r
+               STOREREG(reg, jit->glob + ofs);\r
+               break;\r
+       }\r
+#endif\r
+}\r
+\r
 static void *LocalLoc(struct jitstate *jit)\r
 {\r
        return &jit->code[jit->codesize];\r
@@ -204,9 +327,17 @@ int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsn
 \r
 void PR_CloseJit(struct jitstate *jit)\r
 {\r
-       free(jit->statementjumps);\r
-       free(jit->statementoffsets);\r
-       free(jit->code);\r
+       if (jit)\r
+       {\r
+               free(jit->statementjumps);\r
+               free(jit->statementoffsets);\r
+#ifndef _WIN32\r
+               munmap(jit->code, jit->jitstatements * 500);\r
+#else\r
+               free(jit->code);\r
+#endif\r
+               free(jit)\r
+       }\r
 }\r
 \r
 #define EmitByte(v) EmitByte(jit, v)\r
@@ -238,7 +369,11 @@ struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
 \r
        jit->statementjumps = malloc(numstatements*12);\r
        jit->statementoffsets = malloc(numstatements*4);\r
+#ifndef _WIN32\r
+       jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);\r
+#else\r
        jit->code = malloc(numstatements*500);\r
+#endif\r
        if (!jit->code)\r
                return NULL;\r
 \r
@@ -343,6 +478,9 @@ struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
                case OP_CALL6:\r
                case OP_CALL7:\r
                case OP_CALL8:\r
+                       //FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!\r
+                       //FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.\r
+\r
                //save the state in place the rest of the engine can cope with\r
                        //movl $i, pr_xstatement\r
                        EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
@@ -705,6 +843,7 @@ struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
 \r
                case OP_AND_F:\r
                        //test floats properly, so we don't get confused with -0.0\r
+                       //FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?\r
 \r
                        //flds  glob[A]\r
                        EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
@@ -1256,6 +1395,7 @@ LOADREG(glob + op[i].b, REG_EDI);
 \r
        FixupJumps(jit);\r
 \r
+       /* most likely want executable memory calls somewhere else more common */\r
 #ifdef _WIN32\r
        {\r
                DWORD old;\r
@@ -1264,6 +1404,8 @@ LOADREG(glob + op[i].b, REG_EDI);
                //this means that we must maintain read/write protection, or libc will crash us\r
                VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);\r
        }\r
+#else\r
+       mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);\r
 #endif\r
 \r
 //     externs->WriteFile("jit.x86", jit->code, jit->codesize);\r