]> de.git.xonotic.org Git - voretournament/voretournament.git/blobdiff - misc/source/fteqcc-src/pr_x86.c
Latest fteqcc and netradiant sources
[voretournament/voretournament.git] / misc / source / fteqcc-src / pr_x86.c
index 4ebfdda2016f94562e59e56bf3577a6d020f3994..9f217b339ef96f677a873ced7f716f356c1df491 100644 (file)
@@ -10,16 +10,31 @@ optimisations:
        instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C\r
        it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests\r
 \r
+       this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.\r
+       certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.\r
+\r
+       OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.\r
+\r
+       some instructions are jump sites. any cache must be flushed before the start of the instruction.\r
+       some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.\r
+       such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.\r
+\r
+       store locals on a proper stack instead of the current absurd mechanism.\r
+\r
        eax - tmp\r
        ebx - prinst->edicttable\r
        ecx     - tmp\r
        edx - tmp\r
-       esi - \r
+       esi - debug opcode number\r
        edi - tmp (because its preserved by subfunctions\r
-       ebp - \r
+       ebp -\r
 \r
   to use gas to provide binary opcodes:\r
   vim -N blob.s && as blob.s && objdump.exe -d a.out\r
+\r
+\r
+  notable mods to test:\r
+  prydon gate, due to fpu mangling to carry values between maps\r
 */\r
 \r
 #define PROGSUSED\r
@@ -29,59 +44,135 @@ optimisations:
 \r
 static float ta, tb, nullfloat=0;\r
 \r
-unsigned int *statementjumps;  //[MAX_STATEMENTS*2]\r
-unsigned char **statementoffsets; //[MAX_STATEMENTS]\r
-unsigned int numjumps;\r
-unsigned char *code;\r
-unsigned int codesize;\r
-unsigned int jitstatements;\r
-\r
-void EmitByte(unsigned char byte)\r
+struct jitstate\r
 {\r
-       code[codesize++] = byte;\r
+       unsigned int *statementjumps;   //[MAX_STATEMENTS*3]\r
+       unsigned char **statementoffsets; //[MAX_STATEMENTS]\r
+       unsigned int numjumps;\r
+       unsigned char *code;\r
+       unsigned int codesize;\r
+       unsigned int jitstatements;\r
+};\r
+\r
+static void EmitByte(struct jitstate *jit, unsigned char byte)\r
+{\r
+       jit->code[jit->codesize++] = byte;\r
 }\r
-void Emit4Byte(unsigned int value)\r
+static void Emit4Byte(struct jitstate *jit, unsigned int value)\r
 {\r
-       code[codesize++] = (value>> 0)&0xff;\r
-       code[codesize++] = (value>> 8)&0xff;\r
-       code[codesize++] = (value>>16)&0xff;\r
-       code[codesize++] = (value>>24)&0xff;\r
+       jit->code[jit->codesize++] = (value>> 0)&0xff;\r
+       jit->code[jit->codesize++] = (value>> 8)&0xff;\r
+       jit->code[jit->codesize++] = (value>>16)&0xff;\r
+       jit->code[jit->codesize++] = (value>>24)&0xff;\r
 }\r
-void EmitAdr(void *value)\r
+static void EmitAdr(struct jitstate *jit, void *value)\r
 {\r
-       Emit4Byte((unsigned int)value);\r
+       Emit4Byte(jit, (unsigned int)value);\r
 }\r
-void EmitFloat(float value)\r
+static void EmitFloat(struct jitstate *jit, float value)\r
 {\r
        union {float f; unsigned int i;} u;\r
        u.f = value;\r
-       Emit4Byte(u.i);\r
+       Emit4Byte(jit, u.i);\r
 }\r
-void Emit2Byte(unsigned short value)\r
+static void Emit2Byte(struct jitstate *jit, unsigned short value)\r
 {\r
-       code[codesize++] = (value>> 0)&0xff;\r
-       code[codesize++] = (value>> 8)&0xff;\r
+       jit->code[jit->codesize++] = (value>> 0)&0xff;\r
+       jit->code[jit->codesize++] = (value>> 8)&0xff;\r
 }\r
 \r
-void EmitFOffset(void *func, int bias)\r
+static void EmitFOffset(struct jitstate *jit, void *func, int bias)\r
 {\r
        union {void *f; unsigned int i;} u;\r
        u.f = func;\r
-       u.i -= (unsigned int)&code[codesize+bias];\r
-       Emit4Byte(u.i);\r
+       u.i -= (unsigned int)&jit->code[jit->codesize+bias];\r
+       Emit4Byte(jit, u.i);\r
 }\r
 \r
-void Emit4ByteJump(int statementnum, int offset)\r
+static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)\r
 {\r
-       statementjumps[numjumps++] = codesize;\r
-       statementjumps[numjumps++] = statementnum;\r
-       statementjumps[numjumps++] = offset;\r
+       jit->statementjumps[jit->numjumps++] = jit->codesize;\r
+       jit->statementjumps[jit->numjumps++] = statementnum;\r
+       jit->statementjumps[jit->numjumps++] = offset;\r
 \r
        //the offset is filled in later\r
-       codesize += 4;\r
+       jit->codesize += 4;\r
 }\r
 \r
-void FixupJumps(void)\r
+enum\r
+{\r
+       REG_EAX,\r
+       REG_ECX,\r
+       REG_EDX,\r
+       REG_EBX,\r
+       REG_ESP,\r
+       REG_EBP,\r
+       REG_ESI,\r
+       REG_EDI\r
+};\r
+#define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);\r
+#define CLEARREG(reg) XOR(reg,reg)\r
+#define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
+#define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
+#define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);\r
+#define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);\r
+#define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);\r
+\r
+static void *LocalLoc(struct jitstate *jit)\r
+{\r
+       return &jit->code[jit->codesize];\r
+}\r
+static void *LocalJmp(struct jitstate *jit, int cond)\r
+{\r
+       /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/\r
+       if (cond == OP_GOTO)\r
+               EmitByte(jit, 0xeb);    //jmp\r
+       else if (cond == OP_LE_F)\r
+               EmitByte(jit, 0x76);    //jbe\r
+       else if (cond == OP_GE_F)\r
+               EmitByte(jit, 0x73);    //jae\r
+       else if (cond == OP_LT_F)\r
+               EmitByte(jit, 0x72);    //jb\r
+       else if (cond == OP_GT_F)\r
+               EmitByte(jit, 0x77);    //ja\r
+       else if (cond == OP_LE_I)\r
+               EmitByte(jit, 0x7e);    //jle\r
+       else if (cond == OP_LT_I)\r
+               EmitByte(jit, 0x7c);    //jl\r
+       else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)\r
+               EmitByte(jit, 0x75);    //jne\r
+       else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)\r
+               EmitByte(jit, 0x74);    //je\r
+#if defined(DEBUG) && defined(_WIN32)\r
+       else\r
+       {\r
+               OutputDebugString("oh noes!\n");\r
+               return NULL;\r
+       }\r
+#endif\r
+\r
+       EmitByte(jit, 0);\r
+\r
+       return LocalLoc(jit);\r
+}\r
+static void LocalJmpLoc(void *jmp, void *loc)\r
+{\r
+       int offs;\r
+       unsigned char *a = jmp;\r
+       offs = (char *)loc - (char *)jmp;\r
+#if defined(DEBUG) && defined(_WIN32)\r
+       if (offs > 127 || offs <= -128)\r
+       {\r
+               OutputDebugStringA("bad jump\n");\r
+               a[-2] = 0xcd;\r
+               a[-1] = 0xcc;\r
+               return;\r
+       }\r
+#endif\r
+       a[-1] = offs;\r
+}\r
+\r
+static void FixupJumps(struct jitstate *jit)\r
 {\r
        unsigned int j;\r
        unsigned char *codesrc;\r
@@ -90,15 +181,15 @@ void FixupJumps(void)
 \r
        unsigned int v;\r
 \r
-       for (j = 0; j < numjumps;)\r
+       for (j = 0; j < jit->numjumps;)\r
        {\r
-               v = statementjumps[j++];\r
-               codesrc = &code[v];\r
+               v = jit->statementjumps[j++];\r
+               codesrc = &jit->code[v];\r
 \r
-               v = statementjumps[j++];\r
-               codedst = statementoffsets[v];\r
+               v = jit->statementjumps[j++];\r
+               codedst = jit->statementoffsets[v];\r
 \r
-               v = statementjumps[j++];\r
+               v = jit->statementjumps[j++];\r
                offset = (int)(codedst - (codesrc-v));  //3rd term because the jump is relative to the instruction start, not the instruction's offset\r
 \r
                codesrc[0] = (offset>> 0)&0xff;\r
@@ -108,53 +199,81 @@ void FixupJumps(void)
        }\r
 }\r
 \r
-int PR_LeaveFunction (progfuncs_t *progfuncs);\r
-int PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);\r
+int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);\r
+int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);\r
+\r
+void PR_CloseJit(struct jitstate *jit)\r
+{\r
+       free(jit->statementjumps);\r
+       free(jit->statementoffsets);\r
+       free(jit->code);\r
+}\r
+\r
+#define EmitByte(v) EmitByte(jit, v)\r
+#define EmitAdr(v) EmitAdr(jit, v)\r
+#define EmitFOffset(a,b) EmitFOffset(jit, a, b)\r
+#define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)\r
+#define Emit4Byte(v) Emit4Byte(jit, v)\r
+#define EmitFloat(v) EmitFloat(jit, v)\r
+#define LocalJmp(v) LocalJmp(jit, v)\r
+#define LocalLoc() LocalLoc(jit)\r
+\r
 \r
-pbool PR_GenerateJit(progfuncs_t *progfuncs)\r
+struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)\r
 {\r
+       struct jitstate *jit;\r
+\r
+       void *j0, *l0;\r
+       void *j1, *l1;\r
+       void *j2, *l2;\r
        unsigned int i;\r
        dstatement16_t *op = (dstatement16_t*)current_progstate->statements;\r
        unsigned int numstatements = current_progstate->progs->numstatements;\r
        int *glob = (int*)current_progstate->globals;\r
 \r
        if (current_progstate->numbuiltins)\r
-               return false;\r
-\r
-       jitstatements = numstatements;\r
+               return NULL;\r
+       jit = malloc(sizeof(*jit));\r
+       jit->jitstatements = numstatements;\r
 \r
-       statementjumps = malloc(numstatements*12);\r
-       statementoffsets = malloc(numstatements*4);\r
-       code = malloc(numstatements*500);\r
+       jit->statementjumps = malloc(numstatements*12);\r
+       jit->statementoffsets = malloc(numstatements*4);\r
+       jit->code = malloc(numstatements*500);\r
+       if (!jit->code)\r
+               return NULL;\r
 \r
-       numjumps = 0;\r
-       codesize = 0;\r
+       jit->numjumps = 0;\r
+       jit->codesize = 0;\r
 \r
 \r
 \r
        for (i = 0; i < numstatements; i++)\r
        {\r
-               statementoffsets[i] = &code[codesize];\r
+               jit->statementoffsets[i] = &jit->code[jit->codesize];\r
+\r
+               /*DEBUG*/\r
+               SETREGI(op[i].op, REG_ESI);\r
+\r
                switch(op[i].op)\r
                {\r
                //jumps\r
-               case OP_IF:\r
+               case OP_IF_I:\r
                        //integer compare\r
                        //if a, goto b\r
 \r
                        //cmpl $0,glob[A]\r
                        EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
-                       //jnz B\r
+                       //jne B\r
                        EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
                        break;\r
 \r
-               case OP_IFNOT:\r
+               case OP_IFNOT_I:\r
                        //integer compare\r
                        //if !a, goto b\r
 \r
                        //cmpl $0,glob[A]\r
                        EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
-                       //jz B\r
+                       //je B\r
                        EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
                        break;\r
 \r
@@ -173,25 +292,19 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                        {\r
                                //assumption: anything that returns address 0 is a void or zero return.\r
                                //thus clear eax and copy that to the return vector.\r
-                               EmitByte(0x31);EmitByte(0xc0);\r
-                               EmitByte(0xa3);EmitAdr(glob + OFS_RETURN+0);\r
-                               EmitByte(0xa3);EmitAdr(glob + OFS_RETURN+1);\r
-                               EmitByte(0xa3);EmitAdr(glob + OFS_RETURN+2);\r
+                               CLEARREG(REG_EAX);\r
+                               STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
+                               STOREREG(REG_EAX, glob + OFS_RETURN+1);\r
+                               STOREREG(REG_EAX, glob + OFS_RETURN+2);\r
                        }\r
                        else\r
                        {\r
-                               //movl glob[A+0],eax\r
-                               EmitByte(0xa1);EmitAdr(glob + op[i].a+0);\r
-                               //movl glob[A+0],edx\r
-                               EmitByte(0x8b);EmitByte(0x0d);EmitAdr(glob + op[i].a+1);\r
-                               //movl glob[A+0],ecx\r
-                               EmitByte(0x8b);EmitByte(0x15);EmitAdr(glob + op[i].a+2);\r
-                               //movl eax, glob[OFS_RET+0]\r
-                               EmitByte(0xa3);EmitAdr(glob + OFS_RETURN+0);\r
-                               //movl edx, glob[OFS_RET+0]\r
-                               EmitByte(0x89);EmitByte(0x15);EmitAdr(glob + OFS_RETURN+1);\r
-                               //movl ecx, glob[OFS_RET+0]\r
-                               EmitByte(0x89);EmitByte(0x15);EmitAdr(glob + OFS_RETURN+2);\r
+                               LOADREG(glob + op[i].a+0, REG_EAX);\r
+                               LOADREG(glob + op[i].a+1, REG_EDX);\r
+                               LOADREG(glob + op[i].a+2, REG_ECX);\r
+                               STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
+                               STOREREG(REG_EDX, glob + OFS_RETURN+1);\r
+                               STOREREG(REG_ECX, glob + OFS_RETURN+2);\r
                        }\r
                        \r
                        //call leavefunction to get the return address\r
@@ -207,14 +320,17 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
 //                     cmp prinst->exitdepth,%edx\r
                        EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);\r
 //                     je returntoc\r
-                       EmitByte(0x74);EmitByte(0x09);\r
-//                     mov statementoffsets[%eax*4],%eax\r
-                       EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(statementoffsets+1);\r
-//                     jmp eax\r
-                       EmitByte(0xff);EmitByte(0xe0);\r
+                       j1 = LocalJmp(OP_EQ_E);\r
+//                             mov statementoffsets[%eax*4],%eax\r
+                               EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
+//                             jmp *eax\r
+                               EmitByte(0xff);EmitByte(0xe0);\r
 //                     returntoc:\r
+                       l1 = LocalLoc();\r
 //                     ret\r
                        EmitByte(0xc3);\r
+\r
+                       LocalJmpLoc(j1,l1);\r
                        break;\r
 \r
                //function calls\r
@@ -229,13 +345,13 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                case OP_CALL8:\r
                //save the state in place the rest of the engine can cope with\r
                        //movl $i, pr_xstatement\r
-                       EmitByte(0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
+                       EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
                        //movl $(op[i].op-OP_CALL0), pr_argc\r
-                       EmitByte(0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);\r
+                       EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);\r
 \r
                //figure out who we're calling, and what that involves\r
                        //%eax = glob[A]\r
-                       EmitByte(0xa1); EmitAdr(glob + op[i].a);\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
                //eax is now the func num\r
 \r
                        //mov %eax,%ecx\r
@@ -247,7 +363,7 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                        //cmp %ecx,pr_typecurrent\r
                        EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);\r
                        //je sameprogs\r
-                       EmitByte(0x74); EmitByte(0x3);\r
+                       j1 = LocalJmp(OP_EQ_I);\r
                        {\r
                                //can't handle switching progs\r
 \r
@@ -266,6 +382,8 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                                EmitByte(0xc3);\r
                        }\r
                        //sameprogs:\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
 \r
                        //andl $0x00ffffff, %eax\r
                        EmitByte(0x25);Emit4Byte(0x00ffffff);\r
@@ -286,9 +404,9 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                        //cmp $0,%edx\r
                        EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);\r
                        //jl isabuiltin\r
-                       EmitByte(0x7c);EmitByte(22);\r
-       \r
+                       j1 = LocalJmp(OP_LT_I);\r
                        {\r
+                               /* call the function*/\r
                                //push %ecx\r
                                EmitByte(0x51);\r
                                //push %eax\r
@@ -302,10 +420,12 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
                //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)\r
 \r
                                //jmp statementoffsets[%eax*4]\r
-                               EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(statementoffsets+1);\r
+                               EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
                        }\r
+                       /*its a builtin, figure out which, and call it*/\r
                        //isabuiltin:\r
-\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
 \r
                        //push current_progstate->globals\r
                        EmitByte(0x68);EmitAdr(current_progstate->globals);\r
@@ -321,23 +441,21 @@ pbool PR_GenerateJit(progfuncs_t *progfuncs)
 \r
                //but that builtin might have been Abort()\r
 \r
-                       //mov prinst->continuestatement,%eax\r
-                       EmitByte(0xa1);EmitAdr(&prinst->continuestatement);\r
-               //eax is now prinst->continuestatement\r
-\r
+                       LOADREG(&prinst->continuestatement, REG_EAX);\r
                        //cmp $-1,%eax\r
                        EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);\r
                        //je donebuiltincall\r
-                       EmitByte(0x74);EmitByte(10+8);\r
+                       j1 = LocalJmp(OP_EQ_I);\r
                        {\r
-EmitByte(0xcc);\r
-                               //jmp statementoffsets[%eax*4]\r
-                               EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(statementoffsets+1);\r
-\r
                                //mov $-1,prinst->continuestatement\r
-                               EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement+1);Emit4Byte((unsigned int)-1);\r
+                               EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);\r
+\r
+                               //jmp statementoffsets[%eax*4]\r
+                               EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);\r
                        }\r
                        //donebuiltincall:\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
                        break;\r
 \r
                case OP_MUL_F:\r
@@ -374,24 +492,29 @@ EmitByte(0xcc);
                        break;\r
 \r
                case OP_NOT_F:\r
-                       //flds glob[A]\r
-                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
                        //fldz\r
                        EmitByte(0xd9);EmitByte(0xee);\r
+                       //fcomps        glob[A]\r
+                       EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);\r
                        //fnstsw %ax\r
                        EmitByte(0xdf);EmitByte(0xe0);\r
                        //testb 0x40,%ah\r
                        EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);\r
-                       //je noteq\r
-                       EmitByte(0x74);EmitByte(0x0c);\r
-                       //movl 1.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05);EmitAdr(glob + op[i].c);EmitFloat(0.0f);\r
-                       //jmp end\r
-                       EmitByte(0xeb);EmitByte(0x0a);\r
-                       //noteq:\r
-                       //movl 0.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05);EmitAdr(glob + op[i].c);EmitFloat(1.0f);\r
+                       \r
+                       j1 = LocalJmp(OP_NE_F);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               j2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               //noteq:\r
+                               l1 = LocalLoc();\r
+                               STOREF(1.0f, glob + op[i].c);\r
+                       }\r
                        //end:\r
+                       l2 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+                       LocalJmpLoc(j2,l2);\r
                        break;\r
 \r
                case OP_STORE_F:\r
@@ -399,26 +522,17 @@ EmitByte(0xcc);
                case OP_STORE_ENT:\r
                case OP_STORE_FLD:\r
                case OP_STORE_FNC:\r
-                       //movl glob[A],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
-                       //movl eax,glob[B]\r
-                       EmitByte(0xa3);EmitAdr(glob + op[i].b);\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
+                       STOREREG(REG_EAX, glob + op[i].b);\r
                        break;\r
 \r
                case OP_STORE_V:\r
-                       //movl glob[A+0],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a+0);\r
-                       //movl glob[A+1],edx\r
-                       EmitByte(0x8b);EmitByte(0x0d);EmitAdr(glob + op[i].a+1);\r
-                       //movl glob[A+2],ecx\r
-                       EmitByte(0x8b);EmitByte(0x15);EmitAdr(glob + op[i].a+2);\r
-\r
-                       //movl eax, glob[B+0]\r
-                       EmitByte(0xa3);EmitAdr(glob + op[i].b+0);\r
-                       //movl edx, glob[B+1]\r
-                       EmitByte(0x89);EmitByte(0x15);EmitAdr(glob + op[i].b+1);\r
-                       //movl ecx, glob[B+2]\r
-                       EmitByte(0x89);EmitByte(0x15);EmitAdr(glob + op[i].b+2);\r
+                       LOADREG(glob + op[i].a+0, REG_EAX);\r
+                       LOADREG(glob + op[i].a+1, REG_EDX);\r
+                       LOADREG(glob + op[i].a+2, REG_ECX);\r
+                       STOREREG(REG_EAX, glob + op[i].b+0);\r
+                       STOREREG(REG_EDX, glob + op[i].b+1);\r
+                       STOREREG(REG_ECX, glob + op[i].b+2);\r
                        break;\r
 \r
                case OP_LOAD_F:\r
@@ -430,10 +544,9 @@ EmitByte(0xcc);
                //a is the ent number, b is the field\r
                //c is the dest\r
 \r
-                       //movl glob[A+0],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
-                       //mov glob[B],ecx\r
-                       EmitByte(0x8b); EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
+                       LOADREG(glob + op[i].b, REG_ECX);\r
+\r
                //FIXME: bound eax (ent number)\r
                //FIXME: bound ecx (field index)\r
                        //mov (ebx,eax,4).%eax\r
@@ -443,22 +556,20 @@ EmitByte(0xcc);
                        EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
                //edx is now the field array for that ent\r
 \r
-                       //mov fieldajust(%edx,%ecx,4),%eax      //offset = progfuncs->fieldadjust\r
+                       //mov fieldajust(%edx,%ecx,4),%eax\r
                        EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
-                       //mov edx,glob[C]\r
-                       EmitByte(0xa3);EmitAdr(glob + op[i].c);\r
+\r
+                       STOREREG(REG_EAX, glob + op[i].c)\r
 \r
                        if (op[i].op == OP_LOAD_V)\r
                        {\r
-                               //mov fieldajust+4(%edx,%ecx,4),%eax    //offset = progfuncs->fieldadjust\r
+                               //mov fieldajust+4(%edx,%ecx,4),%eax\r
                                EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);\r
-                               //mov edx,glob[C+1]\r
-                               EmitByte(0xa3);EmitAdr(glob + op[i].c+1);\r
+                               STOREREG(REG_EAX, glob + op[i].c+1)\r
 \r
-                               //mov fieldajust+8(%edx,%ecx,4),%eax    //offset = progfuncs->fieldadjust\r
-                               EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);\r
-                               //mov edx,glob[C+1]\r
-                               EmitByte(0xa3);EmitAdr(glob + op[i].c+2);\r
+                               //mov fieldajust+8(%edx,%ecx,4),%eax\r
+                               EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);\r
+                               STOREREG(REG_EAX, glob + op[i].c+2)\r
                        }\r
                        break;\r
 \r
@@ -466,10 +577,9 @@ EmitByte(0xcc);
                        //a is the ent number, b is the field\r
                //c is the dest\r
 \r
-                       //movl glob[A+0],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
-                       //mov glob[B],ecx\r
-                       EmitByte(0x8b); EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
+                       LOADREG(glob + op[i].b, REG_ECX);\r
+\r
                //FIXME: bound eax (ent number)\r
                //FIXME: bound ecx (field index)\r
                        //mov (ebx,eax,4).%eax\r
@@ -481,8 +591,7 @@ EmitByte(0xcc);
                        //mov fieldajust(%edx,%ecx,4),%eax      //offset = progfuncs->fieldadjust\r
                        //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);\r
                        EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
-                       //mov edx,glob[C]\r
-                       EmitByte(0xa3);EmitAdr(glob + op[i].c);\r
+                       STOREREG(REG_EAX, glob + op[i].c);\r
                        break;\r
 \r
                case OP_STOREP_F:\r
@@ -490,118 +599,111 @@ EmitByte(0xcc);
                case OP_STOREP_ENT:\r
                case OP_STOREP_FLD:\r
                case OP_STOREP_FNC:\r
-                       //movl glob[A],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
-                       //mov glob[B],ecx\r
-                       EmitByte(0x8b); EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
+                       LOADREG(glob + op[i].b, REG_ECX);\r
                        //mov %eax,(%ecx)\r
                        EmitByte(0x89);EmitByte(0x01);\r
                        break;\r
 \r
                case OP_STOREP_V:\r
-                       //mov glob[B],ecx\r
-                       EmitByte(0x8b); EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
-                       //movl glob[A],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a+0);\r
+                       LOADREG(glob + op[i].b, REG_ECX);\r
+\r
+                       LOADREG(glob + op[i].a+0, REG_EAX);\r
                        //mov %eax,0(%ecx)\r
                        EmitByte(0x89);EmitByte(0x01);\r
-                       //movl glob[A],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a+0);\r
+\r
+                       LOADREG(glob + op[i].a+1, REG_EAX);\r
                        //mov %eax,4(%ecx)\r
                        EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);\r
-                       //movl glob[A],eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a+0);\r
+\r
+                       LOADREG(glob + op[i].a+2, REG_EAX);\r
                        //mov %eax,8(%ecx)\r
                        EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);\r
                        break;\r
 \r
+               case OP_NE_I:\r
+               case OP_NE_E:\r
+               case OP_NE_FNC:\r
+               case OP_EQ_I:\r
                case OP_EQ_E:\r
                case OP_EQ_FNC:\r
                        //integer equality\r
-                       //movl glob[A],%eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
-                       //cmp glob[B],%eax\r
-                       EmitByte(0x3b); EmitByte(0x0f); EmitAdr(glob + op[i].b);\r
-                       //je 12\r
-                       EmitByte(0x74);EmitByte(0x0c);\r
-                       //mov 0.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].a);EmitFloat(0.0f);\r
-                       //jmp 10\r
-                       EmitByte(0xeb);EmitByte(0x0a);\r
-                       //mov 1.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].a);EmitFloat(1.0f);\r
-                       break;\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
 \r
-               case OP_NE_E:\r
-               case OP_NE_FNC:\r
-                       //integer equality\r
-                       //movl glob[A],%eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
                        //cmp glob[B],%eax\r
-                       EmitByte(0x3b); EmitByte(0x0f); EmitAdr(glob + op[i].b);\r
-                       //je 12\r
-                       EmitByte(0x74);EmitByte(0x0c);\r
-                       //mov 0.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].a);EmitFloat(1.0f);\r
-                       //jmp 10\r
-                       EmitByte(0xeb);EmitByte(0x0a);\r
-                       //mov 1.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].a);EmitFloat(0.0f);\r
+                       EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);\r
+                       j1 = LocalJmp(op[i].op);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               j2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               l1 = LocalLoc();\r
+                               STOREF(1.0f, glob + op[i].c);\r
+                       }\r
+                       l2 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+                       LocalJmpLoc(j2,l2);\r
                        break;\r
 \r
+               case OP_NOT_I:\r
                case OP_NOT_ENT:\r
                case OP_NOT_FNC:\r
-                       //cmp glob[B],%eax\r
-                       EmitByte(0x8c); EmitByte(0x3d); EmitAdr(glob + op[i].a);EmitByte(0x00);\r
-                       //je 12\r
-                       EmitByte(0x74);EmitByte(0x0c);\r
-                       //mov 0.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].a);EmitFloat(0.0f);\r
-                       //jmp 10\r
-                       EmitByte(0xeb);EmitByte(0x0a);\r
-                       //mov 1.0f,glob[C]\r
-                       EmitByte(0xc7);EmitByte(0x05); EmitAdr(glob + op[i].c);EmitFloat(1.0f);\r
+                       //cmp glob[B],$0\r
+                       EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00); \r
+                       j1 = LocalJmp(OP_NE_I);\r
+                       {\r
+                               STOREF(1.0f, glob + op[i].c);\r
+                               j2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               l1 = LocalLoc();\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                       }\r
+                       l2 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+                       LocalJmpLoc(j2,l2);\r
                        break;\r
 \r
-               case OP_BITOR:  //floats...\r
+               case OP_BITOR_F:        //floats...\r
                        //flds glob[A]\r
                        EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
                        //flds glob[B]\r
                        EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
                        //fistp tb\r
-                       EmitByte(0xdf); EmitByte(0x1d);EmitAdr(&tb);\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
                        //fistp ta\r
-                       EmitByte(0xdf); EmitByte(0x1d);EmitAdr(&ta);\r
-                       //mov ta,%eax\r
-                       EmitByte(0xa1); EmitAdr(&ta);\r
-                       //and tb,%eax\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
+                       LOADREG(&ta, REG_EAX)\r
+                       //or %eax,tb\r
                        EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);\r
                        //fild tb\r
-                       EmitByte(0xdf); EmitByte(0x05);EmitAdr(&tb);\r
+                       EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
                        //fstps glob[C]\r
                        EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
                        break;\r
 \r
-               case OP_BITAND:\r
+               case OP_BITAND_F:\r
                        //flds glob[A]\r
                        EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
                        //flds glob[B]\r
                        EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
                        //fistp tb\r
-                       EmitByte(0xdf); EmitByte(0x1d);EmitAdr(&tb);\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
                        //fistp ta\r
-                       EmitByte(0xdf); EmitByte(0x1d);EmitAdr(&ta);\r
-                       //mov ta,%eax\r
-                       EmitByte(0xa1); EmitAdr(&ta);\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
+                       /*two args are now at ta and tb*/\r
+                       LOADREG(&ta, REG_EAX)\r
                        //and tb,%eax\r
                        EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);\r
+                       /*we just wrote the int value to tb, convert that to a float and store it at c*/\r
                        //fild tb\r
-                       EmitByte(0xdf); EmitByte(0x05);EmitAdr(&tb);\r
+                       EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
                        //fstps glob[C]\r
                        EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
                        break;\r
 \r
-               case OP_AND:\r
+               case OP_AND_F:\r
                        //test floats properly, so we don't get confused with -0.0\r
 \r
                        //flds  glob[A]\r
@@ -612,7 +714,7 @@ EmitByte(0xcc);
                        EmitByte(0xdf); EmitByte(0xe0);\r
                        //test  $0x40,%ah\r
                        EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
-                       //je onefalse\r
+                       //jz onefalse\r
                        EmitByte(0x75); EmitByte(0x1f);\r
 \r
                        //flds  glob[B]\r
@@ -623,7 +725,7 @@ EmitByte(0xcc);
                        EmitByte(0xdf); EmitByte(0xe0);\r
                        //test  $0x40,%ah\r
                        EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
-                       //jne onefalse\r
+                       //jnz onefalse\r
                        EmitByte(0x75); EmitByte(0x0c);\r
 \r
                        //mov float0,glob[C]\r
@@ -636,7 +738,7 @@ EmitByte(0xcc);
                        EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
                        //done:\r
                        break;\r
-               case OP_OR:\r
+               case OP_OR_F:\r
                        //test floats properly, so we don't get confused with -0.0\r
 \r
                        //flds  glob[A]\r
@@ -674,18 +776,16 @@ EmitByte(0xcc);
 \r
                case OP_EQ_S:\r
                case OP_NE_S:\r
+                       {\r
                        //put a in ecx\r
+                       LOADREG(glob + op[i].a, REG_ECX);\r
                        //put b in edi\r
-                       //mov a,%ecx\r
-                       EmitByte(0x8b); EmitByte(0x0d); EmitAdr(glob + op[i].a);\r
-                       //mov b,%edi\r
-                       EmitByte(0x8b); EmitByte(0x3d); EmitAdr(glob + op[i].b);\r
-\r
+                       LOADREG(glob + op[i].b, REG_EDI);\r
+/*\r
                        //early out if they're equal\r
                        //cmp %ecx,%edi\r
-                       EmitByte(0x39); EmitByte(0xd1);\r
-                       //je _true\r
-                       EmitByte(0x74); EmitByte(0x68);\r
+                       EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);\r
+                       j1c = LocalJmp(OP_EQ_S);\r
 \r
                        //if a is 0, check if b is ""\r
                        //jecxz ais0\r
@@ -707,31 +807,30 @@ EmitByte(0xcc);
                                EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
                                //cmpb $0,(%eax)\r
                                EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
+                               j1b = LocalJmp(OP_EQ_S);\r
+                               j0b = LocalJmp(OP_GOTO);\r
+                       }\r
+\r
+                       //ais0:\r
+                       {\r
+                               //push edi\r
+                               EmitByte(0x57);\r
+                               //push progfuncs\r
+                               EmitByte(0x68); EmitAdr(progfuncs);\r
+                               //call PR_StringToNative\r
+                               EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
+                               //add $8,%esp\r
+                               EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
+                               //cmpb $0,(%eax)\r
+                               EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
                                //je _true\r
-                               EmitByte(0x74); EmitByte(0x4b);\r
+                               EmitByte(0x74); EmitByte(0x36);\r
                                //jmp _false\r
-                               EmitByte(0xeb); EmitByte(0x3d);\r
-\r
-                               //ais0:\r
-                               {\r
-                                       //push edi\r
-                                       EmitByte(0x57);\r
-                                       //push progfuncs\r
-                                       EmitByte(0x68); EmitAdr(progfuncs);\r
-                                       //call PR_StringToNative\r
-                                       EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
-                                       //add $8,%esp\r
-                                       EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
-                                       //cmpb $0,(%eax)\r
-                                       EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
-                                       //je _true\r
-                                       EmitByte(0x74); EmitByte(0x36);\r
-                                       //jmp _false\r
-                                       EmitByte(0xeb); EmitByte(0x28);\r
-                               }\r
+                               EmitByte(0xeb); EmitByte(0x28);\r
                        }\r
                        //bnot0:\r
-\r
+*/\r
+LOADREG(glob + op[i].a, REG_ECX);\r
                        //push ecx\r
                        EmitByte(0x51);\r
                        //push progfuncs\r
@@ -741,6 +840,7 @@ EmitByte(0xcc);
                        //push %eax\r
                        EmitByte(0x50);\r
 \r
+LOADREG(glob + op[i].b, REG_EDI);\r
                        //push %edi\r
                        EmitByte(0x57);\r
                        //push progfuncs\r
@@ -757,28 +857,35 @@ EmitByte(0xcc);
                        EmitByte(0xe8); EmitFOffset(strcmp,4);\r
                        //add $16,%esp\r
                        EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);\r
+\r
                        //cmp $0,%eax\r
                        EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
-                       //je _true\r
-                       EmitByte(0x74); EmitByte(0x0c);\r
-//_false:\r
-                       //mov 0.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat((op[i].op == OP_NE_S)?1.0f:0.0f);\r
-                       //jmp done\r
-                       EmitByte(0xeb); EmitByte(0x0a);\r
-//_true:\r
-                       //mov 1.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat((op[i].op == OP_NE_S)?0.0f:1.0f);\r
-//_done:\r
+                       j1 = LocalJmp(OP_EQ_S);\r
+                       {\r
+                               l0 = LocalLoc();\r
+                               STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);\r
+                               j2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               l1 = LocalLoc();\r
+                               STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);\r
+                       }\r
+                       l2 = LocalLoc();\r
+\r
+//                     LocalJmpLoc(j0b, l0);\r
+                       LocalJmpLoc(j1, l1);\r
+//                     LocalJmpLoc(j1b, l1);\r
+                       LocalJmpLoc(j2, l2);\r
+                       }\r
                        break;\r
 \r
                case OP_NOT_S:\r
-                       //mov A,%eax\r
-                       EmitByte(0xa1);EmitAdr(glob + op[i].a);\r
+                       LOADREG(glob + op[i].a, REG_EAX)\r
+\r
                        //cmp $0,%eax\r
                        EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
-                       //je _true\r
-                       EmitByte(0x74); EmitByte(0x1f);\r
+                       j2 = LocalJmp(OP_EQ_S);\r
+\r
                        //push %eax\r
                        EmitByte(0x50);\r
                        //push progfuncs\r
@@ -787,19 +894,22 @@ EmitByte(0xcc);
                        EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
                        //add $8,%esp\r
                        EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
+\r
                        //cmpb $0,(%eax)\r
                        EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
-                       //je _true\r
-                       EmitByte(0x74); EmitByte(0x0c);\r
-//_false:\r
-                       //mov 0.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
-                       //jmp done\r
-                       EmitByte(0xeb); EmitByte(0x0a);\r
-//_true:\r
-                       //mov 1.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
-//_done:\r
+                       j1 = LocalJmp(OP_EQ_S);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               j0 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               l1 = LocalLoc();\r
+                               STOREF(1.0f, glob + op[i].c);\r
+                       }\r
+                       l2 = LocalLoc();\r
+                       LocalJmpLoc(j2, l1);\r
+                       LocalJmpLoc(j1, l1);\r
+                       LocalJmpLoc(j0, l2);\r
                        break;\r
 \r
                case OP_ADD_V:\r
@@ -875,42 +985,31 @@ EmitByte(0xcc);
 \r
                case OP_EQ_F:\r
                case OP_NE_F:\r
-               case OP_LE:\r
-               case OP_GE:\r
-               case OP_LT:\r
-               case OP_GT:\r
+               case OP_LE_F:\r
+               case OP_GE_F:\r
+               case OP_LT_F:\r
+               case OP_GT_F:\r
                        //flds glob[A]\r
-                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
-                       //flds glob[B]\r
                        EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
+                       //flds glob[B]\r
+                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
                        //fcomip %st(1),%st\r
                        EmitByte(0xdf);EmitByte(0xe9);\r
                        //fstp %st(0)   (aka: pop)\r
                        EmitByte(0xdd);EmitByte(0xd8);\r
 \r
-                       //jcc _true\r
-                       if (op[i].op == OP_LE)\r
-                               EmitByte(0x7e); //jle\r
-                       else if (op[i].op == OP_GE)\r
-                               EmitByte(0x7d); //jge\r
-                       else if (op[i].op == OP_LT)\r
-                               EmitByte(0x7c); //jl\r
-                       else if (op[i].op == OP_GT)\r
-                               EmitByte(0x7f); //jg\r
-                       else if (op[i].op == OP_NE_F)\r
-                               EmitByte(0x75); //jne\r
-                       else\r
-                               EmitByte(0x74); //je\r
-                       EmitByte(0x0c);\r
-//_false:\r
-                       //mov 0.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
-                       //jmp done\r
-                       EmitByte(0xeb); EmitByte(0x0a);\r
-//_true:\r
-                       //mov 1.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
-//_done:\r
+                       j1 = LocalJmp(op[i].op);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               j2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       {\r
+                               l1 = LocalLoc();\r
+                               STOREF(1.0f, glob + op[i].c);\r
+                       }\r
+                       l2 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+                       LocalJmpLoc(j2,l2);\r
                        break;\r
 \r
                case OP_MUL_FV:\r
@@ -934,21 +1033,21 @@ EmitByte(0xcc);
                                EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);\r
 \r
                                //flds glob[V0]\r
-                               EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + v+0);\r
+                               EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);\r
                                //fmul st(1)\r
                                EmitByte(0xd8);EmitByte(0xc9);\r
                                //fstps glob[C]\r
                                EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
 \r
                                //flds glob[V0]\r
-                               EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + v+1);\r
+                               EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);\r
                                //fmul st(1)\r
                                EmitByte(0xd8);EmitByte(0xc9);\r
                                //fstps glob[C]\r
                                EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
 \r
                                //flds glob[V0]\r
-                               EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + v+2);\r
+                               EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);\r
                                //fmul st(1)\r
                                EmitByte(0xd8);EmitByte(0xc9);\r
                                //fstps glob[C]\r
@@ -972,8 +1071,8 @@ EmitByte(0xcc);
                        //add $12,%esp\r
                        EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);\r
                        break;\r
-#if 0\r
-               case OP_NOT_V:\r
+#if 1\r
+/*             case OP_NOT_V:\r
                        //flds 0\r
                        //flds glob[A+0]\r
                        //fcomip %st(1),%st\r
@@ -990,8 +1089,18 @@ EmitByte(0xcc);
                        //mov 0,C\r
                        //done:\r
                        break;\r
-\r
+*/\r
+                       \r
+               case OP_NOT_V:\r
+                       EmitByte(0xcd);EmitByte(op[i].op);\r
+                       printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
+                       break;\r
+#endif\r
+               case OP_NE_V:\r
                case OP_EQ_V:\r
+               {\r
+                       void *f0, *f1, *f2, *floc;\r
+//compare v[0]\r
                        //flds glob[A]\r
                        EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
                        //flds glob[B]\r
@@ -1001,54 +1110,151 @@ EmitByte(0xcc);
                        //fstp %st(0)   (aka: pop)\r
                        EmitByte(0xdd);EmitByte(0xd8);\r
 \r
-                       //jncc _true\r
-                       if (op[i].op == OP_NE_V)\r
-                               EmitByte(0x74); //je\r
-                       else\r
-                               EmitByte(0x75); //jne\r
-                       EmitByte(0x0c);\r
-//_false0:\r
-                       //mov 0.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
-                       //jmp done\r
-                       EmitByte(0xeb); EmitByte(0x0a);\r
+                       /*if the condition is true, don't fail*/\r
+                       j1 = LocalJmp(op[i].op);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               f0 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
 \r
+//compare v[1]\r
+                       //flds glob[A]\r
+                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
+                       //flds glob[B]\r
+                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
+                       //fcomip %st(1),%st\r
+                       EmitByte(0xdf);EmitByte(0xe9);\r
+                       //fstp %st(0)   (aka: pop)\r
+                       EmitByte(0xdd);EmitByte(0xd8);\r
 \r
-//_true:\r
-                       //mov 1.0f,c\r
-                       EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
-//_done:\r
+                       /*if the condition is true, don't fail*/\r
+                       j1 = LocalJmp(op[i].op);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               f1 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+\r
+//compare v[2]\r
+                       //flds glob[A]\r
+                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
+                       //flds glob[B]\r
+                       EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
+                       //fcomip %st(1),%st\r
+                       EmitByte(0xdf);EmitByte(0xe9);\r
+                       //fstp %st(0)   (aka: pop)\r
+                       EmitByte(0xdd);EmitByte(0xd8);\r
+\r
+                       /*if the condition is true, don't fail*/\r
+                       j1 = LocalJmp(op[i].op);\r
+                       {\r
+                               STOREF(0.0f, glob + op[i].c);\r
+                               f2 = LocalJmp(OP_GOTO);\r
+                       }\r
+                       l1 = LocalLoc();\r
+                       LocalJmpLoc(j1,l1);\r
+\r
+//success!\r
+                       STOREF(1.0f, glob + op[i].c);\r
+\r
+                       floc = LocalLoc();\r
+                       LocalJmpLoc(f0,floc);\r
+                       LocalJmpLoc(f1,floc);\r
+                       LocalJmpLoc(f2,floc);\r
                        break;\r
+               }\r
 \r
+               /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/\r
+               case OP_FETCH_GBL_F:\r
+               case OP_FETCH_GBL_S:\r
+               case OP_FETCH_GBL_E:\r
+               case OP_FETCH_GBL_FNC:\r
+               case OP_FETCH_GBL_V:\r
+               {\r
+                       unsigned int max = ((unsigned int*)glob)[op[i].a-1];\r
+                       unsigned int base = op[i].a;\r
+                       //flds glob[B]\r
+                       EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
+                       //fistp ta\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
+                       LOADREG(&ta, REG_EAX)\r
+                       //FIXME: if eax >= $max, abort\r
 \r
-               case OP_EQ_V:\r
-                       EmitByte(0xcd);EmitByte(op[i].op);\r
-                       printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
+                       if (op[i].op == OP_FETCH_GBL_V)\r
+                       {\r
+                               /*scale the index by 3*/\r
+                               SETREGI(3, REG_EDX)\r
+                               //mul %edx\r
+                               EmitByte(0xf7); EmitByte(0xe2);\r
+                       }\r
+\r
+                       //lookup global\r
+                       //mov &glob[base](,%eax,4),%edx\r
+                       EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));\r
+                       STOREREG(REG_EDX, glob + op[i].c+0)\r
+                       if (op[i].op == OP_FETCH_GBL_V)\r
+                       {\r
+                               //mov &glob[base+1](,%eax,4),%edx\r
+                               EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));\r
+                               STOREREG(REG_EDX, glob + op[i].c+1)\r
+                               //mov &glob[base+2](,%eax,4),%edx\r
+                               EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));\r
+                               STOREREG(REG_EDX, glob + op[i].c+2)\r
+                       }\r
                        break;\r
+               }\r
 \r
-               case OP_NE_V:\r
-                       EmitByte(0xcd);EmitByte(op[i].op);\r
-                       printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
+               /*fteqcc generates these from writing 'fast arrays'*/\r
+               case OP_GLOBALADDRESS:\r
+                       LOADREG(glob + op[i].b, REG_EAX);\r
+                       //lea &glob[A](, %eax, 4),%eax\r
+                       EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);\r
+                       STOREREG(REG_EAX, glob + op[i].c);\r
+                       break;\r
+//             case OP_BOUNDCHECK:\r
+                       //FIXME: assert b <= a < c\r
+                       break;\r
+               case OP_CONV_FTOI:\r
+                       //flds glob[A]\r
+                       EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
+                       //fistp glob[C]\r
+                       EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
+                       break;\r
+               case OP_MUL_I:\r
+                       LOADREG(glob + op[i].a, REG_EAX);\r
+                       //mull glob[C]       (arg*eax => edx:eax)\r
+                       EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);\r
+                       STOREREG(REG_EAX, glob + op[i].c);\r
                        break;\r
 \r
-               case OP_NOT_V:\r
-                       EmitByte(0xcd);EmitByte(op[i].op);\r
-                       printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
+               /*other extended opcodes*/\r
+               case OP_BITOR_I:\r
+                       LOADREG(glob + op[i].a, REG_EAX)\r
+                       //or %eax,tb\r
+                       EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
+                       STOREREG(REG_EAX, glob + op[i].c);\r
                        break;\r
-#endif\r
+\r
+\r
                default:\r
-                       printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", op[i].op);\r
+                       {\r
+                               enum qcop_e e = op[i].op;\r
+                       printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);\r
+                       }\r
 \r
 \r
-                       free(statementjumps);   //[MAX_STATEMENTS]\r
-                       free(statementoffsets); //[MAX_STATEMENTS]\r
-                       free(code);\r
-                       statementoffsets = NULL;\r
-                       return false;\r
+                       free(jit->statementjumps);      //[MAX_STATEMENTS]\r
+                       free(jit->statementoffsets); //[MAX_STATEMENTS]\r
+                       free(jit->code);\r
+                       free(jit);\r
+                       return NULL;\r
                }\r
        }\r
 \r
-       FixupJumps();\r
+       FixupJumps(jit);\r
 \r
 #ifdef _WIN32\r
        {\r
@@ -1056,22 +1262,32 @@ EmitByte(0xcc);
 \r
                //this memory is on the heap.\r
                //this means that we must maintain read/write protection, or libc will crash us\r
-               VirtualProtect(code, codesize, PAGE_EXECUTE_READWRITE, &old);\r
+               VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);\r
        }\r
 #endif\r
 \r
-//     externs->WriteFile("jit.x86", code, codesize);\r
+//     externs->WriteFile("jit.x86", jit->code, jit->codesize);\r
 \r
-       return true;\r
+       return jit;\r
+}\r
+\r
+float foo(float arg)\r
+{\r
+       float f;\r
+       if (!arg)\r
+               f = 1;\r
+       else\r
+               f = 0;\r
+       return f;\r
 }\r
 \r
-void PR_EnterJIT(progfuncs_t *progfuncs, int statement)\r
+void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)\r
 {\r
 #ifdef __GNUC__\r
        //call, it clobbers pretty much everything.\r
-       asm("call *%0" :: "r"(statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");\r
+       asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");\r
 #elif defined(_MSC_VER)\r
-       void *entry = statementoffsets[statement+1];\r
+       void *entry = jit->statementoffsets[statement+1];\r
        void *edicttable = prinst->edicttable;\r
        __asm {\r
                pushad\r