]> de.git.xonotic.org Git - voretournament/voretournament.git/blob - misc/source/fteqcc-src/pr_x86.c
Better comment on my last change
[voretournament/voretournament.git] / misc / source / fteqcc-src / pr_x86.c
1 /*\r
2 when I say JIT, I mean load time, not execution time.\r
3 \r
4 notes:\r
5         qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)\r
6         field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.\r
7 \r
8 optimisations:\r
9         none at the moment...\r
10         instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C\r
11         it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests\r
12 \r
13         this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.\r
14         certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.\r
15 \r
16         OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.\r
17 \r
18         some instructions are jump sites. any cache must be flushed before the start of the instruction.\r
19         some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.\r
20         such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.\r
21 \r
22         store locals on a proper stack instead of the current absurd mechanism.\r
23 \r
24         eax - tmp\r
25         ebx - prinst->edicttable\r
26         ecx     - tmp\r
27         edx - tmp\r
28         esi - debug opcode number\r
29         edi - tmp (because its preserved by subfunctions\r
30         ebp -\r
31 \r
32   to use gas to provide binary opcodes:\r
33   vim -N blob.s && as blob.s && objdump.exe -d a.out\r
34 \r
35 \r
36   notable mods to test:\r
37   prydon gate, due to fpu mangling to carry values between maps\r
38 */\r
39 \r
40 #define PROGSUSED\r
41 #include "progsint.h"\r
42 \r
43 #ifdef QCJIT\r
44 \r
45 static float ta, tb, nullfloat=0;\r
46 \r
47 struct jitstate\r
48 {\r
49         unsigned int *statementjumps;   //[MAX_STATEMENTS*3]\r
50         unsigned char **statementoffsets; //[MAX_STATEMENTS]\r
51         unsigned int numjumps;\r
52         unsigned char *code;\r
53         unsigned int codesize;\r
54         unsigned int jitstatements;\r
55 };\r
56 \r
57 static void EmitByte(struct jitstate *jit, unsigned char byte)\r
58 {\r
59         jit->code[jit->codesize++] = byte;\r
60 }\r
61 static void Emit4Byte(struct jitstate *jit, unsigned int value)\r
62 {\r
63         jit->code[jit->codesize++] = (value>> 0)&0xff;\r
64         jit->code[jit->codesize++] = (value>> 8)&0xff;\r
65         jit->code[jit->codesize++] = (value>>16)&0xff;\r
66         jit->code[jit->codesize++] = (value>>24)&0xff;\r
67 }\r
68 static void EmitAdr(struct jitstate *jit, void *value)\r
69 {\r
70         Emit4Byte(jit, (unsigned int)value);\r
71 }\r
72 static void EmitFloat(struct jitstate *jit, float value)\r
73 {\r
74         union {float f; unsigned int i;} u;\r
75         u.f = value;\r
76         Emit4Byte(jit, u.i);\r
77 }\r
78 static void Emit2Byte(struct jitstate *jit, unsigned short value)\r
79 {\r
80         jit->code[jit->codesize++] = (value>> 0)&0xff;\r
81         jit->code[jit->codesize++] = (value>> 8)&0xff;\r
82 }\r
83 \r
84 static void EmitFOffset(struct jitstate *jit, void *func, int bias)\r
85 {\r
86         union {void *f; unsigned int i;} u;\r
87         u.f = func;\r
88         u.i -= (unsigned int)&jit->code[jit->codesize+bias];\r
89         Emit4Byte(jit, u.i);\r
90 }\r
91 \r
92 static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)\r
93 {\r
94         jit->statementjumps[jit->numjumps++] = jit->codesize;\r
95         jit->statementjumps[jit->numjumps++] = statementnum;\r
96         jit->statementjumps[jit->numjumps++] = offset;\r
97 \r
98         //the offset is filled in later\r
99         jit->codesize += 4;\r
100 }\r
101 \r
102 enum\r
103 {\r
104         REG_EAX,\r
105         REG_ECX,\r
106         REG_EDX,\r
107         REG_EBX,\r
108         REG_ESP,\r
109         REG_EBP,\r
110         REG_ESI,\r
111         REG_EDI\r
112 };\r
113 #define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);\r
114 #define CLEARREG(reg) XOR(reg,reg)\r
115 #define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
116 #define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
117 #define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);\r
118 #define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);\r
119 #define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);\r
120 \r
121 static void *LocalLoc(struct jitstate *jit)\r
122 {\r
123         return &jit->code[jit->codesize];\r
124 }\r
125 static void *LocalJmp(struct jitstate *jit, int cond)\r
126 {\r
127         /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/\r
128         if (cond == OP_GOTO)\r
129                 EmitByte(jit, 0xeb);    //jmp\r
130         else if (cond == OP_LE_F)\r
131                 EmitByte(jit, 0x76);    //jbe\r
132         else if (cond == OP_GE_F)\r
133                 EmitByte(jit, 0x73);    //jae\r
134         else if (cond == OP_LT_F)\r
135                 EmitByte(jit, 0x72);    //jb\r
136         else if (cond == OP_GT_F)\r
137                 EmitByte(jit, 0x77);    //ja\r
138         else if (cond == OP_LE_I)\r
139                 EmitByte(jit, 0x7e);    //jle\r
140         else if (cond == OP_LT_I)\r
141                 EmitByte(jit, 0x7c);    //jl\r
142         else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)\r
143                 EmitByte(jit, 0x75);    //jne\r
144         else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)\r
145                 EmitByte(jit, 0x74);    //je\r
146 #if defined(DEBUG) && defined(_WIN32)\r
147         else\r
148         {\r
149                 OutputDebugString("oh noes!\n");\r
150                 return NULL;\r
151         }\r
152 #endif\r
153 \r
154         EmitByte(jit, 0);\r
155 \r
156         return LocalLoc(jit);\r
157 }\r
158 static void LocalJmpLoc(void *jmp, void *loc)\r
159 {\r
160         int offs;\r
161         unsigned char *a = jmp;\r
162         offs = (char *)loc - (char *)jmp;\r
163 #if defined(DEBUG) && defined(_WIN32)\r
164         if (offs > 127 || offs <= -128)\r
165         {\r
166                 OutputDebugStringA("bad jump\n");\r
167                 a[-2] = 0xcd;\r
168                 a[-1] = 0xcc;\r
169                 return;\r
170         }\r
171 #endif\r
172         a[-1] = offs;\r
173 }\r
174 \r
175 static void FixupJumps(struct jitstate *jit)\r
176 {\r
177         unsigned int j;\r
178         unsigned char *codesrc;\r
179         unsigned char *codedst;\r
180         unsigned int offset;\r
181 \r
182         unsigned int v;\r
183 \r
184         for (j = 0; j < jit->numjumps;)\r
185         {\r
186                 v = jit->statementjumps[j++];\r
187                 codesrc = &jit->code[v];\r
188 \r
189                 v = jit->statementjumps[j++];\r
190                 codedst = jit->statementoffsets[v];\r
191 \r
192                 v = jit->statementjumps[j++];\r
193                 offset = (int)(codedst - (codesrc-v));  //3rd term because the jump is relative to the instruction start, not the instruction's offset\r
194 \r
195                 codesrc[0] = (offset>> 0)&0xff;\r
196                 codesrc[1] = (offset>> 8)&0xff;\r
197                 codesrc[2] = (offset>>16)&0xff;\r
198                 codesrc[3] = (offset>>24)&0xff;\r
199         }\r
200 }\r
201 \r
202 int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);\r
203 int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);\r
204 \r
205 void PR_CloseJit(struct jitstate *jit)\r
206 {\r
207         free(jit->statementjumps);\r
208         free(jit->statementoffsets);\r
209         free(jit->code);\r
210 }\r
211 \r
212 #define EmitByte(v) EmitByte(jit, v)\r
213 #define EmitAdr(v) EmitAdr(jit, v)\r
214 #define EmitFOffset(a,b) EmitFOffset(jit, a, b)\r
215 #define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)\r
216 #define Emit4Byte(v) Emit4Byte(jit, v)\r
217 #define EmitFloat(v) EmitFloat(jit, v)\r
218 #define LocalJmp(v) LocalJmp(jit, v)\r
219 #define LocalLoc() LocalLoc(jit)\r
220 \r
221 \r
222 struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)\r
223 {\r
224         struct jitstate *jit;\r
225 \r
226         void *j0, *l0;\r
227         void *j1, *l1;\r
228         void *j2, *l2;\r
229         unsigned int i;\r
230         dstatement16_t *op = (dstatement16_t*)current_progstate->statements;\r
231         unsigned int numstatements = current_progstate->progs->numstatements;\r
232         int *glob = (int*)current_progstate->globals;\r
233 \r
234         if (current_progstate->numbuiltins)\r
235                 return NULL;\r
236         jit = malloc(sizeof(*jit));\r
237         jit->jitstatements = numstatements;\r
238 \r
239         jit->statementjumps = malloc(numstatements*12);\r
240         jit->statementoffsets = malloc(numstatements*4);\r
241         jit->code = malloc(numstatements*500);\r
242         if (!jit->code)\r
243                 return NULL;\r
244 \r
245         jit->numjumps = 0;\r
246         jit->codesize = 0;\r
247 \r
248 \r
249 \r
250         for (i = 0; i < numstatements; i++)\r
251         {\r
252                 jit->statementoffsets[i] = &jit->code[jit->codesize];\r
253 \r
254                 /*DEBUG*/\r
255                 SETREGI(op[i].op, REG_ESI);\r
256 \r
257                 switch(op[i].op)\r
258                 {\r
259                 //jumps\r
260                 case OP_IF_I:\r
261                         //integer compare\r
262                         //if a, goto b\r
263 \r
264                         //cmpl $0,glob[A]\r
265                         EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
266                         //jne B\r
267                         EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
268                         break;\r
269 \r
270                 case OP_IFNOT_I:\r
271                         //integer compare\r
272                         //if !a, goto b\r
273 \r
274                         //cmpl $0,glob[A]\r
275                         EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
276                         //je B\r
277                         EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
278                         break;\r
279 \r
280                 case OP_GOTO:\r
281                         EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);\r
282                         break;\r
283                         \r
284                 //function returns\r
285                 case OP_DONE:\r
286                 case OP_RETURN:\r
287                         //done and return are the same\r
288 \r
289                         //part 1: store A into OFS_RETURN\r
290 \r
291                         if (!op[i].a)\r
292                         {\r
293                                 //assumption: anything that returns address 0 is a void or zero return.\r
294                                 //thus clear eax and copy that to the return vector.\r
295                                 CLEARREG(REG_EAX);\r
296                                 STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
297                                 STOREREG(REG_EAX, glob + OFS_RETURN+1);\r
298                                 STOREREG(REG_EAX, glob + OFS_RETURN+2);\r
299                         }\r
300                         else\r
301                         {\r
302                                 LOADREG(glob + op[i].a+0, REG_EAX);\r
303                                 LOADREG(glob + op[i].a+1, REG_EDX);\r
304                                 LOADREG(glob + op[i].a+2, REG_ECX);\r
305                                 STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
306                                 STOREREG(REG_EDX, glob + OFS_RETURN+1);\r
307                                 STOREREG(REG_ECX, glob + OFS_RETURN+2);\r
308                         }\r
309                         \r
310                         //call leavefunction to get the return address\r
311                         \r
312 //                      pushl progfuncs\r
313                         EmitByte(0x68);EmitAdr(progfuncs);\r
314 //                      call PR_LeaveFunction\r
315                         EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);\r
316 //                      add $4,%esp\r
317                         EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);\r
318 //                      movl pr_depth,%edx\r
319                         EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);\r
320 //                      cmp prinst->exitdepth,%edx\r
321                         EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);\r
322 //                      je returntoc\r
323                         j1 = LocalJmp(OP_EQ_E);\r
324 //                              mov statementoffsets[%eax*4],%eax\r
325                                 EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
326 //                              jmp *eax\r
327                                 EmitByte(0xff);EmitByte(0xe0);\r
328 //                      returntoc:\r
329                         l1 = LocalLoc();\r
330 //                      ret\r
331                         EmitByte(0xc3);\r
332 \r
333                         LocalJmpLoc(j1,l1);\r
334                         break;\r
335 \r
336                 //function calls\r
337                 case OP_CALL0:\r
338                 case OP_CALL1:\r
339                 case OP_CALL2:\r
340                 case OP_CALL3:\r
341                 case OP_CALL4:\r
342                 case OP_CALL5:\r
343                 case OP_CALL6:\r
344                 case OP_CALL7:\r
345                 case OP_CALL8:\r
346                 //save the state in place the rest of the engine can cope with\r
347                         //movl $i, pr_xstatement\r
348                         EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
349                         //movl $(op[i].op-OP_CALL0), pr_argc\r
350                         EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);\r
351 \r
352                 //figure out who we're calling, and what that involves\r
353                         //%eax = glob[A]\r
354                         LOADREG(glob + op[i].a, REG_EAX);\r
355                 //eax is now the func num\r
356 \r
357                         //mov %eax,%ecx\r
358                         EmitByte(0x89); EmitByte(0xc1);\r
359                         //shr $24,%ecx\r
360                         EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);\r
361                 //ecx is now the progs num for the new func\r
362 \r
363                         //cmp %ecx,pr_typecurrent\r
364                         EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);\r
365                         //je sameprogs\r
366                         j1 = LocalJmp(OP_EQ_I);\r
367                         {\r
368                                 //can't handle switching progs\r
369 \r
370                                 //FIXME: recurse though PR_ExecuteProgram\r
371                                 //push eax\r
372                                 //push progfuncs\r
373                                 //call PR_ExecuteProgram\r
374                                 //add $8,%esp\r
375                                 //remember to change the je above\r
376 \r
377                                 //err... exit depth? no idea\r
378                                 EmitByte(0xcd);EmitByte(op[i].op);      //int $X\r
379 \r
380 \r
381                                 //ret\r
382                                 EmitByte(0xc3);\r
383                         }\r
384                         //sameprogs:\r
385                         l1 = LocalLoc();\r
386                         LocalJmpLoc(j1,l1);\r
387 \r
388                         //andl $0x00ffffff, %eax\r
389                         EmitByte(0x25);Emit4Byte(0x00ffffff);\r
390                         \r
391                         //mov $sizeof(dfunction_t),%edx\r
392                         EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));\r
393                         //mul %edx\r
394                         EmitByte(0xf7); EmitByte(0xe2);\r
395                         //add pr_functions,%eax\r
396                         EmitByte(0x05); EmitAdr(pr_functions);\r
397 \r
398                 //eax is now the dfunction_t to be called\r
399                 //edx is clobbered.\r
400 \r
401                         //mov (%eax),%edx\r
402                         EmitByte(0x8b);EmitByte(0x10);\r
403                 //edx is now the first statement number\r
404                         //cmp $0,%edx\r
405                         EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);\r
406                         //jl isabuiltin\r
407                         j1 = LocalJmp(OP_LT_I);\r
408                         {\r
409                                 /* call the function*/\r
410                                 //push %ecx\r
411                                 EmitByte(0x51);\r
412                                 //push %eax\r
413                                 EmitByte(0x50);\r
414                                 //pushl progfuncs\r
415                                 EmitByte(0x68);EmitAdr(progfuncs);\r
416                                 //call PR_EnterFunction\r
417                                 EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);\r
418                                 //sub $12,%esp\r
419                                 EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);\r
420                 //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)\r
421 \r
422                                 //jmp statementoffsets[%eax*4]\r
423                                 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
424                         }\r
425                         /*its a builtin, figure out which, and call it*/\r
426                         //isabuiltin:\r
427                         l1 = LocalLoc();\r
428                         LocalJmpLoc(j1,l1);\r
429 \r
430                         //push current_progstate->globals\r
431                         EmitByte(0x68);EmitAdr(current_progstate->globals);\r
432                         //push progfuncs\r
433                         EmitByte(0x68);EmitAdr(progfuncs);\r
434                         //neg %edx\r
435                         EmitByte(0xf7);EmitByte(0xda);\r
436                         //call externs->globalbuiltins[%edx,4]\r
437 //FIXME: make sure this dereferences\r
438                         EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);\r
439                         //add $8,%esp\r
440                         EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);\r
441 \r
442                 //but that builtin might have been Abort()\r
443 \r
444                         LOADREG(&prinst->continuestatement, REG_EAX);\r
445                         //cmp $-1,%eax\r
446                         EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);\r
447                         //je donebuiltincall\r
448                         j1 = LocalJmp(OP_EQ_I);\r
449                         {\r
450                                 //mov $-1,prinst->continuestatement\r
451                                 EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);\r
452 \r
453                                 //jmp statementoffsets[%eax*4]\r
454                                 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);\r
455                         }\r
456                         //donebuiltincall:\r
457                         l1 = LocalLoc();\r
458                         LocalJmpLoc(j1,l1);\r
459                         break;\r
460 \r
461                 case OP_MUL_F:\r
462                         //flds glob[A]\r
463                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
464                         //fmuls glob[B]\r
465                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
466                         //fstps glob[C]\r
467                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
468                         break;\r
469                 case OP_DIV_F:\r
470                         //flds glob[A]\r
471                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
472                         //fdivs glob[B]\r
473                         EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
474                         //fstps glob[C]\r
475                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
476                         break;\r
477                 case OP_ADD_F:\r
478                         //flds glob[A]\r
479                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
480                         //fadds glob[B]\r
481                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
482                         //fstps glob[C]\r
483                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
484                         break;\r
485                 case OP_SUB_F:\r
486                         //flds glob[A]\r
487                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
488                         //fsubs glob[B]\r
489                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);\r
490                         //fstps glob[C]\r
491                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
492                         break;\r
493 \r
494                 case OP_NOT_F:\r
495                         //fldz\r
496                         EmitByte(0xd9);EmitByte(0xee);\r
497                         //fcomps        glob[A]\r
498                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);\r
499                         //fnstsw %ax\r
500                         EmitByte(0xdf);EmitByte(0xe0);\r
501                         //testb 0x40,%ah\r
502                         EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);\r
503                         \r
504                         j1 = LocalJmp(OP_NE_F);\r
505                         {\r
506                                 STOREF(0.0f, glob + op[i].c);\r
507                                 j2 = LocalJmp(OP_GOTO);\r
508                         }\r
509                         {\r
510                                 //noteq:\r
511                                 l1 = LocalLoc();\r
512                                 STOREF(1.0f, glob + op[i].c);\r
513                         }\r
514                         //end:\r
515                         l2 = LocalLoc();\r
516                         LocalJmpLoc(j1,l1);\r
517                         LocalJmpLoc(j2,l2);\r
518                         break;\r
519 \r
520                 case OP_STORE_F:\r
521                 case OP_STORE_S:\r
522                 case OP_STORE_ENT:\r
523                 case OP_STORE_FLD:\r
524                 case OP_STORE_FNC:\r
525                         LOADREG(glob + op[i].a, REG_EAX);\r
526                         STOREREG(REG_EAX, glob + op[i].b);\r
527                         break;\r
528 \r
529                 case OP_STORE_V:\r
530                         LOADREG(glob + op[i].a+0, REG_EAX);\r
531                         LOADREG(glob + op[i].a+1, REG_EDX);\r
532                         LOADREG(glob + op[i].a+2, REG_ECX);\r
533                         STOREREG(REG_EAX, glob + op[i].b+0);\r
534                         STOREREG(REG_EDX, glob + op[i].b+1);\r
535                         STOREREG(REG_ECX, glob + op[i].b+2);\r
536                         break;\r
537 \r
538                 case OP_LOAD_F:\r
539                 case OP_LOAD_S:\r
540                 case OP_LOAD_ENT:\r
541                 case OP_LOAD_FLD:\r
542                 case OP_LOAD_FNC:\r
543                 case OP_LOAD_V:\r
544                 //a is the ent number, b is the field\r
545                 //c is the dest\r
546 \r
547                         LOADREG(glob + op[i].a, REG_EAX);\r
548                         LOADREG(glob + op[i].b, REG_ECX);\r
549 \r
550                 //FIXME: bound eax (ent number)\r
551                 //FIXME: bound ecx (field index)\r
552                         //mov (ebx,eax,4).%eax\r
553                         EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
554                 //eax is now an edictrun_t\r
555                         //mov fields(,%eax,4),%edx\r
556                         EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
557                 //edx is now the field array for that ent\r
558 \r
559                         //mov fieldajust(%edx,%ecx,4),%eax\r
560                         EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
561 \r
562                         STOREREG(REG_EAX, glob + op[i].c)\r
563 \r
564                         if (op[i].op == OP_LOAD_V)\r
565                         {\r
566                                 //mov fieldajust+4(%edx,%ecx,4),%eax\r
567                                 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);\r
568                                 STOREREG(REG_EAX, glob + op[i].c+1)\r
569 \r
570                                 //mov fieldajust+8(%edx,%ecx,4),%eax\r
571                                 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);\r
572                                 STOREREG(REG_EAX, glob + op[i].c+2)\r
573                         }\r
574                         break;\r
575 \r
576                 case OP_ADDRESS:\r
577                         //a is the ent number, b is the field\r
578                 //c is the dest\r
579 \r
580                         LOADREG(glob + op[i].a, REG_EAX);\r
581                         LOADREG(glob + op[i].b, REG_ECX);\r
582 \r
583                 //FIXME: bound eax (ent number)\r
584                 //FIXME: bound ecx (field index)\r
585                         //mov (ebx,eax,4).%eax\r
586                         EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
587                 //eax is now an edictrun_t\r
588                         //mov fields(,%eax,4),%edx\r
589                         EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
590                 //edx is now the field array for that ent\r
591                         //mov fieldajust(%edx,%ecx,4),%eax      //offset = progfuncs->fieldadjust\r
592                         //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);\r
593                         EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
594                         STOREREG(REG_EAX, glob + op[i].c);\r
595                         break;\r
596 \r
597                 case OP_STOREP_F:\r
598                 case OP_STOREP_S:\r
599                 case OP_STOREP_ENT:\r
600                 case OP_STOREP_FLD:\r
601                 case OP_STOREP_FNC:\r
602                         LOADREG(glob + op[i].a, REG_EAX);\r
603                         LOADREG(glob + op[i].b, REG_ECX);\r
604                         //mov %eax,(%ecx)\r
605                         EmitByte(0x89);EmitByte(0x01);\r
606                         break;\r
607 \r
608                 case OP_STOREP_V:\r
609                         LOADREG(glob + op[i].b, REG_ECX);\r
610 \r
611                         LOADREG(glob + op[i].a+0, REG_EAX);\r
612                         //mov %eax,0(%ecx)\r
613                         EmitByte(0x89);EmitByte(0x01);\r
614 \r
615                         LOADREG(glob + op[i].a+1, REG_EAX);\r
616                         //mov %eax,4(%ecx)\r
617                         EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);\r
618 \r
619                         LOADREG(glob + op[i].a+2, REG_EAX);\r
620                         //mov %eax,8(%ecx)\r
621                         EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);\r
622                         break;\r
623 \r
624                 case OP_NE_I:\r
625                 case OP_NE_E:\r
626                 case OP_NE_FNC:\r
627                 case OP_EQ_I:\r
628                 case OP_EQ_E:\r
629                 case OP_EQ_FNC:\r
630                         //integer equality\r
631                         LOADREG(glob + op[i].a, REG_EAX);\r
632 \r
633                         //cmp glob[B],%eax\r
634                         EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);\r
635                         j1 = LocalJmp(op[i].op);\r
636                         {\r
637                                 STOREF(0.0f, glob + op[i].c);\r
638                                 j2 = LocalJmp(OP_GOTO);\r
639                         }\r
640                         {\r
641                                 l1 = LocalLoc();\r
642                                 STOREF(1.0f, glob + op[i].c);\r
643                         }\r
644                         l2 = LocalLoc();\r
645                         LocalJmpLoc(j1,l1);\r
646                         LocalJmpLoc(j2,l2);\r
647                         break;\r
648 \r
649                 case OP_NOT_I:\r
650                 case OP_NOT_ENT:\r
651                 case OP_NOT_FNC:\r
652                         //cmp glob[B],$0\r
653                         EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00); \r
654                         j1 = LocalJmp(OP_NE_I);\r
655                         {\r
656                                 STOREF(1.0f, glob + op[i].c);\r
657                                 j2 = LocalJmp(OP_GOTO);\r
658                         }\r
659                         {\r
660                                 l1 = LocalLoc();\r
661                                 STOREF(0.0f, glob + op[i].c);\r
662                         }\r
663                         l2 = LocalLoc();\r
664                         LocalJmpLoc(j1,l1);\r
665                         LocalJmpLoc(j2,l2);\r
666                         break;\r
667 \r
668                 case OP_BITOR_F:        //floats...\r
669                         //flds glob[A]\r
670                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
671                         //flds glob[B]\r
672                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
673                         //fistp tb\r
674                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
675                         //fistp ta\r
676                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
677                         LOADREG(&ta, REG_EAX)\r
678                         //or %eax,tb\r
679                         EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);\r
680                         //fild tb\r
681                         EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
682                         //fstps glob[C]\r
683                         EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
684                         break;\r
685 \r
686                 case OP_BITAND_F:\r
687                         //flds glob[A]\r
688                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
689                         //flds glob[B]\r
690                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
691                         //fistp tb\r
692                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
693                         //fistp ta\r
694                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
695                         /*two args are now at ta and tb*/\r
696                         LOADREG(&ta, REG_EAX)\r
697                         //and tb,%eax\r
698                         EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);\r
699                         /*we just wrote the int value to tb, convert that to a float and store it at c*/\r
700                         //fild tb\r
701                         EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
702                         //fstps glob[C]\r
703                         EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
704                         break;\r
705 \r
706                 case OP_AND_F:\r
707                         //test floats properly, so we don't get confused with -0.0\r
708 \r
709                         //flds  glob[A]\r
710                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
711                         //fcomps        nullfloat\r
712                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
713                         //fnstsw        %ax\r
714                         EmitByte(0xdf); EmitByte(0xe0);\r
715                         //test  $0x40,%ah\r
716                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
717                         //jz onefalse\r
718                         EmitByte(0x75); EmitByte(0x1f);\r
719 \r
720                         //flds  glob[B]\r
721                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
722                         //fcomps        nullfloat\r
723                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
724                         //fnstsw        %ax\r
725                         EmitByte(0xdf); EmitByte(0xe0);\r
726                         //test  $0x40,%ah\r
727                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
728                         //jnz onefalse\r
729                         EmitByte(0x75); EmitByte(0x0c);\r
730 \r
731                         //mov float0,glob[C]\r
732                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
733                         //jmp done\r
734                         EmitByte(0xeb); EmitByte(0x0a);\r
735 \r
736                         //onefalse:\r
737                         //mov float1,glob[C]\r
738                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
739                         //done:\r
740                         break;\r
741                 case OP_OR_F:\r
742                         //test floats properly, so we don't get confused with -0.0\r
743 \r
744                         //flds  glob[A]\r
745                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
746                         //fcomps        nullfloat\r
747                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
748                         //fnstsw        %ax\r
749                         EmitByte(0xdf); EmitByte(0xe0);\r
750                         //test  $0x40,%ah\r
751                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
752                         //je onetrue\r
753                         EmitByte(0x74); EmitByte(0x1f);\r
754 \r
755                         //flds  glob[B]\r
756                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
757                         //fcomps        nullfloat\r
758                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
759                         //fnstsw        %ax\r
760                         EmitByte(0xdf); EmitByte(0xe0);\r
761                         //test  $0x40,%ah\r
762                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
763                         //je onetrue\r
764                         EmitByte(0x74); EmitByte(0x0c);\r
765 \r
766                         //mov float0,glob[C]\r
767                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
768                         //jmp done\r
769                         EmitByte(0xeb); EmitByte(0x0a);\r
770 \r
771                         //onetrue:\r
772                         //mov float1,glob[C]\r
773                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
774                         //done:\r
775                         break;\r
776 \r
777                 case OP_EQ_S:\r
778                 case OP_NE_S:\r
779                         {\r
780                         //put a in ecx\r
781                         LOADREG(glob + op[i].a, REG_ECX);\r
782                         //put b in edi\r
783                         LOADREG(glob + op[i].b, REG_EDI);\r
784 /*\r
785                         //early out if they're equal\r
786                         //cmp %ecx,%edi\r
787                         EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);\r
788                         j1c = LocalJmp(OP_EQ_S);\r
789 \r
790                         //if a is 0, check if b is ""\r
791                         //jecxz ais0\r
792                         EmitByte(0xe3); EmitByte(0x1a);\r
793 \r
794                         //if b is 0, check if a is ""\r
795                         //cmp $0,%edi\r
796                         EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);\r
797                         //jne bnot0\r
798                         EmitByte(0x75); EmitByte(0x2a);\r
799                         {\r
800                                 //push a\r
801                                 EmitByte(0x51);\r
802                                 //push progfuncs\r
803                                 EmitByte(0x68); EmitAdr(progfuncs);\r
804                                 //call PR_StringToNative\r
805                                 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
806                                 //add $8,%esp\r
807                                 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
808                                 //cmpb $0,(%eax)\r
809                                 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
810                                 j1b = LocalJmp(OP_EQ_S);\r
811                                 j0b = LocalJmp(OP_GOTO);\r
812                         }\r
813 \r
814                         //ais0:\r
815                         {\r
816                                 //push edi\r
817                                 EmitByte(0x57);\r
818                                 //push progfuncs\r
819                                 EmitByte(0x68); EmitAdr(progfuncs);\r
820                                 //call PR_StringToNative\r
821                                 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
822                                 //add $8,%esp\r
823                                 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
824                                 //cmpb $0,(%eax)\r
825                                 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
826                                 //je _true\r
827                                 EmitByte(0x74); EmitByte(0x36);\r
828                                 //jmp _false\r
829                                 EmitByte(0xeb); EmitByte(0x28);\r
830                         }\r
831                         //bnot0:\r
832 */\r
833 LOADREG(glob + op[i].a, REG_ECX);\r
834                         //push ecx\r
835                         EmitByte(0x51);\r
836                         //push progfuncs\r
837                         EmitByte(0x68); EmitAdr(progfuncs);\r
838                         //call PR_StringToNative\r
839                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
840                         //push %eax\r
841                         EmitByte(0x50);\r
842 \r
843 LOADREG(glob + op[i].b, REG_EDI);\r
844                         //push %edi\r
845                         EmitByte(0x57);\r
846                         //push progfuncs\r
847                         EmitByte(0x68); EmitAdr(progfuncs);\r
848                         //call PR_StringToNative\r
849                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
850                         //add $8,%esp\r
851                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
852 \r
853 \r
854                         //push %eax\r
855                         EmitByte(0x50);\r
856                         //call strcmp\r
857                         EmitByte(0xe8); EmitFOffset(strcmp,4);\r
858                         //add $16,%esp\r
859                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);\r
860 \r
861                         //cmp $0,%eax\r
862                         EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
863                         j1 = LocalJmp(OP_EQ_S);\r
864                         {\r
865                                 l0 = LocalLoc();\r
866                                 STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);\r
867                                 j2 = LocalJmp(OP_GOTO);\r
868                         }\r
869                         {\r
870                                 l1 = LocalLoc();\r
871                                 STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);\r
872                         }\r
873                         l2 = LocalLoc();\r
874 \r
875 //                      LocalJmpLoc(j0b, l0);\r
876                         LocalJmpLoc(j1, l1);\r
877 //                      LocalJmpLoc(j1b, l1);\r
878                         LocalJmpLoc(j2, l2);\r
879                         }\r
880                         break;\r
881 \r
882                 case OP_NOT_S:\r
883                         LOADREG(glob + op[i].a, REG_EAX)\r
884 \r
885                         //cmp $0,%eax\r
886                         EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
887                         j2 = LocalJmp(OP_EQ_S);\r
888 \r
889                         //push %eax\r
890                         EmitByte(0x50);\r
891                         //push progfuncs\r
892                         EmitByte(0x68); EmitAdr(progfuncs);\r
893                         //call PR_StringToNative\r
894                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
895                         //add $8,%esp\r
896                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
897 \r
898                         //cmpb $0,(%eax)\r
899                         EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
900                         j1 = LocalJmp(OP_EQ_S);\r
901                         {\r
902                                 STOREF(0.0f, glob + op[i].c);\r
903                                 j0 = LocalJmp(OP_GOTO);\r
904                         }\r
905                         {\r
906                                 l1 = LocalLoc();\r
907                                 STOREF(1.0f, glob + op[i].c);\r
908                         }\r
909                         l2 = LocalLoc();\r
910                         LocalJmpLoc(j2, l1);\r
911                         LocalJmpLoc(j1, l1);\r
912                         LocalJmpLoc(j0, l2);\r
913                         break;\r
914 \r
915                 case OP_ADD_V:\r
916                         //flds glob[A]\r
917                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
918                         //fadds glob[B]\r
919                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
920                         //fstps glob[C]\r
921                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
922 \r
923                         //flds glob[A]\r
924                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
925                         //fadds glob[B]\r
926                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
927                         //fstps glob[C]\r
928                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
929 \r
930                         //flds glob[A]\r
931                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
932                         //fadds glob[B]\r
933                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
934                         //fstps glob[C]\r
935                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
936                         break;\r
937                 case OP_SUB_V:\r
938                         //flds glob[A]\r
939                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
940                         //fsubs glob[B]\r
941                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);\r
942                         //fstps glob[C]\r
943                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
944 \r
945                         //flds glob[A]\r
946                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
947                         //fsubs glob[B]\r
948                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);\r
949                         //fstps glob[C]\r
950                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
951 \r
952                         //flds glob[A]\r
953                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
954                         //fsubs glob[B]\r
955                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);\r
956                         //fstps glob[C]\r
957                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
958                         break;\r
959 \r
960                 case OP_MUL_V:\r
961                         //this is actually a dotproduct\r
962                         //flds glob[A]\r
963                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
964                         //fmuls glob[B]\r
965                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);\r
966 \r
967                         //flds glob[A]\r
968                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
969                         //fmuls glob[B]\r
970                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);\r
971 \r
972                         //flds glob[A]\r
973                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
974                         //fmuls glob[B]\r
975                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);\r
976 \r
977                         //faddp\r
978                         EmitByte(0xde);EmitByte(0xc1);\r
979                         //faddp\r
980                         EmitByte(0xde);EmitByte(0xc1);\r
981 \r
982                         //fstps glob[C]\r
983                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
984                         break;\r
985 \r
986                 case OP_EQ_F:\r
987                 case OP_NE_F:\r
988                 case OP_LE_F:\r
989                 case OP_GE_F:\r
990                 case OP_LT_F:\r
991                 case OP_GT_F:\r
992                         //flds glob[A]\r
993                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
994                         //flds glob[B]\r
995                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
996                         //fcomip %st(1),%st\r
997                         EmitByte(0xdf);EmitByte(0xe9);\r
998                         //fstp %st(0)   (aka: pop)\r
999                         EmitByte(0xdd);EmitByte(0xd8);\r
1000 \r
1001                         j1 = LocalJmp(op[i].op);\r
1002                         {\r
1003                                 STOREF(0.0f, glob + op[i].c);\r
1004                                 j2 = LocalJmp(OP_GOTO);\r
1005                         }\r
1006                         {\r
1007                                 l1 = LocalLoc();\r
1008                                 STOREF(1.0f, glob + op[i].c);\r
1009                         }\r
1010                         l2 = LocalLoc();\r
1011                         LocalJmpLoc(j1,l1);\r
1012                         LocalJmpLoc(j2,l2);\r
1013                         break;\r
1014 \r
1015                 case OP_MUL_FV:\r
1016                 case OP_MUL_VF:\r
1017                         //\r
1018                         {\r
1019                                 int v;\r
1020                                 int f;\r
1021                                 if (op[i].op == OP_MUL_FV)\r
1022                                 {\r
1023                                         f = op[i].a;\r
1024                                         v = op[i].b;\r
1025                                 }\r
1026                                 else\r
1027                                 {\r
1028                                         v = op[i].a;\r
1029                                         f = op[i].b;\r
1030                                 }\r
1031 \r
1032                                 //flds glob[F]\r
1033                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);\r
1034 \r
1035                                 //flds glob[V0]\r
1036                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);\r
1037                                 //fmul st(1)\r
1038                                 EmitByte(0xd8);EmitByte(0xc9);\r
1039                                 //fstps glob[C]\r
1040                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
1041 \r
1042                                 //flds glob[V0]\r
1043                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);\r
1044                                 //fmul st(1)\r
1045                                 EmitByte(0xd8);EmitByte(0xc9);\r
1046                                 //fstps glob[C]\r
1047                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
1048 \r
1049                                 //flds glob[V0]\r
1050                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);\r
1051                                 //fmul st(1)\r
1052                                 EmitByte(0xd8);EmitByte(0xc9);\r
1053                                 //fstps glob[C]\r
1054                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
1055 \r
1056                                 //fstp %st(0)   (aka: pop)\r
1057                                 EmitByte(0xdd);EmitByte(0xd8);\r
1058                         }\r
1059                         break;\r
1060 \r
1061                 case OP_STATE:\r
1062                         //externs->stateop(progfuncs, OPA->_float, OPB->function);\r
1063                         //push b\r
1064                         EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
1065                         //push a\r
1066                         EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);\r
1067                         //push $progfuncs\r
1068                         EmitByte(0x68); EmitAdr(progfuncs);\r
1069                         //call externs->stateop\r
1070                         EmitByte(0xe8); EmitFOffset(externs->stateop, 4);\r
1071                         //add $12,%esp\r
1072                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);\r
1073                         break;\r
1074 #if 1\r
1075 /*              case OP_NOT_V:\r
1076                         //flds 0\r
1077                         //flds glob[A+0]\r
1078                         //fcomip %st(1),%st\r
1079                         //jne _true\r
1080                         //flds glob[A+1]\r
1081                         //fcomip %st(1),%st\r
1082                         //jne _true\r
1083                         //flds glob[A+1]\r
1084                         //fcomip %st(1),%st\r
1085                         //jne _true\r
1086                         //mov 1,C\r
1087                         //jmp done\r
1088                         //_true:\r
1089                         //mov 0,C\r
1090                         //done:\r
1091                         break;\r
1092 */\r
1093                         \r
1094                 case OP_NOT_V:\r
1095                         EmitByte(0xcd);EmitByte(op[i].op);\r
1096                         printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
1097                         break;\r
1098 #endif\r
1099                 case OP_NE_V:\r
1100                 case OP_EQ_V:\r
1101                 {\r
1102                         void *f0, *f1, *f2, *floc;\r
1103 //compare v[0]\r
1104                         //flds glob[A]\r
1105                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
1106                         //flds glob[B]\r
1107                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
1108                         //fcomip %st(1),%st\r
1109                         EmitByte(0xdf);EmitByte(0xe9);\r
1110                         //fstp %st(0)   (aka: pop)\r
1111                         EmitByte(0xdd);EmitByte(0xd8);\r
1112 \r
1113                         /*if the condition is true, don't fail*/\r
1114                         j1 = LocalJmp(op[i].op);\r
1115                         {\r
1116                                 STOREF(0.0f, glob + op[i].c);\r
1117                                 f0 = LocalJmp(OP_GOTO);\r
1118                         }\r
1119                         l1 = LocalLoc();\r
1120                         LocalJmpLoc(j1,l1);\r
1121 \r
1122 //compare v[1]\r
1123                         //flds glob[A]\r
1124                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
1125                         //flds glob[B]\r
1126                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
1127                         //fcomip %st(1),%st\r
1128                         EmitByte(0xdf);EmitByte(0xe9);\r
1129                         //fstp %st(0)   (aka: pop)\r
1130                         EmitByte(0xdd);EmitByte(0xd8);\r
1131 \r
1132                         /*if the condition is true, don't fail*/\r
1133                         j1 = LocalJmp(op[i].op);\r
1134                         {\r
1135                                 STOREF(0.0f, glob + op[i].c);\r
1136                                 f1 = LocalJmp(OP_GOTO);\r
1137                         }\r
1138                         l1 = LocalLoc();\r
1139                         LocalJmpLoc(j1,l1);\r
1140 \r
1141 //compare v[2]\r
1142                         //flds glob[A]\r
1143                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
1144                         //flds glob[B]\r
1145                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
1146                         //fcomip %st(1),%st\r
1147                         EmitByte(0xdf);EmitByte(0xe9);\r
1148                         //fstp %st(0)   (aka: pop)\r
1149                         EmitByte(0xdd);EmitByte(0xd8);\r
1150 \r
1151                         /*if the condition is true, don't fail*/\r
1152                         j1 = LocalJmp(op[i].op);\r
1153                         {\r
1154                                 STOREF(0.0f, glob + op[i].c);\r
1155                                 f2 = LocalJmp(OP_GOTO);\r
1156                         }\r
1157                         l1 = LocalLoc();\r
1158                         LocalJmpLoc(j1,l1);\r
1159 \r
1160 //success!\r
1161                         STOREF(1.0f, glob + op[i].c);\r
1162 \r
1163                         floc = LocalLoc();\r
1164                         LocalJmpLoc(f0,floc);\r
1165                         LocalJmpLoc(f1,floc);\r
1166                         LocalJmpLoc(f2,floc);\r
1167                         break;\r
1168                 }\r
1169 \r
1170                 /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/\r
1171                 case OP_FETCH_GBL_F:\r
1172                 case OP_FETCH_GBL_S:\r
1173                 case OP_FETCH_GBL_E:\r
1174                 case OP_FETCH_GBL_FNC:\r
1175                 case OP_FETCH_GBL_V:\r
1176                 {\r
1177                         unsigned int max = ((unsigned int*)glob)[op[i].a-1];\r
1178                         unsigned int base = op[i].a;\r
1179                         //flds glob[B]\r
1180                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
1181                         //fistp ta\r
1182                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
1183                         LOADREG(&ta, REG_EAX)\r
1184                         //FIXME: if eax >= $max, abort\r
1185 \r
1186                         if (op[i].op == OP_FETCH_GBL_V)\r
1187                         {\r
1188                                 /*scale the index by 3*/\r
1189                                 SETREGI(3, REG_EDX)\r
1190                                 //mul %edx\r
1191                                 EmitByte(0xf7); EmitByte(0xe2);\r
1192                         }\r
1193 \r
1194                         //lookup global\r
1195                         //mov &glob[base](,%eax,4),%edx\r
1196                         EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));\r
1197                         STOREREG(REG_EDX, glob + op[i].c+0)\r
1198                         if (op[i].op == OP_FETCH_GBL_V)\r
1199                         {\r
1200                                 //mov &glob[base+1](,%eax,4),%edx\r
1201                                 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));\r
1202                                 STOREREG(REG_EDX, glob + op[i].c+1)\r
1203                                 //mov &glob[base+2](,%eax,4),%edx\r
1204                                 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));\r
1205                                 STOREREG(REG_EDX, glob + op[i].c+2)\r
1206                         }\r
1207                         break;\r
1208                 }\r
1209 \r
1210                 /*fteqcc generates these from writing 'fast arrays'*/\r
1211                 case OP_GLOBALADDRESS:\r
1212                         LOADREG(glob + op[i].b, REG_EAX);\r
1213                         //lea &glob[A](, %eax, 4),%eax\r
1214                         EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);\r
1215                         STOREREG(REG_EAX, glob + op[i].c);\r
1216                         break;\r
1217 //              case OP_BOUNDCHECK:\r
1218                         //FIXME: assert b <= a < c\r
1219                         break;\r
1220                 case OP_CONV_FTOI:\r
1221                         //flds glob[A]\r
1222                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
1223                         //fistp glob[C]\r
1224                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
1225                         break;\r
1226                 case OP_MUL_I:\r
1227                         LOADREG(glob + op[i].a, REG_EAX);\r
1228                         //mull glob[C]       (arg*eax => edx:eax)\r
1229                         EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);\r
1230                         STOREREG(REG_EAX, glob + op[i].c);\r
1231                         break;\r
1232 \r
1233                 /*other extended opcodes*/\r
1234                 case OP_BITOR_I:\r
1235                         LOADREG(glob + op[i].a, REG_EAX)\r
1236                         //or %eax,tb\r
1237                         EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
1238                         STOREREG(REG_EAX, glob + op[i].c);\r
1239                         break;\r
1240 \r
1241 \r
1242                 default:\r
1243                         {\r
1244                                 enum qcop_e e = op[i].op;\r
1245                         printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);\r
1246                         }\r
1247 \r
1248 \r
1249                         free(jit->statementjumps);      //[MAX_STATEMENTS]\r
1250                         free(jit->statementoffsets); //[MAX_STATEMENTS]\r
1251                         free(jit->code);\r
1252                         free(jit);\r
1253                         return NULL;\r
1254                 }\r
1255         }\r
1256 \r
1257         FixupJumps(jit);\r
1258 \r
1259 #ifdef _WIN32\r
1260         {\r
1261                 DWORD old;\r
1262 \r
1263                 //this memory is on the heap.\r
1264                 //this means that we must maintain read/write protection, or libc will crash us\r
1265                 VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);\r
1266         }\r
1267 #endif\r
1268 \r
1269 //      externs->WriteFile("jit.x86", jit->code, jit->codesize);\r
1270 \r
1271         return jit;\r
1272 }\r
1273 \r
1274 float foo(float arg)\r
1275 {\r
1276         float f;\r
1277         if (!arg)\r
1278                 f = 1;\r
1279         else\r
1280                 f = 0;\r
1281         return f;\r
1282 }\r
1283 \r
1284 void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)\r
1285 {\r
1286 #ifdef __GNUC__\r
1287         //call, it clobbers pretty much everything.\r
1288         asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");\r
1289 #elif defined(_MSC_VER)\r
1290         void *entry = jit->statementoffsets[statement+1];\r
1291         void *edicttable = prinst->edicttable;\r
1292         __asm {\r
1293                 pushad\r
1294                 mov eax,entry\r
1295                 mov ebx,edicttable\r
1296                 call eax\r
1297                 popad\r
1298         }\r
1299 #else\r
1300         #error "Sorry, no idea how to enter assembler safely for your compiler"\r
1301 #endif\r
1302 }\r
1303 #endif\r