]> de.git.xonotic.org Git - xonotic/darkplaces.git/blob - dpsoftrast.c
only respect vid_soft variable if SSE2_PRESENT is defined
[xonotic/darkplaces.git] / dpsoftrast.c
1
2 #include <stdio.h>
3 #include <string.h>
4 #include <math.h>
5 #include "quakedef.h"
6 #include "dpsoftrast.h"
7
8 #ifndef __cplusplus
9 typedef qboolean bool;
10 #endif
11
12 #define GL_NONE                                 0
13 #define GL_FRONT_LEFT                   0x0400
14 #define GL_FRONT_RIGHT                  0x0401
15 #define GL_BACK_LEFT                    0x0402
16 #define GL_BACK_RIGHT                   0x0403
17 #define GL_FRONT                                0x0404
18 #define GL_BACK                                 0x0405
19 #define GL_LEFT                                 0x0406
20 #define GL_RIGHT                                0x0407
21 #define GL_FRONT_AND_BACK               0x0408
22 #define GL_AUX0                                 0x0409
23 #define GL_AUX1                                 0x040A
24 #define GL_AUX2                                 0x040B
25 #define GL_AUX3                                 0x040C
26
27 #define GL_NEVER                                0x0200
28 #define GL_LESS                                 0x0201
29 #define GL_EQUAL                                0x0202
30 #define GL_LEQUAL                               0x0203
31 #define GL_GREATER                              0x0204
32 #define GL_NOTEQUAL                             0x0205
33 #define GL_GEQUAL                               0x0206
34 #define GL_ALWAYS                               0x0207
35
36 #define GL_ZERO                                 0x0
37 #define GL_ONE                                  0x1
38 #define GL_SRC_COLOR                            0x0300
39 #define GL_ONE_MINUS_SRC_COLOR                  0x0301
40 #define GL_DST_COLOR                            0x0306
41 #define GL_ONE_MINUS_DST_COLOR                  0x0307
42 #define GL_SRC_ALPHA                            0x0302
43 #define GL_ONE_MINUS_SRC_ALPHA                  0x0303
44 #define GL_DST_ALPHA                            0x0304
45 #define GL_ONE_MINUS_DST_ALPHA                  0x0305
46 #define GL_SRC_ALPHA_SATURATE                   0x0308
47 #define GL_CONSTANT_COLOR                       0x8001
48 #define GL_ONE_MINUS_CONSTANT_COLOR             0x8002
49 #define GL_CONSTANT_ALPHA                       0x8003
50 #define GL_ONE_MINUS_CONSTANT_ALPHA             0x8004
51
52 typedef enum DPSOFTRAST_ARRAY_e
53 {
54         DPSOFTRAST_ARRAY_POSITION,
55         DPSOFTRAST_ARRAY_COLOR,
56         DPSOFTRAST_ARRAY_TEXCOORD0,
57         DPSOFTRAST_ARRAY_TEXCOORD1,
58         DPSOFTRAST_ARRAY_TEXCOORD2,
59         DPSOFTRAST_ARRAY_TEXCOORD3,
60         DPSOFTRAST_ARRAY_TEXCOORD4,
61         DPSOFTRAST_ARRAY_TEXCOORD5,
62         DPSOFTRAST_ARRAY_TEXCOORD6,
63         DPSOFTRAST_ARRAY_TEXCOORD7,
64         DPSOFTRAST_ARRAY_TOTAL
65 }
66 DPSOFTRAST_ARRAY;
67
68 typedef struct DPSOFTRAST_Texture_s
69 {
70         int flags;
71         int width;
72         int height;
73         int depth;
74         int sides;
75         DPSOFTRAST_TEXTURE_FILTER filter;
76         int mipmaps;
77         int size;
78         unsigned char *bytes;
79         int mipmap[DPSOFTRAST_MAXMIPMAPS][5];
80 }
81 DPSOFTRAST_Texture;
82
83 typedef struct DPSOFTRAST_State_User_s
84 {
85         int colormask[4];
86         int blendfunc[2];
87         int blendsubtract;
88         int depthmask;
89         int depthtest;
90         int depthfunc;
91         int scissortest;
92         int cullface;
93         int alphatest;
94         int alphafunc;
95         float alphavalue;
96         int scissor[4];
97         int viewport[4];
98         float depthrange[2];
99         float polygonoffset[2];
100         float color[4];
101 }
102 DPSOFTRAST_State_User;
103
104 #define DPSOFTRAST_MAXSUBSPAN 16
105
106 typedef struct DPSOFTRAST_State_Draw_Span_s
107 {
108         int start; // pixel index
109         int length; // pixel count
110         int startx; // usable range (according to pixelmask)
111         int endx; // usable range (according to pixelmask)
112         unsigned char mip[DPSOFTRAST_MAXTEXTUREUNITS]; // texcoord to screen space density values (for picking mipmap of textures)
113         unsigned char *pixelmask; // true for pixels that passed depth test, false for others
114         // [0][n][] is start interpolant values (projected)
115         // [1][n][] is end interpolant values (projected)
116         // [0][DPSOFTRAST_ARRAY_TOTAL][] is start screencoord4f
117         // [1][DPSOFTRAST_ARRAY_TOTAL][] is end screencoord4f
118         // NOTE: screencoord4f[3] is W (basically 1/Z), useful for depthbuffer
119         float data[2][DPSOFTRAST_ARRAY_TOTAL+1][4];
120 }
121 DPSOFTRAST_State_Draw_Span;
122
123 #define DPSOFTRAST_DRAW_MAXSPANQUEUE 1024
124
125 typedef struct DPSOFTRAST_State_Draw_s
126 {
127         int numvertices;
128         int maxvertices;
129         float *in_array4f[DPSOFTRAST_ARRAY_TOTAL];
130         float *post_array4f[DPSOFTRAST_ARRAY_TOTAL];
131         float *screencoord4f;
132
133         // spans are queued in this structure for dispatch to the pixel shader,
134         // partly to improve cache locality, partly for batching purposes, spans
135         // are flushed before DrawTriangles returns to caller
136         int numspans;
137         DPSOFTRAST_State_Draw_Span spanqueue[DPSOFTRAST_DRAW_MAXSPANQUEUE];
138 }
139 DPSOFTRAST_State_Draw;
140
141 #define DPSOFTRAST_VALIDATE_FB 1
142 #define DPSOFTRAST_VALIDATE_DEPTHFUNC 2
143 #define DPSOFTRAST_VALIDATE_BLENDFUNC 4
144 #define DPSOFTRAST_VALIDATE_DRAW (DPSOFTRAST_VALIDATE_FB | DPSOFTRAST_VALIDATE_DEPTHFUNC | DPSOFTRAST_VALIDATE_BLENDFUNC)
145
146 typedef enum DPSOFTRAST_BLENDMODE_e
147 {
148         DPSOFTRAST_BLENDMODE_OPAQUE,
149         DPSOFTRAST_BLENDMODE_ALPHA,
150         DPSOFTRAST_BLENDMODE_ADDALPHA,
151         DPSOFTRAST_BLENDMODE_ADD,
152         DPSOFTRAST_BLENDMODE_INVMOD,
153         DPSOFTRAST_BLENDMODE_MUL,
154         DPSOFTRAST_BLENDMODE_MUL2,
155         DPSOFTRAST_BLENDMODE_SUBALPHA,
156         DPSOFTRAST_BLENDMODE_PSEUDOALPHA,
157         DPSOFTRAST_BLENDMODE_TOTAL
158 }
159 DPSOFTRAST_BLENDMODE;
160
161 typedef struct DPSOFTRAST_State_s
162 {
163         // DPSOFTRAST_VALIDATE_ flags
164         int validate;
165
166         int fb_colormask;
167         int fb_width;
168         int fb_height;
169         unsigned int *fb_depthpixels;
170         unsigned int *fb_colorpixels[4];
171
172         const float *pointer_vertex3f;
173         const float *pointer_color4f;
174         const unsigned char *pointer_color4ub;
175         const float *pointer_texcoordf[DPSOFTRAST_MAXTEXCOORDARRAYS];
176         int stride_vertex;
177         int stride_color;
178         int stride_texcoord[DPSOFTRAST_MAXTEXCOORDARRAYS];
179         int components_texcoord[DPSOFTRAST_MAXTEXCOORDARRAYS];
180         DPSOFTRAST_Texture *texbound[DPSOFTRAST_MAXTEXTUREUNITS];
181
182         int shader_mode;
183         int shader_permutation;
184         float uniform4f[DPSOFTRAST_UNIFORM_TOTAL*4];
185         int uniform1i[DPSOFTRAST_UNIFORM_TOTAL];
186
187         // derived values (DPSOFTRAST_VALIDATE_FB)
188         int fb_clearscissor[4];
189         int fb_viewport[4];
190         int fb_viewportscissor[4];
191         float fb_viewportcenter[2];
192         float fb_viewportscale[2];
193
194         // derived values (DPSOFTRAST_VALIDATE_DEPTHFUNC)
195         int fb_depthfunc;
196
197         // derived values (DPSOFTRAST_VALIDATE_BLENDFUNC)
198         int fb_blendmode;
199
200         int texture_max;
201         int texture_end;
202         int texture_firstfree;
203         DPSOFTRAST_Texture *texture;
204
205         int bigendian;
206
207         // error reporting
208         const char *errorstring;
209
210         DPSOFTRAST_State_User user;
211
212         DPSOFTRAST_State_Draw draw;
213 }
214 DPSOFTRAST_State;
215
216 DPSOFTRAST_State dpsoftrast;
217
218 extern int dpsoftrast_test;
219
220 #define DPSOFTRAST_DEPTHSCALE (1024.0f*1048576.0f)
221 #define DPSOFTRAST_BGRA8_FROM_RGBA32F(r,g,b,a) (((int)(r * 255.0f + 0.5f) << 16) | ((int)(g * 255.0f + 0.5f) << 8) | (int)(b * 255.0f + 0.5f) | ((int)(a * 255.0f + 0.5f) << 24))
222 #define DPSOFTRAST_DEPTH32_FROM_DEPTH32F(d) ((int)(DPSOFTRAST_DEPTHSCALE * (1-d)))
223 #define DPSOFTRAST_DRAW_MAXSPANLENGTH 256
224
225 void DPSOFTRAST_RecalcFB(void)
226 {
227         // calculate framebuffer scissor, viewport, viewport clipped by scissor,
228         // and viewport projection values
229         int x1, x2, x3, x4, x5, x6;
230         int y1, y2, y3, y4, y5, y6;
231         x1 = dpsoftrast.user.scissor[0];
232         x2 = dpsoftrast.user.scissor[0] + dpsoftrast.user.scissor[2];
233         x3 = dpsoftrast.user.viewport[0];
234         x4 = dpsoftrast.user.viewport[0] + dpsoftrast.user.viewport[2];
235         y1 = dpsoftrast.fb_height - dpsoftrast.user.scissor[1] - dpsoftrast.user.scissor[3];
236         y2 = dpsoftrast.fb_height - dpsoftrast.user.scissor[1];
237         y3 = dpsoftrast.fb_height - dpsoftrast.user.viewport[1] - dpsoftrast.user.viewport[3];
238         y4 = dpsoftrast.fb_height - dpsoftrast.user.viewport[1];
239         if (!dpsoftrast.user.scissortest) {x1 = 0;y1 = 0;x2 = dpsoftrast.fb_width;y2 = dpsoftrast.fb_height;}
240         if (x1 < 0) x1 = 0;
241         if (x2 > dpsoftrast.fb_width) x2 = dpsoftrast.fb_width;
242         if (x3 < 0) x1 = 0;
243         if (x4 > dpsoftrast.fb_width) x4 = dpsoftrast.fb_width;
244         if (y1 < 0) y1 = 0;
245         if (y2 > dpsoftrast.fb_height) y2 = dpsoftrast.fb_height;
246         if (y3 < 0) y1 = 0;
247         if (y4 > dpsoftrast.fb_height) y4 = dpsoftrast.fb_height;
248         x5 = x1;if (x5 < x3) x5 = x3;
249         x6 = x2;if (x6 > x4) x4 = x4;
250         y5 = y1;if (y5 < y3) y5 = y3;
251         y6 = y2;if (y6 > y4) y6 = y4;
252         dpsoftrast.fb_clearscissor[0] = x1;
253         dpsoftrast.fb_clearscissor[1] = y1;
254         dpsoftrast.fb_clearscissor[2] = x2 - x1;
255         dpsoftrast.fb_clearscissor[3] = y2 - y1;
256         dpsoftrast.fb_viewport[0] = x3;
257         dpsoftrast.fb_viewport[1] = y3;
258         dpsoftrast.fb_viewport[2] = x4 - x3;
259         dpsoftrast.fb_viewport[3] = y4 - y3;
260         dpsoftrast.fb_viewportscissor[0] = x5;
261         dpsoftrast.fb_viewportscissor[1] = y5;
262         dpsoftrast.fb_viewportscissor[2] = x6 - x5;
263         dpsoftrast.fb_viewportscissor[3] = y6 - y5;
264         dpsoftrast.fb_viewportcenter[0] = dpsoftrast.user.viewport[0] + 0.5f * dpsoftrast.user.viewport[2] - 0.5f;
265         dpsoftrast.fb_viewportcenter[1] = dpsoftrast.fb_height - dpsoftrast.user.viewport[1] - 0.5f * dpsoftrast.user.viewport[3] - 0.5f;
266         dpsoftrast.fb_viewportscale[0] = 0.5f * dpsoftrast.user.viewport[2];
267         dpsoftrast.fb_viewportscale[1] = -0.5f * dpsoftrast.user.viewport[3];
268 }
269
270 void DPSOFTRAST_RecalcDepthFunc(void)
271 {
272         dpsoftrast.fb_depthfunc = dpsoftrast.user.depthtest ? dpsoftrast.user.depthfunc : GL_ALWAYS;
273 }
274
275 int blendmodetable[][4] = 
276 {
277         {DPSOFTRAST_BLENDMODE_OPAQUE, GL_ONE, GL_ZERO, false},
278         {DPSOFTRAST_BLENDMODE_ALPHA, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, false},
279         {DPSOFTRAST_BLENDMODE_ADDALPHA, GL_SRC_ALPHA, GL_ONE, false},
280         {DPSOFTRAST_BLENDMODE_ADD, GL_ONE, GL_ONE, false},
281         {DPSOFTRAST_BLENDMODE_INVMOD, GL_ZERO, GL_ONE_MINUS_SRC_COLOR, false},
282         {DPSOFTRAST_BLENDMODE_MUL, GL_ZERO, GL_SRC_COLOR, false},
283         {DPSOFTRAST_BLENDMODE_MUL, GL_DST_COLOR, GL_ZERO, false},
284         {DPSOFTRAST_BLENDMODE_MUL2, GL_DST_COLOR, GL_SRC_COLOR, false},
285         {DPSOFTRAST_BLENDMODE_PSEUDOALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, false},
286         {DPSOFTRAST_BLENDMODE_SUBALPHA, GL_SRC_COLOR, GL_ONE, true}
287 };
288
289 void DPSOFTRAST_RecalcBlendFunc(void)
290 {
291         int i;
292         dpsoftrast.fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE;
293         for (i = 0;i < (int)(sizeof(blendmodetable) / sizeof(blendmodetable[0]));i++)
294         {
295                 if (dpsoftrast.user.blendfunc[0] == blendmodetable[i][1] && dpsoftrast.user.blendfunc[1] == blendmodetable[i][2] && dpsoftrast.user.blendsubtract == blendmodetable[i][3])
296                 {
297                         dpsoftrast.fb_blendmode = blendmodetable[i][0];
298                         break;
299                 }
300         }
301 }
302
303 #define DPSOFTRAST_ValidateQuick(f) ((dpsoftrast.validate & (f)) ? (DPSOFTRAST_Validate(f), 0) : 0)
304
305 void DPSOFTRAST_Validate(int mask)
306 {
307         mask &= dpsoftrast.validate;
308         if (!mask)
309                 return;
310         if (mask & DPSOFTRAST_VALIDATE_FB)
311         {
312                 dpsoftrast.validate &= ~DPSOFTRAST_VALIDATE_FB;
313                 DPSOFTRAST_RecalcFB();
314         }
315         if (mask & DPSOFTRAST_VALIDATE_DEPTHFUNC)
316         {
317                 dpsoftrast.validate &= ~DPSOFTRAST_VALIDATE_DEPTHFUNC;
318                 DPSOFTRAST_RecalcDepthFunc();
319         }
320         if (mask & DPSOFTRAST_VALIDATE_BLENDFUNC)
321         {
322                 dpsoftrast.validate &= ~DPSOFTRAST_VALIDATE_BLENDFUNC;
323                 DPSOFTRAST_RecalcBlendFunc();
324         }
325 }
326
327 DPSOFTRAST_Texture *DPSOFTRAST_Texture_GetByIndex(int index)
328 {
329         if (index >= 1 && index < dpsoftrast.texture_end && dpsoftrast.texture[index].bytes)
330                 return &dpsoftrast.texture[index];
331         return NULL;
332 }
333
334 int DPSOFTRAST_Texture_New(int flags, int width, int height, int depth)
335 {
336         int w;
337         int h;
338         int d;
339         int size;
340         int s;
341         int texnum;
342         int mipmaps;
343         int sides = (flags & DPSOFTRAST_TEXTURE_FLAG_CUBEMAP) ? 6 : 1;
344         int texformat = flags & DPSOFTRAST_TEXTURE_FORMAT_COMPAREMASK;
345         DPSOFTRAST_Texture *texture;
346         if (width*height*depth < 1)
347         {
348                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: width, height or depth is less than 1";
349                 return 0;
350         }
351         if (width > DPSOFTRAST_TEXTURE_MAXSIZE || height > DPSOFTRAST_TEXTURE_MAXSIZE || depth > DPSOFTRAST_TEXTURE_MAXSIZE)
352         {
353                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: texture size is too large";
354                 return 0;
355         }
356         switch(texformat)
357         {
358         case DPSOFTRAST_TEXTURE_FORMAT_BGRA8:
359         case DPSOFTRAST_TEXTURE_FORMAT_RGBA8:
360         case DPSOFTRAST_TEXTURE_FORMAT_ALPHA8:
361                 break;
362         case DPSOFTRAST_TEXTURE_FORMAT_DEPTH:
363                 if (flags & DPSOFTRAST_TEXTURE_FLAG_CUBEMAP)
364                 {
365                         dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FORMAT_DEPTH only permitted on 2D textures";
366                         return 0;
367                 }
368                 if (depth != 1)
369                 {
370                         dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FORMAT_DEPTH only permitted on 2D textures";
371                         return 0;
372                 }
373                 if ((flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP) && (texformat == DPSOFTRAST_TEXTURE_FORMAT_DEPTH))
374                 {
375                         dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FORMAT_DEPTH does not permit mipmaps";
376                         return 0;
377                 }
378                 break;
379         }
380         if (depth != 1 && (flags & DPSOFTRAST_TEXTURE_FLAG_CUBEMAP))
381         {
382                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FLAG_CUBEMAP can not be used on 3D textures";
383                 return 0;
384         }
385         if (depth != 1 && (flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP))
386         {
387                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FLAG_MIPMAP can not be used on 3D textures";
388                 return 0;
389         }
390         if (depth != 1 && (flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP))
391         {
392                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FLAG_MIPMAP can not be used on 3D textures";
393                 return 0;
394         }
395         if ((flags & DPSOFTRAST_TEXTURE_FLAG_CUBEMAP) && (flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP))
396         {
397                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: DPSOFTRAST_TEXTURE_FLAG_MIPMAP can not be used on cubemap textures";
398                 return 0;
399         }
400         if ((width & (width-1)) || (height & (height-1)) || (depth & (depth-1)))
401         {
402                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_New: dimensions are not power of two";
403                 return 0;
404         }
405         // find first empty slot in texture array
406         for (texnum = dpsoftrast.texture_firstfree;texnum < dpsoftrast.texture_end;texnum++)
407                 if (!dpsoftrast.texture[texnum].bytes)
408                         break;
409         dpsoftrast.texture_firstfree = texnum + 1;
410         if (dpsoftrast.texture_max <= texnum)
411         {
412                 // expand texture array as needed
413                 if (dpsoftrast.texture_max < 1024)
414                         dpsoftrast.texture_max = 1024;
415                 else
416                         dpsoftrast.texture_max *= 2;
417                 dpsoftrast.texture = (DPSOFTRAST_Texture *)realloc(dpsoftrast.texture, dpsoftrast.texture_max * sizeof(DPSOFTRAST_Texture));
418         }
419         if (dpsoftrast.texture_end <= texnum)
420                 dpsoftrast.texture_end = texnum + 1;
421         texture = &dpsoftrast.texture[texnum];
422         memset(texture, 0, sizeof(*texture));
423         texture->flags = flags;
424         texture->width = width;
425         texture->height = height;
426         texture->depth = depth;
427         texture->sides = sides;
428         w = width;
429         h = height;
430         d = depth;
431         size = 0;
432         mipmaps = 0;
433         w = width;
434         h = height;
435         d = depth;
436         for (;;)
437         {
438                 s = w * h * d * sides * 4;
439                 texture->mipmap[mipmaps][0] = size;
440                 texture->mipmap[mipmaps][1] = s;
441                 texture->mipmap[mipmaps][2] = w;
442                 texture->mipmap[mipmaps][3] = h;
443                 texture->mipmap[mipmaps][4] = d;
444                 size += s;
445                 mipmaps++;
446                 if (w * h * d == 1 || !(flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP))
447                         break;
448                 if (w > 1) w >>= 1;
449                 if (h > 1) h >>= 1;
450                 if (d > 1) d >>= 1;
451         }
452         texture->mipmaps = mipmaps;
453         texture->size = size;
454
455         // allocate the pixels now
456         texture->bytes = (unsigned char *)calloc(1, size);
457
458         return texnum;
459 }
460 void DPSOFTRAST_Texture_Free(int index)
461 {
462         DPSOFTRAST_Texture *texture;
463         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
464         if (texture->bytes)
465                 free(texture->bytes);
466         texture->bytes = NULL;
467         memset(texture, 0, sizeof(*texture));
468         // adjust the free range and used range
469         if (dpsoftrast.texture_firstfree > index)
470                 dpsoftrast.texture_firstfree = index;
471         while (dpsoftrast.texture_end > 0 && dpsoftrast.texture[dpsoftrast.texture_end-1].bytes == NULL)
472                 dpsoftrast.texture_end--;
473 }
474 void DPSOFTRAST_Texture_CalculateMipmaps(int index)
475 {
476         int i, x, y, z, w, layer0, layer1, row0, row1;
477         unsigned char *o, *i0, *i1, *i2, *i3;
478         DPSOFTRAST_Texture *texture;
479         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
480         if (texture->mipmaps <= 1)
481                 return;
482         for (i = 1;i < texture->mipmaps;i++)
483         {
484                 for (z = 0;z < texture->mipmap[i][4];z++)
485                 {
486                         layer0 = z*2;
487                         layer1 = z*2+1;
488                         if (layer1 >= texture->mipmap[i-1][4])
489                                 layer1 = texture->mipmap[i-1][4]-1;
490                         for (y = 0;y < texture->mipmap[i][3];y++)
491                         {
492                                 row0 = y*2;
493                                 row1 = y*2+1;
494                                 if (row1 >= texture->mipmap[i-1][3])
495                                         row1 = texture->mipmap[i-1][3]-1;
496                                 o =  texture->bytes + texture->mipmap[i  ][0] + 4*((texture->mipmap[i  ][3] * z      + y   ) * texture->mipmap[i  ][2]);
497                                 i0 = texture->bytes + texture->mipmap[i-1][0] + 4*((texture->mipmap[i-1][3] * layer0 + row0) * texture->mipmap[i-1][2]);
498                                 i1 = texture->bytes + texture->mipmap[i-1][0] + 4*((texture->mipmap[i-1][3] * layer0 + row1) * texture->mipmap[i-1][2]);
499                                 i2 = texture->bytes + texture->mipmap[i-1][0] + 4*((texture->mipmap[i-1][3] * layer1 + row0) * texture->mipmap[i-1][2]);
500                                 i3 = texture->bytes + texture->mipmap[i-1][0] + 4*((texture->mipmap[i-1][3] * layer1 + row1) * texture->mipmap[i-1][2]);
501                                 w = texture->mipmap[i][2];
502                                 if (layer1 > layer0)
503                                 {
504                                         if (texture->mipmap[i-1][2] > 1)
505                                         {
506                                                 // average 3D texture
507                                                 for (x = 0;x < w;x++, o += 4, i0 += 8, i1 += 8, i2 += 8, i3 += 8)
508                                                 {
509                                                         o[0] = (i0[0] + i0[4] + i1[0] + i1[4] + i2[0] + i2[4] + i3[0] + i3[4] + 4) >> 3;
510                                                         o[1] = (i0[1] + i0[5] + i1[1] + i1[5] + i2[1] + i2[5] + i3[1] + i3[5] + 4) >> 3;
511                                                         o[2] = (i0[2] + i0[6] + i1[2] + i1[6] + i2[2] + i2[6] + i3[2] + i3[6] + 4) >> 3;
512                                                         o[3] = (i0[3] + i0[7] + i1[3] + i1[7] + i2[3] + i2[7] + i3[3] + i3[7] + 4) >> 3;
513                                                 }
514                                         }
515                                         else
516                                         {
517                                                 // average 3D mipmap with parent width == 1
518                                                 for (x = 0;x < w;x++, o += 4, i0 += 8, i1 += 8)
519                                                 {
520                                                         o[0] = (i0[0] + i1[0] + i2[0] + i3[0] + 2) >> 2;
521                                                         o[1] = (i0[1] + i1[1] + i2[1] + i3[1] + 2) >> 2;
522                                                         o[2] = (i0[2] + i1[2] + i2[2] + i3[2] + 2) >> 2;
523                                                         o[3] = (i0[3] + i1[3] + i2[3] + i3[3] + 2) >> 2;
524                                                 }
525                                         }
526                                 }
527                                 else
528                                 {
529                                         if (texture->mipmap[i-1][2] > 1)
530                                         {
531                                                 // average 2D texture (common case)
532                                                 for (x = 0;x < w;x++, o += 4, i0 += 8, i1 += 8)
533                                                 {
534                                                         o[0] = (i0[0] + i0[4] + i1[0] + i1[4] + 2) >> 2;
535                                                         o[1] = (i0[1] + i0[5] + i1[1] + i1[5] + 2) >> 2;
536                                                         o[2] = (i0[2] + i0[6] + i1[2] + i1[6] + 2) >> 2;
537                                                         o[3] = (i0[3] + i0[7] + i1[3] + i1[7] + 2) >> 2;
538                                                 }
539                                         }
540                                         else
541                                         {
542                                                 // 2D texture with parent width == 1
543                                                 o[0] = (i0[0] + i1[0] + 1) >> 1;
544                                                 o[1] = (i0[1] + i1[1] + 1) >> 1;
545                                                 o[2] = (i0[2] + i1[2] + 1) >> 1;
546                                                 o[3] = (i0[3] + i1[3] + 1) >> 1;
547                                         }
548                                 }
549                         }
550                 }
551         }
552 }
553 void DPSOFTRAST_Texture_UpdatePartial(int index, int mip, const unsigned char *pixels, int blockx, int blocky, int blockwidth, int blockheight)
554 {
555         DPSOFTRAST_Texture *texture;
556         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
557
558         // FIXME IMPLEMENT
559
560         dpsoftrast.errorstring = "DPSOFTRAST_Texture_UpdatePartial: Not implemented.";
561 }
562 void DPSOFTRAST_Texture_UpdateFull(int index, const unsigned char *pixels)
563 {
564         DPSOFTRAST_Texture *texture;
565         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
566
567         memcpy(texture->bytes, pixels, texture->mipmap[0][1]);
568         DPSOFTRAST_Texture_CalculateMipmaps(index);
569 }
570 int DPSOFTRAST_Texture_GetWidth(int index, int mip)
571 {
572         DPSOFTRAST_Texture *texture;
573         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
574         return texture->mipmap[mip][2];
575 }
576 int DPSOFTRAST_Texture_GetHeight(int index, int mip)
577 {
578         DPSOFTRAST_Texture *texture;
579         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
580         return texture->mipmap[mip][3];
581 }
582 int DPSOFTRAST_Texture_GetDepth(int index, int mip)
583 {
584         DPSOFTRAST_Texture *texture;
585         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
586         return texture->mipmap[mip][4];
587 }
588 unsigned char *DPSOFTRAST_Texture_GetPixelPointer(int index, int mip)
589 {
590         DPSOFTRAST_Texture *texture;
591         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
592         return texture->bytes + texture->mipmap[mip][0];
593 }
594 void DPSOFTRAST_Texture_Filter(int index, DPSOFTRAST_TEXTURE_FILTER filter)
595 {
596         DPSOFTRAST_Texture *texture;
597         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
598         if (!(texture->flags & DPSOFTRAST_TEXTURE_FLAG_MIPMAP) && filter > DPSOFTRAST_TEXTURE_FILTER_LINEAR)
599         {
600                 dpsoftrast.errorstring = "DPSOFTRAST_Texture_Filter: requested filter mode requires mipmaps";
601                 return;
602         }
603         texture->filter = filter;
604 }
605
606 void DPSOFTRAST_SetRenderTargets(int width, int height, unsigned int *depthpixels, unsigned int *colorpixels0, unsigned int *colorpixels1, unsigned int *colorpixels2, unsigned int *colorpixels3)
607 {
608         dpsoftrast.fb_width = width;
609         dpsoftrast.fb_height = height;
610         dpsoftrast.fb_depthpixels = depthpixels;
611         dpsoftrast.fb_colorpixels[0] = colorpixels0;
612         dpsoftrast.fb_colorpixels[1] = colorpixels1;
613         dpsoftrast.fb_colorpixels[2] = colorpixels2;
614         dpsoftrast.fb_colorpixels[3] = colorpixels3;
615 }
616 void DPSOFTRAST_Viewport(int x, int y, int width, int height)
617 {
618         dpsoftrast.user.viewport[0] = x;
619         dpsoftrast.user.viewport[1] = y;
620         dpsoftrast.user.viewport[2] = width;
621         dpsoftrast.user.viewport[3] = height;
622         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_FB;
623 }
624 void DPSOFTRAST_ClearColor(float r, float g, float b, float a)
625 {
626         int i, x1, y1, x2, y2, w, h, x, y;
627         unsigned int *p;
628         unsigned int c;
629         DPSOFTRAST_Validate(DPSOFTRAST_VALIDATE_FB);
630         x1 = dpsoftrast.fb_clearscissor[0];
631         y1 = dpsoftrast.fb_clearscissor[1];
632         x2 = dpsoftrast.fb_clearscissor[2];
633         y2 = dpsoftrast.fb_clearscissor[1] + dpsoftrast.fb_clearscissor[3];
634         w = x2 - x1;
635         h = y2 - y1;
636         if (w < 1 || h < 1)
637                 return;
638         // FIXME: honor dpsoftrast.fb_colormask?
639         c = DPSOFTRAST_BGRA8_FROM_RGBA32F(r,g,b,a);
640         for (i = 0;i < 4;i++)
641         {
642                 if (!dpsoftrast.fb_colorpixels[i])
643                         continue;
644                 for (y = y1;y < y2;y++)
645                 {
646                         p = dpsoftrast.fb_colorpixels[i] + y * dpsoftrast.fb_width;
647                         for (x = x1;x < x2;x++)
648                                 p[x] = c;
649                 }
650         }
651 }
652 void DPSOFTRAST_ClearDepth(float d)
653 {
654         int x1, y1, x2, y2, w, h, x, y;
655         unsigned int *p;
656         unsigned int c;
657         DPSOFTRAST_Validate(DPSOFTRAST_VALIDATE_FB);
658         x1 = dpsoftrast.fb_clearscissor[0];
659         y1 = dpsoftrast.fb_clearscissor[1];
660         x2 = dpsoftrast.fb_clearscissor[2];
661         y2 = dpsoftrast.fb_clearscissor[1] + dpsoftrast.fb_clearscissor[3];
662         w = x2 - x1;
663         h = y2 - y1;
664         if (w < 1 || h < 1)
665                 return;
666         c = DPSOFTRAST_DEPTH32_FROM_DEPTH32F(d);
667         for (y = y1;y < y2;y++)
668         {
669                 p = dpsoftrast.fb_depthpixels + y * dpsoftrast.fb_width;
670                 for (x = x1;x < x2;x++)
671                         p[x] = c;
672         }
673 }
674 void DPSOFTRAST_ColorMask(int r, int g, int b, int a)
675 {
676         dpsoftrast.user.colormask[0] = r != 0;
677         dpsoftrast.user.colormask[1] = g != 0;
678         dpsoftrast.user.colormask[2] = b != 0;
679         dpsoftrast.user.colormask[3] = a != 0;
680         dpsoftrast.fb_colormask = ((-dpsoftrast.user.colormask[0]) & 0x00FF0000) | ((-dpsoftrast.user.colormask[1]) & 0x0000FF00) | ((-dpsoftrast.user.colormask[2]) & 0x000000FF) | ((-dpsoftrast.user.colormask[3]) & 0xFF000000);
681 }
682 void DPSOFTRAST_DepthTest(int enable)
683 {
684         dpsoftrast.user.depthtest = enable;
685         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_DEPTHFUNC;
686 }
687 void DPSOFTRAST_ScissorTest(int enable)
688 {
689         dpsoftrast.user.scissortest = enable;
690         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_FB;
691 }
692 void DPSOFTRAST_Scissor(float x, float y, float width, float height)
693 {
694         dpsoftrast.user.scissor[0] = x;
695         dpsoftrast.user.scissor[1] = y;
696         dpsoftrast.user.scissor[2] = width;
697         dpsoftrast.user.scissor[3] = height;
698         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_FB;
699 }
700
701 void DPSOFTRAST_BlendFunc(int smodulate, int dmodulate)
702 {
703         // FIXME: validate
704         dpsoftrast.user.blendfunc[0] = smodulate;
705         dpsoftrast.user.blendfunc[1] = dmodulate;
706         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_BLENDFUNC;
707 }
708 void DPSOFTRAST_BlendSubtract(int enable)
709 {
710         dpsoftrast.user.blendsubtract = enable != 0;
711         dpsoftrast.validate |= DPSOFTRAST_VALIDATE_BLENDFUNC;
712 }
713 void DPSOFTRAST_DepthMask(int enable)
714 {
715         dpsoftrast.user.depthmask = enable;
716 }
717 void DPSOFTRAST_DepthFunc(int comparemode)
718 {
719         // FIXME: validate
720         dpsoftrast.user.depthfunc = comparemode;
721 }
722 void DPSOFTRAST_DepthRange(float range0, float range1)
723 {
724         dpsoftrast.user.depthrange[0] = range0;
725         dpsoftrast.user.depthrange[1] = range1;
726 }
727 void DPSOFTRAST_PolygonOffset(float alongnormal, float intoview)
728 {
729         dpsoftrast.user.polygonoffset[0] = alongnormal;
730         dpsoftrast.user.polygonoffset[1] = intoview;
731 }
732 void DPSOFTRAST_CullFace(int mode)
733 {
734         // FIXME: validate
735         dpsoftrast.user.cullface = mode;
736 }
737 void DPSOFTRAST_AlphaTest(float enable)
738 {
739         dpsoftrast.user.alphatest = enable;
740 }
741 void DPSOFTRAST_AlphaFunc(int alphafunc, float alphavalue)
742 {
743         // FIXME: validate
744         dpsoftrast.user.alphafunc = alphafunc;
745         dpsoftrast.user.alphavalue = alphavalue;
746 }
747 void DPSOFTRAST_Color4f(float r, float g, float b, float a)
748 {
749         dpsoftrast.user.color[0] = r;
750         dpsoftrast.user.color[1] = g;
751         dpsoftrast.user.color[2] = b;
752         dpsoftrast.user.color[3] = a;
753 }
754 void DPSOFTRAST_GetPixelsBGRA(int blockx, int blocky, int blockwidth, int blockheight, unsigned char *outpixels)
755 {
756         int outstride = blockwidth * 4;
757         int instride = dpsoftrast.fb_width * 4;
758         int bx1 = blockx;
759         int by1 = blocky;
760         int bx2 = blockx + blockwidth;
761         int by2 = blocky + blockheight;
762         int bw;
763         int bh;
764         int x;
765         int y;
766         unsigned char *inpixels;
767         unsigned char *b;
768         unsigned char *o;
769         if (bx1 < 0) bx1 = 0;
770         if (by1 < 0) by1 = 0;
771         if (bx2 > dpsoftrast.fb_width) bx2 = dpsoftrast.fb_width;
772         if (by2 > dpsoftrast.fb_height) by2 = dpsoftrast.fb_height;
773         bw = bx2 - bx1;
774         bh = by2 - by1;
775         inpixels = (unsigned char *)dpsoftrast.fb_colorpixels[0];
776         if (dpsoftrast.bigendian)
777         {
778                 for (y = by1;y < by2;y++)
779                 {
780                         b = (unsigned char *)inpixels + (dpsoftrast.fb_height - 1 - y) * instride + 4 * bx1;
781                         o = (unsigned char *)outpixels + (y - by1) * outstride;
782                         for (x = bx1;x < bx2;x++)
783                         {
784                                 o[0] = b[3];
785                                 o[1] = b[2];
786                                 o[2] = b[1];
787                                 o[3] = b[0];
788                                 o += 4;
789                                 b += 4;
790                         }
791                 }
792         }
793         else
794         {
795                 for (y = by1;y < by2;y++)
796                 {
797                         b = (unsigned char *)inpixels + (dpsoftrast.fb_height - 1 - y) * instride + 4 * bx1;
798                         o = (unsigned char *)outpixels + (y - by1) * outstride;
799                         memcpy(o, b, bw*4);
800                 }
801         }
802
803 }
804 void DPSOFTRAST_CopyRectangleToTexture(int index, int mip, int tx, int ty, int sx, int sy, int width, int height)
805 {
806         int tx1 = tx;
807         int ty1 = ty;
808         int tx2 = tx + width;
809         int ty2 = ty + height;
810         int sx1 = sx;
811         int sy1 = sy;
812         int sx2 = sx + width;
813         int sy2 = sy + height;
814         int swidth;
815         int sheight;
816         int twidth;
817         int theight;
818         int sw;
819         int sh;
820         int tw;
821         int th;
822         int y;
823         unsigned int *spixels;
824         unsigned int *tpixels;
825         DPSOFTRAST_Texture *texture;
826         texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
827         if (mip < 0 || mip >= texture->mipmaps) return;
828         spixels = dpsoftrast.fb_colorpixels[0];
829         swidth = dpsoftrast.fb_width;
830         sheight = dpsoftrast.fb_height;
831         tpixels = (unsigned int *)(texture->bytes + texture->mipmap[mip][0]);
832         twidth = texture->mipmap[mip][2];
833         theight = texture->mipmap[mip][3];
834         if (tx1 < 0) tx1 = 0;
835         if (ty1 < 0) ty1 = 0;
836         if (tx2 > twidth) tx2 = twidth;
837         if (ty2 > theight) ty2 = theight;
838         if (sx1 < 0) sx1 = 0;
839         if (sy1 < 0) sy1 = 0;
840         if (sx2 > swidth) sx2 = swidth;
841         if (sy2 > sheight) sy2 = sheight;
842         tw = tx2 - tx1;
843         th = ty2 - ty1;
844         sw = sx2 - sx1;
845         sh = sy2 - sy1;
846         if (tw > sw) tw = sw;
847         if (th > sh) th = sh;
848         if (tw < 1 || th < 1)
849                 return;
850         for (y = 0;y < th;y++)
851                 memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 + y) * swidth + sx1), tw*4);
852         if (texture->mipmaps > 1)
853                 DPSOFTRAST_Texture_CalculateMipmaps(index);
854 }
855 void DPSOFTRAST_SetTexture(int unitnum, int index)
856 {
857         DPSOFTRAST_Texture *texture;
858         if (unitnum < 0 || unitnum >= DPSOFTRAST_MAXTEXTUREUNITS)
859         {
860                 dpsoftrast.errorstring = "DPSOFTRAST_SetTexture: invalid unit number";
861                 return;
862         }
863         texture = DPSOFTRAST_Texture_GetByIndex(index);
864         if (index && !texture)
865         {
866                 dpsoftrast.errorstring = "DPSOFTRAST_SetTexture: invalid texture handle";
867                 return;
868         }
869         dpsoftrast.texbound[unitnum] = texture;
870 }
871
872 void DPSOFTRAST_SetVertexPointer(const float *vertex3f, size_t stride)
873 {
874         dpsoftrast.pointer_vertex3f = vertex3f;
875         dpsoftrast.stride_vertex = stride;
876 }
877 void DPSOFTRAST_SetColorPointer(const float *color4f, size_t stride)
878 {
879         dpsoftrast.pointer_color4f = color4f;
880         dpsoftrast.pointer_color4ub = NULL;
881         dpsoftrast.stride_color = stride;
882 }
883 void DPSOFTRAST_SetColorPointer4ub(const unsigned char *color4ub, size_t stride)
884 {
885         dpsoftrast.pointer_color4f = NULL;
886         dpsoftrast.pointer_color4ub = color4ub;
887         dpsoftrast.stride_color = stride;
888 }
889 void DPSOFTRAST_SetTexCoordPointer(int unitnum, int numcomponents, size_t stride, const float *texcoordf)
890 {
891         dpsoftrast.pointer_texcoordf[unitnum] = texcoordf;
892         dpsoftrast.components_texcoord[unitnum] = numcomponents;
893         dpsoftrast.stride_texcoord[unitnum] = stride;
894 }
895
896 void DPSOFTRAST_SetShader(unsigned int mode, unsigned int permutation)
897 {
898         dpsoftrast.shader_mode = mode;
899         dpsoftrast.shader_permutation = permutation;
900 }
901 void DPSOFTRAST_Uniform4fARB(DPSOFTRAST_UNIFORM index, float v0, float v1, float v2, float v3)
902 {
903         dpsoftrast.uniform4f[index*4+0] = v0;
904         dpsoftrast.uniform4f[index*4+1] = v1;
905         dpsoftrast.uniform4f[index*4+2] = v2;
906         dpsoftrast.uniform4f[index*4+3] = v3;
907 }
908 void DPSOFTRAST_Uniform4fvARB(DPSOFTRAST_UNIFORM index, const float *v)
909 {
910         dpsoftrast.uniform4f[index*4+0] = v[0];
911         dpsoftrast.uniform4f[index*4+1] = v[1];
912         dpsoftrast.uniform4f[index*4+2] = v[2];
913         dpsoftrast.uniform4f[index*4+3] = v[3];
914 }
915 void DPSOFTRAST_UniformMatrix4fvARB(DPSOFTRAST_UNIFORM index, int arraysize, int transpose, const float *v)
916 {
917         int i;
918         for (i = 0;i < arraysize;i++, index += 4, v += 16)
919         {
920                 if (transpose)
921                 {
922                         dpsoftrast.uniform4f[index*4+0] = v[0];
923                         dpsoftrast.uniform4f[index*4+1] = v[4];
924                         dpsoftrast.uniform4f[index*4+2] = v[8];
925                         dpsoftrast.uniform4f[index*4+3] = v[12];
926                         dpsoftrast.uniform4f[index*4+4] = v[1];
927                         dpsoftrast.uniform4f[index*4+5] = v[5];
928                         dpsoftrast.uniform4f[index*4+6] = v[9];
929                         dpsoftrast.uniform4f[index*4+7] = v[13];
930                         dpsoftrast.uniform4f[index*4+8] = v[2];
931                         dpsoftrast.uniform4f[index*4+9] = v[6];
932                         dpsoftrast.uniform4f[index*4+10] = v[10];
933                         dpsoftrast.uniform4f[index*4+11] = v[14];
934                         dpsoftrast.uniform4f[index*4+12] = v[3];
935                         dpsoftrast.uniform4f[index*4+13] = v[7];
936                         dpsoftrast.uniform4f[index*4+14] = v[11];
937                         dpsoftrast.uniform4f[index*4+15] = v[15];
938                 }
939                 else
940                 {
941                         dpsoftrast.uniform4f[index*4+0] = v[0];
942                         dpsoftrast.uniform4f[index*4+1] = v[1];
943                         dpsoftrast.uniform4f[index*4+2] = v[2];
944                         dpsoftrast.uniform4f[index*4+3] = v[3];
945                         dpsoftrast.uniform4f[index*4+4] = v[4];
946                         dpsoftrast.uniform4f[index*4+5] = v[5];
947                         dpsoftrast.uniform4f[index*4+6] = v[6];
948                         dpsoftrast.uniform4f[index*4+7] = v[7];
949                         dpsoftrast.uniform4f[index*4+8] = v[8];
950                         dpsoftrast.uniform4f[index*4+9] = v[9];
951                         dpsoftrast.uniform4f[index*4+10] = v[10];
952                         dpsoftrast.uniform4f[index*4+11] = v[11];
953                         dpsoftrast.uniform4f[index*4+12] = v[12];
954                         dpsoftrast.uniform4f[index*4+13] = v[13];
955                         dpsoftrast.uniform4f[index*4+14] = v[14];
956                         dpsoftrast.uniform4f[index*4+15] = v[15];
957                 }
958         }
959 }
960 void DPSOFTRAST_Uniform1iARB(DPSOFTRAST_UNIFORM index, int i0)
961 {
962         dpsoftrast.uniform1i[index] = i0;
963 }
964
965 void DPSOFTRAST_Draw_LoadVertices(int firstvertex, int numvertices, bool needcolors)
966 {
967         int i;
968         int j;
969         int stride;
970         const float *v;
971         float *p;
972         float *data;
973         const unsigned char *b;
974         dpsoftrast.draw.numvertices = numvertices;
975         if (dpsoftrast.draw.maxvertices < dpsoftrast.draw.numvertices)
976         {
977                 if (dpsoftrast.draw.maxvertices < 4096)
978                         dpsoftrast.draw.maxvertices = 4096;
979                 while (dpsoftrast.draw.maxvertices < dpsoftrast.draw.numvertices)
980                         dpsoftrast.draw.maxvertices *= 2;
981                 if (dpsoftrast.draw.in_array4f[0])
982                         free(dpsoftrast.draw.in_array4f[0]);
983                 data = (float *)calloc(1, dpsoftrast.draw.maxvertices * sizeof(float[4])*(DPSOFTRAST_ARRAY_TOTAL*2 + 1));
984                 for (i = 0;i < DPSOFTRAST_ARRAY_TOTAL;i++, data += dpsoftrast.draw.maxvertices * 4)
985                         dpsoftrast.draw.in_array4f[i] = data;
986                 for (i = 0;i < DPSOFTRAST_ARRAY_TOTAL;i++, data += dpsoftrast.draw.maxvertices * 4)
987                         dpsoftrast.draw.post_array4f[i] = data;
988                 dpsoftrast.draw.screencoord4f = data;
989                 data += dpsoftrast.draw.maxvertices * 4;
990         }
991         stride = dpsoftrast.stride_vertex;
992         v = (const float *)((unsigned char *)dpsoftrast.pointer_vertex3f + firstvertex * stride);
993         p = dpsoftrast.draw.in_array4f[0];
994         for (i = 0;i < numvertices;i++)
995         {
996                 p[0] = v[0];
997                 p[1] = v[1];
998                 p[2] = v[2];
999                 p[3] = 1.0f;
1000                 p += 4;
1001                 v = (const float *)((const unsigned char *)v + stride);
1002         }
1003         if (needcolors)
1004         {
1005                 if (dpsoftrast.pointer_color4f)
1006                 {
1007                         stride = dpsoftrast.stride_color;
1008                         v = (const float *)((const unsigned char *)dpsoftrast.pointer_color4f + firstvertex * stride);
1009                         p = dpsoftrast.draw.in_array4f[1];
1010                         for (i = 0;i < numvertices;i++)
1011                         {
1012                                 p[0] = v[0];
1013                                 p[1] = v[1];
1014                                 p[2] = v[2];
1015                                 p[3] = v[3];
1016                                 p += 4;
1017                                 v = (const float *)((const unsigned char *)v + stride);
1018                         }
1019                 }
1020                 else if (dpsoftrast.pointer_color4ub)
1021                 {
1022                         stride = dpsoftrast.stride_color;
1023                         b = (const unsigned char *)((const unsigned char *)dpsoftrast.pointer_color4ub + firstvertex * stride);
1024                         p = dpsoftrast.draw.in_array4f[1];
1025                         for (i = 0;i < numvertices;i++)
1026                         {
1027                                 p[0] = b[0] * (1.0f / 255.0f);
1028                                 p[1] = b[1] * (1.0f / 255.0f);
1029                                 p[2] = b[2] * (1.0f / 255.0f);
1030                                 p[3] = b[3] * (1.0f / 255.0f);
1031                                 p += 4;
1032                                 b = (const unsigned char *)((const unsigned char *)b + stride);
1033                         }
1034                 }
1035                 else
1036                 {
1037                         v = dpsoftrast.user.color;
1038                         p = dpsoftrast.draw.in_array4f[1];
1039                         for (i = 0;i < numvertices;i++)
1040                         {
1041                                 p[0] = v[0];
1042                                 p[1] = v[1];
1043                                 p[2] = v[2];
1044                                 p[3] = v[3];
1045                                 p += 4;
1046                         }
1047                 }
1048         }
1049         for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL-2;j++)
1050         {
1051                 if (dpsoftrast.pointer_texcoordf[j])
1052                 {
1053                         stride = dpsoftrast.stride_texcoord[j];
1054                         v = (const float *)((const unsigned char *)dpsoftrast.pointer_texcoordf[j] + firstvertex * stride);
1055                         p = dpsoftrast.draw.in_array4f[j+2];
1056                         switch(dpsoftrast.components_texcoord[j])
1057                         {
1058                         case 2:
1059                                 for (i = 0;i < numvertices;i++)
1060                                 {
1061                                         p[0] = v[0];
1062                                         p[1] = v[1];
1063                                         p[2] = 0.0f;
1064                                         p[3] = 1.0f;
1065                                         p += 4;
1066                                         v = (const float *)((const unsigned char *)v + stride);
1067                                 }
1068                                 break;
1069                         case 3:
1070                                 for (i = 0;i < numvertices;i++)
1071                                 {
1072                                         p[0] = v[0];
1073                                         p[1] = v[1];
1074                                         p[2] = v[2];
1075                                         p[3] = 1.0f;
1076                                         p += 4;
1077                                         v = (const float *)((const unsigned char *)v + stride);
1078                                 }
1079                                 break;
1080                         case 4:
1081                                 for (i = 0;i < numvertices;i++)
1082                                 {
1083                                         p[0] = v[0];
1084                                         p[1] = v[1];
1085                                         p[2] = v[2];
1086                                         p[3] = v[3];
1087                                         p += 4;
1088                                         v = (const float *)((const unsigned char *)v + stride);
1089                                 }
1090                                 break;
1091                         }
1092                 }
1093         }
1094 }
1095
1096 void DPSOFTRAST_Array_Transform(float *out4f, const float *in4f, int numitems, const float *inmatrix16f)
1097 {
1098         static const float identitymatrix[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
1099         // TODO: SIMD
1100         float matrix[4][4];
1101         int i;
1102         memcpy(matrix, inmatrix16f, sizeof(float[16]));
1103         if (!memcmp(identitymatrix, matrix, sizeof(float[16])))
1104         {
1105                 // fast case for identity matrix
1106                 memcpy(out4f, in4f, numitems * sizeof(float[4]));
1107                 return;
1108         }
1109         for (i = 0;i < numitems;i++, out4f += 4, in4f += 4)
1110         {
1111                 out4f[0] = in4f[0] * matrix[0][0] + in4f[1] * matrix[1][0] + in4f[2] * matrix[2][0] + in4f[3] * matrix[3][0];
1112                 out4f[1] = in4f[0] * matrix[0][1] + in4f[1] * matrix[1][1] + in4f[2] * matrix[2][1] + in4f[3] * matrix[3][1];
1113                 out4f[2] = in4f[0] * matrix[0][2] + in4f[1] * matrix[1][2] + in4f[2] * matrix[2][2] + in4f[3] * matrix[3][2];
1114                 out4f[3] = in4f[0] * matrix[0][3] + in4f[1] * matrix[1][3] + in4f[2] * matrix[2][3] + in4f[3] * matrix[3][3];
1115         }
1116 }
1117
1118 void DPSOFTRAST_Array_Copy(float *out4f, const float *in4f, int numitems)
1119 {
1120         memcpy(out4f, in4f, numitems * sizeof(float[4]));
1121 }
1122
1123 void DPSOFTRAST_Draw_ProjectVertices(float *out4f, const float *in4f, int numitems)
1124 {
1125         // NOTE: this is used both as a whole mesh transform function and a
1126         // per-triangle transform function (for clipped triangles), accordingly
1127         // it should not crash on divide by 0 but the result of divide by 0 is
1128         // unimportant...
1129         // TODO: SIMD
1130         int i;
1131         float w;
1132         float viewportcenter[4];
1133         float viewportscale[4];
1134         viewportscale[0] = dpsoftrast.fb_viewportscale[0];
1135         viewportscale[1] = dpsoftrast.fb_viewportscale[1];
1136         viewportscale[2] = 0.5f;
1137         viewportscale[3] = 0.0f;
1138         viewportcenter[0] = dpsoftrast.fb_viewportcenter[0];
1139         viewportcenter[1] = dpsoftrast.fb_viewportcenter[1];
1140         viewportcenter[2] = 0.5f;
1141         viewportcenter[3] = 0.0f;
1142         for (i = 0;i < numitems;i++)
1143         {
1144                 if (!in4f[3])
1145                 {
1146                         out4f[0] = 0.0f;
1147                         out4f[1] = 0.0f;
1148                         out4f[2] = 0.0f;
1149                         out4f[3] = 0.0f;
1150                         continue;
1151                 }
1152                 w = 1.0f / in4f[3];
1153                 out4f[0] = viewportcenter[0] + viewportscale[0] * in4f[0] * w;
1154                 out4f[1] = viewportcenter[1] + viewportscale[1] * in4f[1] * w;
1155                 out4f[2] = viewportcenter[2] + viewportscale[2] * in4f[2] * w;
1156                 out4f[3] = viewportcenter[3] + viewportscale[3] * in4f[3] * w;
1157                 out4f[3] = w;
1158                 in4f += 4;
1159                 out4f += 4;
1160         }
1161 }
1162
1163 void DPSOFTRAST_Draw_DebugEdgePoints(const float *screen0, const float *screen1)
1164 {
1165         int i;
1166         int x;
1167         int y;
1168         int w = dpsoftrast.fb_width;
1169         int bounds[4];
1170         float v0[2], v1[2];
1171         unsigned int *pixels = dpsoftrast.fb_colorpixels[0];
1172         //const float *c4f;
1173         bounds[0] = dpsoftrast.fb_viewportscissor[0];
1174         bounds[1] = dpsoftrast.fb_viewportscissor[1];
1175         bounds[2] = dpsoftrast.fb_viewportscissor[0] + dpsoftrast.fb_viewportscissor[2];
1176         bounds[3] = dpsoftrast.fb_viewportscissor[1] + dpsoftrast.fb_viewportscissor[3];
1177         v0[0] = screen0[0];
1178         v0[1] = screen0[1];
1179         v1[0] = screen1[0];
1180         v1[1] = screen1[1];
1181         for (i = 0;i <= 128;i++)
1182         {
1183                 // check nearclip
1184                 //if (dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+3] != 1.0f)
1185                 //      continue;
1186                 x = (int)(v0[0] + (v1[0] - v0[0]) * (i/128.0f));
1187                 y = (int)(v0[1] + (v1[1] - v0[1]) * (i/128.0f));
1188                 if (x < bounds[0] || y < bounds[1] || x >= bounds[2] || y >= bounds[3])
1189                         continue;
1190                 //c4f = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR] + element0*4;
1191                 //pixels[y*w+x] = DPSOFTRAST_BGRA8_FROM_RGBA32F(c4f[0], c4f[1], c4f[2], c4f[3]);
1192                 pixels[y*w+x] = 0xFFFFFFFF;
1193         }
1194 }
1195
1196 void DPSOFTRAST_Draw_Span_Begin(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *zf)
1197 {
1198         int x;
1199         int startx = span->startx;
1200         int endx = span->endx;
1201         float w = span->data[0][DPSOFTRAST_ARRAY_TOTAL][3];
1202         float wslope = span->data[1][DPSOFTRAST_ARRAY_TOTAL][3];
1203         for (x = startx;x < endx;)
1204         {
1205                 int endsub = x + DPSOFTRAST_MAXSUBSPAN-1;
1206                 float z = 1.0f / (w + wslope * x), dz;
1207                 if (endsub >= endx)
1208                 {
1209                         endsub = endx-1;
1210                         dz = endsub > x ? (1.0f / (w + wslope * endsub) - z) / (endsub - x) : 0.0f;
1211                 }
1212                 else
1213                 {
1214                         dz = (1.0f / (w + wslope * endsub) - z) * (1.0f / (DPSOFTRAST_MAXSUBSPAN-1));
1215                 }
1216                 for (; x <= endsub; x++, z += dz)
1217                         zf[x] = z;
1218         }
1219 }
1220
1221 void DPSOFTRAST_Draw_Span_Finish(const DPSOFTRAST_State_Draw_Span * RESTRICT span, const float * RESTRICT in4f)
1222 {
1223         int x;
1224         int startx = span->startx;
1225         int endx = span->endx;
1226         int d[4];
1227         float a, b;
1228         unsigned char * RESTRICT pixelmask = span->pixelmask;
1229         unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0];
1230         if (!pixel)
1231                 return;
1232         pixel += span->start * 4;
1233         // handle alphatest now (this affects depth writes too)
1234         if (dpsoftrast.user.alphatest)
1235                 for (x = startx;x < endx;x++)
1236                         if (in4f[x*4+3] < 0.5f)
1237                                 pixelmask[x] = false;
1238         // FIXME: this does not handle bigendian
1239         switch(dpsoftrast.fb_blendmode)
1240         {
1241         case DPSOFTRAST_BLENDMODE_OPAQUE:
1242                 for (x = startx;x < endx;x++)
1243                 {
1244                         if (!pixelmask[x])
1245                                 continue;
1246                         d[0] = (int)(in4f[x*4+2]*255.0f);if (d[0] > 255) d[0] = 255;
1247                         d[1] = (int)(in4f[x*4+1]*255.0f);if (d[1] > 255) d[1] = 255;
1248                         d[2] = (int)(in4f[x*4+0]*255.0f);if (d[2] > 255) d[2] = 255;
1249                         d[3] = (int)(in4f[x*4+3]*255.0f);if (d[3] > 255) d[3] = 255;
1250                         pixel[x*4+0] = d[0];
1251                         pixel[x*4+1] = d[1];
1252                         pixel[x*4+2] = d[2];
1253                         pixel[x*4+3] = d[3];
1254                 }
1255                 break;
1256         case DPSOFTRAST_BLENDMODE_ALPHA:
1257                 for (x = startx;x < endx;x++)
1258                 {
1259                         if (!pixelmask[x])
1260                                 continue;
1261                         a = in4f[x*4+3] * 255.0f;
1262                         b = 1.0f - in4f[x*4+3];
1263                         d[0] = (int)(in4f[x*4+2]*a+pixel[x*4+0]*b);if (d[0] > 255) d[0] = 255;
1264                         d[1] = (int)(in4f[x*4+1]*a+pixel[x*4+1]*b);if (d[1] > 255) d[1] = 255;
1265                         d[2] = (int)(in4f[x*4+0]*a+pixel[x*4+2]*b);if (d[2] > 255) d[2] = 255;
1266                         d[3] = (int)(in4f[x*4+3]*a+pixel[x*4+3]*b);if (d[3] > 255) d[3] = 255;
1267                         pixel[x*4+0] = d[0];
1268                         pixel[x*4+1] = d[1];
1269                         pixel[x*4+2] = d[2];
1270                         pixel[x*4+3] = d[3];
1271                 }
1272                 break;
1273         case DPSOFTRAST_BLENDMODE_ADDALPHA:
1274                 for (x = startx;x < endx;x++)
1275                 {
1276                         if (!pixelmask[x])
1277                                 continue;
1278                         a = in4f[x*4+3] * 255.0f;
1279                         d[0] = (int)(in4f[x*4+2]*a+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1280                         d[1] = (int)(in4f[x*4+1]*a+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1281                         d[2] = (int)(in4f[x*4+0]*a+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1282                         d[3] = (int)(in4f[x*4+3]*a+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1283                         pixel[x*4+0] = d[0];
1284                         pixel[x*4+1] = d[1];
1285                         pixel[x*4+2] = d[2];
1286                         pixel[x*4+3] = d[3];
1287                 }
1288                 break;
1289         case DPSOFTRAST_BLENDMODE_ADD:
1290                 for (x = startx;x < endx;x++)
1291                 {
1292                         if (!pixelmask[x])
1293                                 continue;
1294                         d[0] = (int)(in4f[x*4+2]*255.0f+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1295                         d[1] = (int)(in4f[x*4+1]*255.0f+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1296                         d[2] = (int)(in4f[x*4+0]*255.0f+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1297                         d[3] = (int)(in4f[x*4+3]*255.0f+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1298                         pixel[x*4+0] = d[0];
1299                         pixel[x*4+1] = d[1];
1300                         pixel[x*4+2] = d[2];
1301                         pixel[x*4+3] = d[3];
1302                 }
1303                 break;
1304         case DPSOFTRAST_BLENDMODE_INVMOD:
1305                 for (x = startx;x < endx;x++)
1306                 {
1307                         if (!pixelmask[x])
1308                                 continue;
1309                         d[0] = (int)((1.0f-in4f[x*4+2])*pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1310                         d[1] = (int)((1.0f-in4f[x*4+1])*pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1311                         d[2] = (int)((1.0f-in4f[x*4+0])*pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1312                         d[3] = (int)((1.0f-in4f[x*4+3])*pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1313                         pixel[x*4+0] = d[0];
1314                         pixel[x*4+1] = d[1];
1315                         pixel[x*4+2] = d[2];
1316                         pixel[x*4+3] = d[3];
1317                 }
1318                 break;
1319         case DPSOFTRAST_BLENDMODE_MUL:
1320                 for (x = startx;x < endx;x++)
1321                 {
1322                         if (!pixelmask[x])
1323                                 continue;
1324                         d[0] = (int)(in4f[x*4+2]*pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1325                         d[1] = (int)(in4f[x*4+1]*pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1326                         d[2] = (int)(in4f[x*4+0]*pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1327                         d[3] = (int)(in4f[x*4+3]*pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1328                         pixel[x*4+0] = d[0];
1329                         pixel[x*4+1] = d[1];
1330                         pixel[x*4+2] = d[2];
1331                         pixel[x*4+3] = d[3];
1332                 }
1333                 break;
1334         case DPSOFTRAST_BLENDMODE_MUL2:
1335                 for (x = startx;x < endx;x++)
1336                 {
1337                         if (!pixelmask[x])
1338                                 continue;
1339                         d[0] = (int)(in4f[x*4+2]*pixel[x*4+0]*2.0f);if (d[0] > 255) d[0] = 255;
1340                         d[1] = (int)(in4f[x*4+1]*pixel[x*4+1]*2.0f);if (d[1] > 255) d[1] = 255;
1341                         d[2] = (int)(in4f[x*4+0]*pixel[x*4+2]*2.0f);if (d[2] > 255) d[2] = 255;
1342                         d[3] = (int)(in4f[x*4+3]*pixel[x*4+3]*2.0f);if (d[3] > 255) d[3] = 255;
1343                         pixel[x*4+0] = d[0];
1344                         pixel[x*4+1] = d[1];
1345                         pixel[x*4+2] = d[2];
1346                         pixel[x*4+3] = d[3];
1347                 }
1348                 break;
1349         case DPSOFTRAST_BLENDMODE_SUBALPHA:
1350                 for (x = startx;x < endx;x++)
1351                 {
1352                         if (!pixelmask[x])
1353                                 continue;
1354                         a = in4f[x*4+3] * -255.0f;
1355                         d[0] = (int)(in4f[x*4+2]*a+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;if (d[0] < 0) d[0] = 0;
1356                         d[1] = (int)(in4f[x*4+1]*a+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;if (d[1] < 0) d[1] = 0;
1357                         d[2] = (int)(in4f[x*4+0]*a+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;if (d[2] < 0) d[2] = 0;
1358                         d[3] = (int)(in4f[x*4+3]*a+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;if (d[3] < 0) d[3] = 0;
1359                         pixel[x*4+0] = d[0];
1360                         pixel[x*4+1] = d[1];
1361                         pixel[x*4+2] = d[2];
1362                         pixel[x*4+3] = d[3];
1363                 }
1364                 break;
1365         case DPSOFTRAST_BLENDMODE_PSEUDOALPHA:
1366                 for (x = startx;x < endx;x++)
1367                 {
1368                         if (!pixelmask[x])
1369                                 continue;
1370                         a = 255.0f;
1371                         b = 1.0f - in4f[x*4+3];
1372                         d[0] = (int)(in4f[x*4+2]*a+pixel[x*4+0]*b);if (d[0] > 255) d[0] = 255;
1373                         d[1] = (int)(in4f[x*4+1]*a+pixel[x*4+1]*b);if (d[1] > 255) d[1] = 255;
1374                         d[2] = (int)(in4f[x*4+0]*a+pixel[x*4+2]*b);if (d[2] > 255) d[2] = 255;
1375                         d[3] = (int)(in4f[x*4+3]*a+pixel[x*4+3]*b);if (d[3] > 255) d[3] = 255;
1376                         pixel[x*4+0] = d[0];
1377                         pixel[x*4+1] = d[1];
1378                         pixel[x*4+2] = d[2];
1379                         pixel[x*4+3] = d[3];
1380                 }
1381                 break;
1382         }
1383 }
1384
1385 void DPSOFTRAST_Draw_Span_FinishBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, const unsigned char* RESTRICT in4ub)
1386 {
1387         int x;
1388         int startx = span->startx;
1389         int endx = span->endx;
1390         int d[4];
1391         const unsigned int * RESTRICT ini = (const unsigned int *)in4ub;
1392         int a, b;
1393         unsigned char * RESTRICT pixelmask = span->pixelmask;
1394         unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0];
1395         unsigned int * RESTRICT pixeli = (unsigned int *)dpsoftrast.fb_colorpixels[0];
1396         if (!pixel)
1397                 return;
1398         pixel += span->start * 4;
1399         pixeli += span->start;
1400         // handle alphatest now (this affects depth writes too)
1401         if (dpsoftrast.user.alphatest)
1402                 for (x = startx;x < endx;x++)
1403                         if (in4ub[x*4+3] < 0.5f)
1404                                 pixelmask[x] = false;
1405         // FIXME: this does not handle bigendian
1406         switch(dpsoftrast.fb_blendmode)
1407         {
1408         case DPSOFTRAST_BLENDMODE_OPAQUE:
1409                 for (x = startx;x < endx;x++)
1410                         if (pixelmask[x])
1411                                 pixeli[x] = ini[x];
1412                 break;
1413         case DPSOFTRAST_BLENDMODE_ALPHA:
1414                 for (x = startx;x < endx;x++)
1415                 {
1416                         if (!pixelmask[x])
1417                                 continue;
1418                         a = in4ub[x*4+3];
1419                         b = 256 - in4ub[x*4+3];
1420                         pixel[x*4+0] = (in4ub[x*4+0]*a+pixel[x*4+0]*b) >> 8;
1421                         pixel[x*4+1] = (in4ub[x*4+1]*a+pixel[x*4+1]*b) >> 8;
1422                         pixel[x*4+2] = (in4ub[x*4+2]*a+pixel[x*4+2]*b) >> 8;
1423                         pixel[x*4+3] = (in4ub[x*4+3]*a+pixel[x*4+3]*b) >> 8;
1424                 }
1425                 break;
1426         case DPSOFTRAST_BLENDMODE_ADDALPHA:
1427                 for (x = startx;x < endx;x++)
1428                 {
1429                         if (!pixelmask[x])
1430                                 continue;
1431                         a = in4ub[x*4+3];
1432                         d[0] = (((in4ub[x*4+0]*a)>>8)+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1433                         d[1] = (((in4ub[x*4+1]*a)>>8)+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1434                         d[2] = (((in4ub[x*4+2]*a)>>8)+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1435                         d[3] = (((in4ub[x*4+3]*a)>>8)+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1436                         pixel[x*4+0] = d[0];
1437                         pixel[x*4+1] = d[1];
1438                         pixel[x*4+2] = d[2];
1439                         pixel[x*4+3] = d[3];
1440                 }
1441                 break;
1442         case DPSOFTRAST_BLENDMODE_ADD:
1443                 for (x = startx;x < endx;x++)
1444                 {
1445                         if (!pixelmask[x])
1446                                 continue;
1447                         d[0] = (in4ub[x*4+0]+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
1448                         d[1] = (in4ub[x*4+1]+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
1449                         d[2] = (in4ub[x*4+2]+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
1450                         d[3] = (in4ub[x*4+3]+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
1451                         pixel[x*4+0] = d[0];
1452                         pixel[x*4+1] = d[1];
1453                         pixel[x*4+2] = d[2];
1454                         pixel[x*4+3] = d[3];
1455                 }
1456                 break;
1457         case DPSOFTRAST_BLENDMODE_INVMOD:
1458                 for (x = startx;x < endx;x++)
1459                 {
1460                         if (!pixelmask[x])
1461                                 continue;
1462                         pixel[x*4+0] = ((255-in4ub[x*4+0])*pixel[x*4+0])>>8;
1463                         pixel[x*4+1] = ((255-in4ub[x*4+1])*pixel[x*4+1])>>8;
1464                         pixel[x*4+2] = ((255-in4ub[x*4+2])*pixel[x*4+2])>>8;
1465                         pixel[x*4+3] = ((255-in4ub[x*4+3])*pixel[x*4+3])>>8;
1466                 }
1467                 break;
1468         case DPSOFTRAST_BLENDMODE_MUL:
1469                 for (x = startx;x < endx;x++)
1470                 {
1471                         if (!pixelmask[x])
1472                                 continue;
1473                         pixel[x*4+0] = (in4ub[x*4+0]*pixel[x*4+0])>>8;
1474                         pixel[x*4+1] = (in4ub[x*4+1]*pixel[x*4+1])>>8;
1475                         pixel[x*4+2] = (in4ub[x*4+2]*pixel[x*4+2])>>8;
1476                         pixel[x*4+3] = (in4ub[x*4+3]*pixel[x*4+3])>>8;
1477                 }
1478                 break;
1479         case DPSOFTRAST_BLENDMODE_MUL2:
1480                 for (x = startx;x < endx;x++)
1481                 {
1482                         if (!pixelmask[x])
1483                                 continue;
1484                         d[0] = (in4ub[x*4+0]*pixel[x*4+0])>>7;if (d[0] > 255) d[0] = 255;
1485                         d[1] = (in4ub[x*4+1]*pixel[x*4+1])>>7;if (d[1] > 255) d[1] = 255;
1486                         d[2] = (in4ub[x*4+2]*pixel[x*4+2])>>7;if (d[2] > 255) d[2] = 255;
1487                         d[3] = (in4ub[x*4+3]*pixel[x*4+3])>>7;if (d[3] > 255) d[3] = 255;
1488                         pixel[x*4+0] = d[0];
1489                         pixel[x*4+1] = d[1];
1490                         pixel[x*4+2] = d[2];
1491                         pixel[x*4+3] = d[3];
1492                 }
1493                 break;
1494         case DPSOFTRAST_BLENDMODE_SUBALPHA:
1495                 for (x = startx;x < endx;x++)
1496                 {
1497                         if (!pixelmask[x])
1498                                 continue;
1499                         a = in4ub[x*4+3];
1500                         d[0] = pixel[x*4+0]-((in4ub[x*4+0]*a)>>8);if (d[0] < 0) d[0] = 0;
1501                         d[1] = pixel[x*4+1]-((in4ub[x*4+1]*a)>>8);if (d[1] < 0) d[1] = 0;
1502                         d[2] = pixel[x*4+2]-((in4ub[x*4+2]*a)>>8);if (d[2] < 0) d[2] = 0;
1503                         d[3] = pixel[x*4+3]-((in4ub[x*4+3]*a)>>8);if (d[3] < 0) d[3] = 0;
1504                         pixel[x*4+0] = d[0];
1505                         pixel[x*4+1] = d[1];
1506                         pixel[x*4+2] = d[2];
1507                         pixel[x*4+3] = d[3];
1508                 }
1509                 break;
1510         case DPSOFTRAST_BLENDMODE_PSEUDOALPHA:
1511                 for (x = startx;x < endx;x++)
1512                 {
1513                         if (!pixelmask[x])
1514                                 continue;
1515                         b = 255 - in4ub[x*4+3];
1516                         d[0] = in4ub[x*4+0]+((pixel[x*4+0]*b)>>8);if (d[0] > 255) d[0] = 255;
1517                         d[1] = in4ub[x*4+1]+((pixel[x*4+1]*b)>>8);if (d[1] > 255) d[1] = 255;
1518                         d[2] = in4ub[x*4+2]+((pixel[x*4+2]*b)>>8);if (d[2] > 255) d[2] = 255;
1519                         d[3] = in4ub[x*4+3]+((pixel[x*4+3]*b)>>8);if (d[3] > 255) d[3] = 255;
1520                         pixel[x*4+0] = d[0];
1521                         pixel[x*4+1] = d[1];
1522                         pixel[x*4+2] = d[2];
1523                         pixel[x*4+3] = d[3];
1524                 }
1525                 break;
1526         }
1527 }
1528
1529 void DPSOFTRAST_Draw_Span_Texture2DVarying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float * RESTRICT out4f, int texunitindex, int arrayindex, const float * RESTRICT zf)
1530 {
1531         int x;
1532         int startx = span->startx;
1533         int endx = span->endx;
1534         int flags;
1535         float c[4];
1536         float data[4];
1537         float slope[4];
1538         float tc[2];
1539         float tcscale[2];
1540         unsigned int tci[2];
1541         unsigned int tci1[2];
1542         unsigned int tcimin[2];
1543         unsigned int tcimax[2];
1544         int tciwrapmask[2];
1545         int tciwidth;
1546         int filter;
1547         int mip;
1548         const unsigned char * RESTRICT pixelbase;
1549         const unsigned char * RESTRICT pixel[4];
1550         DPSOFTRAST_Texture *texture = dpsoftrast.texbound[texunitindex];
1551         // if no texture is bound, just fill it with white
1552         if (!texture)
1553         {
1554                 for (x = startx;x < endx;x++)
1555                 {
1556                         out4f[x*4+0] = 1.0f;
1557                         out4f[x*4+1] = 1.0f;
1558                         out4f[x*4+2] = 1.0f;
1559                         out4f[x*4+3] = 1.0f;
1560                 }
1561                 return;
1562         }
1563         mip = span->mip[texunitindex];
1564         // if this mipmap of the texture is 1 pixel, just fill it with that color
1565         if (texture->mipmap[mip][1] == 4)
1566         {
1567                 c[0] = texture->bytes[2] * (1.0f/255.0f);
1568                 c[1] = texture->bytes[1] * (1.0f/255.0f);
1569                 c[2] = texture->bytes[0] * (1.0f/255.0f);
1570                 c[3] = texture->bytes[3] * (1.0f/255.0f);
1571                 for (x = startx;x < endx;x++)
1572                 {
1573                         out4f[x*4+0] = c[0];
1574                         out4f[x*4+1] = c[1];
1575                         out4f[x*4+2] = c[2];
1576                         out4f[x*4+3] = c[3];
1577                 }
1578                 return;
1579         }
1580         filter = texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR;
1581         data[0] = span->data[0][arrayindex][0];
1582         data[1] = span->data[0][arrayindex][1];
1583         data[2] = span->data[0][arrayindex][2];
1584         data[3] = span->data[0][arrayindex][3];
1585         slope[0] = span->data[1][arrayindex][0];
1586         slope[1] = span->data[1][arrayindex][1];
1587         slope[2] = span->data[1][arrayindex][2];
1588         slope[3] = span->data[1][arrayindex][3];
1589         flags = texture->flags;
1590         pixelbase = (unsigned char *)texture->bytes + texture->mipmap[mip][0];
1591         tcscale[0] = texture->mipmap[mip][2];
1592         tcscale[1] = texture->mipmap[mip][3];
1593         tciwidth = texture->mipmap[mip][2];
1594         tcimin[0] = 0;
1595         tcimin[1] = 0;
1596         tcimax[0] = texture->mipmap[mip][2]-1;
1597         tcimax[1] = texture->mipmap[mip][3]-1;
1598         tciwrapmask[0] = texture->mipmap[mip][2]-1;
1599         tciwrapmask[1] = texture->mipmap[mip][3]-1;
1600         for (x = startx;x < endx;)
1601         {
1602                 float endtc[2];
1603                 unsigned int subtc[2];
1604                 unsigned int substep[2];
1605                 int endsub = x + DPSOFTRAST_MAXSUBSPAN-1;
1606                 float subscale = 4096.0f/(DPSOFTRAST_MAXSUBSPAN-1);
1607                 if (endsub >= endx)
1608                 {
1609                         endsub = endx-1;
1610                         subscale = endsub > x ? 4096.0f / (endsub - x) : 1.0f;
1611                 }
1612                 tc[0] = (data[0] + slope[0]*x) * zf[x] * tcscale[0] - 0.5f;
1613                 tc[1] = (data[1] + slope[1]*x) * zf[x] * tcscale[1] - 0.5f;
1614                 endtc[0] = (data[0] + slope[0]*endsub) * zf[endsub] * tcscale[0] - 0.5f;
1615                 endtc[1] = (data[1] + slope[1]*endsub) * zf[endsub] * tcscale[1] - 0.5f;
1616                 substep[0] = (endtc[0] - tc[0]) * subscale;
1617                 substep[1] = (endtc[1] - tc[1]) * subscale;
1618                 subtc[0] = tc[0] * (1<<12);
1619                 subtc[1] = tc[1] * (1<<12);
1620                 if (!(flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE))
1621                 {
1622                         subtc[0] &= (tciwrapmask[0]<<12)|0xFFF;
1623                         subtc[1] &= (tciwrapmask[1]<<12)|0xFFF;
1624                 }
1625                 if(filter)
1626                 {
1627                         tci[0] = (subtc[0]>>12) - tcimin[0];
1628                         tci[1] = (subtc[1]>>12) - tcimin[1];
1629                         tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>12);
1630                         tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>12);
1631                         if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
1632                         {
1633                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1634                                 {
1635                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1636                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1637                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1638                                         tci[0] = subtc[0]>>12;
1639                                         tci[1] = subtc[1]>>12;
1640                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1641                                         pixel[1] = pixel[0] + 4 * tciwidth;
1642                                         c[0] = (pixel[0][2]*lerp[0]+pixel[0][4+2]*lerp[1]+pixel[1][2]*lerp[2]+pixel[1][4+2]*lerp[3]) * (1.0f / 0xFF000000);
1643                                         c[1] = (pixel[0][1]*lerp[0]+pixel[0][4+1]*lerp[1]+pixel[1][1]*lerp[2]+pixel[1][4+1]*lerp[3]) * (1.0f / 0xFF000000);
1644                                         c[2] = (pixel[0][0]*lerp[0]+pixel[0][4+0]*lerp[1]+pixel[1][0]*lerp[2]+pixel[1][4+0]*lerp[3]) * (1.0f / 0xFF000000);
1645                                         c[3] = (pixel[0][3]*lerp[0]+pixel[0][4+3]*lerp[1]+pixel[1][3]*lerp[2]+pixel[1][4+3]*lerp[3]) * (1.0f / 0xFF000000);
1646                                         out4f[x*4+0] = c[0];
1647                                         out4f[x*4+1] = c[1];
1648                                         out4f[x*4+2] = c[2];
1649                                         out4f[x*4+3] = c[3];
1650                                 }
1651                         }
1652                         else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
1653                         {
1654                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1655                                 {
1656                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1657                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1658                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1659                                         tci[0] = subtc[0]>>12;
1660                                         tci[1] = subtc[1]>>12;
1661                                         tci1[0] = tci[0] + 1;
1662                                         tci1[1] = tci[1] + 1;
1663                                         tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
1664                                         tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
1665                                         tci1[0] = tci1[0] >= tcimin[0] ? (tci1[0] <= tcimax[0] ? tci1[0] : tcimax[0]) : tcimin[0];
1666                                         tci1[1] = tci1[1] >= tcimin[1] ? (tci1[1] <= tcimax[1] ? tci1[1] : tcimax[1]) : tcimin[1];
1667                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1668                                         pixel[1] = pixelbase + 4 * (tci[1]*tciwidth+tci1[0]);
1669                                         pixel[2] = pixelbase + 4 * (tci1[1]*tciwidth+tci[0]);
1670                                         pixel[3] = pixelbase + 4 * (tci1[1]*tciwidth+tci1[0]);
1671                                         c[0] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) * (1.0f / 0xFF000000);
1672                                         c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) * (1.0f / 0xFF000000);
1673                                         c[2] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) * (1.0f / 0xFF000000);
1674                                         c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) * (1.0f / 0xFF000000);
1675                                         out4f[x*4+0] = c[0];
1676                                         out4f[x*4+1] = c[1];
1677                                         out4f[x*4+2] = c[2];
1678                                         out4f[x*4+3] = c[3];
1679                                 }
1680                         }
1681                         else
1682                         {
1683                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1684                                 {
1685                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1686                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1687                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1688                                         tci[0] = subtc[0]>>12;
1689                                         tci[1] = subtc[1]>>12;
1690                                         tci1[0] = tci[0] + 1;
1691                                         tci1[1] = tci[1] + 1;
1692                                         tci[0] &= tciwrapmask[0];
1693                                         tci[1] &= tciwrapmask[1];
1694                                         tci1[0] &= tciwrapmask[0];
1695                                         tci1[1] &= tciwrapmask[1];
1696                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1697                                         pixel[1] = pixelbase + 4 * (tci[1]*tciwidth+tci1[0]);
1698                                         pixel[2] = pixelbase + 4 * (tci1[1]*tciwidth+tci[0]);
1699                                         pixel[3] = pixelbase + 4 * (tci1[1]*tciwidth+tci1[0]);
1700                                         c[0] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) * (1.0f / 0xFF000000);
1701                                         c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) * (1.0f / 0xFF000000);
1702                                         c[2] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) * (1.0f / 0xFF000000);
1703                                         c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) * (1.0f / 0xFF000000);
1704                                         out4f[x*4+0] = c[0];
1705                                         out4f[x*4+1] = c[1];
1706                                         out4f[x*4+2] = c[2];
1707                                         out4f[x*4+3] = c[3];
1708                                 }
1709                         }
1710                 }
1711                 else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
1712                 {
1713                         for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1714                         {
1715                                 tci[0] = subtc[0]>>12;
1716                                 tci[1] = subtc[1]>>12;
1717                                 tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
1718                                 tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
1719                                 pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1720                                 c[0] = pixel[0][2] * (1.0f / 255.0f);
1721                                 c[1] = pixel[0][1] * (1.0f / 255.0f);
1722                                 c[2] = pixel[0][0] * (1.0f / 255.0f);
1723                                 c[3] = pixel[0][3] * (1.0f / 255.0f);
1724                                 out4f[x*4+0] = c[0];
1725                                 out4f[x*4+1] = c[1];
1726                                 out4f[x*4+2] = c[2];
1727                                 out4f[x*4+3] = c[3];
1728                         }
1729                 }
1730                 else
1731                 {
1732                         for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1733                         {
1734                                 tci[0] = subtc[0]>>12;
1735                                 tci[1] = subtc[1]>>12;
1736                                 tci[0] &= tciwrapmask[0];
1737                                 tci[1] &= tciwrapmask[1];
1738                                 pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1739                                 c[0] = pixel[0][2] * (1.0f / 255.0f);
1740                                 c[1] = pixel[0][1] * (1.0f / 255.0f);
1741                                 c[2] = pixel[0][0] * (1.0f / 255.0f);
1742                                 c[3] = pixel[0][3] * (1.0f / 255.0f);
1743                                 out4f[x*4+0] = c[0];
1744                                 out4f[x*4+1] = c[1];
1745                                 out4f[x*4+2] = c[2];
1746                                 out4f[x*4+3] = c[3];
1747                         }
1748                 }
1749         }
1750 }
1751
1752 void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
1753 {
1754         int x;
1755         int startx = span->startx;
1756         int endx = span->endx;
1757         int flags;
1758         float data[4];
1759         float slope[4];
1760         float tc[2];
1761         float tcscale[2];
1762         unsigned int tci[2];
1763         unsigned int tci1[2];
1764         unsigned int tcimin[2];
1765         unsigned int tcimax[2];
1766         int tciwrapmask[2];
1767         int tciwidth;
1768         int filter;
1769         int mip;
1770         unsigned int k;
1771         unsigned int *outi = (unsigned int *)out4ub;
1772         const unsigned char * RESTRICT pixelbase;
1773         const unsigned int * RESTRICT pixelbasei;
1774         const unsigned char * RESTRICT pixel[4];
1775         DPSOFTRAST_Texture *texture = dpsoftrast.texbound[texunitindex];
1776         // if no texture is bound, just fill it with white
1777         if (!texture)
1778         {
1779                 memset(out4ub, 255, span->length*4);
1780                 return;
1781         }
1782         mip = span->mip[texunitindex];
1783         // if this mipmap of the texture is 1 pixel, just fill it with that color
1784         if (texture->mipmap[mip][1] == 4)
1785         {
1786                 k = *((const unsigned int *)texture->bytes);
1787                 for (x = startx;x < endx;x++)
1788                         outi[x] = k;
1789                 return;
1790         }
1791         filter = texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR;
1792         data[0] = span->data[0][arrayindex][0];
1793         data[1] = span->data[0][arrayindex][1];
1794         data[2] = span->data[0][arrayindex][2];
1795         data[3] = span->data[0][arrayindex][3];
1796         slope[0] = span->data[1][arrayindex][0];
1797         slope[1] = span->data[1][arrayindex][1];
1798         slope[2] = span->data[1][arrayindex][2];
1799         slope[3] = span->data[1][arrayindex][3];
1800         flags = texture->flags;
1801         pixelbase = (const unsigned char *)texture->bytes + texture->mipmap[mip][0];
1802         pixelbasei = (const unsigned int *)pixelbase;
1803         tcscale[0] = texture->mipmap[mip][2];
1804         tcscale[1] = texture->mipmap[mip][3];
1805         tciwidth = texture->mipmap[mip][2];
1806         tcimin[0] = 0;
1807         tcimin[1] = 0;
1808         tcimax[0] = texture->mipmap[mip][2]-1;
1809         tcimax[1] = texture->mipmap[mip][3]-1;
1810         tciwrapmask[0] = texture->mipmap[mip][2]-1;
1811         tciwrapmask[1] = texture->mipmap[mip][3]-1;
1812         for (x = startx;x < endx;)
1813         {
1814                 float endtc[2];
1815                 unsigned int subtc[2];
1816                 unsigned int substep[2];
1817                 int endsub = x + DPSOFTRAST_MAXSUBSPAN-1;
1818                 float subscale = 4096.0f/(DPSOFTRAST_MAXSUBSPAN-1);
1819                 if (endsub >= endx)
1820                 {
1821                         endsub = endx-1;
1822                         subscale = endsub > x ? 4096.0f / (endsub - x) : 1.0f;
1823                 }
1824                 tc[0] = (data[0] + slope[0]*x) * zf[x] * tcscale[0] - 0.5f;
1825                 tc[1] = (data[1] + slope[1]*x) * zf[x] * tcscale[1] - 0.5f;
1826                 endtc[0] = (data[0] + slope[0]*endsub) * zf[endsub] * tcscale[0] - 0.5f;
1827                 endtc[1] = (data[1] + slope[1]*endsub) * zf[endsub] * tcscale[1] - 0.5f;
1828                 substep[0] = (endtc[0] - tc[0]) * subscale;
1829                 substep[1] = (endtc[1] - tc[1]) * subscale;
1830                 subtc[0] = tc[0] * (1<<12);
1831                 subtc[1] = tc[1] * (1<<12);
1832                 if (!(flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE))
1833                 {
1834                         subtc[0] &= (tciwrapmask[0]<<12)|0xFFF;
1835                         subtc[1] &= (tciwrapmask[1]<<12)|0xFFF;
1836                 }
1837 #if 0
1838 // LordHavoc: an attempt at reducing number of integer multiplies, did not show any improvement in benchmarks, abandoned.
1839                 if (filter && dpsoftrast_test)
1840                 {
1841                         const unsigned int * RESTRICT pixeli[4];
1842                         tci[0] = (subtc[0]>>12) - tcimin[0];
1843                         tci[1] = (subtc[1]>>12) - tcimin[1]; 
1844                         tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>12);
1845                         tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>12); 
1846                         if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
1847                         {
1848                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1849                                 {
1850                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1851                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1852                                         unsigned int lerp[4] = { (ifrac[0]*ifrac[1]) >> 16, (frac[0]*ifrac[1]) >> 16, (ifrac[0]*frac[1]) >> 16, (frac[0]*frac[1]) >> 16 };
1853                                         tci[0] = subtc[0]>>12;
1854                                         tci[1] = subtc[1]>>12;
1855                                         pixeli[0] = pixelbasei + (tci[1]*tciwidth+tci[0]);
1856                                         pixeli[1] = pixeli[0] + tciwidth;
1857                                         outi[x] = ((((pixeli[0][0] >> 8) & 0x00FF00FF) * lerp[0] + ((pixeli[0][1] >> 8) & 0x00FF00FF) * lerp[1] + ((pixeli[1][0] >> 8) & 0x00FF00FF) * lerp[2] + ((pixeli[1][1] >> 8) & 0x00FF00FF) * lerp[3])     & 0xFF00FF00)
1858                                                 | ((((pixeli[0][0]       & 0x00FF00FF) * lerp[0] + ( pixeli[0][1]       & 0x00FF00FF) * lerp[1] + ( pixeli[1][0]       & 0x00FF00FF) * lerp[2] + ( pixeli[1][1]       & 0x00FF00FF) * lerp[3])>>8) & 0x00FF00FF);
1859                                 }
1860                         }
1861                         else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
1862                         {
1863                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1864                                 {
1865                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1866                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1867                                         unsigned int lerp[4] = { (ifrac[0]*ifrac[1]) >> 16, (frac[0]*ifrac[1]) >> 16, (ifrac[0]*frac[1]) >> 16, (frac[0]*frac[1]) >> 16 };
1868                                         tci[0] = subtc[0]>>12;
1869                                         tci[1] = subtc[1]>>12;
1870                                         tci1[0] = tci[0] + 1;
1871                                         tci1[1] = tci[1] + 1;
1872                                         tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
1873                                         tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
1874                                         tci1[0] = tci1[0] >= tcimin[0] ? (tci1[0] <= tcimax[0] ? tci1[0] : tcimax[0]) : tcimin[0];
1875                                         tci1[1] = tci1[1] >= tcimin[1] ? (tci1[1] <= tcimax[1] ? tci1[1] : tcimax[1]) : tcimin[1];
1876                                         pixeli[0] = pixelbasei + (tci[1]*tciwidth+tci[0]);
1877                                         pixeli[1] = pixelbasei + (tci[1]*tciwidth+tci1[0]);
1878                                         pixeli[2] = pixelbasei + (tci1[1]*tciwidth+tci[0]);
1879                                         pixeli[3] = pixelbasei + (tci1[1]*tciwidth+tci1[0]);
1880                                         outi[x] = ((((pixeli[0][0] >> 8) & 0x00FF00FF) * lerp[0] + ((pixeli[1][0] >> 8) & 0x00FF00FF) * lerp[1] + ((pixeli[2][0] >> 8) & 0x00FF00FF) * lerp[2] + ((pixeli[3][0] >> 8) & 0x00FF00FF) * lerp[3])     & 0xFF00FF00)
1881                                                 | ((((pixeli[0][0]       & 0x00FF00FF) * lerp[0] + ( pixeli[1][0]       & 0x00FF00FF) * lerp[1] + ( pixeli[2][0]       & 0x00FF00FF) * lerp[2] + ( pixeli[3][0]       & 0x00FF00FF) * lerp[3])>>8) & 0x00FF00FF);
1882                                 }
1883                         }
1884                         else
1885                         {
1886                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1887                                 {
1888                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1889                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1890                                         unsigned int lerp[4] = { (ifrac[0]*ifrac[1]) >> 16, (frac[0]*ifrac[1]) >> 16, (ifrac[0]*frac[1]) >> 16, (frac[0]*frac[1]) >> 16 };
1891                                         tci[0] = subtc[0]>>12;
1892                                         tci[1] = subtc[1]>>12;
1893                                         tci1[0] = tci[0] + 1;
1894                                         tci1[1] = tci[1] + 1;
1895                                         tci[0] &= tciwrapmask[0];
1896                                         tci[1] &= tciwrapmask[1];
1897                                         tci1[0] &= tciwrapmask[0];
1898                                         tci1[1] &= tciwrapmask[1];
1899                                         pixeli[0] = pixelbasei + (tci[1]*tciwidth+tci[0]);
1900                                         pixeli[1] = pixelbasei + (tci[1]*tciwidth+tci1[0]);
1901                                         pixeli[2] = pixelbasei + (tci1[1]*tciwidth+tci[0]);
1902                                         pixeli[3] = pixelbasei + (tci1[1]*tciwidth+tci1[0]);
1903                                         outi[x] = ((((pixeli[0][0] >> 8) & 0x00FF00FF) * lerp[0] + ((pixeli[1][0] >> 8) & 0x00FF00FF) * lerp[1] + ((pixeli[2][0] >> 8) & 0x00FF00FF) * lerp[2] + ((pixeli[3][0] >> 8) & 0x00FF00FF) * lerp[3])     & 0xFF00FF00)
1904                                                 | ((((pixeli[0][0]       & 0x00FF00FF) * lerp[0] + ( pixeli[1][0]       & 0x00FF00FF) * lerp[1] + ( pixeli[2][0]       & 0x00FF00FF) * lerp[2] + ( pixeli[3][0]       & 0x00FF00FF) * lerp[3])>>8) & 0x00FF00FF);
1905                                 }
1906                         }
1907                 }
1908                 else
1909 #endif
1910                 if (filter)
1911                 {
1912                         tci[0] = (subtc[0]>>12) - tcimin[0];
1913                         tci[1] = (subtc[1]>>12) - tcimin[1]; 
1914                         tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>12);
1915                         tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>12); 
1916                         if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
1917                         {
1918                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1919                                 {
1920                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1921                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1922                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1923                                         tci[0] = subtc[0]>>12;
1924                                         tci[1] = subtc[1]>>12;
1925                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1926                                         pixel[1] = pixel[0] + 4 * tciwidth;
1927                                         out4ub[x*4+0] = (pixel[0][0]*lerp[0]+pixel[0][4+0]*lerp[1]+pixel[1][0]*lerp[2]+pixel[1][4+0]*lerp[3]) >> 24;
1928                                         out4ub[x*4+1] = (pixel[0][1]*lerp[0]+pixel[0][4+1]*lerp[1]+pixel[1][1]*lerp[2]+pixel[1][4+1]*lerp[3]) >> 24;
1929                                         out4ub[x*4+2] = (pixel[0][2]*lerp[0]+pixel[0][4+2]*lerp[1]+pixel[1][2]*lerp[2]+pixel[1][4+2]*lerp[3]) >> 24;
1930                                         out4ub[x*4+3] = (pixel[0][3]*lerp[0]+pixel[0][4+3]*lerp[1]+pixel[1][3]*lerp[2]+pixel[1][4+3]*lerp[3]) >> 24;
1931                                 }
1932                         }
1933                         else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
1934                         {
1935                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1936                                 {
1937                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1938                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1939                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1940                                         tci[0] = subtc[0]>>12;
1941                                         tci[1] = subtc[1]>>12;
1942                                         tci1[0] = tci[0] + 1;
1943                                         tci1[1] = tci[1] + 1;
1944                                         tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
1945                                         tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
1946                                         tci1[0] = tci1[0] >= tcimin[0] ? (tci1[0] <= tcimax[0] ? tci1[0] : tcimax[0]) : tcimin[0];
1947                                         tci1[1] = tci1[1] >= tcimin[1] ? (tci1[1] <= tcimax[1] ? tci1[1] : tcimax[1]) : tcimin[1];
1948                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1949                                         pixel[1] = pixelbase + 4 * (tci[1]*tciwidth+tci1[0]);
1950                                         pixel[2] = pixelbase + 4 * (tci1[1]*tciwidth+tci[0]);
1951                                         pixel[3] = pixelbase + 4 * (tci1[1]*tciwidth+tci1[0]);
1952                                         out4ub[x*4+0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) >> 24;
1953                                         out4ub[x*4+1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) >> 24;
1954                                         out4ub[x*4+2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) >> 24;
1955                                         out4ub[x*4+3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) >> 24;
1956                                 }
1957                         }
1958                         else
1959                         {
1960                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1961                                 {
1962                                         unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
1963                                         unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
1964                                         unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
1965                                         tci[0] = subtc[0]>>12;
1966                                         tci[1] = subtc[1]>>12;
1967                                         tci1[0] = tci[0] + 1;
1968                                         tci1[1] = tci[1] + 1;
1969                                         tci[0] &= tciwrapmask[0];
1970                                         tci[1] &= tciwrapmask[1];
1971                                         tci1[0] &= tciwrapmask[0];
1972                                         tci1[1] &= tciwrapmask[1];
1973                                         pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
1974                                         pixel[1] = pixelbase + 4 * (tci[1]*tciwidth+tci1[0]);
1975                                         pixel[2] = pixelbase + 4 * (tci1[1]*tciwidth+tci[0]);
1976                                         pixel[3] = pixelbase + 4 * (tci1[1]*tciwidth+tci1[0]);
1977                                         out4ub[x*4+0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) >> 24;
1978                                         out4ub[x*4+1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) >> 24;
1979                                         out4ub[x*4+2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) >> 24;
1980                                         out4ub[x*4+3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) >> 24;
1981                                 }
1982                         }
1983                 }
1984                 else
1985                 {
1986                         tci[0] = (subtc[0]>>12) - tcimin[0];
1987                         tci[1] = (subtc[1]>>12) - tcimin[1]; 
1988                         tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>12);
1989                         tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>12); 
1990                         if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
1991                         {
1992                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
1993                                 {
1994                                         tci[0] = subtc[0]>>12;
1995                                         tci[1] = subtc[1]>>12;
1996                                         outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
1997                                 }
1998                         }
1999                         else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
2000                         {
2001                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
2002                                 {
2003                                         tci[0] = subtc[0]>>12;
2004                                         tci[1] = subtc[1]>>12;
2005                                         tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
2006                                         tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
2007                                         outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
2008                                 }
2009                         }
2010                         else
2011                         {
2012                                 for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
2013                                 {
2014                                         tci[0] = subtc[0]>>12;
2015                                         tci[1] = subtc[1]>>12;
2016                                         tci[0] &= tciwrapmask[0];
2017                                         tci[1] &= tciwrapmask[1];
2018                                         outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
2019                                 }
2020                         }
2021                 }
2022         }
2023 }
2024
2025 void DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
2026 {
2027         // TODO: IMPLEMENT
2028         memset(out4ub, 255, span->length*4);
2029 }
2030
2031 float DPSOFTRAST_SampleShadowmap(const float *vector)
2032 {
2033         // TODO: IMPLEMENT
2034         return 1.0f;
2035 }
2036
2037 void DPSOFTRAST_Draw_Span_MultiplyVarying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *in4f, int arrayindex, const float *zf)
2038 {
2039         int x;
2040         int startx = span->startx;
2041         int endx = span->endx;
2042         float c[4];
2043         float data[4];
2044         float slope[4];
2045         float z;
2046         data[0] = span->data[0][arrayindex][0];
2047         data[1] = span->data[0][arrayindex][1];
2048         data[2] = span->data[0][arrayindex][2];
2049         data[3] = span->data[0][arrayindex][3];
2050         slope[0] = span->data[1][arrayindex][0];
2051         slope[1] = span->data[1][arrayindex][1];
2052         slope[2] = span->data[1][arrayindex][2];
2053         slope[3] = span->data[1][arrayindex][3];
2054         for (x = startx;x < endx;x++)
2055         {
2056                 z = zf[x];
2057                 c[0] = (data[0] + slope[0]*x) * z;
2058                 c[1] = (data[1] + slope[1]*x) * z;
2059                 c[2] = (data[2] + slope[2]*x) * z;
2060                 c[3] = (data[3] + slope[3]*x) * z;
2061                 out4f[x*4+0] = in4f[x*4+0] * c[0];
2062                 out4f[x*4+1] = in4f[x*4+1] * c[1];
2063                 out4f[x*4+2] = in4f[x*4+2] * c[2];
2064                 out4f[x*4+3] = in4f[x*4+3] * c[3];
2065         }
2066 }
2067
2068 void DPSOFTRAST_Draw_Span_Varying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, int arrayindex, const float *zf)
2069 {
2070         int x;
2071         int startx = span->startx;
2072         int endx = span->endx;
2073         float c[4];
2074         float data[4];
2075         float slope[4];
2076         float z;
2077         data[0] = span->data[0][arrayindex][0];
2078         data[1] = span->data[0][arrayindex][1];
2079         data[2] = span->data[0][arrayindex][2];
2080         data[3] = span->data[0][arrayindex][3];
2081         slope[0] = span->data[1][arrayindex][0];
2082         slope[1] = span->data[1][arrayindex][1];
2083         slope[2] = span->data[1][arrayindex][2];
2084         slope[3] = span->data[1][arrayindex][3];
2085         for (x = startx;x < endx;x++)
2086         {
2087                 z = zf[x];
2088                 c[0] = (data[0] + slope[0]*x) * z;
2089                 c[1] = (data[1] + slope[1]*x) * z;
2090                 c[2] = (data[2] + slope[2]*x) * z;
2091                 c[3] = (data[3] + slope[3]*x) * z;
2092                 out4f[x*4+0] = c[0];
2093                 out4f[x*4+1] = c[1];
2094                 out4f[x*4+2] = c[2];
2095                 out4f[x*4+3] = c[3];
2096         }
2097 }
2098
2099 void DPSOFTRAST_Draw_Span_AddBloom(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f, const float *subcolor)
2100 {
2101         int x, startx = span->startx, endx = span->endx;
2102         float c[4], localcolor[4];
2103         localcolor[0] = subcolor[0];
2104         localcolor[1] = subcolor[1];
2105         localcolor[2] = subcolor[2];
2106         localcolor[3] = subcolor[3];
2107         for (x = startx;x < endx;x++)
2108         {
2109                 c[0] = inb4f[x*4+0] - localcolor[0];if (c[0] < 0.0f) c[0] = 0.0f;
2110                 c[1] = inb4f[x*4+1] - localcolor[1];if (c[1] < 0.0f) c[1] = 0.0f;
2111                 c[2] = inb4f[x*4+2] - localcolor[2];if (c[2] < 0.0f) c[2] = 0.0f;
2112                 c[3] = inb4f[x*4+3] - localcolor[3];if (c[3] < 0.0f) c[3] = 0.0f;
2113                 out4f[x*4+0] = ina4f[x*4+0] + c[0];
2114                 out4f[x*4+1] = ina4f[x*4+1] + c[1];
2115                 out4f[x*4+2] = ina4f[x*4+2] + c[2];
2116                 out4f[x*4+3] = ina4f[x*4+3] + c[3];
2117         }
2118 }
2119
2120 void DPSOFTRAST_Draw_Span_MultiplyBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
2121 {
2122         int x, startx = span->startx, endx = span->endx;
2123         for (x = startx;x < endx;x++)
2124         {
2125                 out4f[x*4+0] = ina4f[x*4+0] * inb4f[x*4+0];
2126                 out4f[x*4+1] = ina4f[x*4+1] * inb4f[x*4+1];
2127                 out4f[x*4+2] = ina4f[x*4+2] * inb4f[x*4+2];
2128                 out4f[x*4+3] = ina4f[x*4+3] * inb4f[x*4+3];
2129         }
2130 }
2131
2132 void DPSOFTRAST_Draw_Span_AddBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
2133 {
2134         int x, startx = span->startx, endx = span->endx;
2135         for (x = startx;x < endx;x++)
2136         {
2137                 out4f[x*4+0] = ina4f[x*4+0] + inb4f[x*4+0];
2138                 out4f[x*4+1] = ina4f[x*4+1] + inb4f[x*4+1];
2139                 out4f[x*4+2] = ina4f[x*4+2] + inb4f[x*4+2];
2140                 out4f[x*4+3] = ina4f[x*4+3] + inb4f[x*4+3];
2141         }
2142 }
2143
2144 void DPSOFTRAST_Draw_Span_MixBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
2145 {
2146         int x, startx = span->startx, endx = span->endx;
2147         float a, b;
2148         for (x = startx;x < endx;x++)
2149         {
2150                 a = 1.0f - inb4f[x*4+3];
2151                 b = inb4f[x*4+3];
2152                 out4f[x*4+0] = ina4f[x*4+0] * a + inb4f[x*4+0] * b;
2153                 out4f[x*4+1] = ina4f[x*4+1] * a + inb4f[x*4+1] * b;
2154                 out4f[x*4+2] = ina4f[x*4+2] * a + inb4f[x*4+2] * b;
2155                 out4f[x*4+3] = ina4f[x*4+3] * a + inb4f[x*4+3] * b;
2156         }
2157 }
2158
2159 void DPSOFTRAST_Draw_Span_MixUniformColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *in4f, const float *color)
2160 {
2161         int x, startx = span->startx, endx = span->endx;
2162         float localcolor[4], ilerp, lerp;
2163         localcolor[0] = color[0];
2164         localcolor[1] = color[1];
2165         localcolor[2] = color[2];
2166         localcolor[3] = color[3];
2167         ilerp = 1.0f - localcolor[3];
2168         lerp = localcolor[3];
2169         for (x = startx;x < endx;x++)
2170         {
2171                 out4f[x*4+0] = in4f[x*4+0] * ilerp + localcolor[0] * lerp;
2172                 out4f[x*4+1] = in4f[x*4+1] * ilerp + localcolor[1] * lerp;
2173                 out4f[x*4+2] = in4f[x*4+2] * ilerp + localcolor[2] * lerp;
2174                 out4f[x*4+3] = in4f[x*4+3] * ilerp + localcolor[3] * lerp;
2175         }
2176 }
2177
2178
2179
2180 void DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, int arrayindex, const float *zf)
2181 {
2182         int x;
2183         int startx = span->startx;
2184         int endx = span->endx;
2185         float data[4];
2186         float slope[4];
2187         float z;
2188         data[2] = span->data[0][arrayindex][0];
2189         data[1] = span->data[0][arrayindex][1];
2190         data[0] = span->data[0][arrayindex][2];
2191         data[3] = span->data[0][arrayindex][3];
2192         slope[2] = span->data[1][arrayindex][0];
2193         slope[1] = span->data[1][arrayindex][1];
2194         slope[0] = span->data[1][arrayindex][2];
2195         slope[3] = span->data[1][arrayindex][3];
2196         for (x = startx;x < endx;x++)
2197         {
2198                 z = zf[x];
2199                 out4ub[x*4+0] = (int)(in4ub[x*4+0] * (data[0] + slope[0]*x) * z);
2200                 out4ub[x*4+1] = (int)(in4ub[x*4+1] * (data[1] + slope[1]*x) * z);
2201                 out4ub[x*4+2] = (int)(in4ub[x*4+2] * (data[2] + slope[2]*x) * z);
2202                 out4ub[x*4+3] = (int)(in4ub[x*4+3] * (data[3] + slope[3]*x) * z);
2203         }
2204 }
2205
2206 void DPSOFTRAST_Draw_Span_VaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, int arrayindex, const float *zf)
2207 {
2208         int x;
2209         int startx = span->startx;
2210         int endx = span->endx;
2211         float data[4];
2212         float slope[4];
2213         float z;
2214         data[2] = span->data[0][arrayindex][0]*255.0f;
2215         data[1] = span->data[0][arrayindex][1]*255.0f;
2216         data[0] = span->data[0][arrayindex][2]*255.0f;
2217         data[3] = span->data[0][arrayindex][3]*255.0f;
2218         slope[2] = span->data[1][arrayindex][0]*255.0f;
2219         slope[1] = span->data[1][arrayindex][1]*255.0f;
2220         slope[0] = span->data[1][arrayindex][2]*255.0f;
2221         slope[3] = span->data[1][arrayindex][3]*255.0f;
2222         for (x = startx;x < endx;x++)
2223         {
2224                 z = zf[x];
2225                 out4ub[x*4+0] = (int)((data[0] + slope[0]*x) * z);
2226                 out4ub[x*4+1] = (int)((data[1] + slope[1]*x) * z);
2227                 out4ub[x*4+2] = (int)((data[2] + slope[2]*x) * z);
2228                 out4ub[x*4+3] = (int)((data[3] + slope[3]*x) * z);
2229         }
2230 }
2231
2232 void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *subcolor)
2233 {
2234         int x, startx = span->startx, endx = span->endx;
2235         int c[4], localcolor[4];
2236         localcolor[2] = (int)(subcolor[0] * 255.0f);
2237         localcolor[1] = (int)(subcolor[1] * 255.0f);
2238         localcolor[0] = (int)(subcolor[2] * 255.0f);
2239         localcolor[3] = (int)(subcolor[3] * 255.0f);
2240         for (x = startx;x < endx;x++)
2241         {
2242                 c[0] = inb4ub[x*4+0] - localcolor[0];if (c[0] < 0) c[0] = 0;
2243                 c[1] = inb4ub[x*4+1] - localcolor[1];if (c[1] < 0) c[1] = 0;
2244                 c[2] = inb4ub[x*4+2] - localcolor[2];if (c[2] < 0) c[2] = 0;
2245                 c[3] = inb4ub[x*4+3] - localcolor[3];if (c[3] < 0) c[3] = 0;
2246                 c[0] += ina4ub[x*4+0];if (c[0] > 255) c[0] = 255;
2247                 c[1] += ina4ub[x*4+1];if (c[1] > 255) c[1] = 255;
2248                 c[2] += ina4ub[x*4+2];if (c[2] > 255) c[2] = 255;
2249                 c[3] += ina4ub[x*4+3];if (c[3] > 255) c[3] = 255;
2250                 out4ub[x*4+0] = c[0];
2251                 out4ub[x*4+1] = c[1];
2252                 out4ub[x*4+2] = c[2];
2253                 out4ub[x*4+3] = c[3];
2254         }
2255 }
2256
2257 void DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
2258 {
2259         int x, startx = span->startx, endx = span->endx;
2260         for (x = startx;x < endx;x++)
2261         {
2262                 out4ub[x*4+0] = (ina4ub[x*4+0] * inb4ub[x*4+0])>>8;
2263                 out4ub[x*4+1] = (ina4ub[x*4+1] * inb4ub[x*4+1])>>8;
2264                 out4ub[x*4+2] = (ina4ub[x*4+2] * inb4ub[x*4+2])>>8;
2265                 out4ub[x*4+3] = (ina4ub[x*4+3] * inb4ub[x*4+3])>>8;
2266         }
2267 }
2268
2269 void DPSOFTRAST_Draw_Span_AddBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
2270 {
2271         int x, startx = span->startx, endx = span->endx;
2272         int d[4];
2273         for (x = startx;x < endx;x++)
2274         {
2275                 d[0] = ina4ub[x*4+0] + inb4ub[x*4+0];if (d[0] > 255) d[0] = 255;
2276                 d[1] = ina4ub[x*4+1] + inb4ub[x*4+1];if (d[1] > 255) d[1] = 255;
2277                 d[2] = ina4ub[x*4+2] + inb4ub[x*4+2];if (d[2] > 255) d[2] = 255;
2278                 d[3] = ina4ub[x*4+3] + inb4ub[x*4+3];if (d[3] > 255) d[3] = 255;
2279                 out4ub[x*4+0] = d[0];
2280                 out4ub[x*4+1] = d[1];
2281                 out4ub[x*4+2] = d[2];
2282                 out4ub[x*4+3] = d[3];
2283         }
2284 }
2285
2286 void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *inbtintbgra)
2287 {
2288         int x, startx = span->startx, endx = span->endx;
2289         int d[4];
2290         int b[4];
2291         b[0] = (int)(inbtintbgra[0] * 256.0f);
2292         b[1] = (int)(inbtintbgra[1] * 256.0f);
2293         b[2] = (int)(inbtintbgra[2] * 256.0f);
2294         b[3] = (int)(inbtintbgra[3] * 256.0f);
2295         for (x = startx;x < endx;x++)
2296         {
2297                 d[0] = ina4ub[x*4+0] + ((inb4ub[x*4+0]*b[0])>>8);if (d[0] > 255) d[0] = 255;
2298                 d[1] = ina4ub[x*4+1] + ((inb4ub[x*4+1]*b[1])>>8);if (d[1] > 255) d[1] = 255;
2299                 d[2] = ina4ub[x*4+2] + ((inb4ub[x*4+2]*b[2])>>8);if (d[2] > 255) d[2] = 255;
2300                 d[3] = ina4ub[x*4+3] + ((inb4ub[x*4+3]*b[3])>>8);if (d[3] > 255) d[3] = 255;
2301                 out4ub[x*4+0] = d[0];
2302                 out4ub[x*4+1] = d[1];
2303                 out4ub[x*4+2] = d[2];
2304                 out4ub[x*4+3] = d[3];
2305         }
2306 }
2307
2308 void DPSOFTRAST_Draw_Span_MixBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
2309 {
2310         int x, startx = span->startx, endx = span->endx;
2311         int a, b;
2312         for (x = startx;x < endx;x++)
2313         {
2314                 a = 256 - inb4ub[x*4+3];
2315                 b = inb4ub[x*4+3];
2316                 out4ub[x*4+0] = (ina4ub[x*4+0] * a + inb4ub[x*4+0] * b)>>8;
2317                 out4ub[x*4+1] = (ina4ub[x*4+1] * a + inb4ub[x*4+1] * b)>>8;
2318                 out4ub[x*4+2] = (ina4ub[x*4+2] * a + inb4ub[x*4+2] * b)>>8;
2319                 out4ub[x*4+3] = (ina4ub[x*4+3] * a + inb4ub[x*4+3] * b)>>8;
2320         }
2321 }
2322
2323 void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, const float *color)
2324 {
2325         int x, startx = span->startx, endx = span->endx;
2326         int localcolor[4], ilerp, lerp;
2327         localcolor[2] = (int)(color[0]*255.0f);
2328         localcolor[1] = (int)(color[1]*255.0f);
2329         localcolor[0] = (int)(color[2]*255.0f);
2330         localcolor[3] = (int)(color[3]*255.0f);
2331         ilerp = 256 - localcolor[3];
2332         lerp = localcolor[3];
2333         for (x = startx;x < endx;x++)
2334         {
2335                 out4ub[x*4+0] = (in4ub[x*4+0] * ilerp + localcolor[0] * lerp)>>8;
2336                 out4ub[x*4+1] = (in4ub[x*4+1] * ilerp + localcolor[1] * lerp)>>8;
2337                 out4ub[x*4+2] = (in4ub[x*4+2] * ilerp + localcolor[2] * lerp)>>8;
2338                 out4ub[x*4+3] = (in4ub[x*4+3] * ilerp + localcolor[3] * lerp)>>8;
2339         }
2340 }
2341
2342
2343
2344 void DPSOFTRAST_VertexShader_Generic(void)
2345 {
2346         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2347         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
2348         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
2349         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
2350                 DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
2351 }
2352
2353 void DPSOFTRAST_PixelShader_Generic(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2354 {
2355         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2356         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2357         unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2358         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2359         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2360         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
2361         {
2362                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_FIRST, 2, buffer_z);
2363                 DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(span, buffer_FragColorbgra8, buffer_texture_colorbgra8, 1, buffer_z);
2364                 if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
2365                 {
2366                         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_lightmapbgra8, GL20TU_SECOND, 2, buffer_z);
2367                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
2368                         {
2369                                 // multiply
2370                                 DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
2371                         }
2372                         else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
2373                         {
2374                                 // add
2375                                 DPSOFTRAST_Draw_Span_AddBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
2376                         }
2377                         else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_VERTEXTEXTUREBLEND)
2378                         {
2379                                 // alphablend
2380                                 DPSOFTRAST_Draw_Span_MixBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
2381                         }
2382                 }
2383         }
2384         else
2385                 DPSOFTRAST_Draw_Span_VaryingBGRA8(span, buffer_FragColorbgra8, 1, buffer_z);
2386         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2387 }
2388
2389
2390
2391 void DPSOFTRAST_VertexShader_PostProcess(void)
2392 {
2393         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2394         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
2395         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
2396 }
2397
2398 void DPSOFTRAST_PixelShader_PostProcess(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2399 {
2400         // TODO: optimize!!  at the very least there is no reason to use texture sampling on the frame texture
2401         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2402         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2403         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2404         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2405         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_FragColorbgra8, GL20TU_FIRST, 2, buffer_z);
2406         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_BLOOM)
2407         {
2408                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_SECOND, 3, buffer_z);
2409                 DPSOFTRAST_Draw_Span_AddBloomBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_colorbgra8, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_BloomColorSubtract * 4);
2410         }
2411         DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_ViewTintColor * 4);
2412         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SATURATION)
2413         {
2414                 // TODO: implement saturation
2415         }
2416         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GAMMARAMPS)
2417         {
2418                 // TODO: implement gammaramps
2419         }
2420         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2421 }
2422
2423
2424
2425 void DPSOFTRAST_VertexShader_Depth_Or_Shadow(void)
2426 {
2427         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2428 }
2429
2430 void DPSOFTRAST_PixelShader_Depth_Or_Shadow(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2431 {
2432         // this is never called (because colormask is off when this shader is used)
2433         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2434         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2435         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2436         memset(buffer_FragColorbgra8, 0, span->length*4);
2437         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2438 }
2439
2440
2441
2442 void DPSOFTRAST_VertexShader_FlatColor(void)
2443 {
2444         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2445         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
2446 }
2447
2448 void DPSOFTRAST_PixelShader_FlatColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2449 {
2450         int x, startx = span->startx, endx = span->endx;
2451         int Color_Ambienti[4];
2452         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2453         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2454         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2455         Color_Ambienti[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0]*256.0f);
2456         Color_Ambienti[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1]*256.0f);
2457         Color_Ambienti[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2]*256.0f);
2458         Color_Ambienti[3] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0]        *256.0f);
2459         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2460         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z);
2461         for (x = startx;x < endx;x++)
2462         {
2463                 buffer_FragColorbgra8[x*4+0] = (buffer_texture_colorbgra8[x*4+0] * Color_Ambienti[0])>>8;
2464                 buffer_FragColorbgra8[x*4+1] = (buffer_texture_colorbgra8[x*4+1] * Color_Ambienti[1])>>8;
2465                 buffer_FragColorbgra8[x*4+2] = (buffer_texture_colorbgra8[x*4+2] * Color_Ambienti[2])>>8;
2466                 buffer_FragColorbgra8[x*4+3] = (buffer_texture_colorbgra8[x*4+3] * Color_Ambienti[3])>>8;
2467         }
2468         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2469 }
2470
2471
2472
2473 void DPSOFTRAST_VertexShader_VertexColor(void)
2474 {
2475         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2476         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
2477         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
2478 }
2479
2480 void DPSOFTRAST_PixelShader_VertexColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2481 {
2482         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2483         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2484         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2485         int x, startx = span->startx, endx = span->endx;
2486         float Color_Ambient[4], Color_Diffuse[4];
2487         float data[4];
2488         float slope[4];
2489         float z;
2490         int arrayindex = DPSOFTRAST_ARRAY_COLOR;
2491         data[2] = span->data[0][arrayindex][0];
2492         data[1] = span->data[0][arrayindex][1];
2493         data[0] = span->data[0][arrayindex][2];
2494         data[3] = span->data[0][arrayindex][3];
2495         slope[2] = span->data[1][arrayindex][0];
2496         slope[1] = span->data[1][arrayindex][1];
2497         slope[0] = span->data[1][arrayindex][2];
2498         slope[3] = span->data[1][arrayindex][3];
2499         Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
2500         Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
2501         Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
2502         Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
2503         Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
2504         Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
2505         Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
2506         Color_Diffuse[3] = 0.0f;
2507         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2508         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z);
2509         for (x = startx;x < endx;x++)
2510         {
2511                 z = buffer_z[x];
2512                 buffer_FragColorbgra8[x*4+0] = (int)(buffer_texture_colorbgra8[x*4+0] * (Color_Ambient[0] + ((data[0] + slope[0]*x) * z) * Color_Diffuse[0]));
2513                 buffer_FragColorbgra8[x*4+1] = (int)(buffer_texture_colorbgra8[x*4+1] * (Color_Ambient[1] + ((data[1] + slope[1]*x) * z) * Color_Diffuse[1]));
2514                 buffer_FragColorbgra8[x*4+2] = (int)(buffer_texture_colorbgra8[x*4+2] * (Color_Ambient[2] + ((data[2] + slope[2]*x) * z) * Color_Diffuse[2]));
2515                 buffer_FragColorbgra8[x*4+3] = (int)(buffer_texture_colorbgra8[x*4+3] * (Color_Ambient[3] + ((data[3] + slope[3]*x) * z) * Color_Diffuse[3]));
2516         }
2517         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2518 }
2519
2520
2521
2522 void DPSOFTRAST_VertexShader_Lightmap(void)
2523 {
2524         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2525         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
2526         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.numvertices);
2527 }
2528
2529 void DPSOFTRAST_PixelShader_Lightmap(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2530 {
2531         int x, startx = span->startx, endx = span->endx;
2532         int Color_Ambienti[4], Color_Diffusei[4], Color_Glowi[4];
2533         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2534         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2535         unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2536         unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2537         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2538         unsigned int d[4];
2539         //unsigned char * RESTRICT pixelmask = span->pixelmask;
2540         //unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + span->start * 4;
2541         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2542         Color_Ambienti[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0] * 65536.0f);
2543         Color_Ambienti[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1] * 65536.0f);
2544         Color_Ambienti[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2] * 65536.0f);
2545         Color_Ambienti[3] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0] * 65536.0f);
2546         Color_Diffusei[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0] * 256.0f);
2547         Color_Diffusei[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1] * 256.0f);
2548         Color_Diffusei[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2] * 256.0f);
2549         Color_Diffusei[3] = 0;
2550         Color_Glowi[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0] * 65536.0f);
2551         Color_Glowi[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1] * 65536.0f);
2552         Color_Glowi[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2] * 65536.0f);
2553         Color_Glowi[3] = 0;
2554         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2555         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
2556         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
2557         {
2558                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glowbgra8, GL20TU_GLOW, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2559                 for (x = startx;x < endx;x++)
2560                 {
2561                         d[0] = (buffer_texture_glowbgra8[x*4+0] * Color_Glowi[0] + buffer_texture_colorbgra8[x*4+0] * (Color_Ambienti[0] + buffer_texture_lightmapbgra8[x*4+0] * Color_Diffusei[0])) >> 16;if (d[0] > 255) d[0] = 255;
2562                         d[1] = (buffer_texture_glowbgra8[x*4+1] * Color_Glowi[1] + buffer_texture_colorbgra8[x*4+1] * (Color_Ambienti[1] + buffer_texture_lightmapbgra8[x*4+1] * Color_Diffusei[1])) >> 16;if (d[1] > 255) d[1] = 255;
2563                         d[2] = (buffer_texture_glowbgra8[x*4+2] * Color_Glowi[2] + buffer_texture_colorbgra8[x*4+2] * (Color_Ambienti[2] + buffer_texture_lightmapbgra8[x*4+2] * Color_Diffusei[2])) >> 16;if (d[2] > 255) d[2] = 255;
2564                         d[3] = (buffer_texture_glowbgra8[x*4+3] * Color_Glowi[3] + buffer_texture_colorbgra8[x*4+3] * (Color_Ambienti[3] + buffer_texture_lightmapbgra8[x*4+3] * Color_Diffusei[3])) >> 16;if (d[3] > 255) d[3] = 255;
2565                         buffer_FragColorbgra8[x*4+0] = d[0];
2566                         buffer_FragColorbgra8[x*4+1] = d[1];
2567                         buffer_FragColorbgra8[x*4+2] = d[2];
2568                         buffer_FragColorbgra8[x*4+3] = d[3];
2569                 }
2570         }
2571         else
2572         {
2573                 for (x = startx;x < endx;x++)
2574                 {
2575                         d[0] = (buffer_texture_colorbgra8[x*4+0] * (Color_Ambienti[0] + buffer_texture_lightmapbgra8[x*4+0] * Color_Diffusei[0])) >> 16;if (d[0] > 255) d[0] = 255;
2576                         d[1] = (buffer_texture_colorbgra8[x*4+1] * (Color_Ambienti[1] + buffer_texture_lightmapbgra8[x*4+1] * Color_Diffusei[1])) >> 16;if (d[1] > 255) d[1] = 255;
2577                         d[2] = (buffer_texture_colorbgra8[x*4+2] * (Color_Ambienti[2] + buffer_texture_lightmapbgra8[x*4+2] * Color_Diffusei[2])) >> 16;if (d[2] > 255) d[2] = 255;
2578                         d[3] = (buffer_texture_colorbgra8[x*4+3] * (Color_Ambienti[3] + buffer_texture_lightmapbgra8[x*4+3] * Color_Diffusei[3])) >> 16;if (d[3] > 255) d[3] = 255;
2579                         buffer_FragColorbgra8[x*4+0] = d[0];
2580                         buffer_FragColorbgra8[x*4+1] = d[1];
2581                         buffer_FragColorbgra8[x*4+2] = d[2];
2582                         buffer_FragColorbgra8[x*4+3] = d[3];
2583                 }
2584         }
2585         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2586 }
2587
2588
2589
2590 void DPSOFTRAST_VertexShader_FakeLight(void)
2591 {
2592         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2593 }
2594
2595 void DPSOFTRAST_PixelShader_FakeLight(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2596 {
2597         // TODO: IMPLEMENT
2598         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2599         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2600         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2601         memset(buffer_FragColorbgra8, 0, span->length*4);
2602         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2603 }
2604
2605
2606
2607 void DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace(void)
2608 {
2609         DPSOFTRAST_VertexShader_Lightmap();
2610 }
2611
2612 void DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2613 {
2614         DPSOFTRAST_PixelShader_Lightmap(span);
2615         // TODO: IMPLEMENT
2616 }
2617
2618
2619
2620 void DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace(void)
2621 {
2622         DPSOFTRAST_VertexShader_Lightmap();
2623 }
2624
2625 void DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2626 {
2627         DPSOFTRAST_PixelShader_Lightmap(span);
2628         // TODO: IMPLEMENT
2629 }
2630
2631
2632
2633 void DPSOFTRAST_VertexShader_LightDirection(void)
2634 {
2635         int i;
2636         int numvertices = dpsoftrast.draw.numvertices;
2637         float LightDir[4];
2638         float LightVector[4];
2639         float EyePosition[4];
2640         float EyeVectorModelSpace[4];
2641         float EyeVector[4];
2642         float position[4];
2643         float svector[4];
2644         float tvector[4];
2645         float normal[4];
2646         LightDir[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+0];
2647         LightDir[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+1];
2648         LightDir[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+2];
2649         LightDir[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+3];
2650         EyePosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+0];
2651         EyePosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+1];
2652         EyePosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+2];
2653         EyePosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+3];
2654         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2655         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
2656         for (i = 0;i < numvertices;i++)
2657         {
2658                 position[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0];
2659                 position[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1];
2660                 position[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+2];
2661                 svector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0];
2662                 svector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1];
2663                 svector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2];
2664                 tvector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0];
2665                 tvector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1];
2666                 tvector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2];
2667                 normal[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+0];
2668                 normal[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+1];
2669                 normal[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+2];
2670                 LightVector[0] = svector[0] * LightDir[0] + svector[1] * LightDir[1] + svector[2] * LightDir[2];
2671                 LightVector[1] = tvector[0] * LightDir[0] + tvector[1] * LightDir[1] + tvector[2] * LightDir[2];
2672                 LightVector[2] = normal[0] * LightDir[0] + normal[1] * LightDir[1] + normal[2] * LightDir[2];
2673                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0];
2674                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1];
2675                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2];
2676                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f;
2677                 EyeVectorModelSpace[0] = EyePosition[0] - position[0];
2678                 EyeVectorModelSpace[1] = EyePosition[1] - position[1];
2679                 EyeVectorModelSpace[2] = EyePosition[2] - position[2];
2680                 EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2];
2681                 EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2];
2682                 EyeVector[2] = normal[0]  * EyeVectorModelSpace[0] + normal[1]  * EyeVectorModelSpace[1] + normal[2]  * EyeVectorModelSpace[2];
2683                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0];
2684                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1];
2685                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2];
2686                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f;
2687         }
2688 }
2689
2690 #define DPSOFTRAST_Min(a,b) ((a) < (b) ? (a) : (b))
2691 #define DPSOFTRAST_Max(a,b) ((a) > (b) ? (a) : (b))
2692 #define DPSOFTRAST_Vector3Dot(a,b) ((a)[0]*(b)[0]+(a)[1]*(b)[1]+(a)[2]*(b)[2])
2693 #define DPSOFTRAST_Vector3LengthSquared(v) (DPSOFTRAST_Vector3Dot((v),(v)))
2694 #define DPSOFTRAST_Vector3Length(v) (sqrt(DPSOFTRAST_Vector3LengthSquared(v)))
2695 #define DPSOFTRAST_Vector3Normalize(v)\
2696 do\
2697 {\
2698         float len = sqrt(DPSOFTRAST_Vector3Dot(v,v));\
2699         if (len)\
2700         {\
2701                 len = 1.0f / len;\
2702                 v[0] *= len;\
2703                 v[1] *= len;\
2704                 v[2] *= len;\
2705         }\
2706 }\
2707 while(0)
2708
2709 void DPSOFTRAST_PixelShader_LightDirection(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
2710 {
2711         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
2712         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2713         unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2714         unsigned char buffer_texture_glossbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2715         unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2716         unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2717         unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2718         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
2719         int x, startx = span->startx, endx = span->endx;
2720         float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4];
2721         float LightVectordata[4];
2722         float LightVectorslope[4];
2723         float EyeVectordata[4];
2724         float EyeVectorslope[4];
2725         float z;
2726         float diffusetex[4];
2727         float glosstex[4];
2728         float surfacenormal[4];
2729         float lightnormal[4];
2730         float eyenormal[4];
2731         float specularnormal[4];
2732         float diffuse;
2733         float specular;
2734         float SpecularPower;
2735         int d[4];
2736         Color_Glow[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0];
2737         Color_Glow[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1];
2738         Color_Glow[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2];
2739         Color_Glow[3] = 0.0f;
2740         Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
2741         Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
2742         Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
2743         Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
2744         Color_Pants[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+0];
2745         Color_Pants[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+1];
2746         Color_Pants[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+2];
2747         Color_Pants[3] = 0.0f;
2748         Color_Shirt[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+0];
2749         Color_Shirt[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+1];
2750         Color_Shirt[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+2];
2751         Color_Shirt[3] = 0.0f;
2752         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
2753         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2754         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
2755         {
2756                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_pantsbgra8, GL20TU_PANTS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2757                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_shirtbgra8, GL20TU_SHIRT, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2758         }
2759         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
2760         {
2761                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glowbgra8, GL20TU_GLOW, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2762         }
2763         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
2764         {
2765                 Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
2766                 Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
2767                 Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
2768                 Color_Diffuse[3] = 0.0f;
2769                 LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
2770                 LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
2771                 LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
2772                 LightColor[3] = 0.0f;
2773                 LightVectordata[0]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
2774                 LightVectordata[1]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
2775                 LightVectordata[2]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
2776                 LightVectordata[3]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
2777                 LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
2778                 LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
2779                 LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
2780                 LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
2781                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2782                 Color_Specular[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0];
2783                 Color_Specular[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1];
2784                 Color_Specular[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2];
2785                 Color_Specular[3] = 0.0f;
2786                 SpecularPower = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f);
2787                 EyeVectordata[0]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][0];
2788                 EyeVectordata[1]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][1];
2789                 EyeVectordata[2]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][2];
2790                 EyeVectordata[3]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][3];
2791                 EyeVectorslope[0]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][0];
2792                 EyeVectorslope[1]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][1];
2793                 EyeVectorslope[2]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][2];
2794                 EyeVectorslope[3]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][3];
2795                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2796                 for (x = startx;x < endx;x++)
2797                 {
2798                         z = buffer_z[x];
2799                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
2800                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
2801                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
2802                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
2803                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
2804                         {
2805                                 diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
2806                                 diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
2807                                 diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
2808                                 diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
2809                         }
2810                         glosstex[0] = buffer_texture_glossbgra8[x*4+0];
2811                         glosstex[1] = buffer_texture_glossbgra8[x*4+1];
2812                         glosstex[2] = buffer_texture_glossbgra8[x*4+2];
2813                         glosstex[3] = buffer_texture_glossbgra8[x*4+3];
2814                         surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
2815                         surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
2816                         surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
2817                         DPSOFTRAST_Vector3Normalize(surfacenormal);
2818
2819                         lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
2820                         lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
2821                         lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
2822                         DPSOFTRAST_Vector3Normalize(lightnormal);
2823
2824                         eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
2825                         eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
2826                         eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
2827                         DPSOFTRAST_Vector3Normalize(eyenormal);
2828
2829                         specularnormal[0] = lightnormal[0] + eyenormal[0];
2830                         specularnormal[1] = lightnormal[1] + eyenormal[1];
2831                         specularnormal[2] = lightnormal[2] + eyenormal[2];
2832                         DPSOFTRAST_Vector3Normalize(specularnormal);
2833
2834                         diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
2835                         specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
2836                         specular = pow(specular, SpecularPower * glosstex[3]);
2837                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
2838                         {
2839                                 d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255;
2840                                 d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * Color_Ambient[1] + (diffusetex[1] * Color_Diffuse[1] * diffuse + glosstex[1] * Color_Specular[1] * specular) * LightColor[1]);if (d[1] > 255) d[1] = 255;
2841                                 d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255;
2842                                 d[3] = (int)(                                                  diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
2843                         }
2844                         else
2845                         {
2846                                 d[0] = (int)(                                                  diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255;
2847                                 d[1] = (int)(                                                  diffusetex[1] * Color_Ambient[1] + (diffusetex[1] * Color_Diffuse[1] * diffuse + glosstex[1] * Color_Specular[1] * specular) * LightColor[1]);if (d[1] > 255) d[1] = 255;
2848                                 d[2] = (int)(                                                  diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255;
2849                                 d[3] = (int)(                                                  diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
2850                         }
2851                         buffer_FragColorbgra8[x*4+0] = d[0];
2852                         buffer_FragColorbgra8[x*4+1] = d[1];
2853                         buffer_FragColorbgra8[x*4+2] = d[2];
2854                         buffer_FragColorbgra8[x*4+3] = d[3];
2855                 }
2856         }
2857         else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
2858         {
2859                 Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
2860                 Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
2861                 Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
2862                 Color_Diffuse[3] = 0.0f;
2863                 LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
2864                 LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
2865                 LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
2866                 LightColor[3] = 0.0f;
2867                 LightVectordata[0]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
2868                 LightVectordata[1]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
2869                 LightVectordata[2]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
2870                 LightVectordata[3]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
2871                 LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
2872                 LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
2873                 LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
2874                 LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
2875                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
2876                 for (x = startx;x < endx;x++)
2877                 {
2878                         z = buffer_z[x];
2879                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
2880                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
2881                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
2882                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
2883                         surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
2884                         surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
2885                         surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
2886                         DPSOFTRAST_Vector3Normalize(surfacenormal);
2887
2888                         lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
2889                         lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
2890                         lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
2891                         DPSOFTRAST_Vector3Normalize(lightnormal);
2892
2893                         diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
2894                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
2895                         {
2896                                 d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse * LightColor[0]));if (d[0] > 255) d[0] = 255;
2897                                 d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse * LightColor[1]));if (d[1] > 255) d[1] = 255;
2898                                 d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse * LightColor[2]));if (d[2] > 255) d[2] = 255;
2899                                 d[3] = (int)(                                                  diffusetex[3] * (Color_Ambient[3]                                             ));if (d[3] > 255) d[3] = 255;
2900                         }
2901                         else
2902                         {
2903                                 d[0] = (int)(                                                + diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse * LightColor[0]));if (d[0] > 255) d[0] = 255;
2904                                 d[1] = (int)(                                                + diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse * LightColor[1]));if (d[1] > 255) d[1] = 255;
2905                                 d[2] = (int)(                                                + diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse * LightColor[2]));if (d[2] > 255) d[2] = 255;
2906                                 d[3] = (int)(                                                  diffusetex[3] * (Color_Ambient[3]                                             ));if (d[3] > 255) d[3] = 255;
2907                         }
2908                         buffer_FragColorbgra8[x*4+0] = d[0];
2909                         buffer_FragColorbgra8[x*4+1] = d[1];
2910                         buffer_FragColorbgra8[x*4+2] = d[2];
2911                         buffer_FragColorbgra8[x*4+3] = d[3];
2912                 }
2913         }
2914         else
2915         {
2916                 for (x = startx;x < endx;x++)
2917                 {
2918                         z = buffer_z[x];
2919                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
2920                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
2921                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
2922                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
2923
2924                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
2925                         {
2926                                 d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0]);if (d[0] > 255) d[0] = 255;
2927                                 d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * Color_Ambient[1]);if (d[1] > 255) d[1] = 255;
2928                                 d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * Color_Ambient[2]);if (d[2] > 255) d[2] = 255;
2929                                 d[3] = (int)(                                                  diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
2930                         }
2931                         else
2932                         {
2933                                 d[0] = (int)(                                                  diffusetex[0] * Color_Ambient[0]);if (d[0] > 255) d[0] = 255;
2934                                 d[1] = (int)(                                                  diffusetex[1] * Color_Ambient[1]);if (d[1] > 255) d[1] = 255;
2935                                 d[2] = (int)(                                                  diffusetex[2] * Color_Ambient[2]);if (d[2] > 255) d[2] = 255;
2936                                 d[3] = (int)(                                                  diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
2937                         }
2938                         buffer_FragColorbgra8[x*4+0] = d[0];
2939                         buffer_FragColorbgra8[x*4+1] = d[1];
2940                         buffer_FragColorbgra8[x*4+2] = d[2];
2941                         buffer_FragColorbgra8[x*4+3] = d[3];
2942                 }
2943         }
2944         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
2945 }
2946
2947
2948
2949 void DPSOFTRAST_VertexShader_LightSource(void)
2950 {
2951         int i;
2952         int numvertices = dpsoftrast.draw.numvertices;
2953         float LightPosition[4];
2954         float LightVector[4];
2955         float LightVectorModelSpace[4];
2956         float EyePosition[4];
2957         float EyeVectorModelSpace[4];
2958         float EyeVector[4];
2959         float position[4];
2960         float svector[4];
2961         float tvector[4];
2962         float normal[4];
2963         LightPosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+0];
2964         LightPosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+1];
2965         LightPosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+2];
2966         LightPosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+3];
2967         EyePosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+0];
2968         EyePosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+1];
2969         EyePosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+2];
2970         EyePosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+3];
2971         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
2972         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
2973         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD3], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelToLightM1);
2974         DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.numvertices);
2975         for (i = 0;i < numvertices;i++)
2976         {
2977                 position[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0];
2978                 position[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1];
2979                 position[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+2];
2980                 svector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0];
2981                 svector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1];
2982                 svector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2];
2983                 tvector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0];
2984                 tvector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1];
2985                 tvector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2];
2986                 normal[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+0];
2987                 normal[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+1];
2988                 normal[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+2];
2989                 LightVectorModelSpace[0] = LightPosition[0] - position[0];
2990                 LightVectorModelSpace[1] = LightPosition[1] - position[1];
2991                 LightVectorModelSpace[2] = LightPosition[2] - position[2];
2992                 LightVector[0] = svector[0] * LightVectorModelSpace[0] + svector[1] * LightVectorModelSpace[1] + svector[2] * LightVectorModelSpace[2];
2993                 LightVector[1] = tvector[0] * LightVectorModelSpace[0] + tvector[1] * LightVectorModelSpace[1] + tvector[2] * LightVectorModelSpace[2];
2994                 LightVector[2] = normal[0]  * LightVectorModelSpace[0] + normal[1]  * LightVectorModelSpace[1] + normal[2]  * LightVectorModelSpace[2];
2995                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0];
2996                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1];
2997                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2];
2998                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f;
2999                 EyeVectorModelSpace[0] = EyePosition[0] - position[0];
3000                 EyeVectorModelSpace[1] = EyePosition[1] - position[1];
3001                 EyeVectorModelSpace[2] = EyePosition[2] - position[2];
3002                 EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2];
3003                 EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2];
3004                 EyeVector[2] = normal[0]  * EyeVectorModelSpace[0] + normal[1]  * EyeVectorModelSpace[1] + normal[2]  * EyeVectorModelSpace[2];
3005                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0];
3006                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1];
3007                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2];
3008                 dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f;
3009         }
3010 }
3011
3012 void DPSOFTRAST_PixelShader_LightSource(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3013 {
3014         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3015         unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3016         unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3017         unsigned char buffer_texture_glossbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3018         unsigned char buffer_texture_cubebgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3019         unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3020         unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3021         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3022         int x, startx = span->startx, endx = span->endx;
3023         float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4];
3024         float CubeVectordata[4];
3025         float CubeVectorslope[4];
3026         float LightVectordata[4];
3027         float LightVectorslope[4];
3028         float EyeVectordata[4];
3029         float EyeVectorslope[4];
3030         float z;
3031         float diffusetex[4];
3032         float glosstex[4];
3033         float surfacenormal[4];
3034         float lightnormal[4];
3035         float eyenormal[4];
3036         float specularnormal[4];
3037         float diffuse;
3038         float specular;
3039         float SpecularPower;
3040         float CubeVector[4];
3041         float attenuation;
3042         int d[4];
3043         Color_Glow[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0];
3044         Color_Glow[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1];
3045         Color_Glow[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2];
3046         Color_Glow[3] = 0.0f;
3047         Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
3048         Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
3049         Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
3050         Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
3051         Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
3052         Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
3053         Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
3054         Color_Diffuse[3] = 0.0f;
3055         Color_Specular[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0];
3056         Color_Specular[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1];
3057         Color_Specular[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2];
3058         Color_Specular[3] = 0.0f;
3059         Color_Pants[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+0];
3060         Color_Pants[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+1];
3061         Color_Pants[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+2];
3062         Color_Pants[3] = 0.0f;
3063         Color_Shirt[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+0];
3064         Color_Shirt[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+1];
3065         Color_Shirt[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+2];
3066         Color_Shirt[3] = 0.0f;
3067         LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
3068         LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
3069         LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
3070         LightColor[3] = 0.0f;
3071         SpecularPower = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f);
3072         EyeVectordata[0]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][0];
3073         EyeVectordata[1]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][1];
3074         EyeVectordata[2]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][2];
3075         EyeVectordata[3]    = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][3];
3076         EyeVectorslope[0]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][0];
3077         EyeVectorslope[1]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][1];
3078         EyeVectorslope[2]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][2];
3079         EyeVectorslope[3]   = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][3];
3080         LightVectordata[0]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
3081         LightVectordata[1]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
3082         LightVectordata[2]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
3083         LightVectordata[3]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
3084         LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
3085         LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
3086         LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
3087         LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
3088         CubeVectordata[0]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][0];
3089         CubeVectordata[1]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][1];
3090         CubeVectordata[2]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][2];
3091         CubeVectordata[3]  = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][3];
3092         CubeVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][0];
3093         CubeVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][1];
3094         CubeVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][2];
3095         CubeVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][3];
3096         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3097         memset(buffer_FragColorbgra8 + startx*4, 0, (endx-startx)*4); // clear first, because we skip writing black pixels, and there are a LOT of them...
3098         DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3099         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
3100         {
3101                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_pantsbgra8, GL20TU_PANTS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3102                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_shirtbgra8, GL20TU_SHIRT, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3103         }
3104         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
3105                 DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(span, buffer_texture_cubebgra8, GL20TU_CUBE, DPSOFTRAST_ARRAY_TEXCOORD3, buffer_z);
3106         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
3107         {
3108                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3109                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3110                 for (x = startx;x < endx;x++)
3111                 {
3112                         z = buffer_z[x];
3113                         CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
3114                         CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
3115                         CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
3116                         attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
3117                         if (attenuation < 0.01f)
3118                                 continue;
3119                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
3120                         {
3121                                 attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
3122                                 if (attenuation < 0.01f)
3123                                         continue;
3124                         }
3125
3126                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
3127                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
3128                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
3129                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
3130                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
3131                         {
3132                                 diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
3133                                 diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
3134                                 diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
3135                                 diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
3136                         }
3137                         glosstex[0] = buffer_texture_glossbgra8[x*4+0];
3138                         glosstex[1] = buffer_texture_glossbgra8[x*4+1];
3139                         glosstex[2] = buffer_texture_glossbgra8[x*4+2];
3140                         glosstex[3] = buffer_texture_glossbgra8[x*4+3];
3141                         surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
3142                         surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
3143                         surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
3144                         DPSOFTRAST_Vector3Normalize(surfacenormal);
3145
3146                         lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
3147                         lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
3148                         lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
3149                         DPSOFTRAST_Vector3Normalize(lightnormal);
3150
3151                         eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
3152                         eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
3153                         eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
3154                         DPSOFTRAST_Vector3Normalize(eyenormal);
3155
3156                         specularnormal[0] = lightnormal[0] + eyenormal[0];
3157                         specularnormal[1] = lightnormal[1] + eyenormal[1];
3158                         specularnormal[2] = lightnormal[2] + eyenormal[2];
3159                         DPSOFTRAST_Vector3Normalize(specularnormal);
3160
3161                         diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
3162                         specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
3163                         specular = pow(specular, SpecularPower * glosstex[3]);
3164                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
3165                         {
3166                                 // scale down the attenuation to account for the cubefilter multiplying everything by 255
3167                                 attenuation *= (1.0f / 255.0f);
3168                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse) + glosstex[0] * Color_Specular[0] * specular) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
3169                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse) + glosstex[1] * Color_Specular[1] * specular) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
3170                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse) + glosstex[2] * Color_Specular[2] * specular) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
3171                                 d[3] = (int)( diffusetex[3]                                                                                                                                                                );if (d[3] > 255) d[3] = 255;
3172                         }
3173                         else
3174                         {
3175                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse) + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]                                   * attenuation);if (d[0] > 255) d[0] = 255;
3176                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse) + glosstex[1] * Color_Specular[1] * specular) * LightColor[1]                                   * attenuation);if (d[1] > 255) d[1] = 255;
3177                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse) + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]                                   * attenuation);if (d[2] > 255) d[2] = 255;
3178                                 d[3] = (int)( diffusetex[3]                                                                                                                                                                );if (d[3] > 255) d[3] = 255;
3179                         }
3180                         buffer_FragColorbgra8[x*4+0] = d[0];
3181                         buffer_FragColorbgra8[x*4+1] = d[1];
3182                         buffer_FragColorbgra8[x*4+2] = d[2];
3183                         buffer_FragColorbgra8[x*4+3] = d[3];
3184                 }
3185         }
3186         else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
3187         {
3188                 DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
3189                 for (x = startx;x < endx;x++)
3190                 {
3191                         z = buffer_z[x];
3192                         CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
3193                         CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
3194                         CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
3195                         attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
3196                         if (attenuation < 0.01f)
3197                                 continue;
3198                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
3199                         {
3200                                 attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
3201                                 if (attenuation < 0.01f)
3202                                         continue;
3203                         }
3204
3205                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
3206                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
3207                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
3208                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
3209                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
3210                         {
3211                                 diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
3212                                 diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
3213                                 diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
3214                                 diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
3215                         }
3216                         surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
3217                         surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
3218                         surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
3219                         DPSOFTRAST_Vector3Normalize(surfacenormal);
3220
3221                         lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
3222                         lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
3223                         lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
3224                         DPSOFTRAST_Vector3Normalize(lightnormal);
3225
3226                         diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
3227                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
3228                         {
3229                                 // scale down the attenuation to account for the cubefilter multiplying everything by 255
3230                                 attenuation *= (1.0f / 255.0f);
3231                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse)) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
3232                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse)) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
3233                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse)) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
3234                                 d[3] = (int)( diffusetex[3]                                                                                                                   );if (d[3] > 255) d[3] = 255;
3235                         }
3236                         else
3237                         {
3238                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse)) * LightColor[0]                                   * attenuation);if (d[0] > 255) d[0] = 255;
3239                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse)) * LightColor[1]                                   * attenuation);if (d[1] > 255) d[1] = 255;
3240                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse)) * LightColor[2]                                   * attenuation);if (d[2] > 255) d[2] = 255;
3241                                 d[3] = (int)( diffusetex[3]                                                                                                                                                                );if (d[3] > 255) d[3] = 255;
3242                         }
3243                         buffer_FragColorbgra8[x*4+0] = d[0];
3244                         buffer_FragColorbgra8[x*4+1] = d[1];
3245                         buffer_FragColorbgra8[x*4+2] = d[2];
3246                         buffer_FragColorbgra8[x*4+3] = d[3];
3247                 }
3248         }
3249         else
3250         {
3251                 for (x = startx;x < endx;x++)
3252                 {
3253                         z = buffer_z[x];
3254                         CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
3255                         CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
3256                         CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
3257                         attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
3258                         if (attenuation < 0.01f)
3259                                 continue;
3260                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
3261                         {
3262                                 attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
3263                                 if (attenuation < 0.01f)
3264                                         continue;
3265                         }
3266
3267                         diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
3268                         diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
3269                         diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
3270                         diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
3271                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
3272                         {
3273                                 diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
3274                                 diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
3275                                 diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
3276                                 diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
3277                         }
3278                         if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
3279                         {
3280                                 // scale down the attenuation to account for the cubefilter multiplying everything by 255
3281                                 attenuation *= (1.0f / 255.0f);
3282                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0])) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
3283                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1])) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
3284                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2])) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
3285                                 d[3] = (int)( diffusetex[3]                                                                                      );if (d[3] > 255) d[3] = 255;
3286                         }
3287                         else
3288                         {
3289                                 d[0] = (int)((diffusetex[0] * (Color_Ambient[0])) * LightColor[0]                                   * attenuation);if (d[0] > 255) d[0] = 255;
3290                                 d[1] = (int)((diffusetex[1] * (Color_Ambient[1])) * LightColor[1]                                   * attenuation);if (d[1] > 255) d[1] = 255;
3291                                 d[2] = (int)((diffusetex[2] * (Color_Ambient[2])) * LightColor[2]                                   * attenuation);if (d[2] > 255) d[2] = 255;
3292                                 d[3] = (int)( diffusetex[3]                                                                                                                                                                );if (d[3] > 255) d[3] = 255;
3293                         }
3294                         buffer_FragColorbgra8[x*4+0] = d[0];
3295                         buffer_FragColorbgra8[x*4+1] = d[1];
3296                         buffer_FragColorbgra8[x*4+2] = d[2];
3297                         buffer_FragColorbgra8[x*4+3] = d[3];
3298                 }
3299         }
3300         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3301 }
3302
3303
3304
3305 void DPSOFTRAST_VertexShader_Refraction(void)
3306 {
3307         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
3308 }
3309
3310 void DPSOFTRAST_PixelShader_Refraction(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3311 {
3312         // TODO: IMPLEMENT
3313         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3314         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3315         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3316         memset(buffer_FragColorbgra8, 0, span->length*4);
3317         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3318 }
3319
3320
3321
3322 void DPSOFTRAST_VertexShader_Water(void)
3323 {
3324         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
3325 }
3326
3327
3328 void DPSOFTRAST_PixelShader_Water(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3329 {
3330         // TODO: IMPLEMENT
3331         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3332         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3333         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3334         memset(buffer_FragColorbgra8, 0, span->length*4);
3335         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3336 }
3337
3338
3339
3340 void DPSOFTRAST_VertexShader_ShowDepth(void)
3341 {
3342         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
3343 }
3344
3345 void DPSOFTRAST_PixelShader_ShowDepth(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3346 {
3347         // TODO: IMPLEMENT
3348         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3349         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3350         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3351         memset(buffer_FragColorbgra8, 0, span->length*4);
3352         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3353 }
3354
3355
3356
3357 void DPSOFTRAST_VertexShader_DeferredGeometry(void)
3358 {
3359         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
3360 }
3361
3362 void DPSOFTRAST_PixelShader_DeferredGeometry(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3363 {
3364         // TODO: IMPLEMENT
3365         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3366         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3367         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3368         memset(buffer_FragColorbgra8, 0, span->length*4);
3369         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3370 }
3371
3372
3373
3374 void DPSOFTRAST_VertexShader_DeferredLightSource(void)
3375 {
3376         DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
3377 }
3378
3379 void DPSOFTRAST_PixelShader_DeferredLightSource(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
3380 {
3381         // TODO: IMPLEMENT
3382         float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3383         unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
3384         DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
3385         memset(buffer_FragColorbgra8, 0, span->length*4);
3386         DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
3387 }
3388
3389
3390
3391 typedef struct DPSOFTRAST_ShaderModeInfo_s
3392 {
3393         int lodarrayindex;
3394         void (*Vertex)(void);
3395         void (*Span)(const DPSOFTRAST_State_Draw_Span * RESTRICT span);
3396 }
3397 DPSOFTRAST_ShaderModeInfo;
3398
3399 DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COUNT] =
3400 {
3401         {2, DPSOFTRAST_VertexShader_Generic,                        DPSOFTRAST_PixelShader_Generic,                      },
3402         {2, DPSOFTRAST_VertexShader_PostProcess,                    DPSOFTRAST_PixelShader_PostProcess,                  },
3403         {2, DPSOFTRAST_VertexShader_Depth_Or_Shadow,                DPSOFTRAST_PixelShader_Depth_Or_Shadow,              },
3404         {2, DPSOFTRAST_VertexShader_FlatColor,                      DPSOFTRAST_PixelShader_FlatColor,                    },
3405         {2, DPSOFTRAST_VertexShader_VertexColor,                    DPSOFTRAST_PixelShader_VertexColor,                  },
3406         {2, DPSOFTRAST_VertexShader_Lightmap,                       DPSOFTRAST_PixelShader_Lightmap,                     },
3407         {2, DPSOFTRAST_VertexShader_FakeLight,                      DPSOFTRAST_PixelShader_FakeLight,                    },
3408         {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace,   DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, },
3409         {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace},
3410         {2, DPSOFTRAST_VertexShader_LightDirection,                 DPSOFTRAST_PixelShader_LightDirection,               },
3411         {2, DPSOFTRAST_VertexShader_LightSource,                    DPSOFTRAST_PixelShader_LightSource,                  },
3412         {2, DPSOFTRAST_VertexShader_Refraction,                     DPSOFTRAST_PixelShader_Refraction,                   },
3413         {2, DPSOFTRAST_VertexShader_Water,                          DPSOFTRAST_PixelShader_Water,                        },
3414         {2, DPSOFTRAST_VertexShader_ShowDepth,                      DPSOFTRAST_PixelShader_ShowDepth,                    },
3415         {2, DPSOFTRAST_VertexShader_DeferredGeometry,               DPSOFTRAST_PixelShader_DeferredGeometry,             },
3416         {2, DPSOFTRAST_VertexShader_DeferredLightSource,            DPSOFTRAST_PixelShader_DeferredLightSource,          }
3417 };
3418
3419
3420
3421 void DPSOFTRAST_Draw_ProcessSpans(void)
3422 {
3423         int i;
3424         int x;
3425         int startx;
3426         int endx;
3427         int numspans = dpsoftrast.draw.numspans;
3428 //      unsigned int c;
3429 //      unsigned int *colorpixel;
3430         unsigned int *depthpixel;
3431         float w;
3432         float wslope;
3433         int depth;
3434         int depthslope;
3435         unsigned int d;
3436         DPSOFTRAST_State_Draw_Span *span = dpsoftrast.draw.spanqueue;
3437         unsigned char pixelmask[DPSOFTRAST_DRAW_MAXSPANLENGTH];
3438         for (i = 0;i < numspans;i++, span++)
3439         {
3440                 w = span->data[0][DPSOFTRAST_ARRAY_TOTAL][3];
3441                 wslope = span->data[1][DPSOFTRAST_ARRAY_TOTAL][3];
3442                 if (dpsoftrast.user.depthtest && dpsoftrast.fb_depthpixels)
3443                 {
3444                         depth = (int)(w*DPSOFTRAST_DEPTHSCALE);
3445                         depthslope = (int)(wslope*DPSOFTRAST_DEPTHSCALE);
3446                         depthpixel = dpsoftrast.fb_depthpixels + span->start;
3447                         switch(dpsoftrast.fb_depthfunc)
3448                         {
3449                         default:
3450                         case GL_ALWAYS:  for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = true; break;
3451                         case GL_LESS:    for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break;
3452                         case GL_LEQUAL:  for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break;
3453                         case GL_EQUAL:   for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break;
3454                         case GL_GEQUAL:  for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break;
3455                         case GL_GREATER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break;
3456                         case GL_NEVER:   for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = false; break;
3457                         }
3458                         //colorpixel = dpsoftrast.fb_colorpixels[0] + span->start;
3459                         //for (x = 0;x < span->length;x++)
3460                         //      colorpixel[x] = (depthpixel[x] & 0xFF000000) ? (0x00FF0000) : (depthpixel[x] & 0x00FF0000);
3461                         // if there is no color buffer, skip pixel shader
3462                         startx = 0;
3463                         endx = span->length;
3464                         while (startx < endx && !pixelmask[startx])
3465                                 startx++;
3466                         while (endx > startx && !pixelmask[endx-1])
3467                                 endx--;
3468                         if (startx >= endx)
3469                                 continue; // no pixels to fill
3470                         span->pixelmask = pixelmask;
3471                         span->startx = startx;
3472                         span->endx = endx;
3473                         // run pixel shader if appropriate
3474                         // do this before running depthmask code, to allow the pixelshader
3475                         // to clear pixelmask values for alpha testing
3476                         if (dpsoftrast.fb_colorpixels[0] && dpsoftrast.fb_colormask)
3477                                 DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Span(span);
3478                         if (dpsoftrast.user.depthmask)
3479                                 for (x = 0, d = depth;x < span->length;x++, d += depthslope)
3480                                         if (pixelmask[x])
3481                                                 depthpixel[x] = d;
3482                 }
3483                 else
3484                 {
3485                         // no depth testing means we're just dealing with color...
3486                         // if there is no color buffer, skip pixel shader
3487                         if (dpsoftrast.fb_colorpixels[0] && dpsoftrast.fb_colormask)
3488                         {
3489                                 memset(pixelmask, 1, span->length);
3490                                 span->pixelmask = pixelmask;
3491                                 span->startx = 0;
3492                                 span->endx = span->length;
3493                                 DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Span(span);
3494                         }
3495                 }
3496         }
3497 }
3498
3499 void DPSOFTRAST_Draw_ProcessTriangles(int firstvertex, int numtriangles, const int *element3i, const unsigned short *element3s, unsigned char *arraymask)
3500 {
3501         int cullface = dpsoftrast.user.cullface;
3502         int width = dpsoftrast.fb_width;
3503         int height = dpsoftrast.fb_height;
3504         int i;
3505         int j;
3506         int k;
3507         int y;
3508         int e[3];
3509         int screenx[4];
3510         int screeny[4];
3511         int screenyless[4];
3512         int numpoints;
3513         int clipflags;
3514         int edge0p;
3515         int edge0n;
3516         int edge1p;
3517         int edge1n;
3518         int extent[6];
3519         int startx;
3520         int endx;
3521         float mip_edge0tc[2];
3522         float mip_edge1tc[2];
3523         float mip_edge0xy[2];
3524         float mip_edge1xy[2];
3525         float mip_edge0xymul;
3526         float mip_edge1xymul;
3527         float mip_edge0mip;
3528         float mip_edge1mip;
3529         float mipdensity;
3530         unsigned char mip[DPSOFTRAST_MAXTEXTUREUNITS];
3531         float startxf;
3532         float endxf;
3533         float edge0ylerp;
3534         float edge0yilerp;
3535         float edge1ylerp;
3536         float edge1yilerp;
3537         float edge0xf;
3538         float edge1xf;
3539         float spanilength;
3540         float startxlerp;
3541         float yc;
3542         float w;
3543         float frac;
3544         float ifrac;
3545         float trianglearea2;
3546         float triangleedge[2][4];
3547         float trianglenormal[4];
3548         float clipdist[4];
3549         float clipped[DPSOFTRAST_ARRAY_TOTAL][4][4];
3550         float screen[4][4];
3551         float proj[DPSOFTRAST_ARRAY_TOTAL][4][4];
3552         DPSOFTRAST_Texture *texture;
3553         DPSOFTRAST_State_Draw_Span *span;
3554         DPSOFTRAST_State_Draw_Span *oldspan;
3555         for (i = 0;i < numtriangles;i++)
3556         {
3557                 // generate the 3 edges of this triangle
3558                 // generate spans for the triangle - switch based on left split or right split classification of triangle
3559                 if (element3i)
3560                 {
3561                         e[0] = element3i[i*3+0] - firstvertex;
3562                         e[1] = element3i[i*3+1] - firstvertex;
3563                         e[2] = element3i[i*3+2] - firstvertex;
3564                 }
3565                 else if (element3s)
3566                 {
3567                         e[0] = element3s[i*3+0] - firstvertex;
3568                         e[1] = element3s[i*3+1] - firstvertex;
3569                         e[2] = element3s[i*3+2] - firstvertex;
3570                 }
3571                 else
3572                 {
3573                         e[0] = i*3+0;
3574                         e[1] = i*3+1;
3575                         e[2] = i*3+2;
3576                 }
3577                 triangleedge[0][0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+0] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+0];
3578                 triangleedge[0][1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+1] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+1];
3579                 triangleedge[0][2] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+2] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2];
3580                 triangleedge[1][0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+0] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+0];
3581                 triangleedge[1][1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+1] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+1];
3582                 triangleedge[1][2] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+2] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2];
3583                 trianglenormal[0] = triangleedge[0][1] * triangleedge[1][2] - triangleedge[0][2] * triangleedge[1][1];
3584                 trianglenormal[1] = triangleedge[0][2] * triangleedge[1][0] - triangleedge[0][0] * triangleedge[1][2];
3585                 trianglenormal[2] = triangleedge[0][0] * triangleedge[1][1] - triangleedge[0][1] * triangleedge[1][0];
3586                 trianglearea2 = trianglenormal[0] * trianglenormal[0] + trianglenormal[1] * trianglenormal[1] + trianglenormal[2] * trianglenormal[2];
3587                 // skip degenerate triangles, nothing good can come from them...
3588                 if (trianglearea2 == 0.0f)
3589                         continue;
3590                 // apply current cullface mode (this culls many triangles)
3591                 switch(cullface)
3592                 {
3593                 case GL_BACK:
3594                         if (trianglenormal[2] < 0)
3595                                 continue;
3596                         break;
3597                 case GL_FRONT:
3598                         if (trianglenormal[2] > 0)
3599                                 continue;
3600                         break;
3601                 }
3602                 // calculate distance from nearplane
3603                 clipdist[0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+2] + 1.0f;
3604                 clipdist[1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2] + 1.0f;
3605                 clipdist[2] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+2] + 1.0f;
3606                 clipflags = 0;
3607                 if (clipdist[0] < 0.0f)
3608                         clipflags |= 1;
3609                 if (clipdist[1] < 0.0f)
3610                         clipflags |= 2;
3611                 if (clipdist[2] < 0.0f)
3612                         clipflags |= 4;
3613                 // clip triangle if necessary
3614                 switch(clipflags)
3615                 {
3616                 case 0: /*000*/
3617                         // triangle is entirely in front of nearplane
3618
3619                         // macros for clipping vertices
3620 #define CLIPPEDVERTEXLERP(k,p1,p2) \
3621                         frac = clipdist[p1] / (clipdist[p1] - clipdist[p2]);\
3622                         ifrac = 1.0f - frac;\
3623                         for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)\
3624                         {\
3625                                 /*if (arraymask[j])*/\
3626                                 {\
3627                                         clipped[j][k][0] = dpsoftrast.draw.post_array4f[j][e[p1]*4+0]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+0]*frac;\
3628                                         clipped[j][k][1] = dpsoftrast.draw.post_array4f[j][e[p1]*4+1]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+1]*frac;\
3629                                         clipped[j][k][2] = dpsoftrast.draw.post_array4f[j][e[p1]*4+2]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+2]*frac;\
3630                                         clipped[j][k][3] = dpsoftrast.draw.post_array4f[j][e[p1]*4+3]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+3]*frac;\
3631                                 }\
3632                         }\
3633                         DPSOFTRAST_Draw_ProjectVertices(screen[k], clipped[DPSOFTRAST_ARRAY_POSITION][k], 1)
3634 #define CLIPPEDVERTEXCOPY(k,p1) \
3635                         for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)\
3636                         {\
3637                                 /*if (arraymask[j])*/\
3638                                 {\
3639                                         clipped[j][k][0] = dpsoftrast.draw.post_array4f[j][e[p1]*4+0];\
3640                                         clipped[j][k][1] = dpsoftrast.draw.post_array4f[j][e[p1]*4+1];\
3641                                         clipped[j][k][2] = dpsoftrast.draw.post_array4f[j][e[p1]*4+2];\
3642                                         clipped[j][k][3] = dpsoftrast.draw.post_array4f[j][e[p1]*4+3];\
3643                                 }\
3644                         }\
3645                         screen[k][0] = dpsoftrast.draw.screencoord4f[e[p1]*4+0];\
3646                         screen[k][1] = dpsoftrast.draw.screencoord4f[e[p1]*4+1];\
3647                         screen[k][2] = dpsoftrast.draw.screencoord4f[e[p1]*4+2];\
3648                         screen[k][3] = dpsoftrast.draw.screencoord4f[e[p1]*4+3];
3649
3650                         CLIPPEDVERTEXCOPY(0,0);
3651                         CLIPPEDVERTEXCOPY(1,1);
3652                         CLIPPEDVERTEXCOPY(2,2);
3653                         numpoints = 3;
3654                         break;
3655                 case 1: /*100*/
3656                         CLIPPEDVERTEXLERP(0,0,1);
3657                         CLIPPEDVERTEXCOPY(1,1);
3658                         CLIPPEDVERTEXCOPY(2,2);
3659                         CLIPPEDVERTEXLERP(3,2,0);
3660                         numpoints = 4;
3661                         break;
3662                 case 2: /*010*/
3663                         CLIPPEDVERTEXCOPY(0,0);
3664                         CLIPPEDVERTEXLERP(1,0,1);
3665                         CLIPPEDVERTEXLERP(2,1,2);
3666                         CLIPPEDVERTEXCOPY(3,2);
3667                         numpoints = 4;
3668                         break;
3669                 case 3: /*110*/
3670                         CLIPPEDVERTEXLERP(0,1,2);
3671                         CLIPPEDVERTEXCOPY(1,2);
3672                         CLIPPEDVERTEXLERP(2,2,0);
3673                         numpoints = 3;
3674                         break;
3675                 case 4: /*001*/
3676                         CLIPPEDVERTEXCOPY(0,0);
3677                         CLIPPEDVERTEXCOPY(1,1);
3678                         CLIPPEDVERTEXLERP(2,1,2);
3679                         CLIPPEDVERTEXLERP(3,2,0);
3680                         numpoints = 4;
3681                         break;
3682                 case 5: /*101*/
3683                         CLIPPEDVERTEXLERP(0,0,1);
3684                         CLIPPEDVERTEXCOPY(1,1);
3685                         CLIPPEDVERTEXLERP(2,1,2);
3686                         numpoints = 3;
3687                         break;
3688                 case 6: /*011*/
3689                         CLIPPEDVERTEXCOPY(0,0);
3690                         CLIPPEDVERTEXLERP(1,0,1);
3691                         CLIPPEDVERTEXLERP(2,2,0);
3692                         numpoints = 3;
3693                         break;
3694                 case 7: /*111*/
3695                         // triangle is entirely behind nearplane
3696                         continue;
3697                 }
3698                 // calculate integer y coords for triangle points
3699                 screenx[0] = (int)(screen[0][0]);
3700                 screeny[0] = (int)(screen[0][1]);
3701                 screenx[1] = (int)(screen[1][0]);
3702                 screeny[1] = (int)(screen[1][1]);
3703                 screenx[2] = (int)(screen[2][0]);
3704                 screeny[2] = (int)(screen[2][1]);
3705                 screenx[3] = (int)(screen[3][0]);
3706                 screeny[3] = (int)(screen[3][1]);
3707                 // figure out the extents (bounding box) of the triangle
3708                 extent[0] = screenx[0];
3709                 extent[1] = screeny[0];
3710                 extent[2] = screenx[0];
3711                 extent[3] = screeny[0];
3712                 for (j = 1;j < numpoints;j++)
3713                 {
3714                         if (extent[0] > screenx[j]) extent[0] = screenx[j];
3715                         if (extent[1] > screeny[j]) extent[1] = screeny[j];
3716                         if (extent[2] < screenx[j]) extent[2] = screenx[j];
3717                         if (extent[3] < screeny[j]) extent[3] = screeny[j];
3718                 }
3719                 //extent[0]--;
3720                 //extent[1]--;
3721                 extent[2]++;
3722                 extent[3]++;
3723                 if (extent[0] < 0)
3724                         extent[0] = 0;
3725                 if (extent[1] < 0)
3726                         extent[1] = 0;
3727                 if (extent[2] > width)
3728                         extent[2] = width;
3729                 if (extent[3] > height)
3730                         extent[3] = height;
3731                 // skip offscreen triangles
3732                 if (extent[2] <= extent[0] || extent[3] <= extent[1])
3733                         continue;
3734                 // okay, this triangle is going to produce spans, we'd better project
3735                 // the interpolants now (this is what gives perspective texturing),
3736                 // this consists of simply multiplying all arrays by the W coord
3737                 // (which is basically 1/Z), which will be undone per-pixel
3738                 // (multiplying by Z again) to get the perspective-correct array
3739                 // values
3740                 for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
3741                 {
3742                         //if (arraymask[j])
3743                         {
3744                                 for (k = 0;k < numpoints;k++)
3745                                 {
3746                                         w = screen[k][3];
3747                                         proj[j][k][0] = clipped[j][k][0] * w;
3748                                         proj[j][k][1] = clipped[j][k][1] * w;
3749                                         proj[j][k][2] = clipped[j][k][2] * w;
3750                                         proj[j][k][3] = clipped[j][k][3] * w;
3751                                 }
3752                         }
3753                 }
3754                 // adjust texture LOD by texture density, in the simplest way possible...
3755                 mip_edge0xy[0] = screen[0][0] - screen[1][0];
3756                 mip_edge0xy[1] = screen[0][1] - screen[1][1];
3757                 mip_edge1xy[0] = screen[2][0] - screen[1][0];
3758                 mip_edge1xy[1] = screen[2][1] - screen[1][1];
3759                 mip_edge0xymul = 1.0f / (mip_edge0xy[0]*mip_edge0xy[0]+mip_edge0xy[1]*mip_edge0xy[1]);
3760                 mip_edge1xymul = 1.0f / (mip_edge1xy[0]*mip_edge1xy[0]+mip_edge1xy[1]*mip_edge1xy[1]);
3761                 for (j = 0;j < DPSOFTRAST_MAXTEXTUREUNITS;j++)
3762                 {
3763                         texture = dpsoftrast.texbound[j];
3764                         if (texture)
3765                         {
3766                                 if (texture->filter <= DPSOFTRAST_TEXTURE_FILTER_LINEAR)
3767                                 {
3768                                         mip[j] = 0;
3769                                         continue;
3770                                 }
3771                                 k = DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].lodarrayindex;
3772                                 mip_edge0tc[0] = (clipped[k][0][0] - clipped[k][1][0]) * texture->mipmap[0][2];
3773                                 mip_edge0tc[1] = (clipped[k][0][1] - clipped[k][1][1]) * texture->mipmap[0][3];
3774                                 mip_edge1tc[0] = (clipped[k][2][0] - clipped[k][1][0]) * texture->mipmap[0][2];
3775                                 mip_edge1tc[1] = (clipped[k][2][1] - clipped[k][1][1]) * texture->mipmap[0][3];
3776                                 mip_edge0mip = (mip_edge0tc[0]*mip_edge0tc[0]+mip_edge0tc[1]*mip_edge0tc[1]) * mip_edge0xymul;
3777                                 mip_edge1mip = (mip_edge1tc[0]*mip_edge1tc[0]+mip_edge1tc[1]*mip_edge1tc[1]) * mip_edge1xymul;
3778                                 // this will be multiplied in the texturing routine by the texture resolution
3779                                 mipdensity = mip_edge0mip < mip_edge1mip ? mip_edge0mip : mip_edge1mip;
3780                                 y = (int)(log(mipdensity)/log(2.0f));
3781                                 if (y < 0)
3782                                         y = 0;
3783                                 if (y > texture->mipmaps - 1)
3784                                         y = texture->mipmaps - 1;
3785                                 mip[j] = y;
3786                         }
3787                 }
3788                 // iterate potential spans
3789                 // TODO: optimize?  if we figured out the edge order beforehand, this
3790                 //       could do loops over the edges in the proper order rather than
3791                 //       selecting them for each span
3792                 // TODO: optimize?  the edges could have data slopes calculated
3793                 // TODO: optimize?  the data slopes could be calculated as a plane
3794                 //       (2D slopes) to avoid any interpolation along edges at all
3795                 for (y = extent[1];y < extent[3];y++)
3796                 {
3797                         // get center of pixel y
3798                         yc = y;
3799                         // do the compares all at once
3800                         screenyless[0] = y <= screeny[0];
3801                         screenyless[1] = y <= screeny[1];
3802                         screenyless[2] = y <= screeny[2];
3803                         screenyless[3] = y <= screeny[3];
3804                         if (numpoints == 4)
3805                         {
3806                                 switch(screenyless[0] + screenyless[1] * 2 + screenyless[2] * 4 + screenyless[3] * 8)
3807                                 {
3808                                 case  0: /*0000*/ continue;
3809                                 case  1: /*1000*/ edge0p = 3;edge0n = 0;edge1p = 0;edge1n = 1;break;
3810                                 case  2: /*0100*/ edge0p = 0;edge0n = 1;edge1p = 1;edge1n = 2;break;
3811                                 case  3: /*1100*/ edge0p = 3;edge0n = 0;edge1p = 1;edge1n = 2;break;
3812                                 case  4: /*0010*/ edge0p = 1;edge0n = 2;edge1p = 2;edge1n = 3;break;
3813                                 case  5: /*1010*/ edge0p = 1;edge0n = 2;edge1p = 2;edge1n = 3;break; // concave - nonsense
3814                                 case  6: /*0110*/ edge0p = 0;edge0n = 1;edge1p = 2;edge1n = 3;break;
3815                                 case  7: /*1110*/ edge0p = 3;edge0n = 0;edge1p = 2;edge1n = 3;break;
3816                                 case  8: /*0001*/ edge0p = 2;edge0n = 3;edge1p = 3;edge1n = 0;break;
3817                                 case  9: /*1001*/ edge0p = 2;edge0n = 3;edge1p = 0;edge1n = 1;break;
3818                                 case 10: /*0101*/ edge0p = 2;edge0n = 3;edge1p = 1;edge1n = 2;break; // concave - nonsense
3819                                 case 11: /*1101*/ edge0p = 2;edge0n = 3;edge1p = 1;edge1n = 2;break;
3820                                 case 12: /*0011*/ edge0p = 1;edge0n = 2;edge1p = 3;edge1n = 0;break;
3821                                 case 13: /*1011*/ edge0p = 1;edge0n = 2;edge1p = 0;edge1n = 1;break;
3822                                 case 14: /*0111*/ edge0p = 0;edge0n = 1;edge1p = 3;edge1n = 0;break;
3823                                 case 15: /*1111*/ continue;
3824                                 }
3825                         }
3826                         else
3827                         {
3828                                 switch(screenyless[0] + screenyless[1] * 2 + screenyless[2] * 4)
3829                                 {
3830                                 case 0: /*000*/ continue;
3831                                 case 1: /*100*/ edge0p = 2;edge0n = 0;edge1p = 0;edge1n = 1;break;
3832                                 case 2: /*010*/ edge0p = 0;edge0n = 1;edge1p = 1;edge1n = 2;break;
3833                                 case 3: /*110*/ edge0p = 2;edge0n = 0;edge1p = 1;edge1n = 2;break;
3834                                 case 4: /*001*/ edge0p = 1;edge0n = 2;edge1p = 2;edge1n = 0;break;
3835                                 case 5: /*101*/ edge0p = 1;edge0n = 2;edge1p = 0;edge1n = 1;break;
3836                                 case 6: /*011*/ edge0p = 0;edge0n = 1;edge1p = 2;edge1n = 0;break;
3837                                 case 7: /*111*/ continue;
3838                                 }
3839                         }
3840 #if 0
3841                 {
3842                         int foundedges = 0;
3843                         int cedge0p = 0;
3844                         int cedge0n = 0;
3845                         int cedge1p = 0;
3846                         int cedge1n = 0;
3847                         for (j = 0, k = numpoints-1;j < numpoints;k = j, j++)
3848                         {
3849                                 if (screenyless[k] && !screenyless[j])
3850                                 {
3851                                         cedge1p = k;
3852                                         cedge1n = j;
3853                                         foundedges |= 1;
3854                                 }
3855                                 else if (screenyless[j] && !screenyless[k])
3856                                 {
3857                                         cedge0p = k;
3858                                         cedge0n = j;
3859                                         foundedges |= 2;
3860                                 }
3861                         }
3862                         if (foundedges != 3)
3863                                 continue;
3864                         if (cedge0p != edge0p || cedge0n != edge0n || cedge1p != edge1p || cedge1n != edge1n)
3865                         {
3866                                 if (numpoints == 4)
3867                                         printf("case %i%i%i%i is broken %i %i %i %i != %i %i %i %i\n", screenyless[0], screenyless[1], screenyless[2], screenyless[3], cedge0p, cedge0n, cedge1p, cedge1n, edge0p, edge0n, edge1p, edge1n);
3868                                 else
3869                                         printf("case %i%i%i is broken %i %i %i %i != %i %i %i %i\n", screenyless[0], screenyless[1], screenyless[2], cedge0p, cedge0n, cedge1p, cedge1n, edge0p, edge0n, edge1p, edge1n);
3870                         }
3871                 }
3872 #endif
3873                         edge0ylerp = (yc - screen[edge0p][1]) / (screen[edge0n][1] - screen[edge0p][1]);
3874                         edge1ylerp = (yc - screen[edge1p][1]) / (screen[edge1n][1] - screen[edge1p][1]);
3875                         if (edge0ylerp < 0 || edge0ylerp > 1 || edge1ylerp < 0 || edge1ylerp > 1)
3876                                 continue;
3877                         edge0yilerp = 1.0f - edge0ylerp;
3878                         edge1yilerp = 1.0f - edge1ylerp;
3879                         edge0xf = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
3880                         edge1xf = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
3881                         if (edge0xf < edge1xf)
3882                         {
3883                                 startxf = edge0xf;
3884                                 endxf = edge1xf;
3885                         }
3886                         else
3887                         {
3888                                 startxf = edge1xf;
3889                                 endxf = edge0xf;
3890                         }
3891                         startx = (int)ceil(startxf);
3892                         endx = (int)ceil(endxf);
3893                         if (startx < 0)
3894                                 startx = 0;
3895                         if (endx > width)
3896                                 endx = width;
3897                         if (startx >= endx)
3898                                 continue;
3899                         if (startxf > startx || endxf < endx-1) { printf("%s:%i X wrong (%i to %i is outside %f to %f)\n", __FILE__, __LINE__, startx, endx, startxf, endxf); }
3900                         spanilength = 1.0f / (endxf - startxf);
3901                         startxlerp = startx - startxf;
3902                         span = &dpsoftrast.draw.spanqueue[dpsoftrast.draw.numspans++];
3903                         memcpy(span->mip, mip, sizeof(span->mip));
3904                         span->start = y * width + startx;
3905                         span->length = endx - startx;
3906                         j = DPSOFTRAST_ARRAY_TOTAL;
3907                         if (edge0xf < edge1xf)
3908                         {
3909                                 span->data[0][j][0] = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
3910                                 span->data[0][j][1] = screen[edge0p][1] * edge0yilerp + screen[edge0n][1] * edge0ylerp;
3911                                 span->data[0][j][2] = screen[edge0p][2] * edge0yilerp + screen[edge0n][2] * edge0ylerp;
3912                                 span->data[0][j][3] = screen[edge0p][3] * edge0yilerp + screen[edge0n][3] * edge0ylerp;
3913                                 span->data[1][j][0] = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
3914                                 span->data[1][j][1] = screen[edge1p][1] * edge1yilerp + screen[edge1n][1] * edge1ylerp;
3915                                 span->data[1][j][2] = screen[edge1p][2] * edge1yilerp + screen[edge1n][2] * edge1ylerp;
3916                                 span->data[1][j][3] = screen[edge1p][3] * edge1yilerp + screen[edge1n][3] * edge1ylerp;
3917                                 for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
3918                                 {
3919                                         //if (arraymask[j])
3920                                         {
3921                                                 span->data[0][j][0] = proj[j][edge0p][0] * edge0yilerp + proj[j][edge0n][0] * edge0ylerp;
3922                                                 span->data[0][j][1] = proj[j][edge0p][1] * edge0yilerp + proj[j][edge0n][1] * edge0ylerp;
3923                                                 span->data[0][j][2] = proj[j][edge0p][2] * edge0yilerp + proj[j][edge0n][2] * edge0ylerp;
3924                                                 span->data[0][j][3] = proj[j][edge0p][3] * edge0yilerp + proj[j][edge0n][3] * edge0ylerp;
3925                                                 span->data[1][j][0] = proj[j][edge1p][0] * edge1yilerp + proj[j][edge1n][0] * edge1ylerp;
3926                                                 span->data[1][j][1] = proj[j][edge1p][1] * edge1yilerp + proj[j][edge1n][1] * edge1ylerp;
3927                                                 span->data[1][j][2] = proj[j][edge1p][2] * edge1yilerp + proj[j][edge1n][2] * edge1ylerp;
3928                                                 span->data[1][j][3] = proj[j][edge1p][3] * edge1yilerp + proj[j][edge1n][3] * edge1ylerp;
3929                                         }
3930                                 }
3931                         }
3932                         else
3933                         {
3934                                 span->data[0][j][0] = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
3935                                 span->data[0][j][1] = screen[edge1p][1] * edge1yilerp + screen[edge1n][1] * edge1ylerp;
3936                                 span->data[0][j][2] = screen[edge1p][2] * edge1yilerp + screen[edge1n][2] * edge1ylerp;
3937                                 span->data[0][j][3] = screen[edge1p][3] * edge1yilerp + screen[edge1n][3] * edge1ylerp;
3938                                 span->data[1][j][0] = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
3939                                 span->data[1][j][1] = screen[edge0p][1] * edge0yilerp + screen[edge0n][1] * edge0ylerp;
3940                                 span->data[1][j][2] = screen[edge0p][2] * edge0yilerp + screen[edge0n][2] * edge0ylerp;
3941                                 span->data[1][j][3] = screen[edge0p][3] * edge0yilerp + screen[edge0n][3] * edge0ylerp;
3942                                 for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
3943                                 {
3944                                         //if (arraymask[j])
3945                                         {
3946                                                 span->data[0][j][0] = proj[j][edge1p][0] * edge1yilerp + proj[j][edge1n][0] * edge1ylerp;
3947                                                 span->data[0][j][1] = proj[j][edge1p][1] * edge1yilerp + proj[j][edge1n][1] * edge1ylerp;
3948                                                 span->data[0][j][2] = proj[j][edge1p][2] * edge1yilerp + proj[j][edge1n][2] * edge1ylerp;
3949                                                 span->data[0][j][3] = proj[j][edge1p][3] * edge1yilerp + proj[j][edge1n][3] * edge1ylerp;
3950                                                 span->data[1][j][0] = proj[j][edge0p][0] * edge0yilerp + proj[j][edge0n][0] * edge0ylerp;
3951                                                 span->data[1][j][1] = proj[j][edge0p][1] * edge0yilerp + proj[j][edge0n][1] * edge0ylerp;
3952                                                 span->data[1][j][2] = proj[j][edge0p][2] * edge0yilerp + proj[j][edge0n][2] * edge0ylerp;
3953                                                 span->data[1][j][3] = proj[j][edge0p][3] * edge0yilerp + proj[j][edge0n][3] * edge0ylerp;
3954                                         }
3955                                 }
3956                         }
3957                         // change data[1][n][] to be a data slope
3958                         j = DPSOFTRAST_ARRAY_TOTAL;
3959                         span->data[1][j][0] = (span->data[1][j][0] - span->data[0][j][0]) * spanilength;
3960                         span->data[1][j][1] = (span->data[1][j][1] - span->data[0][j][1]) * spanilength;
3961                         span->data[1][j][2] = (span->data[1][j][2] - span->data[0][j][2]) * spanilength;
3962                         span->data[1][j][3] = (span->data[1][j][3] - span->data[0][j][3]) * spanilength;
3963                         for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
3964                         {
3965                                 //if (arraymask[j])
3966                                 {
3967                                         span->data[1][j][0] = (span->data[1][j][0] - span->data[0][j][0]) * spanilength;
3968                                         span->data[1][j][1] = (span->data[1][j][1] - span->data[0][j][1]) * spanilength;
3969                                         span->data[1][j][2] = (span->data[1][j][2] - span->data[0][j][2]) * spanilength;
3970                                         span->data[1][j][3] = (span->data[1][j][3] - span->data[0][j][3]) * spanilength;
3971                                 }
3972                         }
3973                         // adjust the data[0][n][] to be correct for the pixel centers
3974                         // this also handles horizontal clipping where a major part of the
3975                         // span may be off the left side of the screen
3976                         j = DPSOFTRAST_ARRAY_TOTAL;
3977                         span->data[0][j][0] += span->data[1][j][0] * startxlerp;
3978                         span->data[0][j][1] += span->data[1][j][1] * startxlerp;
3979                         span->data[0][j][2] += span->data[1][j][2] * startxlerp;
3980                         span->data[0][j][3] += span->data[1][j][3] * startxlerp;
3981                         for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
3982                         {
3983                                 //if (arraymask[j])
3984                                 {
3985                                         span->data[0][j][0] += span->data[1][j][0] * startxlerp;
3986                                         span->data[0][j][1] += span->data[1][j][1] * startxlerp;
3987                                         span->data[0][j][2] += span->data[1][j][2] * startxlerp;
3988                                         span->data[0][j][3] += span->data[1][j][3] * startxlerp;
3989                                 }
3990                         }
3991                         // to keep the shader routines from needing more than a small
3992                         // buffer for pixel intermediate data, we split long spans...
3993                         while (span->length > DPSOFTRAST_DRAW_MAXSPANLENGTH)
3994                         {
3995                                 span->length = DPSOFTRAST_DRAW_MAXSPANLENGTH;
3996                                 if (dpsoftrast.draw.numspans >= DPSOFTRAST_DRAW_MAXSPANQUEUE)
3997                                 {
3998                                         DPSOFTRAST_Draw_ProcessSpans();
3999                                         dpsoftrast.draw.numspans = 0;
4000                                 }
4001                                 oldspan = span;
4002                                 span = &dpsoftrast.draw.spanqueue[dpsoftrast.draw.numspans++];
4003                                 *span = *oldspan;
4004                                 startx += DPSOFTRAST_DRAW_MAXSPANLENGTH;
4005                                 span->start = y * width + startx;
4006                                 span->length = endx - startx;
4007                                 j = DPSOFTRAST_ARRAY_TOTAL;
4008                                 span->data[0][j][0] += span->data[1][j][0] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4009                                 span->data[0][j][1] += span->data[1][j][1] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4010                                 span->data[0][j][2] += span->data[1][j][2] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4011                                 span->data[0][j][3] += span->data[1][j][3] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4012                                 for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
4013                                 {
4014                                         //if (arraymask[j])
4015                                         {
4016                                                 span->data[0][j][0] += span->data[1][j][0] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4017                                                 span->data[0][j][1] += span->data[1][j][1] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4018                                                 span->data[0][j][2] += span->data[1][j][2] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4019                                                 span->data[0][j][3] += span->data[1][j][3] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
4020                                         }
4021                                 }
4022                         }
4023                         // after all that, we have a span suitable for the pixel shader...
4024                         if (dpsoftrast.draw.numspans >= DPSOFTRAST_DRAW_MAXSPANQUEUE)
4025                         {
4026                                 DPSOFTRAST_Draw_ProcessSpans();
4027                                 dpsoftrast.draw.numspans = 0;
4028                         }
4029                 }
4030                 // draw outlines over triangle for debugging
4031         //      for (j = 0, k = numpoints-1;j < numpoints;k = j, j++)
4032         //              DPSOFTRAST_Draw_DebugEdgePoints(screen[k], screen[j]);
4033         }
4034         if (dpsoftrast.draw.numspans)
4035         {
4036                 DPSOFTRAST_Draw_ProcessSpans();
4037                 dpsoftrast.draw.numspans = 0;
4038         }
4039 }
4040
4041 void DPSOFTRAST_Draw_DebugPoints(void)
4042 {
4043         int i;
4044         int x;
4045         int y;
4046         int numvertices = dpsoftrast.draw.numvertices;
4047         int w = dpsoftrast.fb_width;
4048         int bounds[4];
4049         unsigned int *pixels = dpsoftrast.fb_colorpixels[0];
4050         const float *c4f;
4051         bounds[0] = dpsoftrast.fb_viewportscissor[0];
4052         bounds[1] = dpsoftrast.fb_viewportscissor[1];
4053         bounds[2] = dpsoftrast.fb_viewportscissor[0] + dpsoftrast.fb_viewportscissor[2];
4054         bounds[3] = dpsoftrast.fb_viewportscissor[1] + dpsoftrast.fb_viewportscissor[3];
4055         for (i = 0;i < numvertices;i++)
4056         {
4057                 // check nearclip
4058                 //if (dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+3] != 1.0f)
4059                 //      continue;
4060                 x = (int)(dpsoftrast.draw.screencoord4f[i*4+0]);
4061                 y = (int)(dpsoftrast.draw.screencoord4f[i*4+1]);
4062                 //x = (int)(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0] + 0.5f);
4063                 //y = (int)(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1] + 0.5f);
4064                 //x = (int)((dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0] + 1.0f) * dpsoftrast.fb_width * 0.5f + 0.5f);
4065                 //y = (int)((dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1] + 1.0f) * dpsoftrast.fb_height * 0.5f + 0.5f);
4066                 if (x < bounds[0] || y < bounds[1] || x >= bounds[2] || y >= bounds[3])
4067                         continue;
4068                 c4f = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR] + i*4;
4069                 pixels[y*w+x] = DPSOFTRAST_BGRA8_FROM_RGBA32F(c4f[0], c4f[1], c4f[2], c4f[3]);
4070         }
4071 }
4072
4073 void DPSOFTRAST_DrawTriangles(int firstvertex, int numvertices, int numtriangles, const int *element3i, const unsigned short *element3s)
4074 {
4075         unsigned char arraymask[DPSOFTRAST_ARRAY_TOTAL];
4076         arraymask[0] = true;
4077         arraymask[1] = dpsoftrast.fb_colorpixels[0] != NULL; // TODO: optimize (decide based on shadermode)
4078         arraymask[2] = dpsoftrast.pointer_texcoordf[0] != NULL;
4079         arraymask[3] = dpsoftrast.pointer_texcoordf[1] != NULL;
4080         arraymask[4] = dpsoftrast.pointer_texcoordf[2] != NULL;
4081         arraymask[5] = dpsoftrast.pointer_texcoordf[3] != NULL;
4082         arraymask[6] = dpsoftrast.pointer_texcoordf[4] != NULL;
4083         arraymask[7] = dpsoftrast.pointer_texcoordf[5] != NULL;
4084         arraymask[8] = dpsoftrast.pointer_texcoordf[6] != NULL;
4085         arraymask[9] = dpsoftrast.pointer_texcoordf[7] != NULL;
4086         DPSOFTRAST_Validate(DPSOFTRAST_VALIDATE_DRAW);
4087         DPSOFTRAST_Draw_LoadVertices(firstvertex, numvertices, true);
4088         DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Vertex();
4089         DPSOFTRAST_Draw_ProjectVertices(dpsoftrast.draw.screencoord4f, dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], numvertices);
4090         DPSOFTRAST_Draw_ProcessTriangles(firstvertex, numtriangles, element3i, element3s, arraymask);
4091 }
4092
4093 void DPSOFTRAST_Init(int width, int height, unsigned int *colorpixels, unsigned int *depthpixels)
4094 {
4095         union
4096         {
4097                 int i;
4098                 unsigned char b[4];
4099         }
4100         u;
4101         u.i = 1;
4102         memset(&dpsoftrast, 0, sizeof(dpsoftrast));
4103         dpsoftrast.bigendian = u.b[3];
4104         dpsoftrast.fb_width = width;
4105         dpsoftrast.fb_height = height;
4106         dpsoftrast.fb_depthpixels = depthpixels;
4107         dpsoftrast.fb_colorpixels[0] = colorpixels;
4108         dpsoftrast.fb_colorpixels[1] = NULL;
4109         dpsoftrast.fb_colorpixels[1] = NULL;
4110         dpsoftrast.fb_colorpixels[1] = NULL;
4111         dpsoftrast.texture_firstfree = 1;
4112         dpsoftrast.texture_end = 1;
4113         dpsoftrast.texture_max = 0;
4114         dpsoftrast.user.colormask[0] = 1;
4115         dpsoftrast.user.colormask[1] = 1;
4116         dpsoftrast.user.colormask[2] = 1;
4117         dpsoftrast.user.colormask[3] = 1;
4118         dpsoftrast.user.blendfunc[0] = GL_ONE;
4119         dpsoftrast.user.blendfunc[1] = GL_ZERO;
4120         dpsoftrast.user.depthmask = true;
4121         dpsoftrast.user.depthtest = true;
4122         dpsoftrast.user.depthfunc = GL_LEQUAL;
4123         dpsoftrast.user.scissortest = false;
4124         dpsoftrast.user.cullface = GL_BACK;
4125         dpsoftrast.user.alphatest = false;
4126         dpsoftrast.user.alphafunc = GL_GREATER;
4127         dpsoftrast.user.alphavalue = 0.5f;
4128         dpsoftrast.user.scissor[0] = 0;
4129         dpsoftrast.user.scissor[1] = 0;
4130         dpsoftrast.user.scissor[2] = dpsoftrast.fb_width;
4131         dpsoftrast.user.scissor[3] = dpsoftrast.fb_height;
4132         dpsoftrast.user.viewport[0] = 0;
4133         dpsoftrast.user.viewport[1] = 0;
4134         dpsoftrast.user.viewport[2] = dpsoftrast.fb_width;
4135         dpsoftrast.user.viewport[3] = dpsoftrast.fb_height;
4136         dpsoftrast.user.depthrange[0] = 0;
4137         dpsoftrast.user.depthrange[1] = 1;
4138         dpsoftrast.user.polygonoffset[0] = 0;
4139         dpsoftrast.user.polygonoffset[1] = 0;
4140         dpsoftrast.user.color[0] = 1;
4141         dpsoftrast.user.color[1] = 1;
4142         dpsoftrast.user.color[2] = 1;
4143         dpsoftrast.user.color[3] = 1;
4144         dpsoftrast.validate = -1;
4145         DPSOFTRAST_Validate(-1);
4146         dpsoftrast.validate = 0;
4147 }
4148
4149 void DPSOFTRAST_Shutdown(void)
4150 {
4151         int i;
4152         for (i = 0;i < dpsoftrast.texture_end;i++)
4153                 if (dpsoftrast.texture[i].bytes)
4154                         free(dpsoftrast.texture[i].bytes);
4155         if (dpsoftrast.texture)
4156                 free(dpsoftrast.texture);
4157         memset(&dpsoftrast, 0, sizeof(dpsoftrast));
4158 }
4159