-#include <memory.h>
-#include "dpsoftrast.h"
#include <stdio.h>
+#include <string.h>
#include <math.h>
+#include "quakedef.h"
+#include "dpsoftrast.h"
-#undef true
-#undef false
#ifndef __cplusplus
-typedef enum bool {false, true} bool;
+typedef qboolean bool;
#endif
-#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1400)
-#define RESTRICT __restrict
+#if defined(__GNUC__)
+#define ALIGN(var) var __attribute__((__aligned__(16)))
+#elif defined(_MSC_VER)
+#define ALIGN(var) __declspec(align(16)) var
#else
-#define RESTRICT
+#define ALIGN(var) var
#endif
-#define GL_NONE 0
-#define GL_FRONT_LEFT 0x0400
-#define GL_FRONT_RIGHT 0x0401
-#define GL_BACK_LEFT 0x0402
-#define GL_BACK_RIGHT 0x0403
-#define GL_FRONT 0x0404
-#define GL_BACK 0x0405
-#define GL_LEFT 0x0406
-#define GL_RIGHT 0x0407
-#define GL_FRONT_AND_BACK 0x0408
-#define GL_AUX0 0x0409
-#define GL_AUX1 0x040A
-#define GL_AUX2 0x040B
-#define GL_AUX3 0x040C
-
-#define GL_NEVER 0x0200
-#define GL_LESS 0x0201
-#define GL_EQUAL 0x0202
-#define GL_LEQUAL 0x0203
-#define GL_GREATER 0x0204
-#define GL_NOTEQUAL 0x0205
-#define GL_GEQUAL 0x0206
-#define GL_ALWAYS 0x0207
-
-#define GL_ZERO 0x0
-#define GL_ONE 0x1
-#define GL_SRC_COLOR 0x0300
-#define GL_ONE_MINUS_SRC_COLOR 0x0301
-#define GL_DST_COLOR 0x0306
-#define GL_ONE_MINUS_DST_COLOR 0x0307
-#define GL_SRC_ALPHA 0x0302
-#define GL_ONE_MINUS_SRC_ALPHA 0x0303
-#define GL_DST_ALPHA 0x0304
-#define GL_ONE_MINUS_DST_ALPHA 0x0305
-#define GL_SRC_ALPHA_SATURATE 0x0308
-#define GL_CONSTANT_COLOR 0x8001
-#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
-#define GL_CONSTANT_ALPHA 0x8003
-#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#ifdef SSE2_PRESENT
+#include <emmintrin.h>
+
+#define MM_MALLOC(size) _mm_malloc(size, 16)
+
+static void *MM_CALLOC(size_t nmemb, size_t size)
+{
+ void *ptr = _mm_malloc(nmemb*size, 16);
+ if(ptr != NULL) memset(ptr, 0, nmemb*size);
+ return ptr;
+}
+
+#define MM_FREE _mm_free
+#else
+#define MM_MALLOC(size) malloc(size)
+#define MM_CALLOC(nmemb, size) calloc(nmemb, size)
+#define MM_FREE free
+#endif
typedef enum DPSOFTRAST_ARRAY_e
{
#define DPSOFTRAST_MAXSUBSPAN 16
-typedef struct DPSOFTRAST_State_Draw_Span_s
+typedef ALIGN(struct DPSOFTRAST_State_Draw_Span_s
{
int start; // pixel index
int length; // pixel count
// [0][DPSOFTRAST_ARRAY_TOTAL][] is start screencoord4f
// [1][DPSOFTRAST_ARRAY_TOTAL][] is end screencoord4f
// NOTE: screencoord4f[3] is W (basically 1/Z), useful for depthbuffer
- float data[2][DPSOFTRAST_ARRAY_TOTAL+1][4];
+ ALIGN(float data[2][DPSOFTRAST_ARRAY_TOTAL+1][4]);
}
-DPSOFTRAST_State_Draw_Span;
+DPSOFTRAST_State_Draw_Span);
#define DPSOFTRAST_DRAW_MAXSPANQUEUE 1024
DPSOFTRAST_State dpsoftrast;
+extern int dpsoftrast_test;
+
#define DPSOFTRAST_DEPTHSCALE (1024.0f*1048576.0f)
#define DPSOFTRAST_BGRA8_FROM_RGBA32F(r,g,b,a) (((int)(r * 255.0f + 0.5f) << 16) | ((int)(g * 255.0f + 0.5f) << 8) | (int)(b * 255.0f + 0.5f) | ((int)(a * 255.0f + 0.5f) << 24))
#define DPSOFTRAST_DEPTH32_FROM_DEPTH32F(d) ((int)(DPSOFTRAST_DEPTHSCALE * (1-d)))
texture->size = size;
// allocate the pixels now
- texture->bytes = (unsigned char *)calloc(1, size);
+ texture->bytes = (unsigned char *)MM_CALLOC(1, size);
return texnum;
}
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
if (texture->bytes)
- free(texture->bytes);
+ MM_FREE(texture->bytes);
texture->bytes = NULL;
memset(texture, 0, sizeof(*texture));
// adjust the free range and used range
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
- return texture->width;
+ return texture->mipmap[mip][2];
}
int DPSOFTRAST_Texture_GetHeight(int index, int mip)
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
- return texture->height;
+ return texture->mipmap[mip][3];
}
int DPSOFTRAST_Texture_GetDepth(int index, int mip)
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
- return texture->depth;
+ return texture->mipmap[mip][4];
}
unsigned char *DPSOFTRAST_Texture_GetPixelPointer(int index, int mip)
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return 0;
- return texture->bytes;
+ return texture->bytes + texture->mipmap[mip][0];
}
void DPSOFTRAST_Texture_Filter(int index, DPSOFTRAST_TEXTURE_FILTER filter)
{
}
void DPSOFTRAST_UniformMatrix4fvARB(DPSOFTRAST_UNIFORM index, int arraysize, int transpose, const float *v)
{
- dpsoftrast.uniform4f[index*4+0] = v[0];
- dpsoftrast.uniform4f[index*4+1] = v[1];
- dpsoftrast.uniform4f[index*4+2] = v[2];
- dpsoftrast.uniform4f[index*4+3] = v[3];
- dpsoftrast.uniform4f[index*4+4] = v[4];
- dpsoftrast.uniform4f[index*4+5] = v[5];
- dpsoftrast.uniform4f[index*4+6] = v[6];
- dpsoftrast.uniform4f[index*4+7] = v[7];
- dpsoftrast.uniform4f[index*4+8] = v[8];
- dpsoftrast.uniform4f[index*4+9] = v[9];
- dpsoftrast.uniform4f[index*4+10] = v[10];
- dpsoftrast.uniform4f[index*4+11] = v[11];
- dpsoftrast.uniform4f[index*4+12] = v[12];
- dpsoftrast.uniform4f[index*4+13] = v[13];
- dpsoftrast.uniform4f[index*4+14] = v[14];
- dpsoftrast.uniform4f[index*4+15] = v[15];
+ int i;
+ for (i = 0;i < arraysize;i++, index += 4, v += 16)
+ {
+ if (transpose)
+ {
+ dpsoftrast.uniform4f[index*4+0] = v[0];
+ dpsoftrast.uniform4f[index*4+1] = v[4];
+ dpsoftrast.uniform4f[index*4+2] = v[8];
+ dpsoftrast.uniform4f[index*4+3] = v[12];
+ dpsoftrast.uniform4f[index*4+4] = v[1];
+ dpsoftrast.uniform4f[index*4+5] = v[5];
+ dpsoftrast.uniform4f[index*4+6] = v[9];
+ dpsoftrast.uniform4f[index*4+7] = v[13];
+ dpsoftrast.uniform4f[index*4+8] = v[2];
+ dpsoftrast.uniform4f[index*4+9] = v[6];
+ dpsoftrast.uniform4f[index*4+10] = v[10];
+ dpsoftrast.uniform4f[index*4+11] = v[14];
+ dpsoftrast.uniform4f[index*4+12] = v[3];
+ dpsoftrast.uniform4f[index*4+13] = v[7];
+ dpsoftrast.uniform4f[index*4+14] = v[11];
+ dpsoftrast.uniform4f[index*4+15] = v[15];
+ }
+ else
+ {
+ dpsoftrast.uniform4f[index*4+0] = v[0];
+ dpsoftrast.uniform4f[index*4+1] = v[1];
+ dpsoftrast.uniform4f[index*4+2] = v[2];
+ dpsoftrast.uniform4f[index*4+3] = v[3];
+ dpsoftrast.uniform4f[index*4+4] = v[4];
+ dpsoftrast.uniform4f[index*4+5] = v[5];
+ dpsoftrast.uniform4f[index*4+6] = v[6];
+ dpsoftrast.uniform4f[index*4+7] = v[7];
+ dpsoftrast.uniform4f[index*4+8] = v[8];
+ dpsoftrast.uniform4f[index*4+9] = v[9];
+ dpsoftrast.uniform4f[index*4+10] = v[10];
+ dpsoftrast.uniform4f[index*4+11] = v[11];
+ dpsoftrast.uniform4f[index*4+12] = v[12];
+ dpsoftrast.uniform4f[index*4+13] = v[13];
+ dpsoftrast.uniform4f[index*4+14] = v[14];
+ dpsoftrast.uniform4f[index*4+15] = v[15];
+ }
+ }
}
void DPSOFTRAST_Uniform1iARB(DPSOFTRAST_UNIFORM index, int i0)
{
while (dpsoftrast.draw.maxvertices < dpsoftrast.draw.numvertices)
dpsoftrast.draw.maxvertices *= 2;
if (dpsoftrast.draw.in_array4f[0])
- free(dpsoftrast.draw.in_array4f[0]);
- data = (float *)calloc(1, dpsoftrast.draw.maxvertices * sizeof(float[4])*(DPSOFTRAST_ARRAY_TOTAL*2 + 1));
+ MM_FREE(dpsoftrast.draw.in_array4f[0]);
+ data = (float *)MM_CALLOC(1, dpsoftrast.draw.maxvertices * sizeof(float[4])*(DPSOFTRAST_ARRAY_TOTAL*2 + 1));
for (i = 0;i < DPSOFTRAST_ARRAY_TOTAL;i++, data += dpsoftrast.draw.maxvertices * 4)
dpsoftrast.draw.in_array4f[i] = data;
for (i = 0;i < DPSOFTRAST_ARRAY_TOTAL;i++, data += dpsoftrast.draw.maxvertices * 4)
}
}
-void DPSOFTRAST_Draw_VertexShaderLightDirection(void)
-{
-}
-
-void DPSOFTRAST_Draw_Span_Begin(const DPSOFTRAST_State_Draw_Span *span, float *zf)
+void DPSOFTRAST_Draw_Span_Begin(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *zf)
{
int x;
int startx = span->startx;
}
}
-void DPSOFTRAST_Draw_Span_Finish(const DPSOFTRAST_State_Draw_Span *span, const float * RESTRICT in4f)
+void DPSOFTRAST_Draw_Span_Finish(const DPSOFTRAST_State_Draw_Span * RESTRICT span, const float * RESTRICT in4f)
{
int x;
int startx = span->startx;
}
}
-void DPSOFTRAST_Draw_Span_Texture2DVarying(const DPSOFTRAST_State_Draw_Span *span, float * RESTRICT out4f, int texunitindex, int arrayindex, const float * RESTRICT zf)
+void DPSOFTRAST_Draw_Span_FinishBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, const unsigned char* RESTRICT in4ub)
+{
+ int x;
+ int startx = span->startx;
+ int endx = span->endx;
+ int d[4];
+ const unsigned int * RESTRICT ini = (const unsigned int *)in4ub;
+ int a, b;
+ unsigned char * RESTRICT pixelmask = span->pixelmask;
+ unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0];
+ unsigned int * RESTRICT pixeli = (unsigned int *)dpsoftrast.fb_colorpixels[0];
+ if (!pixel)
+ return;
+ pixel += span->start * 4;
+ pixeli += span->start;
+ // handle alphatest now (this affects depth writes too)
+ if (dpsoftrast.user.alphatest)
+ for (x = startx;x < endx;x++)
+ if (in4ub[x*4+3] < 0.5f)
+ pixelmask[x] = false;
+ // FIXME: this does not handle bigendian
+ switch(dpsoftrast.fb_blendmode)
+ {
+ case DPSOFTRAST_BLENDMODE_OPAQUE:
+ for (x = startx;x < endx;x++)
+ if (pixelmask[x])
+ pixeli[x] = ini[x];
+ break;
+ case DPSOFTRAST_BLENDMODE_ALPHA:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ a = in4ub[x*4+3];
+ b = 256 - in4ub[x*4+3];
+ pixel[x*4+0] = (in4ub[x*4+0]*a+pixel[x*4+0]*b) >> 8;
+ pixel[x*4+1] = (in4ub[x*4+1]*a+pixel[x*4+1]*b) >> 8;
+ pixel[x*4+2] = (in4ub[x*4+2]*a+pixel[x*4+2]*b) >> 8;
+ pixel[x*4+3] = (in4ub[x*4+3]*a+pixel[x*4+3]*b) >> 8;
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_ADDALPHA:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ a = in4ub[x*4+3];
+ d[0] = (((in4ub[x*4+0]*a)>>8)+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (((in4ub[x*4+1]*a)>>8)+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (((in4ub[x*4+2]*a)>>8)+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (((in4ub[x*4+3]*a)>>8)+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_ADD:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ d[0] = (in4ub[x*4+0]+pixel[x*4+0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (in4ub[x*4+1]+pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (in4ub[x*4+2]+pixel[x*4+2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (in4ub[x*4+3]+pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_INVMOD:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ pixel[x*4+0] = ((255-in4ub[x*4+0])*pixel[x*4+0])>>8;
+ pixel[x*4+1] = ((255-in4ub[x*4+1])*pixel[x*4+1])>>8;
+ pixel[x*4+2] = ((255-in4ub[x*4+2])*pixel[x*4+2])>>8;
+ pixel[x*4+3] = ((255-in4ub[x*4+3])*pixel[x*4+3])>>8;
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_MUL:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ pixel[x*4+0] = (in4ub[x*4+0]*pixel[x*4+0])>>8;
+ pixel[x*4+1] = (in4ub[x*4+1]*pixel[x*4+1])>>8;
+ pixel[x*4+2] = (in4ub[x*4+2]*pixel[x*4+2])>>8;
+ pixel[x*4+3] = (in4ub[x*4+3]*pixel[x*4+3])>>8;
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_MUL2:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ d[0] = (in4ub[x*4+0]*pixel[x*4+0])>>7;if (d[0] > 255) d[0] = 255;
+ d[1] = (in4ub[x*4+1]*pixel[x*4+1])>>7;if (d[1] > 255) d[1] = 255;
+ d[2] = (in4ub[x*4+2]*pixel[x*4+2])>>7;if (d[2] > 255) d[2] = 255;
+ d[3] = (in4ub[x*4+3]*pixel[x*4+3])>>7;if (d[3] > 255) d[3] = 255;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_SUBALPHA:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ a = in4ub[x*4+3];
+ d[0] = pixel[x*4+0]-((in4ub[x*4+0]*a)>>8);if (d[0] < 0) d[0] = 0;
+ d[1] = pixel[x*4+1]-((in4ub[x*4+1]*a)>>8);if (d[1] < 0) d[1] = 0;
+ d[2] = pixel[x*4+2]-((in4ub[x*4+2]*a)>>8);if (d[2] < 0) d[2] = 0;
+ d[3] = pixel[x*4+3]-((in4ub[x*4+3]*a)>>8);if (d[3] < 0) d[3] = 0;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
+ case DPSOFTRAST_BLENDMODE_PSEUDOALPHA:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ b = 255 - in4ub[x*4+3];
+ d[0] = in4ub[x*4+0]+((pixel[x*4+0]*b)>>8);if (d[0] > 255) d[0] = 255;
+ d[1] = in4ub[x*4+1]+((pixel[x*4+1]*b)>>8);if (d[1] > 255) d[1] = 255;
+ d[2] = in4ub[x*4+2]+((pixel[x*4+2]*b)>>8);if (d[2] > 255) d[2] = 255;
+ d[3] = in4ub[x*4+3]+((pixel[x*4+3]*b)>>8);if (d[3] > 255) d[3] = 255;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
+ }
+}
+
+void DPSOFTRAST_Draw_Span_Texture2DVarying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float * RESTRICT out4f, int texunitindex, int arrayindex, const float * RESTRICT zf)
{
int x;
int startx = span->startx;
endtc[1] = (data[1] + slope[1]*endsub) * zf[endsub] * tcscale[1] - 0.5f;
substep[0] = (endtc[0] - tc[0]) * subscale;
substep[1] = (endtc[1] - tc[1]) * subscale;
- subtc[0] = tc[0] * (1<<12);
- subtc[1] = tc[1] * (1<<12);
+ subtc[0] = tc[0] * (1<<16);
+ subtc[1] = tc[1] * (1<<16);
if (!(flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE))
{
- subtc[0] &= (tciwrapmask[0]<<12)|0xFFF;
- subtc[1] &= (tciwrapmask[1]<<12)|0xFFF;
+ subtc[0] &= (tciwrapmask[0]<<16)|0xFFFF;
+ subtc[1] &= (tciwrapmask[1]<<16)|0xFFFF;
}
if(filter)
{
- tci[0] = (subtc[0]>>12) - tcimin[0];
- tci[1] = (subtc[1]>>12) - tcimin[0];
- tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>12) + 1;
- tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>12) + 1;
- if (tci[0] <= tcimax[0] && tci[1] <= tcimax[1] && tci1[0] <= tcimax[0] && tci1[1] <= tcimax[1])
+ tci[0] = (subtc[0]>>16) - tcimin[0];
+ tci[1] = (subtc[1]>>16) - tcimin[1];
+ tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>16);
+ tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>16);
+ if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
{
for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
{
unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
- tci[0] = subtc[0]>>12;
- tci[1] = subtc[1]>>12;
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
pixel[1] = pixel[0] + 4 * tciwidth;
c[0] = (pixel[0][2]*lerp[0]+pixel[0][4+2]*lerp[1]+pixel[1][2]*lerp[2]+pixel[1][4+2]*lerp[3]) * (1.0f / 0xFF000000);
unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
- tci[0] = subtc[0]>>12;
- tci[1] = subtc[1]>>12;
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
tci1[0] = tci[0] + 1;
tci1[1] = tci[1] + 1;
tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
- tci[0] = subtc[0]>>12;
- tci[1] = subtc[1]>>12;
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
tci1[0] = tci[0] + 1;
tci1[1] = tci[1] + 1;
tci[0] &= tciwrapmask[0];
{
for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
{
- tci[0] = subtc[0]>>12;
- tci[1] = subtc[1]>>12;
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
{
for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
{
- tci[0] = subtc[0]>>12;
- tci[1] = subtc[1]>>12;
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
tci[0] &= tciwrapmask[0];
tci[1] &= tciwrapmask[1];
pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
}
}
-void DPSOFTRAST_Draw_Span_MultiplyVarying(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *in4f, int arrayindex, const float *zf)
+void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
+{
+#ifdef SSE2_PRESENT
+ int x;
+ int startx = span->startx;
+ int endx = span->endx;
+ int flags;
+ float data[4];
+ float slope[4];
+ float tc[2];
+ float tcscale[2];
+ unsigned int tci[2];
+ unsigned int tci1[2];
+ unsigned int tcimin[2];
+ unsigned int tcimax[2];
+ int tciwrapmask[2];
+ int tciwidth;
+ int filter;
+ int mip;
+ unsigned int k;
+ unsigned int *outi = (unsigned int *)out4ub;
+ const unsigned char * RESTRICT pixelbase;
+ const unsigned int * RESTRICT pixelbasei;
+ DPSOFTRAST_Texture *texture = dpsoftrast.texbound[texunitindex];
+ // if no texture is bound, just fill it with white
+ if (!texture)
+ {
+ memset(out4ub + startx*4, 255, span->length*4);
+ return;
+ }
+ mip = span->mip[texunitindex];
+ // if this mipmap of the texture is 1 pixel, just fill it with that color
+ if (texture->mipmap[mip][1] == 4)
+ {
+ k = *((const unsigned int *)texture->bytes);
+ for (x = startx;x < endx;x++)
+ outi[x] = k;
+ return;
+ }
+ filter = texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR;
+ data[0] = span->data[0][arrayindex][0];
+ data[1] = span->data[0][arrayindex][1];
+ data[2] = span->data[0][arrayindex][2];
+ data[3] = span->data[0][arrayindex][3];
+ slope[0] = span->data[1][arrayindex][0];
+ slope[1] = span->data[1][arrayindex][1];
+ slope[2] = span->data[1][arrayindex][2];
+ slope[3] = span->data[1][arrayindex][3];
+ flags = texture->flags;
+ pixelbase = (const unsigned char *)texture->bytes + texture->mipmap[mip][0];
+ pixelbasei = (const unsigned int *)pixelbase;
+ tcscale[0] = texture->mipmap[mip][2];
+ tcscale[1] = texture->mipmap[mip][3];
+ tciwidth = texture->mipmap[mip][2];
+ tcimin[0] = 0;
+ tcimin[1] = 0;
+ tcimax[0] = texture->mipmap[mip][2]-1;
+ tcimax[1] = texture->mipmap[mip][3]-1;
+ tciwrapmask[0] = texture->mipmap[mip][2]-1;
+ tciwrapmask[1] = texture->mipmap[mip][3]-1;
+ for (x = startx;x < endx;)
+ {
+ float endtc[2];
+ unsigned int subtc[2];
+ unsigned int substep[2];
+ int endsub = x + DPSOFTRAST_MAXSUBSPAN-1;
+ float subscale = 65536.0f/(DPSOFTRAST_MAXSUBSPAN-1);
+ if (endsub >= endx)
+ {
+ endsub = endx-1;
+ subscale = endsub > x ? 65536.0f / (endsub - x) : 1.0f;
+ }
+ tc[0] = (data[0] + slope[0]*x) * zf[x] * tcscale[0] - 0.5f;
+ tc[1] = (data[1] + slope[1]*x) * zf[x] * tcscale[1] - 0.5f;
+ endtc[0] = (data[0] + slope[0]*endsub) * zf[endsub] * tcscale[0] - 0.5f;
+ endtc[1] = (data[1] + slope[1]*endsub) * zf[endsub] * tcscale[1] - 0.5f;
+ substep[0] = (endtc[0] - tc[0]) * subscale;
+ substep[1] = (endtc[1] - tc[1]) * subscale;
+ subtc[0] = tc[0] * (1<<16);
+ subtc[1] = tc[1] * (1<<16);
+ if (!(flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE))
+ {
+ subtc[0] &= (tciwrapmask[0]<<16)|0xFFFF;
+ subtc[1] &= (tciwrapmask[1]<<16)|0xFFFF;
+ }
+ if (filter)
+ {
+ tci[0] = (subtc[0]>>16) - tcimin[0];
+ tci[1] = (subtc[1]>>16) - tcimin[1];
+ tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>16);
+ tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>16);
+ if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
+ {
+ __m128i subtcm = _mm_setr_epi32(subtc[0], subtc[1], subtc[0] + substep[0], subtc[1] + substep[1]);
+ __m128i substepm = _mm_slli_epi32(_mm_setr_epi32(substep[0], substep[1], substep[0], substep[1]), 1);
+ __m128i scalem = _mm_set1_epi32((tciwidth<<18)+4);
+ for (; x + 1 <= endsub; x += 2, subtcm = _mm_add_epi32(subtcm, substepm))
+ {
+ __m128i tcim = _mm_shufflehi_epi16(_mm_shufflelo_epi16(subtcm, _MM_SHUFFLE(3, 1, 3, 1)), _MM_SHUFFLE(3, 1, 3, 1)), pix1, pix2, pix3, pix4, fracm;
+ ALIGN(int pixeloffset[4]);
+ tcim = _mm_madd_epi16(_mm_add_epi16(tcim, _mm_setr_epi32(0, 0x10000, 0, 0x10000)), scalem);
+ _mm_store_si128((__m128i * RESTRICT)pixeloffset, tcim);
+ pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[0]]), _mm_setzero_si128());
+ pix2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[1]]), _mm_setzero_si128());
+ pix3 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[2]]), _mm_setzero_si128());
+ pix4 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[3]]), _mm_setzero_si128());
+ fracm = _mm_srli_epi16(subtcm, 1);
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shuffle_epi32(_mm_shufflelo_epi16(fracm, _MM_SHUFFLE(2, 2, 2, 2)), _MM_SHUFFLE(1, 0, 1, 0))));
+ pix3 = _mm_add_epi16(pix3,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix4, pix3), 1),
+ _mm_shuffle_epi32(_mm_shufflehi_epi16(fracm, _MM_SHUFFLE(2, 2, 2, 2)), _MM_SHUFFLE(3, 2, 3, 2))));
+ pix2 = _mm_unpacklo_epi64(pix1, pix3);
+ pix4 = _mm_unpackhi_epi64(pix1, pix3);
+ pix2 = _mm_add_epi16(pix2,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix4, pix2), 1),
+ _mm_shufflehi_epi16(_mm_shufflelo_epi16(fracm, _MM_SHUFFLE(0, 0, 0, 0)), _MM_SHUFFLE(0, 0, 0, 0))));
+ _mm_storel_epi64((__m128i * RESTRICT)&outi[x], _mm_packus_epi16(pix2, _mm_shufflelo_epi16(pix2, _MM_SHUFFLE(3, 2, 3, 2))));
+ }
+ if (x <= endsub)
+ {
+ __m128i tcim = _mm_shufflelo_epi16(subtcm, _MM_SHUFFLE(3, 1, 3, 1)), pix1, pix2, fracm;
+ ALIGN(int pixeloffset[4]);
+ tcim = _mm_madd_epi16(_mm_add_epi16(tcim, _mm_setr_epi32(0, 0x10000, 0, 0)), scalem);
+ _mm_store_si128((__m128i * RESTRICT)pixeloffset, tcim);
+ pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[0]]), _mm_setzero_si128());
+ pix2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i * RESTRICT)&pixelbase[pixeloffset[1]]), _mm_setzero_si128());
+ fracm = _mm_srli_epi16(subtcm, 1);
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shuffle_epi32(_mm_shufflelo_epi16(fracm, _MM_SHUFFLE(2, 2, 2, 2)), _MM_SHUFFLE(1, 0, 1, 0))));
+ pix2 = _mm_shuffle_epi32(pix1, _MM_SHUFFLE(3, 2, 3, 2));
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shufflelo_epi16(fracm, _MM_SHUFFLE(0, 0, 0, 0))));
+ outi[x] = _mm_cvtsi128_si32(_mm_packus_epi16(pix1, pix1));
+ x++;
+ }
+ }
+ else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
+ {
+ __m128i subtcm = _mm_setr_epi32(subtc[0], subtc[1], subtc[0], subtc[1]), substepm = _mm_setr_epi32(substep[0], substep[1], substep[0], substep[1]);
+ __m128i minm = _mm_set1_epi32((tcimin[1]<<16)|tcimin[0]), maxm = _mm_set1_epi32((tcimax[1]<<16)|tcimax[0]), scalem = _mm_set1_epi32((tciwidth<<18)+4);
+ for (; x <= endsub; x++, subtcm = _mm_add_epi32(subtcm, substepm))
+ {
+ __m128i tcim = _mm_shuffle_epi32(_mm_shufflelo_epi16(subtcm, _MM_SHUFFLE(3, 1, 3, 1)), _MM_SHUFFLE(1, 0, 1, 0)), pix1, pix2, fracm;
+ ALIGN(int pixeloffset[4]);
+ tcim = _mm_min_epi16(_mm_max_epi16(_mm_add_epi16(tcim, _mm_setr_epi32(0, 1, 0x10000, 0x10001)), minm), maxm);
+ tcim = _mm_madd_epi16(tcim, scalem);
+ _mm_store_si128((__m128i * RESTRICT)pixeloffset, tcim);
+ pix1 = _mm_unpacklo_epi8(_mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[0]]), _mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[1]])), _mm_setzero_si128());
+ pix2 = _mm_unpacklo_epi8(_mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[2]]), _mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[3]])), _mm_setzero_si128());
+ fracm = _mm_srli_epi16(subtcm, 1);
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shuffle_epi32(_mm_shufflelo_epi16(fracm, _MM_SHUFFLE(2, 2, 2, 2)), _MM_SHUFFLE(1, 0, 1, 0))));
+ pix2 = _mm_shuffle_epi32(pix1, _MM_SHUFFLE(3, 2, 3, 2));
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shufflelo_epi16(fracm, _MM_SHUFFLE(0, 0, 0, 0))));
+ outi[x] = _mm_cvtsi128_si32(_mm_packus_epi16(pix1, pix1));
+ }
+ }
+ else
+ {
+ __m128i subtcm = _mm_setr_epi32(subtc[0], subtc[1], 0, 0), substepm = _mm_setr_epi32(substep[0], substep[1], 0, 0);
+ __m128i wrapm = _mm_set1_epi32((tciwrapmask[1]<<16)|tciwrapmask[0]), scalem = _mm_set1_epi32((tciwidth<<18)+4);
+ for (; x <= endsub; x++, subtcm = _mm_add_epi32(subtcm, substepm))
+ {
+ __m128i tcim = _mm_shuffle_epi32(_mm_shufflelo_epi16(subtcm, _MM_SHUFFLE(3, 1, 3, 1)), _MM_SHUFFLE(1, 0, 1, 0)),
+ pix1, pix2, fracm;
+ ALIGN(int pixeloffset[4]);
+ tcim = _mm_and_si128(_mm_add_epi16(tcim, _mm_setr_epi32(0, 1, 0x10000, 0x10001)), wrapm);
+ tcim = _mm_madd_epi16(tcim, scalem);
+ _mm_store_si128((__m128i * RESTRICT)pixeloffset, tcim);
+ pix1 = _mm_unpacklo_epi8(_mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[0]]), _mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[1]])), _mm_setzero_si128());
+ pix2 = _mm_unpacklo_epi8(_mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[2]]), _mm_cvtsi32_si128(*(const int * RESTRICT)&pixelbase[pixeloffset[3]])), _mm_setzero_si128());
+ fracm = _mm_srli_epi16(subtcm, 1);
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shuffle_epi32(_mm_shufflelo_epi16(fracm, _MM_SHUFFLE(2, 2, 2, 2)), _MM_SHUFFLE(1, 0, 1, 0))));
+ pix2 = _mm_shuffle_epi32(pix1, _MM_SHUFFLE(3, 2, 3, 2));
+ pix1 = _mm_add_epi16(pix1,
+ _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(pix2, pix1), 1),
+ _mm_shufflelo_epi16(fracm, _MM_SHUFFLE(0, 0, 0, 0))));
+ outi[x] = _mm_cvtsi128_si32(_mm_packus_epi16(pix1, pix1));
+ }
+ }
+ }
+ else
+ {
+ tci[0] = (subtc[0]>>16) - tcimin[0];
+ tci[1] = (subtc[1]>>16) - tcimin[1];
+ tci1[0] = ((subtc[0] + (endsub - x)*substep[0])>>16);
+ tci1[1] = ((subtc[1] + (endsub - x)*substep[1])>>16);
+ if (tci[0] <= tcimax[0]-1 && tci[1] <= tcimax[1]-1 && tci1[0] <= tcimax[0]-1 && tci1[1] <= tcimax[1]-1)
+ {
+ for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
+ {
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
+ outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
+ }
+ }
+ else if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
+ {
+ for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
+ {
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
+ tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
+ tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
+ outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
+ }
+ }
+ else
+ {
+ for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
+ {
+ tci[0] = subtc[0]>>16;
+ tci[1] = subtc[1]>>16;
+ tci[0] &= tciwrapmask[0];
+ tci[1] &= tciwrapmask[1];
+ outi[x] = pixelbasei[(tci[1]*tciwidth+tci[0])];
+ }
+ }
+ }
+ }
+#endif
+}
+
+void DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
+{
+ // TODO: IMPLEMENT
+ memset(out4ub, 255, span->length*4);
+}
+
+float DPSOFTRAST_SampleShadowmap(const float *vector)
+{
+ // TODO: IMPLEMENT
+ return 1.0f;
+}
+
+void DPSOFTRAST_Draw_Span_MultiplyVarying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *in4f, int arrayindex, const float *zf)
{
int x;
int startx = span->startx;
}
}
-void DPSOFTRAST_Draw_Span_Varying(const DPSOFTRAST_State_Draw_Span *span, float *out4f, int arrayindex, const float *zf)
+void DPSOFTRAST_Draw_Span_Varying(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, int arrayindex, const float *zf)
{
int x;
int startx = span->startx;
}
}
-void DPSOFTRAST_Draw_Span_AddBloom(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *ina4f, const float *inb4f, const float *subcolor)
+void DPSOFTRAST_Draw_Span_AddBloom(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f, const float *subcolor)
{
int x, startx = span->startx, endx = span->endx;
float c[4], localcolor[4];
}
}
-void DPSOFTRAST_Draw_Span_MultiplyBuffers(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *ina4f, const float *inb4f)
+void DPSOFTRAST_Draw_Span_MultiplyBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
{
int x, startx = span->startx, endx = span->endx;
for (x = startx;x < endx;x++)
}
}
-void DPSOFTRAST_Draw_Span_AddBuffers(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *ina4f, const float *inb4f)
+void DPSOFTRAST_Draw_Span_AddBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
{
int x, startx = span->startx, endx = span->endx;
for (x = startx;x < endx;x++)
}
}
-void DPSOFTRAST_Draw_Span_MixBuffers(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *ina4f, const float *inb4f)
+void DPSOFTRAST_Draw_Span_MixBuffers(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *ina4f, const float *inb4f)
{
int x, startx = span->startx, endx = span->endx;
float a, b;
}
}
-void DPSOFTRAST_Draw_Span_MixUniformColor(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *in4f, const float *color)
+void DPSOFTRAST_Draw_Span_MixUniformColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span, float *out4f, const float *in4f, const float *color)
{
int x, startx = span->startx, endx = span->endx;
float localcolor[4], ilerp, lerp;
}
}
-void DPSOFTRAST_Draw_Span_Lightmap(const DPSOFTRAST_State_Draw_Span *span, float * RESTRICT out4f, const float * RESTRICT diffuse, const float * RESTRICT lightmap)
-{
- int x, startx = span->startx, endx = span->endx;
- float Color_Ambient[4], Color_Diffuse[4];
- Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
- Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
- Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
- Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
- Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
- Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
- Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
- Color_Diffuse[3] = 0.0f;
- for (x = startx;x < endx;x++)
- {
- out4f[x*4+0] = diffuse[x*4+0] * (Color_Ambient[0] + lightmap[x*4+0] * Color_Diffuse[0]);
- out4f[x*4+1] = diffuse[x*4+1] * (Color_Ambient[1] + lightmap[x*4+1] * Color_Diffuse[1]);
- out4f[x*4+2] = diffuse[x*4+2] * (Color_Ambient[2] + lightmap[x*4+2] * Color_Diffuse[2]);
- out4f[x*4+3] = diffuse[x*4+3] * (Color_Ambient[3] + lightmap[x*4+3] * Color_Diffuse[3]);
- }
-}
-void DPSOFTRAST_Draw_Span_Lightmap_Finish(const DPSOFTRAST_State_Draw_Span *span, const float * RESTRICT diffuse, const float * RESTRICT lightmap)
+
+void DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, int arrayindex, const float *zf)
+{
+ int x;
+ int startx = span->startx;
+ int endx = span->endx;
+ float data[4];
+ float slope[4];
+ float z;
+ data[2] = span->data[0][arrayindex][0];
+ data[1] = span->data[0][arrayindex][1];
+ data[0] = span->data[0][arrayindex][2];
+ data[3] = span->data[0][arrayindex][3];
+ slope[2] = span->data[1][arrayindex][0];
+ slope[1] = span->data[1][arrayindex][1];
+ slope[0] = span->data[1][arrayindex][2];
+ slope[3] = span->data[1][arrayindex][3];
+ for (x = startx;x < endx;x++)
+ {
+ z = zf[x];
+ out4ub[x*4+0] = (int)(in4ub[x*4+0] * (data[0] + slope[0]*x) * z);
+ out4ub[x*4+1] = (int)(in4ub[x*4+1] * (data[1] + slope[1]*x) * z);
+ out4ub[x*4+2] = (int)(in4ub[x*4+2] * (data[2] + slope[2]*x) * z);
+ out4ub[x*4+3] = (int)(in4ub[x*4+3] * (data[3] + slope[3]*x) * z);
+ }
+}
+
+void DPSOFTRAST_Draw_Span_VaryingBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, int arrayindex, const float *zf)
+{
+ int x;
+ int startx = span->startx;
+ int endx = span->endx;
+ float data[4];
+ float slope[4];
+ float z;
+ data[2] = span->data[0][arrayindex][0]*255.0f;
+ data[1] = span->data[0][arrayindex][1]*255.0f;
+ data[0] = span->data[0][arrayindex][2]*255.0f;
+ data[3] = span->data[0][arrayindex][3]*255.0f;
+ slope[2] = span->data[1][arrayindex][0]*255.0f;
+ slope[1] = span->data[1][arrayindex][1]*255.0f;
+ slope[0] = span->data[1][arrayindex][2]*255.0f;
+ slope[3] = span->data[1][arrayindex][3]*255.0f;
+ for (x = startx;x < endx;x++)
+ {
+ z = zf[x];
+ out4ub[x*4+0] = (int)((data[0] + slope[0]*x) * z);
+ out4ub[x*4+1] = (int)((data[1] + slope[1]*x) * z);
+ out4ub[x*4+2] = (int)((data[2] + slope[2]*x) * z);
+ out4ub[x*4+3] = (int)((data[3] + slope[3]*x) * z);
+ }
+}
+
+void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *subcolor)
+{
+ int x, startx = span->startx, endx = span->endx;
+ int c[4], localcolor[4];
+ localcolor[2] = (int)(subcolor[0] * 255.0f);
+ localcolor[1] = (int)(subcolor[1] * 255.0f);
+ localcolor[0] = (int)(subcolor[2] * 255.0f);
+ localcolor[3] = (int)(subcolor[3] * 255.0f);
+ for (x = startx;x < endx;x++)
+ {
+ c[0] = inb4ub[x*4+0] - localcolor[0];if (c[0] < 0) c[0] = 0;
+ c[1] = inb4ub[x*4+1] - localcolor[1];if (c[1] < 0) c[1] = 0;
+ c[2] = inb4ub[x*4+2] - localcolor[2];if (c[2] < 0) c[2] = 0;
+ c[3] = inb4ub[x*4+3] - localcolor[3];if (c[3] < 0) c[3] = 0;
+ c[0] += ina4ub[x*4+0];if (c[0] > 255) c[0] = 255;
+ c[1] += ina4ub[x*4+1];if (c[1] > 255) c[1] = 255;
+ c[2] += ina4ub[x*4+2];if (c[2] > 255) c[2] = 255;
+ c[3] += ina4ub[x*4+3];if (c[3] > 255) c[3] = 255;
+ out4ub[x*4+0] = c[0];
+ out4ub[x*4+1] = c[1];
+ out4ub[x*4+2] = c[2];
+ out4ub[x*4+3] = c[3];
+ }
+}
+
+void DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
+{
+ int x, startx = span->startx, endx = span->endx;
+ for (x = startx;x < endx;x++)
+ {
+ out4ub[x*4+0] = (ina4ub[x*4+0] * inb4ub[x*4+0])>>8;
+ out4ub[x*4+1] = (ina4ub[x*4+1] * inb4ub[x*4+1])>>8;
+ out4ub[x*4+2] = (ina4ub[x*4+2] * inb4ub[x*4+2])>>8;
+ out4ub[x*4+3] = (ina4ub[x*4+3] * inb4ub[x*4+3])>>8;
+ }
+}
+
+void DPSOFTRAST_Draw_Span_AddBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
int x, startx = span->startx, endx = span->endx;
int d[4];
- float Color_Ambient[4], Color_Diffuse[4];
- unsigned char * RESTRICT pixelmask = span->pixelmask;
- unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0];
- if (!pixel)
- return;
- pixel += span->start * 4;
- Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0]*255.0f;
- Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1]*255.0f;
- Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2]*255.0f;
- Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0]*255.0f;
- Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0]*255.0f;
- Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1]*255.0f;
- Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2]*255.0f;
- Color_Diffuse[3] = 0.0f;
for (x = startx;x < endx;x++)
{
- if (!pixelmask[x])
- continue;
- d[0] = diffuse[x*4+0] * (Color_Ambient[0] + lightmap[x*4+0] * Color_Diffuse[0]);if (d[0] > 255) d[0] = 255;
- d[1] = diffuse[x*4+1] * (Color_Ambient[1] + lightmap[x*4+1] * Color_Diffuse[1]);if (d[1] > 255) d[1] = 255;
- d[2] = diffuse[x*4+2] * (Color_Ambient[2] + lightmap[x*4+2] * Color_Diffuse[2]);if (d[2] > 255) d[2] = 255;
- d[3] = diffuse[x*4+3] * (Color_Ambient[3] + lightmap[x*4+3] * Color_Diffuse[3]);if (d[3] > 255) d[3] = 255;
- pixel[x*4+0] = d[2];
- pixel[x*4+1] = d[1];
- pixel[x*4+2] = d[0];
- pixel[x*4+3] = d[3];
+ d[0] = ina4ub[x*4+0] + inb4ub[x*4+0];if (d[0] > 255) d[0] = 255;
+ d[1] = ina4ub[x*4+1] + inb4ub[x*4+1];if (d[1] > 255) d[1] = 255;
+ d[2] = ina4ub[x*4+2] + inb4ub[x*4+2];if (d[2] > 255) d[2] = 255;
+ d[3] = ina4ub[x*4+3] + inb4ub[x*4+3];if (d[3] > 255) d[3] = 255;
+ out4ub[x*4+0] = d[0];
+ out4ub[x*4+1] = d[1];
+ out4ub[x*4+2] = d[2];
+ out4ub[x*4+3] = d[3];
+ }
+}
+
+void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *inbtintbgra)
+{
+ int x, startx = span->startx, endx = span->endx;
+ int d[4];
+ int b[4];
+ b[0] = (int)(inbtintbgra[0] * 256.0f);
+ b[1] = (int)(inbtintbgra[1] * 256.0f);
+ b[2] = (int)(inbtintbgra[2] * 256.0f);
+ b[3] = (int)(inbtintbgra[3] * 256.0f);
+ for (x = startx;x < endx;x++)
+ {
+ d[0] = ina4ub[x*4+0] + ((inb4ub[x*4+0]*b[0])>>8);if (d[0] > 255) d[0] = 255;
+ d[1] = ina4ub[x*4+1] + ((inb4ub[x*4+1]*b[1])>>8);if (d[1] > 255) d[1] = 255;
+ d[2] = ina4ub[x*4+2] + ((inb4ub[x*4+2]*b[2])>>8);if (d[2] > 255) d[2] = 255;
+ d[3] = ina4ub[x*4+3] + ((inb4ub[x*4+3]*b[3])>>8);if (d[3] > 255) d[3] = 255;
+ out4ub[x*4+0] = d[0];
+ out4ub[x*4+1] = d[1];
+ out4ub[x*4+2] = d[2];
+ out4ub[x*4+3] = d[3];
+ }
+}
+
+void DPSOFTRAST_Draw_Span_MixBuffersBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
+{
+ int x, startx = span->startx, endx = span->endx;
+ int a, b;
+ for (x = startx;x < endx;x++)
+ {
+ a = 256 - inb4ub[x*4+3];
+ b = inb4ub[x*4+3];
+ out4ub[x*4+0] = (ina4ub[x*4+0] * a + inb4ub[x*4+0] * b)>>8;
+ out4ub[x*4+1] = (ina4ub[x*4+1] * a + inb4ub[x*4+1] * b)>>8;
+ out4ub[x*4+2] = (ina4ub[x*4+2] * a + inb4ub[x*4+2] * b)>>8;
+ out4ub[x*4+3] = (ina4ub[x*4+3] * a + inb4ub[x*4+3] * b)>>8;
+ }
+}
+
+void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Draw_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, const float *color)
+{
+ int x, startx = span->startx, endx = span->endx;
+ int localcolor[4], ilerp, lerp;
+ localcolor[2] = (int)(color[0]*255.0f);
+ localcolor[1] = (int)(color[1]*255.0f);
+ localcolor[0] = (int)(color[2]*255.0f);
+ localcolor[3] = (int)(color[3]*255.0f);
+ ilerp = 256 - localcolor[3];
+ lerp = localcolor[3];
+ for (x = startx;x < endx;x++)
+ {
+ out4ub[x*4+0] = (in4ub[x*4+0] * ilerp + localcolor[0] * lerp)>>8;
+ out4ub[x*4+1] = (in4ub[x*4+1] * ilerp + localcolor[1] * lerp)>>8;
+ out4ub[x*4+2] = (in4ub[x*4+2] * ilerp + localcolor[2] * lerp)>>8;
+ out4ub[x*4+3] = (in4ub[x*4+3] * ilerp + localcolor[3] * lerp)>>8;
+ }
+}
+
+
+
+void DPSOFTRAST_VertexShader_Generic(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
+}
+
+void DPSOFTRAST_PixelShader_Generic(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_FIRST, 2, buffer_z);
+ DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(span, buffer_FragColorbgra8, buffer_texture_colorbgra8, 1, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_lightmapbgra8, GL20TU_SECOND, 2, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ // multiply
+ DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
+ }
+ else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ // add
+ DPSOFTRAST_Draw_Span_AddBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
+ }
+ else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_VERTEXTEXTUREBLEND)
+ {
+ // alphablend
+ DPSOFTRAST_Draw_Span_MixBuffersBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_lightmapbgra8);
+ }
+ }
+ }
+ else
+ DPSOFTRAST_Draw_Span_VaryingBGRA8(span, buffer_FragColorbgra8, 1, buffer_z);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_PostProcess(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
+}
+
+void DPSOFTRAST_PixelShader_PostProcess(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: optimize!! at the very least there is no reason to use texture sampling on the frame texture
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_FragColorbgra8, GL20TU_FIRST, 2, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_BLOOM)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_SECOND, 3, buffer_z);
+ DPSOFTRAST_Draw_Span_AddBloomBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, buffer_texture_colorbgra8, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_BloomColorSubtract * 4);
+ }
+ DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(span, buffer_FragColorbgra8, buffer_FragColorbgra8, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_ViewTintColor * 4);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SATURATION)
+ {
+ // TODO: implement saturation
+ }
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GAMMARAMPS)
+ {
+ // TODO: implement gammaramps
+ }
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_Depth_Or_Shadow(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+}
+
+void DPSOFTRAST_PixelShader_Depth_Or_Shadow(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // this is never called (because colormask is off when this shader is used)
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_FlatColor(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
+}
+
+void DPSOFTRAST_PixelShader_FlatColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ int x, startx = span->startx, endx = span->endx;
+ int Color_Ambienti[4];
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ Color_Ambienti[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0]*256.0f);
+ Color_Ambienti[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1]*256.0f);
+ Color_Ambienti[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2]*256.0f);
+ Color_Ambienti[3] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0] *256.0f);
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z);
+ for (x = startx;x < endx;x++)
+ {
+ buffer_FragColorbgra8[x*4+0] = (buffer_texture_colorbgra8[x*4+0] * Color_Ambienti[0])>>8;
+ buffer_FragColorbgra8[x*4+1] = (buffer_texture_colorbgra8[x*4+1] * Color_Ambienti[1])>>8;
+ buffer_FragColorbgra8[x*4+2] = (buffer_texture_colorbgra8[x*4+2] * Color_Ambienti[2])>>8;
+ buffer_FragColorbgra8[x*4+3] = (buffer_texture_colorbgra8[x*4+3] * Color_Ambienti[3])>>8;
}
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_VertexColor(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
}
-void DPSOFTRAST_Draw_Span_VertexColor(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+void DPSOFTRAST_PixelShader_VertexColor(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
int x, startx = span->startx, endx = span->endx;
float Color_Ambient[4], Color_Diffuse[4];
- float c[4];
float data[4];
float slope[4];
float z;
int arrayindex = DPSOFTRAST_ARRAY_COLOR;
- data[0] = span->data[0][arrayindex][0];
+ data[2] = span->data[0][arrayindex][0];
data[1] = span->data[0][arrayindex][1];
- data[2] = span->data[0][arrayindex][2];
+ data[0] = span->data[0][arrayindex][2];
data[3] = span->data[0][arrayindex][3];
- slope[0] = span->data[1][arrayindex][0];
+ slope[2] = span->data[1][arrayindex][0];
slope[1] = span->data[1][arrayindex][1];
- slope[2] = span->data[1][arrayindex][2];
+ slope[0] = span->data[1][arrayindex][2];
slope[3] = span->data[1][arrayindex][3];
- Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
+ Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
- Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
+ Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
- Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
+ Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
- Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
+ Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
Color_Diffuse[3] = 0.0f;
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z);
for (x = startx;x < endx;x++)
{
- z = zf[x];
- c[0] = (data[0] + slope[0]*x) * z;
- c[1] = (data[1] + slope[1]*x) * z;
- c[2] = (data[2] + slope[2]*x) * z;
- c[3] = (data[3] + slope[3]*x) * z;
- out4f[x*4+0] = diffuse[x*4+0] * (Color_Ambient[0] + c[0] * Color_Diffuse[0]);
- out4f[x*4+1] = diffuse[x*4+1] * (Color_Ambient[1] + c[1] * Color_Diffuse[1]);
- out4f[x*4+2] = diffuse[x*4+2] * (Color_Ambient[2] + c[2] * Color_Diffuse[2]);
- out4f[x*4+3] = diffuse[x*4+3] * (Color_Ambient[3] + c[3] * Color_Diffuse[3]);
+ z = buffer_z[x];
+ buffer_FragColorbgra8[x*4+0] = (int)(buffer_texture_colorbgra8[x*4+0] * (Color_Ambient[0] + ((data[0] + slope[0]*x) * z) * Color_Diffuse[0]));
+ buffer_FragColorbgra8[x*4+1] = (int)(buffer_texture_colorbgra8[x*4+1] * (Color_Ambient[1] + ((data[1] + slope[1]*x) * z) * Color_Diffuse[1]));
+ buffer_FragColorbgra8[x*4+2] = (int)(buffer_texture_colorbgra8[x*4+2] * (Color_Ambient[2] + ((data[2] + slope[2]*x) * z) * Color_Diffuse[2]));
+ buffer_FragColorbgra8[x*4+3] = (int)(buffer_texture_colorbgra8[x*4+3] * (Color_Ambient[3] + ((data[3] + slope[3]*x) * z) * Color_Diffuse[3]));
}
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_Lightmap(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.numvertices);
}
-void DPSOFTRAST_Draw_Span_FlatColor(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse)
+void DPSOFTRAST_PixelShader_Lightmap(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
int x, startx = span->startx, endx = span->endx;
- float Color_Ambient[4];
- Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
- Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
- Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
- Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
- for (x = startx;x < endx;x++)
+ int Color_Ambienti[4], Color_Diffusei[4], Color_Glowi[4];
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned int d[4];
+ //unsigned char * RESTRICT pixelmask = span->pixelmask;
+ //unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + span->start * 4;
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ Color_Ambienti[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0] * 65536.0f);
+ Color_Ambienti[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1] * 65536.0f);
+ Color_Ambienti[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2] * 65536.0f);
+ Color_Ambienti[3] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0] * 65536.0f);
+ Color_Diffusei[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0] * 256.0f);
+ Color_Diffusei[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1] * 256.0f);
+ Color_Diffusei[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2] * 256.0f);
+ Color_Diffusei[3] = 0;
+ Color_Glowi[2] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0] * 65536.0f);
+ Color_Glowi[1] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1] * 65536.0f);
+ Color_Glowi[0] = (int)(dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2] * 65536.0f);
+ Color_Glowi[3] = 0;
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glowbgra8, GL20TU_GLOW, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ for (x = startx;x < endx;x++)
+ {
+ d[0] = (buffer_texture_glowbgra8[x*4+0] * Color_Glowi[0] + buffer_texture_colorbgra8[x*4+0] * (Color_Ambienti[0] + buffer_texture_lightmapbgra8[x*4+0] * Color_Diffusei[0])) >> 16;if (d[0] > 255) d[0] = 255;
+ d[1] = (buffer_texture_glowbgra8[x*4+1] * Color_Glowi[1] + buffer_texture_colorbgra8[x*4+1] * (Color_Ambienti[1] + buffer_texture_lightmapbgra8[x*4+1] * Color_Diffusei[1])) >> 16;if (d[1] > 255) d[1] = 255;
+ d[2] = (buffer_texture_glowbgra8[x*4+2] * Color_Glowi[2] + buffer_texture_colorbgra8[x*4+2] * (Color_Ambienti[2] + buffer_texture_lightmapbgra8[x*4+2] * Color_Diffusei[2])) >> 16;if (d[2] > 255) d[2] = 255;
+ d[3] = (buffer_texture_glowbgra8[x*4+3] * Color_Glowi[3] + buffer_texture_colorbgra8[x*4+3] * (Color_Ambienti[3] + buffer_texture_lightmapbgra8[x*4+3] * Color_Diffusei[3])) >> 16;if (d[3] > 255) d[3] = 255;
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
+ }
+ }
+ else
{
- out4f[x*4+0] = diffuse[x*4+0] * Color_Ambient[0];
- out4f[x*4+1] = diffuse[x*4+1] * Color_Ambient[1];
- out4f[x*4+2] = diffuse[x*4+2] * Color_Ambient[2];
- out4f[x*4+3] = diffuse[x*4+3] * Color_Ambient[3];
+ for (x = startx;x < endx;x++)
+ {
+ d[0] = (buffer_texture_colorbgra8[x*4+0] * (Color_Ambienti[0] + buffer_texture_lightmapbgra8[x*4+0] * Color_Diffusei[0])) >> 16;if (d[0] > 255) d[0] = 255;
+ d[1] = (buffer_texture_colorbgra8[x*4+1] * (Color_Ambienti[1] + buffer_texture_lightmapbgra8[x*4+1] * Color_Diffusei[1])) >> 16;if (d[1] > 255) d[1] = 255;
+ d[2] = (buffer_texture_colorbgra8[x*4+2] * (Color_Ambienti[2] + buffer_texture_lightmapbgra8[x*4+2] * Color_Diffusei[2])) >> 16;if (d[2] > 255) d[2] = 255;
+ d[3] = (buffer_texture_colorbgra8[x*4+3] * (Color_Ambienti[3] + buffer_texture_lightmapbgra8[x*4+3] * Color_Diffusei[3])) >> 16;if (d[3] > 255) d[3] = 255;
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
+ }
}
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
}
-void DPSOFTRAST_Draw_Span_FakeLight(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
-{
- memset(out4f, 0, span->length*sizeof(float[4]));
-}
-void DPSOFTRAST_Draw_Span_LightDirectionMap_ModelSpace(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
-{
- memset(out4f, 0, span->length*sizeof(float[4]));
-}
-void DPSOFTRAST_Draw_Span_LightDirectionMap_TangentSpace(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+void DPSOFTRAST_VertexShader_FakeLight(void)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
}
-void DPSOFTRAST_Draw_Span_LightDirection(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+void DPSOFTRAST_PixelShader_FakeLight(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
}
-void DPSOFTRAST_Draw_Span_LightSource(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
-{
- memset(out4f, 0, span->length*sizeof(float[4]));
-}
-void DPSOFTRAST_Draw_Span_Refraction(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+
+void DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace(void)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ DPSOFTRAST_VertexShader_Lightmap();
}
-void DPSOFTRAST_Draw_Span_Water(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+void DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ DPSOFTRAST_PixelShader_Lightmap(span);
+ // TODO: IMPLEMENT
}
-void DPSOFTRAST_Draw_Span_DeferredGeometry(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *diffuse, const float *zf)
+
+
+void DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace(void)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ DPSOFTRAST_VertexShader_Lightmap();
}
-void DPSOFTRAST_Draw_Span_DeferredLightSource(const DPSOFTRAST_State_Draw_Span *span, float *out4f, const float *zf)
+void DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
- memset(out4f, 0, span->length*sizeof(float[4]));
+ DPSOFTRAST_PixelShader_Lightmap(span);
+ // TODO: IMPLEMENT
}
-void DPSOFTRAST_Draw_VertexShader(void)
+
+
+void DPSOFTRAST_VertexShader_LightDirection(void)
{
+ int i;
+ int numvertices = dpsoftrast.draw.numvertices;
+ float LightDir[4];
+ float LightVector[4];
+ float EyePosition[4];
+ float EyeVectorModelSpace[4];
+ float EyeVector[4];
+ float position[4];
+ float svector[4];
+ float tvector[4];
+ float normal[4];
+ LightDir[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+0];
+ LightDir[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+1];
+ LightDir[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+2];
+ LightDir[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightDir*4+3];
+ EyePosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+0];
+ EyePosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+1];
+ EyePosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+2];
+ EyePosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+3];
DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
- switch(dpsoftrast.shader_mode)
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
+ for (i = 0;i < numvertices;i++)
{
- case SHADERMODE_GENERIC: ///< (particles/HUD/etc) vertex color: optionally multiplied by one texture
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
- break;
- case SHADERMODE_POSTPROCESS: ///< postprocessing shader (r_glsl_postprocess)
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices);
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1], dpsoftrast.draw.numvertices);
- break;
- case SHADERMODE_DEPTH_OR_SHADOW: ///< (depthfirst/shadows) vertex shader only
- break;
- case SHADERMODE_FLATCOLOR: ///< (lightmap) modulate texture by uniform color (q1bsp: q3bsp)
- DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
- break;
- case SHADERMODE_VERTEXCOLOR: ///< (lightmap) modulate texture by vertex colors (q3bsp)
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_COLOR], dpsoftrast.draw.numvertices);
- DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
- break;
- case SHADERMODE_LIGHTMAP: ///< (lightmap) modulate texture by lightmap texture (q1bsp: q3bsp)
- DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
- DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.numvertices);
- break;
- case SHADERMODE_FAKELIGHT: ///< (fakelight) modulate texture by "fake" lighting (no lightmaps: no nothing)
- break;
- case SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE: ///< (lightmap) use directional pixel shading from texture containing modelspace light directions (q3bsp deluxemap)
- break;
- case SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE: ///< (lightmap) use directional pixel shading from texture containing tangentspace light directions (q1bsp deluxemap)
- break;
- case SHADERMODE_LIGHTDIRECTION: ///< (lightmap) use directional pixel shading from fixed light direction (q3bsp)
- DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
- DPSOFTRAST_Draw_VertexShaderLightDirection();
- break;
- case SHADERMODE_LIGHTSOURCE: ///< (lightsource) use directional pixel shading from light source (rtlight)
- break;
- case SHADERMODE_REFRACTION: ///< refract background (the material is rendered normally after this pass)
- break;
- case SHADERMODE_WATER: ///< refract background and reflection (the material is rendered normally after this pass)
- break;
- case SHADERMODE_SHOWDEPTH: ///< (debugging) renders depth as color
- break;
- case SHADERMODE_DEFERREDGEOMETRY: ///< (deferred) render material properties to screenspace geometry buffers
- break;
- case SHADERMODE_DEFERREDLIGHTSOURCE: ///< (deferred) use directional pixel shading from light source (rtlight) on screenspace geometry buffers
- break;
- case SHADERMODE_COUNT:
- break;
+ position[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0];
+ position[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1];
+ position[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+2];
+ svector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0];
+ svector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1];
+ svector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2];
+ tvector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0];
+ tvector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1];
+ tvector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2];
+ normal[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+0];
+ normal[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+1];
+ normal[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+2];
+ LightVector[0] = svector[0] * LightDir[0] + svector[1] * LightDir[1] + svector[2] * LightDir[2];
+ LightVector[1] = tvector[0] * LightDir[0] + tvector[1] * LightDir[1] + tvector[2] * LightDir[2];
+ LightVector[2] = normal[0] * LightDir[0] + normal[1] * LightDir[1] + normal[2] * LightDir[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f;
+ EyeVectorModelSpace[0] = EyePosition[0] - position[0];
+ EyeVectorModelSpace[1] = EyePosition[1] - position[1];
+ EyeVectorModelSpace[2] = EyePosition[2] - position[2];
+ EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2];
+ EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2];
+ EyeVector[2] = normal[0] * EyeVectorModelSpace[0] + normal[1] * EyeVectorModelSpace[1] + normal[2] * EyeVectorModelSpace[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f;
}
}
-void DPSOFTRAST_Draw_PixelShaderSpan(const DPSOFTRAST_State_Draw_Span *span)
+#define DPSOFTRAST_Min(a,b) ((a) < (b) ? (a) : (b))
+#define DPSOFTRAST_Max(a,b) ((a) > (b) ? (a) : (b))
+#define DPSOFTRAST_Vector3Dot(a,b) ((a)[0]*(b)[0]+(a)[1]*(b)[1]+(a)[2]*(b)[2])
+#define DPSOFTRAST_Vector3LengthSquared(v) (DPSOFTRAST_Vector3Dot((v),(v)))
+#define DPSOFTRAST_Vector3Length(v) (sqrt(DPSOFTRAST_Vector3LengthSquared(v)))
+#define DPSOFTRAST_Vector3Normalize(v)\
+do\
+{\
+ float len = sqrt(DPSOFTRAST_Vector3Dot(v,v));\
+ if (len)\
+ {\
+ len = 1.0f / len;\
+ v[0] *= len;\
+ v[1] *= len;\
+ v[2] *= len;\
+ }\
+}\
+while(0)
+
+void DPSOFTRAST_PixelShader_LightDirection(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
{
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
- float buffer_texture_color[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
- float buffer_texture_lightmap[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
- float buffer_FragColor[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
- switch(dpsoftrast.shader_mode)
- {
- case SHADERMODE_GENERIC: ///< (particles/HUD/etc) vertex color: optionally multiplied by one texture
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
- {
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_FIRST, 2, buffer_z);
- DPSOFTRAST_Draw_Span_MultiplyVarying(span, buffer_FragColor, buffer_texture_color, 1, buffer_z);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_glossbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ int x, startx = span->startx, endx = span->endx;
+ float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4];
+ float LightVectordata[4];
+ float LightVectorslope[4];
+ float EyeVectordata[4];
+ float EyeVectorslope[4];
+ float z;
+ float diffusetex[4];
+ float glosstex[4];
+ float surfacenormal[4];
+ float lightnormal[4];
+ float eyenormal[4];
+ float specularnormal[4];
+ float diffuse;
+ float specular;
+ float SpecularPower;
+ int d[4];
+ Color_Glow[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0];
+ Color_Glow[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1];
+ Color_Glow[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2];
+ Color_Glow[3] = 0.0f;
+ Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
+ Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
+ Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
+ Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
+ Color_Pants[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+0];
+ Color_Pants[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+1];
+ Color_Pants[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+2];
+ Color_Pants[3] = 0.0f;
+ Color_Shirt[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+0];
+ Color_Shirt[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+1];
+ Color_Shirt[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+2];
+ Color_Shirt[3] = 0.0f;
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_pantsbgra8, GL20TU_PANTS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_shirtbgra8, GL20TU_SHIRT, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ }
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glowbgra8, GL20TU_GLOW, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ }
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
+ {
+ Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
+ Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
+ Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
+ Color_Diffuse[3] = 0.0f;
+ LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
+ LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
+ LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
+ LightColor[3] = 0.0f;
+ LightVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ Color_Specular[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0];
+ Color_Specular[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1];
+ Color_Specular[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2];
+ Color_Specular[3] = 0.0f;
+ SpecularPower = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f);
+ EyeVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][0];
+ EyeVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][1];
+ EyeVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][2];
+ EyeVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][3];
+ EyeVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][0];
+ EyeVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][1];
+ EyeVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][2];
+ EyeVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][3];
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ for (x = startx;x < endx;x++)
+ {
+ z = buffer_z[x];
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
{
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_lightmap, GL20TU_SECOND, 2, buffer_z);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
- {
- // multiply
- DPSOFTRAST_Draw_Span_MultiplyBuffers(span, buffer_FragColor, buffer_FragColor, buffer_texture_lightmap);
- }
- else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
- {
- // add
- DPSOFTRAST_Draw_Span_AddBuffers(span, buffer_FragColor, buffer_FragColor, buffer_texture_lightmap);
- }
- else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_VERTEXTEXTUREBLEND)
- {
- // alphablend
- DPSOFTRAST_Draw_Span_MixBuffers(span, buffer_FragColor, buffer_FragColor, buffer_texture_lightmap);
- }
+ diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
+ diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
+ diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
+ diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
+ }
+ glosstex[0] = buffer_texture_glossbgra8[x*4+0];
+ glosstex[1] = buffer_texture_glossbgra8[x*4+1];
+ glosstex[2] = buffer_texture_glossbgra8[x*4+2];
+ glosstex[3] = buffer_texture_glossbgra8[x*4+3];
+ surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ DPSOFTRAST_Vector3Normalize(surfacenormal);
+
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
+
+ specularnormal[0] = lightnormal[0] + eyenormal[0];
+ specularnormal[1] = lightnormal[1] + eyenormal[1];
+ specularnormal[2] = lightnormal[2] + eyenormal[2];
+ DPSOFTRAST_Vector3Normalize(specularnormal);
+
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
+ specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ specular = pow(specular, SpecularPower * glosstex[3]);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
+ {
+ d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * Color_Ambient[1] + (diffusetex[1] * Color_Diffuse[1] * diffuse + glosstex[1] * Color_Specular[1] * specular) * LightColor[1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)( diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)( diffusetex[1] * Color_Ambient[1] + (diffusetex[1] * Color_Diffuse[1] * diffuse + glosstex[1] * Color_Specular[1] * specular) * LightColor[1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)( diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
}
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- else
- DPSOFTRAST_Draw_Span_Varying(span, buffer_FragColor, 1, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_POSTPROCESS: ///< postprocessing shader (r_glsl_postprocess)
- // TODO: optimize!! at the very least there is no reason to use texture sampling on the frame texture
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_FragColor, GL20TU_FIRST, 2, buffer_z);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_BLOOM)
+ }
+ else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
+ {
+ Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
+ Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
+ Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
+ Color_Diffuse[3] = 0.0f;
+ LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
+ LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
+ LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
+ LightColor[3] = 0.0f;
+ LightVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ for (x = startx;x < endx;x++)
{
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_SECOND, 3, buffer_z);
- DPSOFTRAST_Draw_Span_AddBloom(span, buffer_FragColor, buffer_FragColor, buffer_texture_color, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_BloomColorSubtract * 4);
+ z = buffer_z[x];
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+ surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ DPSOFTRAST_Vector3Normalize(surfacenormal);
+
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
+ {
+ d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse * LightColor[0]));if (d[0] > 255) d[0] = 255;
+ d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse * LightColor[1]));if (d[1] > 255) d[1] = 255;
+ d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse * LightColor[2]));if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * (Color_Ambient[3] ));if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)( + diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse * LightColor[0]));if (d[0] > 255) d[0] = 255;
+ d[1] = (int)( + diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse * LightColor[1]));if (d[1] > 255) d[1] = 255;
+ d[2] = (int)( + diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse * LightColor[2]));if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * (Color_Ambient[3] ));if (d[3] > 255) d[3] = 255;
+ }
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- DPSOFTRAST_Draw_Span_MixUniformColor(span, buffer_FragColor, buffer_FragColor, dpsoftrast.uniform4f + DPSOFTRAST_UNIFORM_ViewTintColor * 4);
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SATURATION)
+ }
+ else
+ {
+ for (x = startx;x < endx;x++)
{
- // TODO: implement saturation
+ z = buffer_z[x];
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GLOW)
+ {
+ d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)(buffer_texture_glowbgra8[x*4+1] * Color_Glow[1] + diffusetex[1] * Color_Ambient[1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)(buffer_texture_glowbgra8[x*4+2] * Color_Glow[2] + diffusetex[2] * Color_Ambient[2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)( diffusetex[0] * Color_Ambient[0]);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)( diffusetex[1] * Color_Ambient[1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)( diffusetex[2] * Color_Ambient[2]);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
+ }
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- if (dpsoftrast.shader_permutation & SHADERPERMUTATION_GAMMARAMPS)
+ }
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_LightSource(void)
+{
+ int i;
+ int numvertices = dpsoftrast.draw.numvertices;
+ float LightPosition[4];
+ float LightVector[4];
+ float LightVectorModelSpace[4];
+ float EyePosition[4];
+ float EyeVectorModelSpace[4];
+ float EyeVector[4];
+ float position[4];
+ float svector[4];
+ float tvector[4];
+ float normal[4];
+ LightPosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+0];
+ LightPosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+1];
+ LightPosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+2];
+ LightPosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightPosition*4+3];
+ EyePosition[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+0];
+ EyePosition[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+1];
+ EyePosition[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+2];
+ EyePosition[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_EyePosition*4+3];
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD0], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD3], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelToLightM1);
+ DPSOFTRAST_Array_Copy(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD4], dpsoftrast.draw.numvertices);
+ for (i = 0;i < numvertices;i++)
+ {
+ position[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+0];
+ position[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+1];
+ position[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION][i*4+2];
+ svector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0];
+ svector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1];
+ svector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2];
+ tvector[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0];
+ tvector[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1];
+ tvector[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2];
+ normal[0] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+0];
+ normal[1] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+1];
+ normal[2] = dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_TEXCOORD3][i*4+2];
+ LightVectorModelSpace[0] = LightPosition[0] - position[0];
+ LightVectorModelSpace[1] = LightPosition[1] - position[1];
+ LightVectorModelSpace[2] = LightPosition[2] - position[2];
+ LightVector[0] = svector[0] * LightVectorModelSpace[0] + svector[1] * LightVectorModelSpace[1] + svector[2] * LightVectorModelSpace[2];
+ LightVector[1] = tvector[0] * LightVectorModelSpace[0] + tvector[1] * LightVectorModelSpace[1] + tvector[2] * LightVectorModelSpace[2];
+ LightVector[2] = normal[0] * LightVectorModelSpace[0] + normal[1] * LightVectorModelSpace[1] + normal[2] * LightVectorModelSpace[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f;
+ EyeVectorModelSpace[0] = EyePosition[0] - position[0];
+ EyeVectorModelSpace[1] = EyePosition[1] - position[1];
+ EyeVectorModelSpace[2] = EyePosition[2] - position[2];
+ EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2];
+ EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2];
+ EyeVector[2] = normal[0] * EyeVectorModelSpace[0] + normal[1] * EyeVectorModelSpace[1] + normal[2] * EyeVectorModelSpace[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2];
+ dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f;
+ }
+}
+
+void DPSOFTRAST_PixelShader_LightSource(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_glossbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_cubebgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ int x, startx = span->startx, endx = span->endx;
+ float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4];
+ float CubeVectordata[4];
+ float CubeVectorslope[4];
+ float LightVectordata[4];
+ float LightVectorslope[4];
+ float EyeVectordata[4];
+ float EyeVectorslope[4];
+ float z;
+ float diffusetex[4];
+ float glosstex[4];
+ float surfacenormal[4];
+ float lightnormal[4];
+ float eyenormal[4];
+ float specularnormal[4];
+ float diffuse;
+ float specular;
+ float SpecularPower;
+ float CubeVector[4];
+ float attenuation;
+ int d[4];
+ Color_Glow[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+0];
+ Color_Glow[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+1];
+ Color_Glow[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Glow*4+2];
+ Color_Glow[3] = 0.0f;
+ Color_Ambient[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0];
+ Color_Ambient[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1];
+ Color_Ambient[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2];
+ Color_Ambient[3] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0];
+ Color_Diffuse[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+0];
+ Color_Diffuse[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+1];
+ Color_Diffuse[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Diffuse*4+2];
+ Color_Diffuse[3] = 0.0f;
+ Color_Specular[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0];
+ Color_Specular[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1];
+ Color_Specular[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2];
+ Color_Specular[3] = 0.0f;
+ Color_Pants[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+0];
+ Color_Pants[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+1];
+ Color_Pants[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Pants*4+2];
+ Color_Pants[3] = 0.0f;
+ Color_Shirt[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+0];
+ Color_Shirt[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+1];
+ Color_Shirt[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_Color_Shirt*4+2];
+ Color_Shirt[3] = 0.0f;
+ LightColor[2] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+0];
+ LightColor[1] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
+ LightColor[0] = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
+ LightColor[3] = 0.0f;
+ SpecularPower = dpsoftrast.uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f);
+ EyeVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][0];
+ EyeVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][1];
+ EyeVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][2];
+ EyeVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD2][3];
+ EyeVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][0];
+ EyeVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][1];
+ EyeVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][2];
+ EyeVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD2][3];
+ LightVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ LightVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][0];
+ LightVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][1];
+ LightVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][2];
+ LightVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD1][3];
+ CubeVectordata[0] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][0];
+ CubeVectordata[1] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][1];
+ CubeVectordata[2] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][2];
+ CubeVectordata[3] = span->data[0][DPSOFTRAST_ARRAY_TEXCOORD3][3];
+ CubeVectorslope[0] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][0];
+ CubeVectorslope[1] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][1];
+ CubeVectorslope[2] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][2];
+ CubeVectorslope[3] = span->data[1][DPSOFTRAST_ARRAY_TEXCOORD3][3];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8 + startx*4, 0, (endx-startx)*4); // clear first, because we skip writing black pixels, and there are a LOT of them...
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_pantsbgra8, GL20TU_PANTS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_shirtbgra8, GL20TU_SHIRT, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ }
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
+ DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(span, buffer_texture_cubebgra8, GL20TU_CUBE, DPSOFTRAST_ARRAY_TEXCOORD3, buffer_z);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SPECULAR)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ for (x = startx;x < endx;x++)
{
- // TODO: implement gammaramps
+ z = buffer_z[x];
+ CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
+ CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
+ CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
+ attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
+ {
+ attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ }
+
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
+ diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
+ diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
+ diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
+ }
+ glosstex[0] = buffer_texture_glossbgra8[x*4+0];
+ glosstex[1] = buffer_texture_glossbgra8[x*4+1];
+ glosstex[2] = buffer_texture_glossbgra8[x*4+2];
+ glosstex[3] = buffer_texture_glossbgra8[x*4+3];
+ surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ DPSOFTRAST_Vector3Normalize(surfacenormal);
+
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
+
+ specularnormal[0] = lightnormal[0] + eyenormal[0];
+ specularnormal[1] = lightnormal[1] + eyenormal[1];
+ specularnormal[2] = lightnormal[2] + eyenormal[2];
+ DPSOFTRAST_Vector3Normalize(specularnormal);
+
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
+ specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ specular = pow(specular, SpecularPower * glosstex[3]);
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
+ {
+ // scale down the attenuation to account for the cubefilter multiplying everything by 255
+ attenuation *= (1.0f / 255.0f);
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse) + glosstex[0] * Color_Specular[0] * specular) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse) + glosstex[1] * Color_Specular[1] * specular) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse) + glosstex[2] * Color_Specular[2] * specular) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse) + glosstex[0] * Color_Specular[0] * specular) * LightColor[0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse) + glosstex[1] * Color_Specular[1] * specular) * LightColor[1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse) + glosstex[2] * Color_Specular[2] * specular) * LightColor[2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_DEPTH_OR_SHADOW: ///< (depthfirst/shadows) vertex shader only
- break;
- case SHADERMODE_FLATCOLOR: ///< (lightmap) modulate texture by uniform color (q1bsp: q3bsp)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_FlatColor(span, buffer_FragColor, buffer_texture_color);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_VERTEXCOLOR: ///< (lightmap) modulate texture by vertex colors (q3bsp)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_VertexColor(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_LIGHTMAP: ///< (lightmap) modulate texture by lightmap texture (q1bsp: q3bsp)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_lightmap, GL20TU_LIGHTMAP, 6, buffer_z);
- if(!dpsoftrast.user.alphatest && dpsoftrast.fb_blendmode == DPSOFTRAST_BLENDMODE_OPAQUE)
+ }
+ else if (dpsoftrast.shader_permutation & SHADERPERMUTATION_DIFFUSE)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+ for (x = startx;x < endx;x++)
{
- DPSOFTRAST_Draw_Span_Lightmap_Finish(span, buffer_texture_color, buffer_texture_lightmap);
+ z = buffer_z[x];
+ CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
+ CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
+ CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
+ attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
+ {
+ attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ }
+
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
+ diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
+ diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
+ diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
+ }
+ surfacenormal[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ DPSOFTRAST_Vector3Normalize(surfacenormal);
+
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
+ {
+ // scale down the attenuation to account for the cubefilter multiplying everything by 255
+ attenuation *= (1.0f / 255.0f);
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse)) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse)) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse)) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0] + Color_Diffuse[0] * diffuse)) * LightColor[0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1] + Color_Diffuse[1] * diffuse)) * LightColor[1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2] + Color_Diffuse[2] * diffuse)) * LightColor[2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- else
+ }
+ else
+ {
+ for (x = startx;x < endx;x++)
{
- DPSOFTRAST_Draw_Span_Lightmap(span, buffer_FragColor, buffer_texture_color, buffer_texture_lightmap);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
+ z = buffer_z[x];
+ CubeVector[0] = (CubeVectordata[0] + CubeVectorslope[0]*x) * z;
+ CubeVector[1] = (CubeVectordata[1] + CubeVectorslope[1]*x) * z;
+ CubeVector[2] = (CubeVectordata[2] + CubeVectorslope[2]*x) * z;
+ attenuation = 1.0f - DPSOFTRAST_Vector3LengthSquared(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_SHADOWMAP2D)
+ {
+ attenuation *= DPSOFTRAST_SampleShadowmap(CubeVector);
+ if (attenuation < 0.01f)
+ continue;
+ }
+
+ diffusetex[0] = buffer_texture_colorbgra8[x*4+0];
+ diffusetex[1] = buffer_texture_colorbgra8[x*4+1];
+ diffusetex[2] = buffer_texture_colorbgra8[x*4+2];
+ diffusetex[3] = buffer_texture_colorbgra8[x*4+3];
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_COLORMAPPING)
+ {
+ diffusetex[0] += buffer_texture_pantsbgra8[x*4+0] * Color_Pants[0] + buffer_texture_shirtbgra8[x*4+0] * Color_Shirt[0];
+ diffusetex[1] += buffer_texture_pantsbgra8[x*4+1] * Color_Pants[1] + buffer_texture_shirtbgra8[x*4+1] * Color_Shirt[1];
+ diffusetex[2] += buffer_texture_pantsbgra8[x*4+2] * Color_Pants[2] + buffer_texture_shirtbgra8[x*4+2] * Color_Shirt[2];
+ diffusetex[3] += buffer_texture_pantsbgra8[x*4+3] * Color_Pants[3] + buffer_texture_shirtbgra8[x*4+3] * Color_Shirt[3];
+ }
+ if (dpsoftrast.shader_permutation & SHADERPERMUTATION_CUBEFILTER)
+ {
+ // scale down the attenuation to account for the cubefilter multiplying everything by 255
+ attenuation *= (1.0f / 255.0f);
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0])) * LightColor[0] * buffer_texture_cubebgra8[x*4+0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1])) * LightColor[1] * buffer_texture_cubebgra8[x*4+1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2])) * LightColor[2] * buffer_texture_cubebgra8[x*4+2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ else
+ {
+ d[0] = (int)((diffusetex[0] * (Color_Ambient[0])) * LightColor[0] * attenuation);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((diffusetex[1] * (Color_Ambient[1])) * LightColor[1] * attenuation);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((diffusetex[2] * (Color_Ambient[2])) * LightColor[2] * attenuation);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)( diffusetex[3] );if (d[3] > 255) d[3] = 255;
+ }
+ buffer_FragColorbgra8[x*4+0] = d[0];
+ buffer_FragColorbgra8[x*4+1] = d[1];
+ buffer_FragColorbgra8[x*4+2] = d[2];
+ buffer_FragColorbgra8[x*4+3] = d[3];
}
- break;
- case SHADERMODE_FAKELIGHT: ///< (fakelight) modulate texture by "fake" lighting (no lightmaps: no nothing)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_FakeLight(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE: ///< (lightmap) use directional pixel shading from texture containing modelspace light directions (q3bsp deluxemap)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_LightDirectionMap_ModelSpace(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE: ///< (lightmap) use directional pixel shading from texture containing tangentspace light directions (q1bsp deluxemap)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_LightDirectionMap_TangentSpace(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_LIGHTDIRECTION: ///< (lightmap) use directional pixel shading from fixed light direction (q3bsp)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_LightDirection(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_LIGHTSOURCE: ///< (lightsource) use directional pixel shading from light source (rtlight)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_LightSource(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_REFRACTION: ///< refract background (the material is rendered normally after this pass)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_Refraction(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_WATER: ///< refract background and reflection (the material is rendered normally after this pass)
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_Water(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_SHOWDEPTH: ///< (debugging) renders depth as color
- break;
- case SHADERMODE_DEFERREDGEOMETRY: ///< (deferred) render material properties to screenspace geometry buffers
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_Texture2DVarying(span, buffer_texture_color, GL20TU_COLOR, 2, buffer_z);
- DPSOFTRAST_Draw_Span_DeferredGeometry(span, buffer_FragColor, buffer_texture_color, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_DEFERREDLIGHTSOURCE: ///< (deferred) use directional pixel shading from light source (rtlight) on screenspace geometry buffers
- DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
- DPSOFTRAST_Draw_Span_DeferredLightSource(span, buffer_FragColor, buffer_z);
- DPSOFTRAST_Draw_Span_Finish(span, buffer_FragColor);
- break;
- case SHADERMODE_COUNT:
- break;
}
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_Refraction(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+}
+
+void DPSOFTRAST_PixelShader_Refraction(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_Water(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+}
+
+
+void DPSOFTRAST_PixelShader_Water(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_ShowDepth(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+}
+
+void DPSOFTRAST_PixelShader_ShowDepth(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_DeferredGeometry(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+}
+
+void DPSOFTRAST_PixelShader_DeferredGeometry(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+void DPSOFTRAST_VertexShader_DeferredLightSource(void)
+{
+ DPSOFTRAST_Array_Transform(dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.in_array4f[DPSOFTRAST_ARRAY_POSITION], dpsoftrast.draw.numvertices, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
}
+void DPSOFTRAST_PixelShader_DeferredLightSource(const DPSOFTRAST_State_Draw_Span * RESTRICT span)
+{
+ // TODO: IMPLEMENT
+ float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ DPSOFTRAST_Draw_Span_Begin(span, buffer_z);
+ memset(buffer_FragColorbgra8, 0, span->length*4);
+ DPSOFTRAST_Draw_Span_FinishBGRA8(span, buffer_FragColorbgra8);
+}
+
+
+
+typedef struct DPSOFTRAST_ShaderModeInfo_s
+{
+ int lodarrayindex;
+ void (*Vertex)(void);
+ void (*Span)(const DPSOFTRAST_State_Draw_Span * RESTRICT span);
+}
+DPSOFTRAST_ShaderModeInfo;
+
+DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COUNT] =
+{
+ {2, DPSOFTRAST_VertexShader_Generic, DPSOFTRAST_PixelShader_Generic, },
+ {2, DPSOFTRAST_VertexShader_PostProcess, DPSOFTRAST_PixelShader_PostProcess, },
+ {2, DPSOFTRAST_VertexShader_Depth_Or_Shadow, DPSOFTRAST_PixelShader_Depth_Or_Shadow, },
+ {2, DPSOFTRAST_VertexShader_FlatColor, DPSOFTRAST_PixelShader_FlatColor, },
+ {2, DPSOFTRAST_VertexShader_VertexColor, DPSOFTRAST_PixelShader_VertexColor, },
+ {2, DPSOFTRAST_VertexShader_Lightmap, DPSOFTRAST_PixelShader_Lightmap, },
+ {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, },
+ {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, },
+ {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace},
+ {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, },
+ {2, DPSOFTRAST_VertexShader_LightSource, DPSOFTRAST_PixelShader_LightSource, },
+ {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, },
+ {2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, },
+ {2, DPSOFTRAST_VertexShader_ShowDepth, DPSOFTRAST_PixelShader_ShowDepth, },
+ {2, DPSOFTRAST_VertexShader_DeferredGeometry, DPSOFTRAST_PixelShader_DeferredGeometry, },
+ {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, }
+};
+
+
+
void DPSOFTRAST_Draw_ProcessSpans(void)
{
int i;
// do this before running depthmask code, to allow the pixelshader
// to clear pixelmask values for alpha testing
if (dpsoftrast.fb_colorpixels[0] && dpsoftrast.fb_colormask)
- DPSOFTRAST_Draw_PixelShaderSpan(span);
+ DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Span(span);
if (dpsoftrast.user.depthmask)
for (x = 0, d = depth;x < span->length;x++, d += depthslope)
if (pixelmask[x])
span->pixelmask = pixelmask;
span->startx = 0;
span->endx = span->length;
- DPSOFTRAST_Draw_PixelShaderSpan(span);
+ DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Span(span);
}
}
}
}
-void DPSOFTRAST_Draw_ProcessTriangles(int firstvertex, int numvertices, int numtriangles, const int *element3i, const unsigned short *element3s, unsigned char *arraymask)
+void DPSOFTRAST_Draw_ProcessTriangles(int firstvertex, int numtriangles, const int *element3i, const unsigned short *element3s, unsigned char *arraymask)
{
+#ifdef SSE2_PRESENT
int cullface = dpsoftrast.user.cullface;
int width = dpsoftrast.fb_width;
int height = dpsoftrast.fb_height;
int k;
int y;
int e[3];
- int screenx[4];
- int screeny[4];
+ ALIGN(int screeny[4]);
+ int starty, endy;
int screenyless[4];
int numpoints;
int clipflags;
int edge0n;
int edge1p;
int edge1n;
- int extent[6];
int startx;
int endx;
float mip_edge0tc[2];
float spanilength;
float startxlerp;
float yc;
- float w;
float frac;
float ifrac;
- float trianglearea2;
- float triangleedge[2][4];
- float trianglenormal[4];
- float clipdist[4];
- float clipped[DPSOFTRAST_ARRAY_TOTAL][4][4];
- float screen[4][4];
- float proj[DPSOFTRAST_ARRAY_TOTAL][4][4];
+ //float trianglearea2;
+ __m128 triangleedge[2];
+ __m128 trianglenormal;
+ ALIGN(float clipdist[4]);
+ ALIGN(float clipped[DPSOFTRAST_ARRAY_TOTAL][4][4]);
+ ALIGN(float screen[4][4]);
+ __m128 proj[DPSOFTRAST_ARRAY_TOTAL][4];
DPSOFTRAST_Texture *texture;
DPSOFTRAST_State_Draw_Span *span;
DPSOFTRAST_State_Draw_Span *oldspan;
e[1] = i*3+1;
e[2] = i*3+2;
}
- triangleedge[0][0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+0] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+0];
- triangleedge[0][1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+1] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+1];
- triangleedge[0][2] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+2] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2];
- triangleedge[1][0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+0] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+0];
- triangleedge[1][1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+1] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+1];
- triangleedge[1][2] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4+2] - dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2];
+ {
+ __m128 v1 = _mm_load_ps(&dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4]);
+ triangleedge[0] = _mm_sub_ps(_mm_load_ps(&dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4]), v1);
+ triangleedge[1] = _mm_sub_ps(_mm_load_ps(&dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[2]*4]), v1);
+ }
+ // store normal in 2, 0, 1 order instead of 0, 1, 2 as it requires fewer shuffles and leaves z component accessible as scalar
+ trianglenormal = _mm_sub_ps(_mm_mul_ps(triangleedge[0], _mm_shuffle_ps(triangleedge[1], triangleedge[1], _MM_SHUFFLE(3, 0, 2, 1))),
+ _mm_mul_ps(_mm_shuffle_ps(triangleedge[0], triangleedge[0], _MM_SHUFFLE(3, 0, 2, 1)), triangleedge[1]));
+#if 0
+ trianglenormal[2] = triangleedge[0][0] * triangleedge[1][1] - triangleedge[0][1] * triangleedge[1][0];
trianglenormal[0] = triangleedge[0][1] * triangleedge[1][2] - triangleedge[0][2] * triangleedge[1][1];
trianglenormal[1] = triangleedge[0][2] * triangleedge[1][0] - triangleedge[0][0] * triangleedge[1][2];
- trianglenormal[2] = triangleedge[0][0] * triangleedge[1][1] - triangleedge[0][1] * triangleedge[1][0];
- trianglearea2 = trianglenormal[0] * trianglenormal[0] + trianglenormal[1] * trianglenormal[1] + trianglenormal[2] * trianglenormal[2];
- // skip degenerate triangles, nothing good can come from them...
- if (trianglearea2 == 0.0f)
- continue;
+#endif
// apply current cullface mode (this culls many triangles)
switch(cullface)
{
case GL_BACK:
- if (trianglenormal[2] < 0)
+ if (_mm_ucomilt_ss(trianglenormal, _mm_setzero_ps()))
continue;
break;
case GL_FRONT:
- if (trianglenormal[2] > 0)
+ if (_mm_ucomigt_ss(trianglenormal, _mm_setzero_ps()))
continue;
break;
}
+#if 0
+ trianglearea2 = trianglenormal[0] * trianglenormal[0] + trianglenormal[1] * trianglenormal[1] + trianglenormal[2] * trianglenormal[2];
+ // skip degenerate triangles, nothing good can come from them...
+ if (trianglearea2 == 0.0f)
+ continue;
+#endif
+
// calculate distance from nearplane
clipdist[0] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[0]*4+2] + 1.0f;
clipdist[1] = dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION][e[1]*4+2] + 1.0f;
ifrac = 1.0f - frac;\
for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)\
{\
- if (arraymask[j])\
+ /*if (arraymask[j])*/\
{\
clipped[j][k][0] = dpsoftrast.draw.post_array4f[j][e[p1]*4+0]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+0]*frac;\
clipped[j][k][1] = dpsoftrast.draw.post_array4f[j][e[p1]*4+1]*ifrac+dpsoftrast.draw.post_array4f[j][e[p2]*4+1]*frac;\
#define CLIPPEDVERTEXCOPY(k,p1) \
for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)\
{\
- if (arraymask[j])\
+ /*if (arraymask[j])*/\
{\
clipped[j][k][0] = dpsoftrast.draw.post_array4f[j][e[p1]*4+0];\
clipped[j][k][1] = dpsoftrast.draw.post_array4f[j][e[p1]*4+1];\
// triangle is entirely behind nearplane
continue;
}
- // calculate integer y coords for triangle points
- screenx[0] = (int)(screen[0][0]);
- screeny[0] = (int)(screen[0][1]);
- screenx[1] = (int)(screen[1][0]);
- screeny[1] = (int)(screen[1][1]);
- screenx[2] = (int)(screen[2][0]);
- screeny[2] = (int)(screen[2][1]);
- screenx[3] = (int)(screen[3][0]);
- screeny[3] = (int)(screen[3][1]);
- // figure out the extents (bounding box) of the triangle
- extent[0] = screenx[0];
- extent[1] = screeny[0];
- extent[2] = screenx[0];
- extent[3] = screeny[0];
- for (j = 1;j < numpoints;j++)
- {
- if (extent[0] > screenx[j]) extent[0] = screenx[j];
- if (extent[1] > screeny[j]) extent[1] = screeny[j];
- if (extent[2] < screenx[j]) extent[2] = screenx[j];
- if (extent[3] < screeny[j]) extent[3] = screeny[j];
- }
- //extent[0]--;
- //extent[1]--;
- extent[2]++;
- extent[3]++;
- if (extent[0] < 0)
- extent[0] = 0;
- if (extent[1] < 0)
- extent[1] = 0;
- if (extent[2] > width)
- extent[2] = width;
- if (extent[3] > height)
- extent[3] = height;
- // skip offscreen triangles
- if (extent[2] <= extent[0] || extent[3] <= extent[1])
- continue;
+ {
+ // calculate integer y coords for triangle points
+ __m128i screeni = _mm_packs_epi32(_mm_cvttps_epi32(_mm_shuffle_ps(_mm_load_ps(screen[0]), _mm_load_ps(screen[1]), _MM_SHUFFLE(1, 0, 1, 0))),
+ _mm_cvttps_epi32(_mm_shuffle_ps(_mm_load_ps(screen[2]), _mm_load_ps(screen[3]), _MM_SHUFFLE(1, 0, 1, 0)))),
+ screenir, screenmin, screenmax;
+ if (numpoints <= 3) screeni = _mm_shuffle_epi32(screeni, _MM_SHUFFLE(2, 2, 1, 0));
+ screenir = _mm_shuffle_epi32(screeni, _MM_SHUFFLE(1, 0, 3, 2)),
+ screenmin = _mm_min_epi16(screeni, screenir);
+ screenmax = _mm_max_epi16(screeni, screenir);
+ screenmin = _mm_min_epi16(screenmin, _mm_shufflelo_epi16(screenmin, _MM_SHUFFLE(1, 0, 3, 2)));
+ screenmax = _mm_max_epi16(screenmax, _mm_shufflelo_epi16(screenmax, _MM_SHUFFLE(1, 0, 3, 2)));
+ screenmin = _mm_max_epi16(screenmin, _mm_setzero_si128());
+ screenmax = _mm_min_epi16(screenmax, _mm_setr_epi16(width-1, height-1, 0, 0, 0, 0, 0, 0));
+ // skip offscreen triangles
+ {
+ __m128i cc = _mm_cmplt_epi16(screenmax, screenmin);
+ if (_mm_extract_epi16(cc, 0)|_mm_extract_epi16(cc, 1))
+ continue;
+ }
+ starty = _mm_extract_epi16(screenmin, 1);
+ endy = _mm_extract_epi16(screenmax, 1)+1;
+ _mm_store_si128((__m128i *)screeny, _mm_srai_epi32(screeni, 16));
+ }
+
// okay, this triangle is going to produce spans, we'd better project
// the interpolants now (this is what gives perspective texturing),
// this consists of simply multiplying all arrays by the W coord
// values
for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
{
- if (arraymask[j])
+ //if (arraymask[j])
{
for (k = 0;k < numpoints;k++)
{
- w = screen[k][3];
- proj[j][k][0] = clipped[j][k][0] * w;
- proj[j][k][1] = clipped[j][k][1] * w;
- proj[j][k][2] = clipped[j][k][2] * w;
- proj[j][k][3] = clipped[j][k][3] * w;
+ proj[j][k] = _mm_mul_ps(_mm_load_ps(clipped[j][k]), _mm_set1_ps(screen[k][3]));
}
}
}
mip[j] = 0;
continue;
}
- // FIXME: use appropriate array for this texture!
- mip_edge0tc[0] = (clipped[DPSOFTRAST_ARRAY_TEXCOORD0][0][0] - clipped[DPSOFTRAST_ARRAY_TEXCOORD0][1][0]) * texture->mipmap[0][2];
- mip_edge0tc[1] = (clipped[DPSOFTRAST_ARRAY_TEXCOORD0][0][1] - clipped[DPSOFTRAST_ARRAY_TEXCOORD0][1][1]) * texture->mipmap[0][3];
- mip_edge1tc[0] = (clipped[DPSOFTRAST_ARRAY_TEXCOORD0][2][0] - clipped[DPSOFTRAST_ARRAY_TEXCOORD0][1][0]) * texture->mipmap[0][2];
- mip_edge1tc[1] = (clipped[DPSOFTRAST_ARRAY_TEXCOORD0][2][1] - clipped[DPSOFTRAST_ARRAY_TEXCOORD0][1][1]) * texture->mipmap[0][3];
+ k = DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].lodarrayindex;
+ mip_edge0tc[0] = (clipped[k][0][0] - clipped[k][1][0]) * texture->mipmap[0][2];
+ mip_edge0tc[1] = (clipped[k][0][1] - clipped[k][1][1]) * texture->mipmap[0][3];
+ mip_edge1tc[0] = (clipped[k][2][0] - clipped[k][1][0]) * texture->mipmap[0][2];
+ mip_edge1tc[1] = (clipped[k][2][1] - clipped[k][1][1]) * texture->mipmap[0][3];
mip_edge0mip = (mip_edge0tc[0]*mip_edge0tc[0]+mip_edge0tc[1]*mip_edge0tc[1]) * mip_edge0xymul;
mip_edge1mip = (mip_edge1tc[0]*mip_edge1tc[0]+mip_edge1tc[1]*mip_edge1tc[1]) * mip_edge1xymul;
// this will be multiplied in the texturing routine by the texture resolution
mipdensity = mip_edge0mip < mip_edge1mip ? mip_edge0mip : mip_edge1mip;
- y = (int)(log(mipdensity)/log(2.0f) + 0.5f);
+ y = (int)(log(mipdensity)/log(2.0f));
if (y < 0)
y = 0;
if (y > texture->mipmaps - 1)
// TODO: optimize? the edges could have data slopes calculated
// TODO: optimize? the data slopes could be calculated as a plane
// (2D slopes) to avoid any interpolation along edges at all
- for (y = extent[1];y < extent[3];y++)
+ for (y = starty;y < endy;y++)
{
// get center of pixel y
yc = y;
memcpy(span->mip, mip, sizeof(span->mip));
span->start = y * width + startx;
span->length = endx - startx;
- j = DPSOFTRAST_ARRAY_TOTAL;
- if (edge0xf < edge1xf)
{
- span->data[0][j][0] = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
- span->data[0][j][1] = screen[edge0p][1] * edge0yilerp + screen[edge0n][1] * edge0ylerp;
- span->data[0][j][2] = screen[edge0p][2] * edge0yilerp + screen[edge0n][2] * edge0ylerp;
- span->data[0][j][3] = screen[edge0p][3] * edge0yilerp + screen[edge0n][3] * edge0ylerp;
- span->data[1][j][0] = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
- span->data[1][j][1] = screen[edge1p][1] * edge1yilerp + screen[edge1n][1] * edge1ylerp;
- span->data[1][j][2] = screen[edge1p][2] * edge1yilerp + screen[edge1n][2] * edge1ylerp;
- span->data[1][j][3] = screen[edge1p][3] * edge1yilerp + screen[edge1n][3] * edge1ylerp;
- for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
+ __m128 edge0ylerpm = _mm_set1_ps(edge0ylerp), edge0yilerpm = _mm_set1_ps(edge0yilerp),
+ edge1ylerpm = _mm_set1_ps(edge1ylerp), edge1yilerpm = _mm_set1_ps(edge1yilerp),
+ spanilengthm = _mm_set1_ps(spanilength), startxlerpm = _mm_set1_ps(startxlerp),
+ data0, data1;
+ j = DPSOFTRAST_ARRAY_TOTAL;
+ if (edge0xf < edge1xf)
{
- if (arraymask[j])
+ data0 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(screen[edge0p]), edge0yilerpm), _mm_mul_ps(_mm_load_ps(screen[edge0n]), edge0ylerpm));
+ data1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(screen[edge1p]), edge1yilerpm), _mm_mul_ps(_mm_load_ps(screen[edge1n]), edge1ylerpm));
+ data1 = _mm_mul_ps(_mm_sub_ps(data1, data0), spanilengthm);
+ data0 = _mm_add_ps(data0, _mm_mul_ps(data1, startxlerpm));
+ _mm_store_ps(span->data[0][j], data0);
+ _mm_store_ps(span->data[1][j], data1);
+ for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
{
- span->data[0][j][0] = proj[j][edge0p][0] * edge0yilerp + proj[j][edge0n][0] * edge0ylerp;
- span->data[0][j][1] = proj[j][edge0p][1] * edge0yilerp + proj[j][edge0n][1] * edge0ylerp;
- span->data[0][j][2] = proj[j][edge0p][2] * edge0yilerp + proj[j][edge0n][2] * edge0ylerp;
- span->data[0][j][3] = proj[j][edge0p][3] * edge0yilerp + proj[j][edge0n][3] * edge0ylerp;
- span->data[1][j][0] = proj[j][edge1p][0] * edge1yilerp + proj[j][edge1n][0] * edge1ylerp;
- span->data[1][j][1] = proj[j][edge1p][1] * edge1yilerp + proj[j][edge1n][1] * edge1ylerp;
- span->data[1][j][2] = proj[j][edge1p][2] * edge1yilerp + proj[j][edge1n][2] * edge1ylerp;
- span->data[1][j][3] = proj[j][edge1p][3] * edge1yilerp + proj[j][edge1n][3] * edge1ylerp;
+ //if (arraymask[j])
+ {
+ data0 = _mm_add_ps(_mm_mul_ps(proj[j][edge0p], edge0yilerpm), _mm_mul_ps(proj[j][edge0n], edge0ylerpm));
+ data1 = _mm_add_ps(_mm_mul_ps(proj[j][edge1p], edge1yilerpm), _mm_mul_ps(proj[j][edge1n], edge1ylerpm));
+ data1 = _mm_mul_ps(_mm_sub_ps(data1, data0), spanilengthm);
+ data0 = _mm_add_ps(data0, _mm_mul_ps(data1, startxlerpm));
+ _mm_store_ps(span->data[0][j], data0);
+ _mm_store_ps(span->data[1][j], data1);
+ }
}
}
- }
- else
- {
- span->data[0][j][0] = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
- span->data[0][j][1] = screen[edge1p][1] * edge1yilerp + screen[edge1n][1] * edge1ylerp;
- span->data[0][j][2] = screen[edge1p][2] * edge1yilerp + screen[edge1n][2] * edge1ylerp;
- span->data[0][j][3] = screen[edge1p][3] * edge1yilerp + screen[edge1n][3] * edge1ylerp;
- span->data[1][j][0] = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
- span->data[1][j][1] = screen[edge0p][1] * edge0yilerp + screen[edge0n][1] * edge0ylerp;
- span->data[1][j][2] = screen[edge0p][2] * edge0yilerp + screen[edge0n][2] * edge0ylerp;
- span->data[1][j][3] = screen[edge0p][3] * edge0yilerp + screen[edge0n][3] * edge0ylerp;
- for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
+ else
{
- if (arraymask[j])
+ data0 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(screen[edge1p]), edge1yilerpm), _mm_mul_ps(_mm_load_ps(screen[edge1n]), edge1ylerpm));
+ data1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(screen[edge0p]), edge0yilerpm), _mm_mul_ps(_mm_load_ps(screen[edge0n]), edge0ylerpm));
+ data1 = _mm_mul_ps(_mm_sub_ps(data1, data0), spanilengthm);
+ data0 = _mm_add_ps(data0, _mm_mul_ps(data1, startxlerpm));
+ _mm_store_ps(span->data[0][j], data0);
+ _mm_store_ps(span->data[1][j], data1);
+ for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
{
- span->data[0][j][0] = proj[j][edge1p][0] * edge1yilerp + proj[j][edge1n][0] * edge1ylerp;
- span->data[0][j][1] = proj[j][edge1p][1] * edge1yilerp + proj[j][edge1n][1] * edge1ylerp;
- span->data[0][j][2] = proj[j][edge1p][2] * edge1yilerp + proj[j][edge1n][2] * edge1ylerp;
- span->data[0][j][3] = proj[j][edge1p][3] * edge1yilerp + proj[j][edge1n][3] * edge1ylerp;
- span->data[1][j][0] = proj[j][edge0p][0] * edge0yilerp + proj[j][edge0n][0] * edge0ylerp;
- span->data[1][j][1] = proj[j][edge0p][1] * edge0yilerp + proj[j][edge0n][1] * edge0ylerp;
- span->data[1][j][2] = proj[j][edge0p][2] * edge0yilerp + proj[j][edge0n][2] * edge0ylerp;
- span->data[1][j][3] = proj[j][edge0p][3] * edge0yilerp + proj[j][edge0n][3] * edge0ylerp;
+ //if (arraymask[j])
+ {
+ data0 = _mm_add_ps(_mm_mul_ps(proj[j][edge1p], edge1yilerpm), _mm_mul_ps(proj[j][edge1n], edge1ylerpm));
+ data1 = _mm_add_ps(_mm_mul_ps(proj[j][edge0p], edge0yilerpm), _mm_mul_ps(proj[j][edge0n], edge0ylerpm));
+ data1 = _mm_mul_ps(_mm_sub_ps(data1, data0), spanilengthm);
+ data0 = _mm_add_ps(data0, _mm_mul_ps(data1, startxlerpm));
+ _mm_store_ps(span->data[0][j], data0);
+ _mm_store_ps(span->data[1][j], data1);
+ }
}
}
}
- // change data[1][n][] to be a data slope
- j = DPSOFTRAST_ARRAY_TOTAL;
- span->data[1][j][0] = (span->data[1][j][0] - span->data[0][j][0]) * spanilength;
- span->data[1][j][1] = (span->data[1][j][1] - span->data[0][j][1]) * spanilength;
- span->data[1][j][2] = (span->data[1][j][2] - span->data[0][j][2]) * spanilength;
- span->data[1][j][3] = (span->data[1][j][3] - span->data[0][j][3]) * spanilength;
- for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
- {
- if (arraymask[j])
- {
- span->data[1][j][0] = (span->data[1][j][0] - span->data[0][j][0]) * spanilength;
- span->data[1][j][1] = (span->data[1][j][1] - span->data[0][j][1]) * spanilength;
- span->data[1][j][2] = (span->data[1][j][2] - span->data[0][j][2]) * spanilength;
- span->data[1][j][3] = (span->data[1][j][3] - span->data[0][j][3]) * spanilength;
- }
- }
- // adjust the data[0][n][] to be correct for the pixel centers
- // this also handles horizontal clipping where a major part of the
- // span may be off the left side of the screen
- j = DPSOFTRAST_ARRAY_TOTAL;
- span->data[0][j][0] += span->data[1][j][0] * startxlerp;
- span->data[0][j][1] += span->data[1][j][1] * startxlerp;
- span->data[0][j][2] += span->data[1][j][2] * startxlerp;
- span->data[0][j][3] += span->data[1][j][3] * startxlerp;
- for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
- {
- if (arraymask[j])
- {
- span->data[0][j][0] += span->data[1][j][0] * startxlerp;
- span->data[0][j][1] += span->data[1][j][1] * startxlerp;
- span->data[0][j][2] += span->data[1][j][2] * startxlerp;
- span->data[0][j][3] += span->data[1][j][3] * startxlerp;
- }
- }
// to keep the shader routines from needing more than a small
// buffer for pixel intermediate data, we split long spans...
while (span->length > DPSOFTRAST_DRAW_MAXSPANLENGTH)
{
+ __m128 maxspanlengthm;
span->length = DPSOFTRAST_DRAW_MAXSPANLENGTH;
if (dpsoftrast.draw.numspans >= DPSOFTRAST_DRAW_MAXSPANQUEUE)
{
span->start = y * width + startx;
span->length = endx - startx;
j = DPSOFTRAST_ARRAY_TOTAL;
- span->data[0][j][0] += span->data[1][j][0] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][1] += span->data[1][j][1] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][2] += span->data[1][j][2] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][3] += span->data[1][j][3] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
+ maxspanlengthm = _mm_set1_ps(DPSOFTRAST_DRAW_MAXSPANLENGTH);
+ _mm_store_ps(span->data[0][j], _mm_add_ps(_mm_load_ps(span->data[0][j]), _mm_mul_ps(_mm_load_ps(span->data[1][j]), maxspanlengthm)));
for (j = 0;j < DPSOFTRAST_ARRAY_TOTAL;j++)
{
- if (arraymask[j])
+ //if (arraymask[j])
{
- span->data[0][j][0] += span->data[1][j][0] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][1] += span->data[1][j][1] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][2] += span->data[1][j][2] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
- span->data[0][j][3] += span->data[1][j][3] * DPSOFTRAST_DRAW_MAXSPANLENGTH;
+ _mm_store_ps(span->data[0][j], _mm_add_ps(_mm_load_ps(span->data[0][j]), _mm_mul_ps(_mm_load_ps(span->data[1][j]), maxspanlengthm)));
}
}
}
DPSOFTRAST_Draw_ProcessSpans();
dpsoftrast.draw.numspans = 0;
}
+#endif
}
void DPSOFTRAST_Draw_DebugPoints(void)
arraymask[9] = dpsoftrast.pointer_texcoordf[7] != NULL;
DPSOFTRAST_Validate(DPSOFTRAST_VALIDATE_DRAW);
DPSOFTRAST_Draw_LoadVertices(firstvertex, numvertices, true);
- DPSOFTRAST_Draw_VertexShader();
+ DPSOFTRAST_ShaderModeTable[dpsoftrast.shader_mode].Vertex();
DPSOFTRAST_Draw_ProjectVertices(dpsoftrast.draw.screencoord4f, dpsoftrast.draw.post_array4f[DPSOFTRAST_ARRAY_POSITION], numvertices);
- DPSOFTRAST_Draw_ProcessTriangles(firstvertex, numvertices, numtriangles, element3i, element3s, arraymask);
+ DPSOFTRAST_Draw_ProcessTriangles(firstvertex, numtriangles, element3i, element3s, arraymask);
}
void DPSOFTRAST_Init(int width, int height, unsigned int *colorpixels, unsigned int *depthpixels)
free(dpsoftrast.texture);
memset(&dpsoftrast, 0, sizeof(dpsoftrast));
}
+