X-Git-Url: http://de.git.xonotic.org/?p=xonotic%2Fdarkplaces.git;a=blobdiff_plain;f=dpsoftrast.c;h=b3a00ae417f34004967976d6c7278332b0daa0bf;hp=179a4d0c5fe93cae3b4668e1f4d9221c6a79082c;hb=ab9e8a37699390a5dc199e4def9cdbaaa59d82cf;hpb=e1cc58c8345fbadd75421fa7843ec19bdf0b8681 diff --git a/dpsoftrast.c b/dpsoftrast.c index 179a4d0c..b3a00ae4 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -15,13 +15,13 @@ typedef qboolean bool; #endif #define ALIGN_SIZE 16 -#define ATOMIC_SIZE 32 +#define ATOMIC_SIZE 4 #ifdef SSE_POSSIBLE #if defined(__APPLE__) #include #define ALIGN(var) var __attribute__((__aligned__(16))) - #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) #define MEMORY_BARRIER (_mm_sfence()) #define ATOMIC_COUNTER volatile int32_t #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter))) @@ -29,7 +29,7 @@ typedef qboolean bool; #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter))) #elif defined(__GNUC__) && defined(WIN32) #define ALIGN(var) var __attribute__((__aligned__(16))) - #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) #define MEMORY_BARRIER (_mm_sfence()) //(__sync_synchronize()) #define ATOMIC_COUNTER volatile LONG @@ -43,7 +43,7 @@ typedef qboolean bool; #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd((LONG *) &(counter), (val))) #elif defined(__GNUC__) #define ALIGN(var) var __attribute__((__aligned__(16))) - #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) #define MEMORY_BARRIER (_mm_sfence()) //(__sync_synchronize()) #define ATOMIC_COUNTER volatile int @@ -52,7 +52,7 @@ typedef qboolean bool; #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val))) #elif defined(_MSC_VER) #define ALIGN(var) __declspec(align(16)) var - #define ATOMIC(var) __declspec(align(32)) var + #define ATOMIC(var) __declspec(align(4)) var #define MEMORY_BARRIER (_mm_sfence()) //(MemoryBarrier()) #define ATOMIC_COUNTER volatile LONG @@ -91,11 +91,11 @@ typedef qboolean bool; #define _mm_cvtss_f32(val) (__builtin_ia32_vec_ext_v4sf ((__v4sf)(val), 0)) #endif -#define MM_MALLOC(size) _mm_malloc(size, ATOMIC_SIZE) +#define MM_MALLOC(size) _mm_malloc(size, ALIGN_SIZE) static void *MM_CALLOC(size_t nmemb, size_t size) { - void *ptr = _mm_malloc(nmemb*size, ATOMIC_SIZE); + void *ptr = _mm_malloc(nmemb*size, ALIGN_SIZE); if (ptr != NULL) memset(ptr, 0, nmemb*size); return ptr; } @@ -163,15 +163,15 @@ enum { DPSOFTRAST_OPCODE_Reset = 0 }; #define DPSOFTRAST_DRAW_MAXCOMMANDPOOL 2097152 #define DPSOFTRAST_DRAW_MAXCOMMANDSIZE 16384 -typedef ATOMIC(struct DPSOFTRAST_State_Command_Pool_s +typedef ALIGN(struct DPSOFTRAST_State_Command_Pool_s { int freecommand; int usedcommands; - ATOMIC(unsigned char commands[DPSOFTRAST_DRAW_MAXCOMMANDPOOL]); + ALIGN(unsigned char commands[DPSOFTRAST_DRAW_MAXCOMMANDPOOL]); } DPSOFTRAST_State_Command_Pool); -typedef ATOMIC(struct DPSOFTRAST_State_Triangle_s +typedef ALIGN(struct DPSOFTRAST_State_Triangle_s { unsigned char mip[DPSOFTRAST_MAXTEXTUREUNITS]; // texcoord to screen space density values (for picking mipmap of textures) float w[3]; @@ -236,7 +236,7 @@ typedef enum DPSOFTRAST_BLENDMODE_e } DPSOFTRAST_BLENDMODE; -typedef ATOMIC(struct DPSOFTRAST_State_Thread_s +typedef ALIGN(struct DPSOFTRAST_State_Thread_s { void *thread; int index; @@ -302,7 +302,7 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s } DPSOFTRAST_State_Thread); -typedef ATOMIC(struct DPSOFTRAST_State_s +typedef ALIGN(struct DPSOFTRAST_State_s { int fb_width; int fb_height; @@ -3857,8 +3857,8 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f; } + specular = pow(specular, 1.0f + SpecularPower * glosstex[3]); - specular = pow(specular, SpecularPower * glosstex[3]); if (thread->shader_permutation & SHADERPERMUTATION_GLOW) { d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255; @@ -4266,7 +4266,7 @@ void DPSOFTRAST_PixelShader_LightSource(DPSOFTRAST_State_Thread *thread, const D specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f; } - specular = pow(specular, SpecularPower * glosstex[3]); + specular = pow(specular, 1.0f + SpecularPower * glosstex[3]); if (thread->shader_permutation & SHADERPERMUTATION_CUBEFILTER) { @@ -4739,6 +4739,8 @@ static const DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COU {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}}, + {2, DPSOFTRAST_VertexShader_Lightmap, DPSOFTRAST_PixelShader_Lightmap, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}}, + {2, DPSOFTRAST_VertexShader_VertexColor, DPSOFTRAST_PixelShader_VertexColor, {DPSOFTRAST_ARRAY_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, ~0}, {GL20TU_COLOR, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, {2, DPSOFTRAST_VertexShader_LightSource, DPSOFTRAST_PixelShader_LightSource, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_CUBE, ~0}}, {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_NORMAL, GL20TU_REFRACTION, ~0}},