#define _USE_MATH_DEFINES
#include <math.h>
#include "quakedef.h"
+#include "thread.h"
#include "dpsoftrast.h"
-#ifdef USE_SDL
-#define USE_THREADS
-#endif
-
#ifndef __cplusplus
typedef qboolean bool;
#endif
#define ATOMIC_SIZE 32
#ifdef SSE2_PRESENT
- #if defined(__GNUC__)
+ #if defined(__APPLE__)
+ #include <libkern/OSAtomic.h>
+ #define ALIGN(var) var __attribute__((__aligned__(16)))
+ #define ATOMIC(var) var __attribute__((__aligned__(32)))
+ #define MEMORY_BARRIER (_mm_sfence())
+ #define ATOMIC_COUNTER volatile int32_t
+ #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter)))
+ #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter)))
+ #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter)))
+ #elif defined(__GNUC__)
#define ALIGN(var) var __attribute__((__aligned__(16)))
#define ATOMIC(var) var __attribute__((__aligned__(32)))
- #ifdef USE_THREADS
- #define MEMORY_BARRIER (_mm_sfence())
- //(__sync_synchronize())
- #define ATOMIC_COUNTER volatile int
- #define ATOMIC_INCREMENT(counter) (__sync_add_and_fetch(&(counter), 1))
- #define ATOMIC_DECREMENT(counter) (__sync_add_and_fetch(&(counter), -1))
- #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val)))
- #endif
+ #define MEMORY_BARRIER (_mm_sfence())
+ //(__sync_synchronize())
+ #define ATOMIC_COUNTER volatile int
+ #define ATOMIC_INCREMENT(counter) (__sync_add_and_fetch(&(counter), 1))
+ #define ATOMIC_DECREMENT(counter) (__sync_add_and_fetch(&(counter), -1))
+ #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val)))
#elif defined(_MSC_VER)
#define ALIGN(var) __declspec(align(16)) var
#define ATOMIC(var) __declspec(align(32)) var
- #ifdef USE_THREADS
- #define MEMORY_BARRIER (_mm_sfence())
- //(MemoryBarrier())
- #define ATOMIC_COUNTER volatile LONG
- #define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter)))
- #define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter)))
- #define ATOMIC_ADD(counter, val) (InterlockedExchangeAdd(&(counter), (val)))
- #endif
- #else
- #undef USE_THREADS
- #undef SSE2_PRESENT
+ #define MEMORY_BARRIER (_mm_sfence())
+ //(MemoryBarrier())
+ #define ATOMIC_COUNTER volatile LONG
+ #define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter)))
+ #define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter)))
+ #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd(&(counter), (val)))
#endif
#endif
-#ifndef SSE2_PRESENT
- #define ALIGN(var) var
- #define ATOMIC(var) var
+#ifndef ALIGN
+#define ALIGN(var) var
#endif
-
-#ifdef USE_THREADS
-#include <SDL.h>
-#include <SDL_thread.h>
-#else
- #define MEMORY_BARRIER ((void)0)
- #define ATOMIC_COUNTER int
- #define ATOMIC_INCREMENT(counter) (++(counter))
- #define ATOMIC_DECREMENT(counter) (--(counter))
- #define ATOMIC_ADD(counter, val) ((void)((counter) += (val)))
- typedef void SDL_Thread;
- typedef void SDL_cond;
- typedef void SDL_mutex;
+#ifndef ATOMIC
+#define ATOMIC(var) var
+#endif
+#ifndef MEMORY_BARRIER
+#define MEMORY_BARRIER ((void)0)
+#endif
+#ifndef ATOMIC_COUNTER
+#define ATOMIC_COUNTER int
+#endif
+#ifndef ATOMIC_INCREMENT
+#define ATOMIC_INCREMENT(counter) (++(counter))
+#endif
+#ifndef ATOMIC_DECREMENT
+#define ATOMIC_DECREMENT(counter) (--(counter))
+#endif
+#ifndef ATOMIC_ADD
+#define ATOMIC_ADD(counter, val) ((void)((counter) += (val)))
#endif
#ifdef SSE2_PRESENT
typedef ATOMIC(struct DPSOFTRAST_State_Thread_s
{
- SDL_Thread *thread;
+ void *thread;
int index;
int cullface;
volatile bool waiting;
volatile bool starving;
- SDL_cond *waitcond;
- SDL_cond *drawcond;
- SDL_mutex *drawmutex;
+ void *waitcond;
+ void *drawcond;
+ void *drawmutex;
int numspans;
int numtriangles;
// error reporting
const char *errorstring;
+ bool usethreads;
int interlace;
int numthreads;
DPSOFTRAST_State_Thread *threads;
static void DPSOFTRAST_Draw_SyncCommands(void)
{
- MEMORY_BARRIER;
+ if(dpsoftrast.usethreads) MEMORY_BARRIER;
dpsoftrast.drawcommand = dpsoftrast.commandpool.freecommand;
}
static void DPSOFTRAST_Draw_FreeCommandPool(int space)
{
-#ifdef USE_THREADS
DPSOFTRAST_State_Thread *thread;
int i;
int freecommand = dpsoftrast.commandpool.freecommand;
if (usedcommands <= DPSOFTRAST_DRAW_MAXCOMMANDPOOL-space || waitindex < 0)
break;
thread = &dpsoftrast.threads[waitindex];
- SDL_LockMutex(thread->drawmutex);
+ Thread_LockMutex(thread->drawmutex);
if (thread->commandoffset != dpsoftrast.drawcommand)
{
thread->waiting = true;
- if (thread->starving) SDL_CondSignal(thread->drawcond);
- SDL_CondWait(thread->waitcond, thread->drawmutex);
+ if (thread->starving) Thread_CondSignal(thread->drawcond);
+ Thread_CondWait(thread->waitcond, thread->drawmutex);
thread->waiting = false;
}
- SDL_UnlockMutex(thread->drawmutex);
+ Thread_UnlockMutex(thread->drawmutex);
}
dpsoftrast.commandpool.usedcommands = usedcommands;
-#else
- DPSOFTRAST_Draw_FlushThreads();
-#endif
}
#define DPSOFTRAST_ALIGNCOMMAND(size) \
extra += DPSOFTRAST_DRAW_MAXCOMMANDPOOL - freecommand;
if (usedcommands > DPSOFTRAST_DRAW_MAXCOMMANDPOOL - (size + extra))
{
- DPSOFTRAST_Draw_FreeCommandPool(size + extra);
+ if (dpsoftrast.usethreads)
+ DPSOFTRAST_Draw_FreeCommandPool(size + extra);
+ else
+ DPSOFTRAST_Draw_FlushThreads();
freecommand = dpsoftrast.commandpool.freecommand;
usedcommands = dpsoftrast.commandpool.usedcommands;
}
int bx2 = blockx + blockwidth;
int by2 = blocky + blockheight;
int bw;
- int bh;
int x;
int y;
unsigned char *inpixels;
if (bx2 > dpsoftrast.fb_width) bx2 = dpsoftrast.fb_width;
if (by2 > dpsoftrast.fb_height) by2 = dpsoftrast.fb_height;
bw = bx2 - bx1;
- bh = by2 - by1;
inpixels = (unsigned char *)dpsoftrast.fb_colorpixels[0];
if (dpsoftrast.bigendian)
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
if (mip < 0 || mip >= texture->mipmaps) return;
- if (texture->binds)
- DPSOFTRAST_Flush();
+ DPSOFTRAST_Flush();
spixels = dpsoftrast.fb_colorpixels[0];
swidth = dpsoftrast.fb_width;
sheight = dpsoftrast.fb_height;
{
__m128 m0, m1, m2, m3;
DPSOFTRAST_Command_UniformMatrix4f *command = DPSOFTRAST_ALLOCATECOMMAND(UniformMatrix4f);
- command->index = index;
+ command->index = (DPSOFTRAST_UNIFORM)index;
if (((size_t)v)&(ALIGN_SIZE-1))
{
m0 = _mm_loadu_ps(v);
*endy = _mm_cvttss_si32(minproj)+1;
return clipmask;
}
-#endif
static int DPSOFTRAST_Vertex_Project(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems)
{
-#ifdef SSE2_PRESENT
float *end = out4f + numitems*4;
__m128 viewportcenter = _mm_load_ps(dpsoftrast.fb_viewportcenter), viewportscale = _mm_load_ps(dpsoftrast.fb_viewportscale);
__m128 minpos, maxpos;
_mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f),
_mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
return 0;
-#endif
}
static int DPSOFTRAST_Vertex_TransformProject(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems, const float *inmatrix16f)
{
-#ifdef SSE2_PRESENT
static const float identitymatrix[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
__m128 m0, m1, m2, m3, viewportcenter, viewportscale, minpos, maxpos;
float *end;
if (starty && endy)
return DPSOFTRAST_Vertex_BoundY(starty, endy, minpos, maxpos, viewportcenter, viewportscale, m0, m1, m2, m3);
return 0;
-#endif
}
+#endif
static float *DPSOFTRAST_Array_Load(int outarray, int inarray)
{
+#ifdef SSE2_PRESENT
float *outf = dpsoftrast.post_array4f[outarray];
const unsigned char *inb;
int firstvertex = dpsoftrast.firstvertex;
break;
}
return outf;
+#else
+ return NULL;
+#endif
}
static float *DPSOFTRAST_Array_Transform(int outarray, int inarray, const float *inmatrix16f)
#if 0
static float *DPSOFTRAST_Array_Project(int outarray, int inarray)
{
+#ifdef SSE2_PRESENT
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_Project(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices);
return data;
+#else
+ return NULL;
+#endif
}
#endif
static float *DPSOFTRAST_Array_TransformProject(int outarray, int inarray, const float *inmatrix16f)
{
+#ifdef SSE2_PRESENT
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_TransformProject(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices, inmatrix16f);
return data;
+#else
+ return NULL;
+#endif
}
void DPSOFTRAST_Draw_Span_Begin(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, float *zf)
{2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, {~0}},
{2, DPSOFTRAST_VertexShader_ShowDepth, DPSOFTRAST_PixelShader_ShowDepth, {~0}},
{2, DPSOFTRAST_VertexShader_DeferredGeometry, DPSOFTRAST_PixelShader_DeferredGeometry, {~0}},
- {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}}
+ {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}},
};
void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread)
ycc = _mm_min_epi16(ycc, _mm_shuffle_epi32(ycc, _MM_SHUFFLE(2, 3, 0, 1)));
nexty = _mm_extract_epi16(ycc, 0);
if (nexty >= bandy) nexty = bandy-1;
- if (_mm_ucomigt_ss(_mm_max_ss(screen[edge0n], screen[edge0p]), _mm_min_ss(screen[edge1n], screen[edge1p])))
- {
- int tmp = edge0n;
- edge0n = edge1n;
- edge1n = tmp;
- tmp = edge0p;
- edge0p = edge1p;
- edge1p = tmp;
- }
xslope = _mm_sub_ps(_mm_movelh_ps(screen[edge0n], screen[edge1n]), _mm_movelh_ps(screen[edge0p], screen[edge1p]));
xslope = _mm_div_ps(xslope, _mm_shuffle_ps(xslope, xslope, _MM_SHUFFLE(3, 3, 1, 1)));
xcoords = _mm_add_ps(_mm_movelh_ps(screen[edge0p], screen[edge1p]),
_mm_mul_ps(xslope, _mm_sub_ps(_mm_set1_ps(y), _mm_shuffle_ps(screen[edge0p], screen[edge1p], _MM_SHUFFLE(1, 1, 1, 1)))));
xcoords = _mm_add_ps(xcoords, _mm_set1_ps(0.5f));
+ if (_mm_ucomigt_ss(xcoords, _mm_shuffle_ps(xcoords, xcoords, _MM_SHUFFLE(1, 0, 3, 2))))
+ {
+ xcoords = _mm_shuffle_ps(xcoords, xcoords, _MM_SHUFFLE(1, 0, 3, 2));
+ xslope = _mm_shuffle_ps(xslope, xslope, _MM_SHUFFLE(1, 0, 3, 2));
+ }
for(; y <= nexty; y++, xcoords = _mm_add_ps(xcoords, xslope))
{
int startx, endx, offset;
command->clipped = dpsoftrast.drawclipped;
command->refcount = dpsoftrast.numthreads;
-#ifdef USE_THREADS
- DPSOFTRAST_Draw_SyncCommands();
+ if (dpsoftrast.usethreads)
{
int i;
+ DPSOFTRAST_Draw_SyncCommands();
for (i = 0; i < dpsoftrast.numthreads; i++)
{
DPSOFTRAST_State_Thread *thread = &dpsoftrast.threads[i];
if (((command->starty < thread->maxy1 && command->endy > thread->miny1) || (command->starty < thread->maxy2 && command->endy > thread->miny2)) && thread->starving)
- SDL_CondSignal(thread->drawcond);
+ Thread_CondSignal(thread->drawcond);
}
}
-#else
- DPSOFTRAST_Draw_FlushThreads();
-#endif
+ else
+ {
+ DPSOFTRAST_Draw_FlushThreads();
+ }
}
static void DPSOFTRAST_Draw_InterpretCommands(DPSOFTRAST_State_Thread *thread, int endoffset)
thread->commandoffset = commandoffset;
}
-#ifdef USE_THREADS
static int DPSOFTRAST_Draw_Thread(void *data)
{
DPSOFTRAST_State_Thread *thread = (DPSOFTRAST_State_Thread *)data;
}
else
{
- SDL_LockMutex(thread->drawmutex);
+ Thread_LockMutex(thread->drawmutex);
if (thread->commandoffset == dpsoftrast.drawcommand && thread->index >= 0)
{
- if (thread->waiting) SDL_CondSignal(thread->waitcond);
+ if (thread->waiting) Thread_CondSignal(thread->waitcond);
thread->starving = true;
- SDL_CondWait(thread->drawcond, thread->drawmutex);
+ Thread_CondWait(thread->drawcond, thread->drawmutex);
thread->starving = false;
}
- SDL_UnlockMutex(thread->drawmutex);
+ Thread_UnlockMutex(thread->drawmutex);
}
}
return 0;
}
-#endif
static void DPSOFTRAST_Draw_FlushThreads(void)
{
DPSOFTRAST_State_Thread *thread;
int i;
DPSOFTRAST_Draw_SyncCommands();
-#ifdef USE_THREADS
- for (i = 0; i < dpsoftrast.numthreads; i++)
+ if (dpsoftrast.usethreads)
{
- thread = &dpsoftrast.threads[i];
- if (thread->commandoffset != dpsoftrast.drawcommand)
+ for (i = 0; i < dpsoftrast.numthreads; i++)
{
- SDL_LockMutex(thread->drawmutex);
- if (thread->commandoffset != dpsoftrast.drawcommand && thread->starving)
- SDL_CondSignal(thread->drawcond);
- SDL_UnlockMutex(thread->drawmutex);
+ thread = &dpsoftrast.threads[i];
+ if (thread->commandoffset != dpsoftrast.drawcommand)
+ {
+ Thread_LockMutex(thread->drawmutex);
+ if (thread->commandoffset != dpsoftrast.drawcommand && thread->starving)
+ Thread_CondSignal(thread->drawcond);
+ Thread_UnlockMutex(thread->drawmutex);
+ }
}
- }
-#endif
- for (i = 0; i < dpsoftrast.numthreads; i++)
- {
- thread = &dpsoftrast.threads[i];
-#ifdef USE_THREADS
- if (thread->commandoffset != dpsoftrast.drawcommand)
+ for (i = 0; i < dpsoftrast.numthreads; i++)
{
- SDL_LockMutex(thread->drawmutex);
+ thread = &dpsoftrast.threads[i];
if (thread->commandoffset != dpsoftrast.drawcommand)
{
- thread->waiting = true;
- SDL_CondWait(thread->waitcond, thread->drawmutex);
- thread->waiting = false;
+ Thread_LockMutex(thread->drawmutex);
+ if (thread->commandoffset != dpsoftrast.drawcommand)
+ {
+ thread->waiting = true;
+ Thread_CondWait(thread->waitcond, thread->drawmutex);
+ thread->waiting = false;
+ }
+ Thread_UnlockMutex(thread->drawmutex);
}
- SDL_UnlockMutex(thread->drawmutex);
}
-#else
- if (thread->commandoffset != dpsoftrast.drawcommand)
- DPSOFTRAST_Draw_InterpretCommands(thread, dpsoftrast.drawcommand);
-#endif
+ }
+ else
+ {
+ for (i = 0; i < dpsoftrast.numthreads; i++)
+ {
+ thread = &dpsoftrast.threads[i];
+ if (thread->commandoffset != dpsoftrast.drawcommand)
+ DPSOFTRAST_Draw_InterpretCommands(thread, dpsoftrast.drawcommand);
+ }
}
dpsoftrast.commandpool.usedcommands = 0;
}
DPSOFTRAST_Flush();
}
-void DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsigned int *colorpixels, unsigned int *depthpixels)
+int DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsigned int *colorpixels, unsigned int *depthpixels)
{
int i;
union
dpsoftrast.color[1] = 1;
dpsoftrast.color[2] = 1;
dpsoftrast.color[3] = 1;
- dpsoftrast.interlace = bound(0, interlace, 1);
-#ifdef USE_THREADS
- dpsoftrast.numthreads = bound(1, numthreads, 64);
-#else
- dpsoftrast.numthreads = 1;
-#endif
+ dpsoftrast.usethreads = numthreads > 0 && Thread_HasThreads();
+ dpsoftrast.interlace = dpsoftrast.usethreads ? bound(0, interlace, 1) : 0;
+ dpsoftrast.numthreads = dpsoftrast.usethreads ? bound(1, numthreads, 64) : 1;
dpsoftrast.threads = (DPSOFTRAST_State_Thread *)MM_CALLOC(dpsoftrast.numthreads, sizeof(DPSOFTRAST_State_Thread));
for (i = 0; i < dpsoftrast.numthreads; i++)
{
thread->commandoffset = 0;
thread->waiting = false;
thread->starving = false;
-#ifdef USE_THREADS
- thread->waitcond = SDL_CreateCond();
- thread->drawcond = SDL_CreateCond();
- thread->drawmutex = SDL_CreateMutex();
-#endif
-
+
thread->validate = -1;
DPSOFTRAST_Validate(thread, -1);
-#ifdef USE_THREADS
- thread->thread = SDL_CreateThread(DPSOFTRAST_Draw_Thread, thread);
-#endif
+
+ if (dpsoftrast.usethreads)
+ {
+ thread->waitcond = Thread_CreateCond();
+ thread->drawcond = Thread_CreateCond();
+ thread->drawmutex = Thread_CreateMutex();
+ thread->thread = Thread_CreateThread(DPSOFTRAST_Draw_Thread, thread);
+ }
}
+ return 0;
}
void DPSOFTRAST_Shutdown(void)
{
int i;
-#ifdef USE_THREADS
- if (dpsoftrast.numthreads > 0)
+ if (dpsoftrast.usethreads && dpsoftrast.numthreads > 0)
{
DPSOFTRAST_State_Thread *thread;
for (i = 0; i < dpsoftrast.numthreads; i++)
{
thread = &dpsoftrast.threads[i];
- SDL_LockMutex(thread->drawmutex);
+ Thread_LockMutex(thread->drawmutex);
thread->index = -1;
- SDL_CondSignal(thread->drawcond);
- SDL_UnlockMutex(thread->drawmutex);
- SDL_WaitThread(thread->thread, NULL);
- SDL_DestroyCond(thread->waitcond);
- SDL_DestroyCond(thread->drawcond);
- SDL_DestroyMutex(thread->drawmutex);
+ Thread_CondSignal(thread->drawcond);
+ Thread_UnlockMutex(thread->drawmutex);
+ Thread_WaitThread(thread->thread, 0);
+ Thread_DestroyCond(thread->waitcond);
+ Thread_DestroyCond(thread->drawcond);
+ Thread_DestroyMutex(thread->drawmutex);
}
}
-#endif
for (i = 0;i < dpsoftrast.texture_end;i++)
if (dpsoftrast.texture[i].bytes)
MM_FREE(dpsoftrast.texture[i].bytes);