#define ATOMIC_SIZE 32
#ifdef SSE2_PRESENT
- #if defined(__GNUC__)
+ #if defined(__APPLE__)
+ #include <libkern/OSAtomic.h>
+ #define ALIGN(var) var __attribute__((__aligned__(16)))
+ #define ATOMIC(var) var __attribute__((__aligned__(32)))
+ #define MEMORY_BARRIER (_mm_sfence())
+ #define ATOMIC_COUNTER volatile int32_t
+ #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter)))
+ #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter)))
+ #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter)))
+ #elif defined(__GNUC__)
#define ALIGN(var) var __attribute__((__aligned__(16)))
#define ATOMIC(var) var __attribute__((__aligned__(32)))
#define MEMORY_BARRIER (_mm_sfence())
#define ATOMIC_COUNTER volatile LONG
#define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter)))
#define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter)))
- #define ATOMIC_ADD(counter, val) (InterlockedExchangeAdd(&(counter), (val)))
+ #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd(&(counter), (val)))
#endif
#endif
int bx2 = blockx + blockwidth;
int by2 = blocky + blockheight;
int bw;
- int bh;
int x;
int y;
unsigned char *inpixels;
if (bx2 > dpsoftrast.fb_width) bx2 = dpsoftrast.fb_width;
if (by2 > dpsoftrast.fb_height) by2 = dpsoftrast.fb_height;
bw = bx2 - bx1;
- bh = by2 - by1;
inpixels = (unsigned char *)dpsoftrast.fb_colorpixels[0];
if (dpsoftrast.bigendian)
{
DPSOFTRAST_Texture *texture;
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
if (mip < 0 || mip >= texture->mipmaps) return;
- if (texture->binds)
- DPSOFTRAST_Flush();
+ DPSOFTRAST_Flush();
spixels = dpsoftrast.fb_colorpixels[0];
swidth = dpsoftrast.fb_width;
sheight = dpsoftrast.fb_height;
if (th > sh) th = sh;
if (tw < 1 || th < 1)
return;
+ sy1 = sheight - 1 - sy1;
for (y = 0;y < th;y++)
- memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 + y) * swidth + sx1), tw*4);
+ memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 - y) * swidth + sx1), tw*4);
if (texture->mipmaps > 1)
DPSOFTRAST_Texture_CalculateMipmaps(index);
}
{
__m128 m0, m1, m2, m3;
DPSOFTRAST_Command_UniformMatrix4f *command = DPSOFTRAST_ALLOCATECOMMAND(UniformMatrix4f);
- command->index = index;
+ command->index = (DPSOFTRAST_UNIFORM)index;
if (((size_t)v)&(ALIGN_SIZE-1))
{
m0 = _mm_loadu_ps(v);
#ifdef SSE2_PRESENT
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2));
- localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+ localcolor = _mm_packs_epi32(localcolor, localcolor);
for (x = startx;x+2 <= endx;x+=2)
{
__m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
#ifdef SSE2_PRESENT
int x, startx = span->startx, endx = span->endx;
__m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f)));
- tint = _mm_shuffle_epi32(_mm_packs_epi32(tint, tint), _MM_SHUFFLE(1, 0, 1, 0));
+ tint = _mm_packs_epi32(tint, tint);
for (x = startx;x+2 <= endx;x+=2)
{
__m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
#ifdef SSE2_PRESENT
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend;
- localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+ localcolor = _mm_packs_epi32(localcolor, localcolor);
blend = _mm_slli_epi16(_mm_shufflehi_epi16(_mm_shufflelo_epi16(localcolor, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)), 4);
for (x = startx;x+2 <= endx;x+=2)
{
{2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, {~0}},
{2, DPSOFTRAST_VertexShader_ShowDepth, DPSOFTRAST_PixelShader_ShowDepth, {~0}},
{2, DPSOFTRAST_VertexShader_DeferredGeometry, DPSOFTRAST_PixelShader_DeferredGeometry, {~0}},
- {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}}
+ {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}},
};
void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread)