]> de.git.xonotic.org Git - xonotic/darkplaces.git/blobdiff - dpsoftrast.c
fix row order of screen to texture copies
[xonotic/darkplaces.git] / dpsoftrast.c
index c179417cce62c62ae1ce1a1905282aad930215de..652105bb35319d627d3ab3c11dfed6c4b94090e9 100644 (file)
@@ -14,7 +14,16 @@ typedef qboolean bool;
 #define ATOMIC_SIZE 32
 
 #ifdef SSE2_PRESENT
-       #if defined(__GNUC__)
+       #if defined(__APPLE__)
+               #include <libkern/OSAtomic.h>
+               #define ALIGN(var) var __attribute__((__aligned__(16)))
+               #define ATOMIC(var) var __attribute__((__aligned__(32)))
+               #define MEMORY_BARRIER (_mm_sfence())
+               #define ATOMIC_COUNTER volatile int32_t 
+               #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter)))
+               #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter)))
+               #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter)))
+       #elif defined(__GNUC__)
                #define ALIGN(var) var __attribute__((__aligned__(16)))
                #define ATOMIC(var) var __attribute__((__aligned__(32)))
                #define MEMORY_BARRIER (_mm_sfence())
@@ -31,7 +40,7 @@ typedef qboolean bool;
                #define ATOMIC_COUNTER volatile LONG
                #define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter)))
                #define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter)))
-               #define ATOMIC_ADD(counter, val) (InterlockedExchangeAdd(&(counter), (val)))
+               #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd(&(counter), (val)))
        #endif
 #endif
 
@@ -1150,7 +1159,6 @@ void DPSOFTRAST_GetPixelsBGRA(int blockx, int blocky, int blockwidth, int blockh
        int bx2 = blockx + blockwidth;
        int by2 = blocky + blockheight;
        int bw;
-       int bh;
        int x;
        int y;
        unsigned char *inpixels;
@@ -1162,7 +1170,6 @@ void DPSOFTRAST_GetPixelsBGRA(int blockx, int blocky, int blockwidth, int blockh
        if (bx2 > dpsoftrast.fb_width) bx2 = dpsoftrast.fb_width;
        if (by2 > dpsoftrast.fb_height) by2 = dpsoftrast.fb_height;
        bw = bx2 - bx1;
-       bh = by2 - by1;
        inpixels = (unsigned char *)dpsoftrast.fb_colorpixels[0];
        if (dpsoftrast.bigendian)
        {
@@ -1216,8 +1223,7 @@ void DPSOFTRAST_CopyRectangleToTexture(int index, int mip, int tx, int ty, int s
        DPSOFTRAST_Texture *texture;
        texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
        if (mip < 0 || mip >= texture->mipmaps) return;
-       if (texture->binds)
-               DPSOFTRAST_Flush();
+       DPSOFTRAST_Flush();
        spixels = dpsoftrast.fb_colorpixels[0];
        swidth = dpsoftrast.fb_width;
        sheight = dpsoftrast.fb_height;
@@ -1240,8 +1246,9 @@ void DPSOFTRAST_CopyRectangleToTexture(int index, int mip, int tx, int ty, int s
        if (th > sh) th = sh;
        if (tw < 1 || th < 1)
                return;
+       sy1 = sheight - 1 - sy1;
        for (y = 0;y < th;y++)
-               memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 + y) * swidth + sx1), tw*4);
+               memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 - y) * swidth + sx1), tw*4);
        if (texture->mipmaps > 1)
                DPSOFTRAST_Texture_CalculateMipmaps(index);
 }
@@ -1358,7 +1365,7 @@ void DPSOFTRAST_UniformMatrix4fv(DPSOFTRAST_UNIFORM uniform, int arraysize, int
        {
                __m128 m0, m1, m2, m3;
                DPSOFTRAST_Command_UniformMatrix4f *command = DPSOFTRAST_ALLOCATECOMMAND(UniformMatrix4f);
-               command->index = index;
+               command->index = (DPSOFTRAST_UNIFORM)index;
                if (((size_t)v)&(ALIGN_SIZE-1))
                {
                        m0 = _mm_loadu_ps(v);
@@ -2986,7 +2993,7 @@ void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Triangle * RESTRI
 #ifdef SSE2_PRESENT
        int x, startx = span->startx, endx = span->endx;
        __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2));
-       localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+       localcolor = _mm_packs_epi32(localcolor, localcolor);
        for (x = startx;x+2 <= endx;x+=2)
        {
                __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
@@ -3051,7 +3058,7 @@ void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Triangle
 #ifdef SSE2_PRESENT
        int x, startx = span->startx, endx = span->endx;
        __m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f)));
-       tint = _mm_shuffle_epi32(_mm_packs_epi32(tint, tint), _MM_SHUFFLE(1, 0, 1, 0));
+       tint = _mm_packs_epi32(tint, tint);
        for (x = startx;x+2 <= endx;x+=2)
        {
                __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
@@ -3097,7 +3104,7 @@ void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Triangle *
 #ifdef SSE2_PRESENT
        int x, startx = span->startx, endx = span->endx;
        __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend;
-       localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+       localcolor = _mm_packs_epi32(localcolor, localcolor);
        blend = _mm_slli_epi16(_mm_shufflehi_epi16(_mm_shufflelo_epi16(localcolor, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)), 4);
        for (x = startx;x+2 <= endx;x+=2)
        {
@@ -4234,7 +4241,7 @@ static const DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COU
        {2, DPSOFTRAST_VertexShader_Water,                          DPSOFTRAST_PixelShader_Water,                          {~0}},
        {2, DPSOFTRAST_VertexShader_ShowDepth,                      DPSOFTRAST_PixelShader_ShowDepth,                      {~0}},
        {2, DPSOFTRAST_VertexShader_DeferredGeometry,               DPSOFTRAST_PixelShader_DeferredGeometry,               {~0}},
-       {2, DPSOFTRAST_VertexShader_DeferredLightSource,            DPSOFTRAST_PixelShader_DeferredLightSource,            {~0}}
+       {2, DPSOFTRAST_VertexShader_DeferredLightSource,            DPSOFTRAST_PixelShader_DeferredLightSource,            {~0}},
 };
 
 void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread)