From b3b22b1ce762eaa16a0b25772c54b120771d6728 Mon Sep 17 00:00:00 2001 From: eihrul Date: Sun, 6 Feb 2011 12:44:54 +0000 Subject: [PATCH] implemented scissoring of triangles and extra necessary blendmode for xonotic hud git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@10815 d7cf8633-e32d-0410-b094-e92efae38249 --- dpsoftrast.c | 134 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/dpsoftrast.c b/dpsoftrast.c index 072fd564..67a111a7 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -179,7 +179,6 @@ typedef ALIGN(struct DPSOFTRAST_State_Span_s int triangle; // triangle this span was generated by int x; // framebuffer x coord int y; // framebuffer y coord - int length; // pixel count int startx; // usable range (according to pixelmask) int endx; // usable range (according to pixelmask) unsigned char *pixelmask; // true for pixels that passed depth test, false for others @@ -205,6 +204,7 @@ typedef enum DPSOFTRAST_BLENDMODE_e DPSOFTRAST_BLENDMODE_MUL2, DPSOFTRAST_BLENDMODE_SUBALPHA, DPSOFTRAST_BLENDMODE_PSEUDOALPHA, + DPSOFTRAST_BLENDMODE_INVADD, DPSOFTRAST_BLENDMODE_TOTAL } DPSOFTRAST_BLENDMODE; @@ -243,7 +243,7 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s // derived values (DPSOFTRAST_VALIDATE_FB) int fb_colormask; - int fb_clearscissor[4]; + int fb_scissor[4]; ALIGN(float fb_viewportcenter[4]); ALIGN(float fb_viewportscale[4]); @@ -365,10 +365,10 @@ static void DPSOFTRAST_RecalcFB(DPSOFTRAST_State_Thread *thread) if (x2 > dpsoftrast.fb_width) x2 = dpsoftrast.fb_width; if (y1 < 0) y1 = 0; if (y2 > dpsoftrast.fb_height) y2 = dpsoftrast.fb_height; - thread->fb_clearscissor[0] = x1; - thread->fb_clearscissor[1] = y1; - thread->fb_clearscissor[2] = x2 - x1; - thread->fb_clearscissor[3] = y2 - y1; + thread->fb_scissor[0] = x1; + thread->fb_scissor[1] = y1; + thread->fb_scissor[2] = x2 - x1; + thread->fb_scissor[3] = y2 - y1; DPSOFTRAST_RecalcViewport(thread->viewport, thread->fb_viewportcenter, thread->fb_viewportscale); } @@ -386,7 +386,7 @@ static void DPSOFTRAST_RecalcBlendFunc(DPSOFTRAST_State_Thread *thread) { #define BLENDFUNC(sfactor, dfactor, blendmode) \ case (sfactor<<16)|dfactor: thread->fb_blendmode = blendmode; break; - BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) + BLENDFUNC(GL_SRC_ALPHA, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break; } } @@ -403,7 +403,7 @@ static void DPSOFTRAST_RecalcBlendFunc(DPSOFTRAST_State_Thread *thread) BLENDFUNC(GL_DST_COLOR, GL_ZERO, DPSOFTRAST_BLENDMODE_MUL) BLENDFUNC(GL_DST_COLOR, GL_SRC_COLOR, DPSOFTRAST_BLENDMODE_MUL2) BLENDFUNC(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, DPSOFTRAST_BLENDMODE_PSEUDOALPHA) - BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) + BLENDFUNC(GL_ONE_MINUS_DST_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_INVADD) default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break; } } @@ -900,10 +900,10 @@ static void DPSOFTRAST_Interpret_ClearColor(DPSOFTRAST_State_Thread *thread, con unsigned int *p; unsigned int c; DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB); - x1 = thread->fb_clearscissor[0]; - y1 = thread->fb_clearscissor[1]; - x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2]; - y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3]; + x1 = thread->fb_scissor[0]; + y1 = thread->fb_scissor[1]; + x2 = thread->fb_scissor[0] + thread->fb_scissor[2]; + y2 = thread->fb_scissor[1] + thread->fb_scissor[3]; if (y1 < miny1) y1 = miny1; if (y2 > maxy2) y2 = maxy2; w = x2 - x1; @@ -946,10 +946,10 @@ static void DPSOFTRAST_Interpret_ClearDepth(DPSOFTRAST_State_Thread *thread, DPS unsigned int *p; unsigned int c; DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB); - x1 = thread->fb_clearscissor[0]; - y1 = thread->fb_clearscissor[1]; - x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2]; - y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3]; + x1 = thread->fb_scissor[0]; + y1 = thread->fb_scissor[1]; + x2 = thread->fb_scissor[0] + thread->fb_scissor[2]; + y2 = thread->fb_scissor[1] + thread->fb_scissor[3]; if (y1 < miny1) y1 = miny1; if (y2 > maxy2) y2 = maxy2; w = x2 - x1; @@ -2125,6 +2125,21 @@ void DPSOFTRAST_Draw_Span_Finish(DPSOFTRAST_State_Thread *thread, const DPSOFTRA pixel[x*4+3] = d[3]; } break; + case DPSOFTRAST_BLENDMODE_INVADD: + for (x = startx;x < endx;x++) + { + if (!pixelmask[x]) + continue; + d[0] = (int)((255.0f-pixel[x*4+2])*in4f[x*4+0] + pixel[x*4+2]);if (d[0] > 255) d[0] = 255; + d[1] = (int)((255.0f-pixel[x*4+1])*in4f[x*4+1] + pixel[x*4+1]);if (d[1] > 255) d[1] = 255; + d[2] = (int)((255.0f-pixel[x*4+0])*in4f[x*4+2] + pixel[x*4+0]);if (d[2] > 255) d[2] = 255; + d[3] = (int)((255.0f-pixel[x*4+3])*in4f[x*4+3] + pixel[x*4+3]);if (d[3] > 255) d[3] = 255; + pixel[x*4+0] = d[0]; + pixel[x*4+1] = d[1]; + pixel[x*4+2] = d[2]; + pixel[x*4+3] = d[3]; + } + break; } } @@ -2259,6 +2274,12 @@ void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPS dst = _mm_add_epi16(src, _mm_sub_epi16(dst, _mm_srli_epi16(_mm_mullo_epi16(dst, blend), 8))); }); break; + case DPSOFTRAST_BLENDMODE_INVADD: + FINISHBLEND({ + dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4))); + }, { + dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4))); + }); } #endif } @@ -2469,7 +2490,7 @@ void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, // if no texture is bound, just fill it with white if (!texture) { - memset(out4ub + startx*4, 255, span->length*4); + memset(out4ub + startx*4, 255, (span->endx - span->startx)*4); return; } mip = triangle->mip[texunitindex]; @@ -2728,7 +2749,7 @@ void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, void DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf) { // TODO: IMPLEMENT - memset(out4ub, 255, span->length*4); + memset(out4ub + span->startx*4, 255, (span->startx - span->endx)*4); } float DPSOFTRAST_SampleShadowmap(const float *vector) @@ -3185,7 +3206,7 @@ void DPSOFTRAST_PixelShader_Depth_Or_Shadow(DPSOFTRAST_State_Thread *thread, con float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -3407,7 +3428,7 @@ void DPSOFTRAST_PixelShader_FakeLight(DPSOFTRAST_State_Thread *thread, const DPS float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4090,7 +4111,7 @@ void DPSOFTRAST_PixelShader_Refraction(DPSOFTRAST_State_Thread *thread, const DP float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4108,7 +4129,7 @@ void DPSOFTRAST_PixelShader_Water(DPSOFTRAST_State_Thread *thread, const DPSOFTR float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4125,7 +4146,7 @@ void DPSOFTRAST_PixelShader_ShowDepth(DPSOFTRAST_State_Thread *thread, const DPS float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4142,7 +4163,7 @@ void DPSOFTRAST_PixelShader_DeferredGeometry(DPSOFTRAST_State_Thread *thread, co float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4159,7 +4180,7 @@ void DPSOFTRAST_PixelShader_DeferredLightSource(DPSOFTRAST_State_Thread *thread, float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4223,23 +4244,23 @@ void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread) depthslope = (int)(wslope*DPSOFTRAST_DEPTHSCALE); depth = (int)(w*DPSOFTRAST_DEPTHSCALE - DPSOFTRAST_DEPTHOFFSET*(thread->polygonoffset[1] + fabs(wslope)*thread->polygonoffset[0])); depthpixel = dpsoftrast.fb_depthpixels + span->y * dpsoftrast.fb_width + span->x; + startx = span->startx; + endx = span->endx; switch(thread->fb_depthfunc) { default: - case GL_ALWAYS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = true; break; - case GL_LESS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break; - case GL_LEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break; - case GL_EQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break; - case GL_GEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break; - case GL_GREATER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break; - case GL_NEVER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = false; break; + case GL_ALWAYS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = true; break; + case GL_LESS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break; + case GL_LEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break; + case GL_EQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break; + case GL_GEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break; + case GL_GREATER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break; + case GL_NEVER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = false; break; } //colorpixel = dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;; - //for (x = 0;x < span->length;x++) + //for (x = startx;x < endx;x++) // colorpixel[x] = (depthpixel[x] & 0xFF000000) ? (0x00FF0000) : (depthpixel[x] & 0x00FF0000); // if there is no color buffer, skip pixel shader - startx = 0; - endx = span->length; while (startx < endx && !pixelmask[startx]) startx++; while (endx > startx && !pixelmask[endx-1]) @@ -4265,10 +4286,8 @@ void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread) // if there is no color buffer, skip pixel shader if (dpsoftrast.fb_colorpixels[0] && thread->fb_colormask) { - memset(pixelmask, 1, span->length); + memset(pixelmask + span->startx, 1, span->endx - span->startx); span->pixelmask = pixelmask; - span->startx = 0; - span->endx = span->length; DPSOFTRAST_ShaderModeTable[thread->shader_mode].Span(thread, triangle, span); } } @@ -4282,11 +4301,8 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS { #ifdef SSE2_PRESENT int cullface = thread->cullface; - int width = dpsoftrast.fb_width; - int miny1 = thread->miny1; - int maxy1 = thread->maxy1; - int miny2 = thread->miny2; - int maxy2 = thread->maxy2; + int minx, maxx, miny, maxy; + int miny1, maxy1, miny2, maxy2; __m128i fbmin, fbmax; __m128 viewportcenter, viewportscale; int firstvertex = command->firstvertex; @@ -4310,6 +4326,13 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS __m128 screen[4]; DPSOFTRAST_State_Triangle *triangle; DPSOFTRAST_Texture *texture; + DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW); + miny = thread->fb_scissor[1]; + maxy = thread->fb_scissor[1] + thread->fb_scissor[3]; + miny1 = bound(miny, thread->miny1, maxy); + maxy1 = bound(miny, thread->maxy1, maxy); + miny2 = bound(miny, thread->miny2, maxy); + maxy2 = bound(miny, thread->maxy2, maxy); if ((command->starty >= maxy1 || command->endy <= miny1) && (command->starty >= maxy2 || command->endy <= miny2)) { if (!ATOMIC_DECREMENT(command->refcount)) @@ -4319,9 +4342,10 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS } return; } - DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW); - fbmin = _mm_setr_epi16(0, miny1, 0, miny1, 0, miny1, 0, miny1); - fbmax = _mm_sub_epi16(_mm_setr_epi16(width, maxy2, width, maxy2, width, maxy2, width, maxy2), _mm_set1_epi16(1)); + minx = thread->fb_scissor[0]; + maxx = thread->fb_scissor[0] + thread->fb_scissor[2]; + fbmin = _mm_setr_epi16(minx, miny1, minx, miny1, minx, miny1, minx, miny1); + fbmax = _mm_sub_epi16(_mm_setr_epi16(maxx, maxy2, maxx, maxy2, maxx, maxy2, maxx, maxy2), _mm_set1_epi16(1)); viewportcenter = _mm_load_ps(thread->fb_viewportcenter); viewportscale = _mm_load_ps(thread->fb_viewportscale); screen[3] = _mm_setzero_ps(); @@ -4644,19 +4668,23 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS int startx, endx, offset; startx = _mm_cvtss_si32(xcoords); endx = _mm_cvtss_si32(_mm_movehl_ps(xcoords, xcoords)); - if (startx < 0) startx = 0; - if (endx > dpsoftrast.fb_width) endx = dpsoftrast.fb_width; + if (startx < minx) + { + if (startx < 0) startx = 0; + startx += (minx-startx)&~(DPSOFTRAST_DRAW_MAXSPANLENGTH-1); + } + if (endx > maxx) endx = maxx; if (startx >= endx) continue; - for (offset = startx; offset < endx;) + for (offset = startx; offset < endx;offset += DPSOFTRAST_DRAW_MAXSPANLENGTH) { DPSOFTRAST_State_Span *span = &thread->spans[thread->numspans]; span->triangle = thread->numtriangles; span->x = offset; span->y = y; - span->length = endx - offset; - if (span -> length > DPSOFTRAST_DRAW_MAXSPANLENGTH) - span -> length = DPSOFTRAST_DRAW_MAXSPANLENGTH; - offset += span->length; + span->startx = max(minx - offset, 0); + span->endx = min(endx - offset, DPSOFTRAST_DRAW_MAXSPANLENGTH); + if (span->startx >= span->endx) + continue; if (++thread->numspans >= DPSOFTRAST_DRAW_MAXSPANS) DPSOFTRAST_Draw_ProcessSpans(thread); } -- 2.39.2