- return commandoffset;
-}
-
-void DPSOFTRAST_Draw_GenerateSpans(DPSOFTRAST_State_Thread *thread, int freetriangle)
-{
- int miny = (thread->index*dpsoftrast.fb_height)/dpsoftrast.numthreads;
- int maxy = ((thread->index+1)*dpsoftrast.fb_height)/dpsoftrast.numthreads;
- int commandoffset = thread->commandoffset;
- int triangleoffset = thread->triangleoffset;
- DPSOFTRAST_State_Triangle *triangle = NULL;
- int starty;
- int endy;
- int y;
- int numpoints;
- __m128 coords[4];
- __m128i ycoords;
- while (triangleoffset != freetriangle)
- {
- triangle = &dpsoftrast.trianglepool.triangles[triangleoffset];
- if (++triangleoffset >= DPSOFTRAST_DRAW_MAXTRIANGLEPOOL)
- triangleoffset = 0;
- starty = triangle->starty + 1;
- endy = triangle->endy;
- if (starty >= maxy || endy <= miny)
- continue;
- numpoints = triangle->numpoints;
- coords[0] = _mm_load_ps(triangle->coords[0]);
- coords[1] = _mm_load_ps(triangle->coords[1]);
- coords[2] = _mm_load_ps(triangle->coords[2]);
- coords[3] = _mm_load_ps(triangle->coords[3]);
- ycoords = _mm_load_si128((const __m128i *)triangle->ycoords);
- if (starty < miny)
- starty = miny;
- if (endy > maxy)
- endy = maxy;
- for (y = starty; y < endy;)
- {
- __m128 xcoords, xslope;
- __m128i ycc = _mm_cmpgt_epi32(_mm_set1_epi32(y), ycoords);
- int yccmask = _mm_movemask_epi8(ycc);
- int edge0p, edge0n, edge1p, edge1n;
- int nexty;
- if (numpoints == 4)
- {
- switch(yccmask)
- {
- default:
- case 0xFFFF: /*0000*/ y = endy; continue;
- case 0xFFF0: /*1000*/ edge0p = 3;edge0n = 0;edge1p = 1;edge1n = 0;break;
- case 0xFF0F: /*0100*/ edge0p = 0;edge0n = 1;edge1p = 2;edge1n = 1;break;
- case 0xFF00: /*1100*/ edge0p = 3;edge0n = 0;edge1p = 2;edge1n = 1;break;
- case 0xF0FF: /*0010*/ edge0p = 1;edge0n = 2;edge1p = 3;edge1n = 2;break;
- case 0xF0F0: /*1010*/ edge0p = 1;edge0n = 2;edge1p = 3;edge1n = 2;break; // concave - nonsense
- case 0xF00F: /*0110*/ edge0p = 0;edge0n = 1;edge1p = 3;edge1n = 2;break;
- case 0xF000: /*1110*/ edge0p = 3;edge0n = 0;edge1p = 3;edge1n = 2;break;
- case 0x0FFF: /*0001*/ edge0p = 2;edge0n = 3;edge1p = 0;edge1n = 3;break;
- case 0x0FF0: /*1001*/ edge0p = 2;edge0n = 3;edge1p = 1;edge1n = 0;break;
- case 0x0F0F: /*0101*/ edge0p = 2;edge0n = 3;edge1p = 2;edge1n = 1;break; // concave - nonsense
- case 0x0F00: /*1101*/ edge0p = 2;edge0n = 3;edge1p = 2;edge1n = 1;break;
- case 0x00FF: /*0011*/ edge0p = 1;edge0n = 2;edge1p = 0;edge1n = 3;break;
- case 0x00F0: /*1011*/ edge0p = 1;edge0n = 2;edge1p = 1;edge1n = 0;break;
- case 0x000F: /*0111*/ edge0p = 0;edge0n = 1;edge1p = 0;edge1n = 3;break;
- case 0x0000: /*1111*/ y++; continue;
- }
- }
- else
- {
- switch(yccmask)
- {
- default:
- case 0xFFFF: /*000*/ y = endy; continue;
- case 0xFFF0: /*100*/ edge0p = 2;edge0n = 0;edge1p = 1;edge1n = 0;break;
- case 0xFF0F: /*010*/ edge0p = 0;edge0n = 1;edge1p = 2;edge1n = 1;break;
- case 0xFF00: /*110*/ edge0p = 2;edge0n = 0;edge1p = 2;edge1n = 1;break;
- case 0x00FF: /*001*/ edge0p = 1;edge0n = 2;edge1p = 0;edge1n = 2;break;
- case 0x00F0: /*101*/ edge0p = 1;edge0n = 2;edge1p = 1;edge1n = 0;break;
- case 0x000F: /*011*/ edge0p = 0;edge0n = 1;edge1p = 0;edge1n = 2;break;
- case 0x0000: /*111*/ y++; continue;
- }
- }
- ycc = _mm_max_epi16(_mm_srli_epi16(ycc, 1), ycoords);
- ycc = _mm_min_epi16(ycc, _mm_shuffle_epi32(ycc, _MM_SHUFFLE(1, 0, 3, 2)));
- ycc = _mm_min_epi16(ycc, _mm_shuffle_epi32(ycc, _MM_SHUFFLE(2, 3, 0, 1)));
- nexty = _mm_extract_epi16(ycc, 0);
- if(nexty >= endy) nexty = endy-1;
- if (_mm_ucomigt_ss(_mm_max_ss(coords[edge0n], coords[edge0p]), _mm_min_ss(coords[edge1n], coords[edge1p])))
- {
- int tmp = edge0n;
- edge0n = edge1n;
- edge1n = tmp;
- tmp = edge0p;
- edge0p = edge1p;
- edge1p = tmp;
- }
- xslope = _mm_sub_ps(_mm_movelh_ps(coords[edge0n], coords[edge1n]), _mm_movelh_ps(coords[edge0p], coords[edge1p]));
- xslope = _mm_div_ps(xslope, _mm_shuffle_ps(xslope, xslope, _MM_SHUFFLE(3, 3, 1, 1)));
- xcoords = _mm_add_ps(_mm_movelh_ps(coords[edge0p], coords[edge1p]),
- _mm_mul_ps(xslope, _mm_sub_ps(_mm_set1_ps(y), _mm_shuffle_ps(coords[edge0p], coords[edge1p], _MM_SHUFFLE(1, 1, 1, 1)))));
- xcoords = _mm_add_ps(xcoords, _mm_set1_ps(0.5f));
- for(; y <= nexty; y++, xcoords = _mm_add_ps(xcoords, xslope))
- {
- int startx, endx, offset;
- startx = _mm_cvtss_si32(xcoords);
- endx = _mm_cvtss_si32(_mm_movehl_ps(xcoords, xcoords));
- if (startx < 0) startx = 0;
- if (endx > dpsoftrast.fb_width) endx = dpsoftrast.fb_width;
- if (startx >= endx) continue;
- for (offset = startx; offset < endx;)
- {
- DPSOFTRAST_State_Span *span = &thread->spans[thread->numspans];
- span->triangle = (int)(triangle - dpsoftrast.trianglepool.triangles);
- span->x = offset;
- span->y = y;
- span->length = endx - offset;
- if (span -> length > DPSOFTRAST_DRAW_MAXSPANLENGTH)
- span -> length = DPSOFTRAST_DRAW_MAXSPANLENGTH;
- offset += span->length;
- if (++thread->numspans >= DPSOFTRAST_DRAW_MAXSPANS)
- commandoffset = DPSOFTRAST_Draw_ProcessSpans(thread, commandoffset);
- }
- }
- }
- }
-
- if (thread->numspans > 0)
- commandoffset = DPSOFTRAST_Draw_ProcessSpans(thread, commandoffset);
- if (commandoffset != triangle->commandoffset)
- {
- commandoffset = DPSOFTRAST_Draw_InterpretCommands(thread, commandoffset, triangle->commandoffset);
- DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW);
- }
-
- MEMORY_BARRIER;
-
- thread->commandoffset = commandoffset;
- thread->triangleoffset = triangleoffset;
-}
-
-void DPSOFTRAST_Draw_FlushThreads(void)
-{
- DPSOFTRAST_State_Thread *thread;
- int i;
- if(dpsoftrast.drawtriangle != dpsoftrast.trianglepool.freetriangle)
- {
- MEMORY_BARRIER;
- dpsoftrast.drawtriangle = dpsoftrast.trianglepool.freetriangle;
- }
-#ifdef USETHREADS
- SDL_LockMutex(dpsoftrast.trianglemutex);
-#endif
- for (i = 0; i < dpsoftrast.numthreads; i++)
- {
- thread = &dpsoftrast.threads[i];
-#ifdef USETHREADS
- while (thread->triangleoffset != dpsoftrast.drawtriangle)
- {
- thread->waiting = true;
- SDL_CondBroadcast(dpsoftrast.trianglecond);
- SDL_CondWait(thread->waitcond, dpsoftrast.trianglemutex);
- thread->waiting = false;
- }
-#else
- if (thread->triangleoffset != dpsoftrast.drawtriangle)
- DPSOFTRAST_Draw_GenerateSpans(thread, dpsoftrast.drawtriangle);
-#endif
- }
-#ifdef USETHREADS
- SDL_UnlockMutex(dpsoftrast.trianglemutex);
-#endif
- dpsoftrast.trianglepool.usedtriangles = 0;
- dpsoftrast.commandpool.usedcommands = 0;