From f8f2c6e04027f3fb0a1cce5efa4aacac25bbcf60 Mon Sep 17 00:00:00 2001 From: havoc Date: Fri, 11 Mar 2011 10:12:14 +0000 Subject: [PATCH] cache collision trace results from frame to frame increased r_shadow_bouncegrid_photons from 2000 to 5000 (same performance, thanks to the collision cache) git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@10907 d7cf8633-e32d-0410-b094-e92efae38249 --- cl_parse.c | 1 + cl_screen.c | 2 + client.h | 3 + collision.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++ collision.h | 4 + host.c | 5 ++ r_shadow.c | 2 +- sv_main.c | 2 + todo | 3 + 9 files changed, 246 insertions(+), 1 deletion(-) diff --git a/cl_parse.c b/cl_parse.c index 08c726fa..29eb8b72 100644 --- a/cl_parse.c +++ b/cl_parse.c @@ -1620,6 +1620,7 @@ void CL_ParseServerInfo (void) int nummodels, numsounds; Con_DPrint("Serverinfo packet received.\n"); + Collision_Cache_Reset(true); // if server is active, we already began a loading plaque if (!sv.active) diff --git a/cl_screen.c b/cl_screen.c index e152f425..6917068d 100644 --- a/cl_screen.c +++ b/cl_screen.c @@ -803,6 +803,7 @@ void R_TimeReport_EndFrame(void) "%7i lightmap updates (%7i pixels)%8iKB/%8iKB framedata\n" "%4i lights%4i clears%4i scissored%7i light%7i shadow%7i dynamic\n" "bouncegrid:%4i lights%6i particles%6i traces%6i hits%6i splats%6i bounces\n" +"collision cache efficiency:%6i cached%6i traced%6ianimated\n" "%6i draws%8i vertices%8i triangles bloompixels%8i copied%8i drawn\n" "updated%5i indexbuffers%8i bytes%5i vertexbuffers%8i bytes\n" "%s" @@ -814,6 +815,7 @@ void R_TimeReport_EndFrame(void) , r_refdef.stats.lightmapupdates, r_refdef.stats.lightmapupdatepixels, (r_refdef.stats.framedatacurrent+512) / 1024, (r_refdef.stats.framedatasize+512)/1024 , r_refdef.stats.lights, r_refdef.stats.lights_clears, r_refdef.stats.lights_scissored, r_refdef.stats.lights_lighttriangles, r_refdef.stats.lights_shadowtriangles, r_refdef.stats.lights_dynamicshadowtriangles , r_refdef.stats.bouncegrid_lights, r_refdef.stats.bouncegrid_particles, r_refdef.stats.bouncegrid_traces, r_refdef.stats.bouncegrid_hits, r_refdef.stats.bouncegrid_splats, r_refdef.stats.bouncegrid_bounces +, r_refdef.stats.collisioncache_cached, r_refdef.stats.collisioncache_traced, r_refdef.stats.collisioncache_animated , r_refdef.stats.draws, r_refdef.stats.draws_vertices, r_refdef.stats.draws_elements / 3, r_refdef.stats.bloom_copypixels, r_refdef.stats.bloom_drawpixels , r_refdef.stats.indexbufferuploadcount, r_refdef.stats.indexbufferuploadsize, r_refdef.stats.vertexbufferuploadcount, r_refdef.stats.vertexbufferuploadsize , r_speeds_timestring); diff --git a/client.h b/client.h index 1147c8ac..54c637b2 100644 --- a/client.h +++ b/client.h @@ -1579,6 +1579,9 @@ typedef struct r_refdef_stats_s int bouncegrid_hits; int bouncegrid_splats; int bouncegrid_bounces; + int collisioncache_animated; + int collisioncache_cached; + int collisioncache_traced; int bloom; int bloom_copypixels; int bloom_drawpixels; diff --git a/collision.c b/collision.c index 5e0e017f..9f4a416e 100644 --- a/collision.c +++ b/collision.c @@ -19,6 +19,9 @@ cvar_t collision_prefernudgedfraction = {0, "collision_prefernudgedfraction", "1 cvar_t collision_endposnudge = {0, "collision_endposnudge", "0", "workaround to fix trace_endpos sometimes being returned where it would be inside solid by making that collision hit (recommended: values like 1)"}; #endif cvar_t collision_debug_tracelineasbox = {0, "collision_debug_tracelineasbox", "0", "workaround for any bugs in Collision_TraceLineBrushFloat by using Collision_TraceBrushBrushFloat"}; +cvar_t collision_cache = {0, "collision_cache", "1", "store results of collision traces for next frame to reuse if possible (optimization)"}; + +mempool_t *collision_mempool; void Collision_Init (void) { @@ -32,6 +35,9 @@ void Collision_Init (void) Cvar_RegisterVariable(&collision_endposnudge); #endif Cvar_RegisterVariable(&collision_debug_tracelineasbox); + Cvar_RegisterVariable(&collision_cache); + collision_mempool = Mem_AllocPool("collision cache", 0, NULL); + Collision_Cache_Init(collision_mempool); } @@ -1660,9 +1666,172 @@ void Collision_TransformBrush(const matrix4x4_t *matrix, colbrushf_t *brush) } } +typedef struct collision_cachedtrace_s +{ + int next; + int sequence; + dp_model_t *model; +// const frameblend_t *frameblend; +// const skeleton_t *skeleton; + vec3_t bodymins; + vec3_t bodymaxs; + int bodysupercontents; + matrix4x4_t matrix; + matrix4x4_t inversematrix; + vec3_t start; + vec3_t mins; + vec3_t maxs; + vec3_t end; + int hitsupercontentsmask; + trace_t result; +} +collision_cachedtrace_t; + +static mempool_t *collision_cachedtrace_mempool; +static collision_cachedtrace_t *collision_cachedtrace_array; +static int collision_cachedtrace_firstfree; +static int collision_cachedtrace_lastused; +static int collision_cachedtrace_max; +static int collision_cachedtrace_sequence; +static int *collision_cachedtrace_hash; + +void Collision_Cache_Reset(qboolean resetlimits) +{ + if (collision_cachedtrace_hash) + Mem_Free(collision_cachedtrace_hash); + if (collision_cachedtrace_array) + Mem_Free(collision_cachedtrace_array); + if (resetlimits || !collision_cachedtrace_max) + collision_cachedtrace_max = 1024; + collision_cachedtrace_firstfree = 1; + collision_cachedtrace_lastused = 0; + collision_cachedtrace_array = Mem_Alloc(collision_cachedtrace_mempool, collision_cachedtrace_max * sizeof(collision_cachedtrace_t)); + collision_cachedtrace_hash = Mem_Alloc(collision_cachedtrace_mempool, collision_cachedtrace_max * sizeof(int)); + collision_cachedtrace_sequence = 1; +} + +void Collision_Cache_Init(mempool_t *mempool) +{ + collision_cachedtrace_mempool = mempool; + Collision_Cache_Reset(true); +} + +void Collision_Cache_NewFrame(void) +{ + int hashindex; + int index; + int *p; + // unlink all stale traces + for (hashindex = 0;hashindex < collision_cachedtrace_max;hashindex++) + { + if (!collision_cachedtrace_hash[hashindex]) + continue; + p = &collision_cachedtrace_hash[hashindex]; + while ((index = *p)) + { + if (collision_cachedtrace_array[index].sequence != collision_cachedtrace_sequence) + { + if (collision_cachedtrace_firstfree > index) + collision_cachedtrace_firstfree = index; + *p = collision_cachedtrace_array[index].next; + collision_cachedtrace_array[index].sequence = 0; + //memset(&collision_cachedtrace_array[index], 0, sizeof(collision_cachedtrace_array[index])); + } + else + p = &collision_cachedtrace_array[index].next; + } + } + // shrink used range if possible + index = collision_cachedtrace_lastused; + while (index && collision_cachedtrace_array[index].sequence == 0) + index--; + collision_cachedtrace_lastused = index; + // increment sequence + collision_cachedtrace_sequence++; + // do not allow sequence to wrap to 0 + if (collision_cachedtrace_sequence >= 1<<30) + collision_cachedtrace_sequence = 1; +} + +static collision_cachedtrace_t *Collision_Cache_Lookup(dp_model_t *model, const frameblend_t *frameblend, const skeleton_t *skeleton, const vec3_t bodymins, const vec3_t bodymaxs, int bodysupercontents, const matrix4x4_t *matrix, const matrix4x4_t *inversematrix, const vec3_t start, const vec3_t mins, const vec3_t maxs, const vec3_t end, int hitsupercontentsmask) +{ + int hashindex = 0; + int index = 0; + collision_cachedtrace_t *cached = collision_cachedtrace_array + index; + // all non-cached traces use the same index + if ((frameblend && frameblend[0].lerp != 1) || (skeleton && skeleton->relativetransforms)) + r_refdef.stats.collisioncache_animated++; + else if (!collision_cache.integer) + r_refdef.stats.collisioncache_traced++; + else + { + // cached trace lookup + hashindex = (int)(((size_t)model + (size_t) + (size_t)(VectorLength2(bodymins) + VectorLength2(bodymaxs) + start[0] + start[1] + start[2] + end[0] + end[1] + end[2]) + bodysupercontents + hitsupercontentsmask) % collision_cachedtrace_max); + for (index = collision_cachedtrace_hash[hashindex];index;index = cached->next) + { + cached = collision_cachedtrace_array + index; + if (cached->model == model + && VectorCompare(cached->bodymins, bodymins) + && VectorCompare(cached->bodymaxs, bodymaxs) + && cached->bodysupercontents == bodysupercontents + && VectorCompare(cached->start, start) + && VectorCompare(cached->mins, mins) + && VectorCompare(cached->maxs, maxs) + && VectorCompare(cached->end, end) + && cached->hitsupercontentsmask == hitsupercontentsmask + && !memcmp(&cached->matrix, matrix, sizeof(*matrix))) + { + r_refdef.stats.collisioncache_cached++; + return cached; // found a match + } + } + r_refdef.stats.collisioncache_traced++; + // find an unused cache entry + for (index = collision_cachedtrace_firstfree;index <= collision_cachedtrace_lastused;index++) + if (!collision_cachedtrace_array[index].sequence) + break; + collision_cachedtrace_firstfree = index; + if (index > collision_cachedtrace_lastused) + { + // see if we need to reset the cache for growth + if (collision_cachedtrace_max <= index) + { + collision_cachedtrace_max *= 2; + Collision_Cache_Reset(false); + collision_cachedtrace_firstfree = index = 1; + } + collision_cachedtrace_lastused = index; + } + // link the new cache entry into the hash bucket + cached = collision_cachedtrace_array + index; + cached->next = collision_cachedtrace_hash[hashindex]; + collision_cachedtrace_hash[hashindex] = index; + cached->model = model; + VectorCopy(bodymins, cached->bodymins); + VectorCopy(bodymaxs, cached->bodymaxs); + cached->bodysupercontents = bodysupercontents; + VectorCopy(start, cached->start); + VectorCopy(mins, cached->mins); + VectorCopy(maxs, cached->maxs); + VectorCopy(end, cached->end); + cached->hitsupercontentsmask = hitsupercontentsmask; + cached->matrix = *matrix; + cached->inversematrix = *inversematrix; + } + cached->sequence = 0; + return cached; +} + void Collision_ClipToGenericEntity(trace_t *trace, dp_model_t *model, const frameblend_t *frameblend, const skeleton_t *skeleton, const vec3_t bodymins, const vec3_t bodymaxs, int bodysupercontents, matrix4x4_t *matrix, matrix4x4_t *inversematrix, const vec3_t start, const vec3_t mins, const vec3_t maxs, const vec3_t end, int hitsupercontentsmask) { float starttransformed[3], endtransformed[3]; + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, frameblend, skeleton, bodymins, bodymaxs, bodysupercontents, matrix, inversematrix, start, mins, maxs, end, hitsupercontentsmask); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; @@ -1703,10 +1872,21 @@ void Collision_ClipToGenericEntity(trace_t *trace, dp_model_t *model, const fram // transform plane // NOTE: this relies on plane.dist being directly after plane.normal Matrix4x4_TransformPositivePlane(matrix, trace->plane.normal[0], trace->plane.normal[1], trace->plane.normal[2], trace->plane.dist, trace->plane.normal); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_ClipToWorld(trace_t *trace, dp_model_t *model, const vec3_t start, const vec3_t mins, const vec3_t maxs, const vec3_t end, int hitsupercontents) { + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, NULL, NULL, vec3_origin, vec3_origin, 0, &identitymatrix, &identitymatrix, start, mins, maxs, end, hitsupercontents); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } + memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; // ->TraceBox: TraceBrush not needed here, as worldmodel is never rotated @@ -1715,11 +1895,21 @@ void Collision_ClipToWorld(trace_t *trace, dp_model_t *model, const vec3_t start trace->fraction = bound(0, trace->fraction, 1); trace->realfraction = bound(0, trace->realfraction, 1); VectorLerp(start, trace->fraction, end, trace->endpos); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_ClipLineToGenericEntity(trace_t *trace, dp_model_t *model, const frameblend_t *frameblend, const skeleton_t *skeleton, const vec3_t bodymins, const vec3_t bodymaxs, int bodysupercontents, matrix4x4_t *matrix, matrix4x4_t *inversematrix, const vec3_t start, const vec3_t end, int hitsupercontentsmask, qboolean hitsurfaces) { float starttransformed[3], endtransformed[3]; + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, frameblend, skeleton, bodymins, bodymaxs, bodysupercontents, matrix, inversematrix, start, vec3_origin, vec3_origin, end, hitsupercontentsmask); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; @@ -1743,10 +1933,21 @@ void Collision_ClipLineToGenericEntity(trace_t *trace, dp_model_t *model, const // transform plane // NOTE: this relies on plane.dist being directly after plane.normal Matrix4x4_TransformPositivePlane(matrix, trace->plane.normal[0], trace->plane.normal[1], trace->plane.normal[2], trace->plane.dist, trace->plane.normal); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_ClipLineToWorld(trace_t *trace, dp_model_t *model, const vec3_t start, const vec3_t end, int hitsupercontents, qboolean hitsurfaces) { + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, NULL, NULL, vec3_origin, vec3_origin, 0, &identitymatrix, &identitymatrix, start, vec3_origin, vec3_origin, end, hitsupercontents); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } + memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; if (model && model->TraceLineAgainstSurfaces && hitsurfaces) @@ -1756,11 +1957,21 @@ void Collision_ClipLineToWorld(trace_t *trace, dp_model_t *model, const vec3_t s trace->fraction = bound(0, trace->fraction, 1); trace->realfraction = bound(0, trace->realfraction, 1); VectorLerp(start, trace->fraction, end, trace->endpos); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_ClipPointToGenericEntity(trace_t *trace, dp_model_t *model, const frameblend_t *frameblend, const skeleton_t *skeleton, const vec3_t bodymins, const vec3_t bodymaxs, int bodysupercontents, matrix4x4_t *matrix, matrix4x4_t *inversematrix, const vec3_t start, int hitsupercontentsmask) { float starttransformed[3]; + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, frameblend, skeleton, bodymins, bodymaxs, bodysupercontents, matrix, inversematrix, start, vec3_origin, vec3_origin, start, hitsupercontentsmask); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; @@ -1779,15 +1990,29 @@ void Collision_ClipPointToGenericEntity(trace_t *trace, dp_model_t *model, const // transform plane // NOTE: this relies on plane.dist being directly after plane.normal Matrix4x4_TransformPositivePlane(matrix, trace->plane.normal[0], trace->plane.normal[1], trace->plane.normal[2], trace->plane.dist, trace->plane.normal); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_ClipPointToWorld(trace_t *trace, dp_model_t *model, const vec3_t start, int hitsupercontents) { + collision_cachedtrace_t *cached = Collision_Cache_Lookup(model, NULL, NULL, vec3_origin, vec3_origin, 0, &identitymatrix, &identitymatrix, start, vec3_origin, vec3_origin, start, hitsupercontents); + if (cached->sequence) + { + cached->sequence = collision_cachedtrace_sequence; + *trace = cached->result; + return; + } + memset(trace, 0, sizeof(*trace)); trace->fraction = trace->realfraction = 1; if (model && model->TracePoint) model->TracePoint(model, NULL, NULL, trace, start, hitsupercontents); VectorCopy(start, trace->endpos); + + cached->sequence = collision_cachedtrace_sequence; + cached->result = *trace; } void Collision_CombineTraces(trace_t *cliptrace, const trace_t *trace, void *touch, qboolean isbmodel) diff --git a/collision.h b/collision.h index 1b41f054..919e9dcd 100644 --- a/collision.h +++ b/collision.h @@ -63,6 +63,10 @@ void Collision_Init(void); void Collision_ClipTrace_Box(trace_t *trace, const vec3_t cmins, const vec3_t cmaxs, const vec3_t start, const vec3_t mins, const vec3_t maxs, const vec3_t end, int hitsupercontentsmask, int boxsupercontents, int boxq3surfaceflags, const texture_t *boxtexture); void Collision_ClipTrace_Point(trace_t *trace, const vec3_t cmins, const vec3_t cmaxs, const vec3_t start, int hitsupercontentsmask, int boxsupercontents, int boxq3surfaceflags, const texture_t *boxtexture); +void Collision_Cache_Reset(qboolean resetlimits); +void Collision_Cache_Init(mempool_t *mempool); +void Collision_Cache_NewFrame(void); + typedef struct colpointf_s { vec3_t v; diff --git a/host.c b/host.c index f3de7a18..930eaf56 100644 --- a/host.c +++ b/host.c @@ -786,6 +786,9 @@ void Host_Main(void) double advancetime, aborttime = 0; float offset; + if (cls.state == ca_dedicated) + Collision_Cache_NewFrame(); + // run the world state // don't allow simulation to run too fast or too slow or logic glitches can occur @@ -880,6 +883,8 @@ void Host_Main(void) if (cls.state != ca_dedicated && (cl_timer > 0 || cls.timedemo || ((vid_activewindow ? cl_maxfps : cl_maxidlefps).value < 1))) { R_TimeReport("---"); + Collision_Cache_NewFrame(); + R_TimeReport("collisioncache"); // decide the simulation time if (cls.capturevideo.active) { diff --git a/r_shadow.c b/r_shadow.c index 5b7a2f22..f02ef74e 100644 --- a/r_shadow.c +++ b/r_shadow.c @@ -329,7 +329,7 @@ cvar_t r_shadow_bouncegrid_maxbounce = {CVAR_SAVE, "r_shadow_bouncegrid_maxbounc cvar_t r_shadow_bouncegrid_nolerpsplat = {CVAR_SAVE, "r_shadow_bouncegrid_nolerpsplat", "0", "enables slightly quicker (but worse looking) photon accumulation"}; cvar_t r_shadow_bouncegrid_particlebounceintensity = {CVAR_SAVE, "r_shadow_bouncegrid_particlebounceintensity", "4", "amount of energy carried over after each bounce, this is a multiplier of texture color and the result is clamped to 1 or less, to prevent adding energy on each bounce"}; cvar_t r_shadow_bouncegrid_particleintensity = {CVAR_SAVE, "r_shadow_bouncegrid_particleintensity", "2", "brightness of particles contributing to bouncegrid texture"}; -cvar_t r_shadow_bouncegrid_photons = {CVAR_SAVE, "r_shadow_bouncegrid_photons", "2000", "total photons to shoot per update, divided proportionately between lights"}; +cvar_t r_shadow_bouncegrid_photons = {CVAR_SAVE, "r_shadow_bouncegrid_photons", "5000", "total photons to shoot per update, divided proportionately between lights"}; cvar_t r_shadow_bouncegrid_spacingx = {CVAR_SAVE, "r_shadow_bouncegrid_spacingx", "64", "unit size of bouncegrid pixel on X axis"}; cvar_t r_shadow_bouncegrid_spacingy = {CVAR_SAVE, "r_shadow_bouncegrid_spacingy", "64", "unit size of bouncegrid pixel on Y axis"}; cvar_t r_shadow_bouncegrid_spacingz = {CVAR_SAVE, "r_shadow_bouncegrid_spacingz", "64", "unit size of bouncegrid pixel on Z axis"}; diff --git a/sv_main.c b/sv_main.c index 68001f50..74457fec 100644 --- a/sv_main.c +++ b/sv_main.c @@ -3108,6 +3108,8 @@ void SV_SpawnServer (const char *server) return; } + Collision_Cache_Reset(true); + // let's not have any servers with no name if (hostname.string[0] == 0) Cvar_Set ("hostname", "UNNAMED"); diff --git a/todo b/todo index 3ed03b1d..c9790741 100644 --- a/todo +++ b/todo @@ -36,6 +36,8 @@ 0 bug darkplaces loader: png loading crashes if the image is transparent (Urre) 0 bug darkplaces loader: q1bsp loader computes wrong submodel size for submodels with no surfaces, such as a func_wall comprised entirely of SKIP or CAULK brushes (neg|ke) 0 bug darkplaces memory: memstats doesn't account for memory used by VBO/EBO buffers in models +0 bug darkplaces mobile: add a command to lock in current accelerometer axes as default orientation (calibrate), which the menuqc/csqc can call +0 bug darkplaces mobile: add a landscape mode for mobile devices where width is > height, which changes 2D and 3D rendering orientation 0 bug darkplaces qw: tf skins not working (xavior) 0 bug darkplaces readme: it would be a very good idea to add documentation of sv_gameplayfix_* cvars in the readme as a means to run broken mods (xaGe) 0 bug darkplaces readme: readme says that q3 shaders are not supported, this is not true, describe the working features in detail (qqshka) @@ -1342,6 +1344,7 @@ d hmap: add support for GTKRadiant stuff d lhfire: post lhfire build with example scripts. d litsupport: fix the one COM_HunkFile call that uses two parameters (glquake took one) and fix the few "//lit support begin" messages at the end of code blocks (metlslime) d lmp2pcx: post new lmp2pcx build. +d optimization darkplaces renderer: cache collision trace results for more performance in r_shadow_bouncegrid d optimization darkplaces renderer: initialize more lighting state in R_Shadow_Stage_Light to reduce per-surface overhead (LordHavoc) d optimization darkplaces renderer: rename r_shadow_glsl_geforcefxlowprecision to r_shadow_glsl_usehalffloat and enable it by default if the extension is present, it's about a 20% speed gain on GF6 compared to 5% on GFFX (SavageX) d optimization darkplaces server: optimize pvs checking by caching pvs cluster indices corresponding to entity box (Sajt) -- 2.39.2