From 33cefdeb9044350e531ca015aafcb44dc644c178 Mon Sep 17 00:00:00 2001 From: divverent Date: Sat, 4 Oct 2014 20:12:51 +0000 Subject: [PATCH] Implement GPU-side corona fading Uses GL_ARB_query_buffer_object to retrieve query results directly into a buffer. This allows corona fading to be moved into the fragment shader, preventing a round-trip to the CPU and synchronous rendering. Depending on the machine's configuration, it can give a dramatic performance boost. From: Alex Goins git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@12094 d7cf8633-e32d-0410-b094-e92efae38249 ::stable-branch::merge=121152f1632170629122559482a298ea8b3aaf75 --- client.h | 1 + dpsoftrast.h | 3 ++- gl_rmain.c | 5 ++++- glquake.h | 8 ++++++++ model_brush.h | 2 ++ r_shadow.c | 47 ++++++++++++++++++++++++++++++++++------------- shader_glsl.h | 11 ++++++++++- vid.h | 1 + vid_sdl.c | 1 + vid_shared.c | 3 +++ vid_wgl.c | 1 + 11 files changed, 67 insertions(+), 16 deletions(-) diff --git a/client.h b/client.h index f830c2e7..0bd75f77 100644 --- a/client.h +++ b/client.h @@ -317,6 +317,7 @@ typedef struct rtlight_s vec3_t currentcolor; /// used by corona updates, due to occlusion query float corona_visibility; + unsigned int occlusion_buf; unsigned int corona_queryindex_visiblepixels; unsigned int corona_queryindex_allpixels; /// this is R_GetCubemap(rtlight->cubemapname) diff --git a/dpsoftrast.h b/dpsoftrast.h index 5bda9f36..bc1dbd05 100644 --- a/dpsoftrast.h +++ b/dpsoftrast.h @@ -196,7 +196,8 @@ typedef enum shaderpermutation_e SHADERPERMUTATION_DEPTHRGB = 1<<28, ///< read/write depth values in RGB color coded format for older hardware without depth samplers SHADERPERMUTATION_ALPHAGEN_VERTEX = 1<<29, ///< alphaGen vertex SHADERPERMUTATION_SKELETAL = 1<<30, ///< (skeletal models) use skeletal matrices to deform vertices (gpu-skinning) - SHADERPERMUTATION_COUNT = 31 ///< size of shaderpermutationinfo array + SHADERPERMUTATION_OCCLUDE = 1<<31, ///< use occlusion buffer for corona + SHADERPERMUTATION_COUNT = 32 ///< size of shaderpermutationinfo array } shaderpermutation_t; diff --git a/gl_rmain.c b/gl_rmain.c index 7b074b7e..85721903 100644 --- a/gl_rmain.c +++ b/gl_rmain.c @@ -678,7 +678,8 @@ shaderpermutationinfo_t shaderpermutationinfo[SHADERPERMUTATION_COUNT] = {"#define USETRIPPY\n", " trippy"}, {"#define USEDEPTHRGB\n", " depthrgb"}, {"#define USEALPHAGENVERTEX\n", " alphagenvertex"}, - {"#define USESKELETAL\n", " skeletal"} + {"#define USESKELETAL\n", " skeletal"}, + {"#define USEOCCLUDE\n", " occlude"} }; // NOTE: MUST MATCH ORDER OF SHADERMODE_* ENUMS! @@ -2202,6 +2203,8 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting, permutation |= SHADERPERMUTATION_TRIPPY; if (rsurface.texture->currentmaterialflags & MATERIALFLAG_ALPHATEST) permutation |= SHADERPERMUTATION_ALPHAKILL; + if (rsurface.texture->currentmaterialflags & MATERIALFLAG_OCCLUDE) + permutation |= SHADERPERMUTATION_OCCLUDE; if (rsurface.texture->r_water_waterscroll[0] && rsurface.texture->r_water_waterscroll[1]) permutation |= SHADERPERMUTATION_NORMALMAPSCROLLBLEND; // todo: make generic if (rsurfacepass == RSURFPASS_BACKGROUND) diff --git a/glquake.h b/glquake.h index d642e3e9..781eb310 100644 --- a/glquake.h +++ b/glquake.h @@ -1071,6 +1071,14 @@ extern void (GLAPIENTRY *qglGetQueryObjectuivARB)(GLuint qid, GLenum pname, GLui #define GL_QUERY_RESULT_AVAILABLE_ARB 0x8867 #endif +// GL_ARB_query_buffer_object +#ifndef GL_QUERY_BUFFER_ARB +#define GL_QUERY_BUFFER_ARB 0x9192 +#define GL_QUERY_BUFFER_BINDING_ARB 0x9193 +#define GL_QUERY_RESULT_NO_WAIT_ARB 0x9194 +#define GL_QUERY_BUFFER_BARRIER_BIT_ARB 0x00008000 +#endif + // GL_EXT_bgr #define GL_BGR 0x80E0 diff --git a/model_brush.h b/model_brush.h index d417c62d..d103f454 100644 --- a/model_brush.h +++ b/model_brush.h @@ -120,6 +120,8 @@ mplane_t; #define MATERIALFLAG_NORTLIGHT 134217728 // alphagen vertex #define MATERIALFLAG_ALPHAGEN_VERTEX 268435456 +// use occlusion buffer for corona +#define MATERIALFLAG_OCCLUDE 536870912 // combined mask of all attributes that require depth sorted rendering #define MATERIALFLAGMASK_DEPTHSORTED (MATERIALFLAG_BLENDED | MATERIALFLAG_NODEPTHTEST) // combined mask of all attributes that cause some sort of transparency diff --git a/r_shadow.c b/r_shadow.c index 79ab59f0..ff8792db 100644 --- a/r_shadow.c +++ b/r_shadow.c @@ -3941,6 +3941,7 @@ static void R_Shadow_PrepareLight(rtlight_t *rtlight) rtlight->cached_numshadowentities = 0; rtlight->cached_numshadowentities_noselfshadow = 0; rtlight->cached_numsurfaces = 0; + rtlight->occlusion_buf = 0; rtlight->cached_lightentities = NULL; rtlight->cached_lightentities_noselfshadow = NULL; rtlight->cached_shadowentities = NULL; @@ -5147,7 +5148,9 @@ static float spritetexcoord2f[4*2] = {0, 1, 0, 0, 1, 0, 1, 1}; static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale) { vec3_t color; + unsigned int occlude = 0; GLint allpixels = 0, visiblepixels = 0; + // now we have to check the query result if (rtlight->corona_queryindex_visiblepixels) { @@ -5160,29 +5163,44 @@ static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale) case RENDERPATH_GLES2: #if defined(GL_SAMPLES_PASSED_ARB) && !defined(USE_GLES2) CHECKGLERROR - qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, &visiblepixels); - qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, &allpixels); + // See if we can use the GPU-side method to prevent implicit sync + if (vid.support.arb_query_buffer_object) { +#define BUFFER_OFFSET(i) ((void*)NULL + (i)) + qglGenBuffersARB(1, &rtlight->occlusion_buf); + qglBindBufferARB(GL_QUERY_BUFFER_ARB, rtlight->occlusion_buf); + qglBufferDataARB(GL_QUERY_BUFFER_ARB, 8, NULL, GL_DYNAMIC_COPY); + qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, BUFFER_OFFSET(0)); + qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, BUFFER_OFFSET(4)); + qglBindBufferBase(GL_UNIFORM_BUFFER, 0, rtlight->occlusion_buf); + occlude = MATERIALFLAG_OCCLUDE; + } else { + qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, &visiblepixels); + qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, &allpixels); + if (visiblepixels < 1 || allpixels < 1) + return; + rtlight->corona_visibility *= bound(0, (float)visiblepixels / (float)allpixels, 1); + } + cscale *= rtlight->corona_visibility; CHECKGLERROR -#endif break; +#else + return; +#endif case RENDERPATH_D3D9: Con_DPrintf("FIXME D3D9 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__); - break; + return; case RENDERPATH_D3D10: Con_DPrintf("FIXME D3D10 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__); - break; + return; case RENDERPATH_D3D11: Con_DPrintf("FIXME D3D11 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__); - break; + return; case RENDERPATH_SOFT: //Con_DPrintf("FIXME SOFT %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__); - break; - } - //Con_Printf("%i of %i pixels\n", (int)visiblepixels, (int)allpixels); - if (visiblepixels < 1 || allpixels < 1) return; - rtlight->corona_visibility *= bound(0, (float)visiblepixels / (float)allpixels, 1); - cscale *= rtlight->corona_visibility; + default: + return; + } } else { @@ -5202,10 +5220,13 @@ static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale) } R_CalcSprite_Vertex3f(vertex3f, rtlight->shadoworigin, r_refdef.view.right, r_refdef.view.up, scale, -scale, -scale, scale); RSurf_ActiveCustomEntity(&identitymatrix, &identitymatrix, RENDER_NODEPTHTEST, 0, color[0], color[1], color[2], 1, 4, vertex3f, spritetexcoord2f, NULL, NULL, NULL, NULL, 2, polygonelement3i, polygonelement3s, false, false); - R_DrawCustomSurface(r_shadow_lightcorona, &identitymatrix, MATERIALFLAG_ADD | MATERIALFLAG_BLENDED | MATERIALFLAG_FULLBRIGHT | MATERIALFLAG_NOCULLFACE | MATERIALFLAG_NODEPTHTEST, 0, 4, 0, 2, false, false); + R_DrawCustomSurface(r_shadow_lightcorona, &identitymatrix, MATERIALFLAG_ADD | MATERIALFLAG_BLENDED | MATERIALFLAG_FULLBRIGHT | MATERIALFLAG_NOCULLFACE | MATERIALFLAG_NODEPTHTEST | occlude, 0, 4, 0, 2, false, false); if(negated) GL_BlendEquationSubtract(false); } + if (rtlight->occlusion_buf) { + qglDeleteBuffersARB(1, &rtlight->occlusion_buf); + } } void R_Shadow_DrawCoronas(void) diff --git a/shader_glsl.h b/shader_glsl.h index 3e150a22..1f5803c0 100644 --- a/shader_glsl.h +++ b/shader_glsl.h @@ -2,7 +2,7 @@ "// written by Forest 'LordHavoc' Hale\n", "// shadowmapping enhancements by Lee 'eihrul' Salzman\n", "\n", -"#ifdef USESKELETAL\n", +"#if defined(USESKELETAL) || defined(USEOCCLUDE)\n", "# ifdef GL_ARB_uniform_buffer_object\n", "# extension GL_ARB_uniform_buffer_object : enable\n", "# endif\n", @@ -1433,6 +1433,12 @@ "#ifdef USENORMALMAPSCROLLBLEND\n", "uniform highp vec2 NormalmapScrollBlend;\n", "#endif\n", +"#ifdef USEOCCLUDE\n", +"uniform occludeQuery {\n", +" uint visiblepixels;\n", +" uint allpixels;\n", +"};\n", +"#endif\n", "void main(void)\n", "{\n", "#ifdef USEOFFSETMAPPING\n", @@ -1738,6 +1744,9 @@ " ScreenTexCoord = mix(SafeScreenTexCoord, ScreenTexCoord, f);\n", " color.rgb = mix(color.rgb, cast_myhalf3(dp_texture2D(Texture_Reflection, ScreenTexCoord)) * ReflectColor.rgb, ReflectColor.a);\n", "#endif\n", +"#ifdef USEOCCLUDE\n", +" color.rgb *= clamp(float(visiblepixels) / float(allpixels), 0.0, 1.0);\n", +"#endif\n", "\n", " dp_FragColor = vec4(color);\n", "}\n", diff --git a/vid.h b/vid.h index 7e8f3203..7302386a 100644 --- a/vid.h +++ b/vid.h @@ -53,6 +53,7 @@ typedef struct viddef_support_s qboolean arb_framebuffer_object; qboolean arb_multitexture; qboolean arb_occlusion_query; + qboolean arb_query_buffer_object; qboolean arb_shadow; qboolean arb_texture_compression; qboolean arb_texture_cube_map; diff --git a/vid_sdl.c b/vid_sdl.c index d632a2b4..2016762d 100644 --- a/vid_sdl.c +++ b/vid_sdl.c @@ -1952,6 +1952,7 @@ void GLES_Init(void) vid.support.arb_draw_buffers = false; vid.support.arb_multitexture = false; vid.support.arb_occlusion_query = false; + vid.support.arb_query_buffer_object = false; vid.support.arb_shadow = false; vid.support.arb_texture_compression = false; // different (vendor-specific) formats than on desktop OpenGL... vid.support.arb_texture_cube_map = SDL_GL_ExtensionSupported("GL_OES_texture_cube_map") != 0; diff --git a/vid_shared.c b/vid_shared.c index 2d6497e5..7c321c2a 100644 --- a/vid_shared.c +++ b/vid_shared.c @@ -1065,6 +1065,7 @@ void VID_CheckExtensions(void) vid.support.arb_draw_buffers = GL_CheckExtension("GL_ARB_draw_buffers", drawbuffersfuncs, "-nodrawbuffers", false); vid.support.arb_multitexture = GL_CheckExtension("GL_ARB_multitexture", multitexturefuncs, "-nomtex", false); vid.support.arb_occlusion_query = GL_CheckExtension("GL_ARB_occlusion_query", occlusionqueryfuncs, "-noocclusionquery", false); + vid.support.arb_query_buffer_object = GL_CheckExtension("GL_ARB_query_buffer_object", NULL, "-noquerybuffer", true); vid.support.arb_shadow = GL_CheckExtension("GL_ARB_shadow", NULL, "-noshadow", false); vid.support.arb_texture_compression = GL_CheckExtension("GL_ARB_texture_compression", texturecompressionfuncs, "-notexturecompression", false); vid.support.arb_texture_cube_map = GL_CheckExtension("GL_ARB_texture_cube_map", NULL, "-nocubemap", false); @@ -1110,6 +1111,7 @@ void VID_CheckExtensions(void) // COMMANDLINEOPTION: GL: -nofbo disables GL_EXT_framebuffer_object (which accelerates rendering), only used if GL_ARB_fragment_shader is also available // COMMANDLINEOPTION: GL: -nomtex disables GL_ARB_multitexture (required for faster map rendering) // COMMANDLINEOPTION: GL: -noocclusionquery disables GL_ARB_occlusion_query (which allows coronas to fade according to visibility, and potentially used for rendering optimizations) +// COMMANDLINEOPTION: GL: -noquerybuffer disables GL_ARB_query_buffer_object (which allows corona fading without synchronous rendering) // COMMANDLINEOPTION: GL: -nos3tc disables GL_EXT_texture_compression_s3tc (which allows use of .dds texture caching) // COMMANDLINEOPTION: GL: -noseparatestencil disables use of OpenGL2.0 glStencilOpSeparate and GL_ATI_separate_stencil extensions (which accelerate shadow rendering) // COMMANDLINEOPTION: GL: -noshadow disables use of GL_ARB_shadow (required for hardware shadowmap filtering) @@ -2122,6 +2124,7 @@ void VID_Soft_SharedSetup(void) vid.support.arb_depth_texture = true; vid.support.arb_draw_buffers = true; vid.support.arb_occlusion_query = true; + vid.support.arb_query_buffer_object = false; vid.support.arb_shadow = true; //vid.support.arb_texture_compression = true; vid.support.arb_texture_cube_map = true; diff --git a/vid_wgl.c b/vid_wgl.c index 544d24ba..24379fbf 100644 --- a/vid_wgl.c +++ b/vid_wgl.c @@ -1525,6 +1525,7 @@ qboolean VID_InitModeDX(viddef_mode_t *mode, int version) vid.support.arb_depth_texture = true; vid.support.arb_draw_buffers = vid_d3d9caps.NumSimultaneousRTs > 1; vid.support.arb_occlusion_query = true; // can't find a cap for this + vid.support.arb_query_buffer_object = true; vid.support.arb_shadow = true; vid.support.arb_texture_compression = true; vid.support.arb_texture_cube_map = true; -- 2.39.2