Implement GPU-side corona fading
authordivverent <divverent@d7cf8633-e32d-0410-b094-e92efae38249>
Sat, 4 Oct 2014 20:12:51 +0000 (20:12 +0000)
committerdivverent <divverent@d7cf8633-e32d-0410-b094-e92efae38249>
Sat, 4 Oct 2014 20:12:51 +0000 (20:12 +0000)
Uses GL_ARB_query_buffer_object to retrieve query results directly into a
buffer. This allows corona fading to be moved into the fragment shader,
preventing a round-trip to the CPU and synchronous rendering. Depending on the
machine's configuration, it can give a dramatic performance boost.

From: Alex Goins <agoins@nvidia.com>

git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@12094 d7cf8633-e32d-0410-b094-e92efae38249

client.h
dpsoftrast.h
gl_rmain.c
glquake.h
model_brush.h
r_shadow.c
shader_glsl.h
vid.h
vid_sdl.c
vid_shared.c
vid_wgl.c

index f830c2e..0bd75f7 100644 (file)
--- a/client.h
+++ b/client.h
@@ -317,6 +317,7 @@ typedef struct rtlight_s
        vec3_t currentcolor;
        /// used by corona updates, due to occlusion query
        float corona_visibility;
+       unsigned int occlusion_buf;
        unsigned int corona_queryindex_visiblepixels;
        unsigned int corona_queryindex_allpixels;
        /// this is R_GetCubemap(rtlight->cubemapname)
index 5bda9f3..bc1dbd0 100644 (file)
@@ -196,7 +196,8 @@ typedef enum shaderpermutation_e
        SHADERPERMUTATION_DEPTHRGB = 1<<28, ///< read/write depth values in RGB color coded format for older hardware without depth samplers
        SHADERPERMUTATION_ALPHAGEN_VERTEX = 1<<29, ///< alphaGen vertex
        SHADERPERMUTATION_SKELETAL = 1<<30, ///< (skeletal models) use skeletal matrices to deform vertices (gpu-skinning)
-       SHADERPERMUTATION_COUNT = 31 ///< size of shaderpermutationinfo array
+       SHADERPERMUTATION_OCCLUDE = 1<<31, ///< use occlusion buffer for corona
+       SHADERPERMUTATION_COUNT = 32 ///< size of shaderpermutationinfo array
 }
 shaderpermutation_t;
 
index 7b074b7..8572190 100644 (file)
@@ -678,7 +678,8 @@ shaderpermutationinfo_t shaderpermutationinfo[SHADERPERMUTATION_COUNT] =
        {"#define USETRIPPY\n", " trippy"},
        {"#define USEDEPTHRGB\n", " depthrgb"},
        {"#define USEALPHAGENVERTEX\n", " alphagenvertex"},
-       {"#define USESKELETAL\n", " skeletal"}
+       {"#define USESKELETAL\n", " skeletal"},
+       {"#define USEOCCLUDE\n", " occlude"}
 };
 
 // NOTE: MUST MATCH ORDER OF SHADERMODE_* ENUMS!
@@ -2202,6 +2203,8 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
                permutation |= SHADERPERMUTATION_TRIPPY;
        if (rsurface.texture->currentmaterialflags & MATERIALFLAG_ALPHATEST)
                permutation |= SHADERPERMUTATION_ALPHAKILL;
+       if (rsurface.texture->currentmaterialflags & MATERIALFLAG_OCCLUDE)
+               permutation |= SHADERPERMUTATION_OCCLUDE;
        if (rsurface.texture->r_water_waterscroll[0] && rsurface.texture->r_water_waterscroll[1])
                permutation |= SHADERPERMUTATION_NORMALMAPSCROLLBLEND; // todo: make generic
        if (rsurfacepass == RSURFPASS_BACKGROUND)
index d642e3e..781eb31 100644 (file)
--- a/glquake.h
+++ b/glquake.h
@@ -1071,6 +1071,14 @@ extern void (GLAPIENTRY *qglGetQueryObjectuivARB)(GLuint qid, GLenum pname, GLui
 #define GL_QUERY_RESULT_AVAILABLE_ARB                     0x8867
 #endif
 
+// GL_ARB_query_buffer_object
+#ifndef GL_QUERY_BUFFER_ARB
+#define GL_QUERY_BUFFER_ARB                               0x9192
+#define GL_QUERY_BUFFER_BINDING_ARB                       0x9193
+#define GL_QUERY_RESULT_NO_WAIT_ARB                       0x9194
+#define GL_QUERY_BUFFER_BARRIER_BIT_ARB                   0x00008000
+#endif
+
 // GL_EXT_bgr
 #define GL_BGR                                 0x80E0
 
index d417c62..d103f45 100644 (file)
@@ -120,6 +120,8 @@ mplane_t;
 #define MATERIALFLAG_NORTLIGHT 134217728
 // alphagen vertex
 #define MATERIALFLAG_ALPHAGEN_VERTEX 268435456
+// use occlusion buffer for corona
+#define MATERIALFLAG_OCCLUDE 536870912
 // combined mask of all attributes that require depth sorted rendering
 #define MATERIALFLAGMASK_DEPTHSORTED (MATERIALFLAG_BLENDED | MATERIALFLAG_NODEPTHTEST)
 // combined mask of all attributes that cause some sort of transparency
index 79ab59f..ff8792d 100644 (file)
@@ -3941,6 +3941,7 @@ static void R_Shadow_PrepareLight(rtlight_t *rtlight)
        rtlight->cached_numshadowentities              = 0;
        rtlight->cached_numshadowentities_noselfshadow = 0;
        rtlight->cached_numsurfaces                    = 0;
+       rtlight->occlusion_buf                         = 0;
        rtlight->cached_lightentities                  = NULL;
        rtlight->cached_lightentities_noselfshadow     = NULL;
        rtlight->cached_shadowentities                 = NULL;
@@ -5147,7 +5148,9 @@ static float spritetexcoord2f[4*2] = {0, 1, 0, 0, 1, 0, 1, 1};
 static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale)
 {
        vec3_t color;
+       unsigned int occlude = 0;
        GLint allpixels = 0, visiblepixels = 0;
+
        // now we have to check the query result
        if (rtlight->corona_queryindex_visiblepixels)
        {
@@ -5160,29 +5163,44 @@ static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale)
                case RENDERPATH_GLES2:
 #if defined(GL_SAMPLES_PASSED_ARB) && !defined(USE_GLES2)
                        CHECKGLERROR
-                       qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, &visiblepixels);
-                       qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, &allpixels);
+                       // See if we can use the GPU-side method to prevent implicit sync
+                       if (vid.support.arb_query_buffer_object) {
+#define BUFFER_OFFSET(i)    ((void*)NULL + (i))
+                               qglGenBuffersARB(1, &rtlight->occlusion_buf);
+                               qglBindBufferARB(GL_QUERY_BUFFER_ARB, rtlight->occlusion_buf);
+                               qglBufferDataARB(GL_QUERY_BUFFER_ARB, 8, NULL, GL_DYNAMIC_COPY);
+                               qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, BUFFER_OFFSET(0));
+                               qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, BUFFER_OFFSET(4));
+                               qglBindBufferBase(GL_UNIFORM_BUFFER, 0, rtlight->occlusion_buf);
+                               occlude = MATERIALFLAG_OCCLUDE;
+                       } else {
+                               qglGetQueryObjectivARB(rtlight->corona_queryindex_visiblepixels, GL_QUERY_RESULT_ARB, &visiblepixels);
+                               qglGetQueryObjectivARB(rtlight->corona_queryindex_allpixels, GL_QUERY_RESULT_ARB, &allpixels); 
+                               if (visiblepixels < 1 || allpixels < 1)
+                                       return;
+                               rtlight->corona_visibility *= bound(0, (float)visiblepixels / (float)allpixels, 1);
+                       }
+                       cscale *= rtlight->corona_visibility;
                        CHECKGLERROR
-#endif
                        break;
+#else
+                       return;
+#endif
                case RENDERPATH_D3D9:
                        Con_DPrintf("FIXME D3D9 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-                       break;
+                       return;
                case RENDERPATH_D3D10:
                        Con_DPrintf("FIXME D3D10 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-                       break;
+                       return;
                case RENDERPATH_D3D11:
                        Con_DPrintf("FIXME D3D11 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-                       break;
+                       return;
                case RENDERPATH_SOFT:
                        //Con_DPrintf("FIXME SOFT %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-                       break;
-               }
-               //Con_Printf("%i of %i pixels\n", (int)visiblepixels, (int)allpixels);
-               if (visiblepixels < 1 || allpixels < 1)
                        return;
-               rtlight->corona_visibility *= bound(0, (float)visiblepixels / (float)allpixels, 1);
-               cscale *= rtlight->corona_visibility;
+               default:
+                       return;
+               }
        }
        else
        {
@@ -5202,10 +5220,13 @@ static void R_DrawCorona(rtlight_t *rtlight, float cscale, float scale)
                }
                R_CalcSprite_Vertex3f(vertex3f, rtlight->shadoworigin, r_refdef.view.right, r_refdef.view.up, scale, -scale, -scale, scale);
                RSurf_ActiveCustomEntity(&identitymatrix, &identitymatrix, RENDER_NODEPTHTEST, 0, color[0], color[1], color[2], 1, 4, vertex3f, spritetexcoord2f, NULL, NULL, NULL, NULL, 2, polygonelement3i, polygonelement3s, false, false);
-               R_DrawCustomSurface(r_shadow_lightcorona, &identitymatrix, MATERIALFLAG_ADD | MATERIALFLAG_BLENDED | MATERIALFLAG_FULLBRIGHT | MATERIALFLAG_NOCULLFACE | MATERIALFLAG_NODEPTHTEST, 0, 4, 0, 2, false, false);
+               R_DrawCustomSurface(r_shadow_lightcorona, &identitymatrix, MATERIALFLAG_ADD | MATERIALFLAG_BLENDED | MATERIALFLAG_FULLBRIGHT | MATERIALFLAG_NOCULLFACE | MATERIALFLAG_NODEPTHTEST | occlude, 0, 4, 0, 2, false, false);
                if(negated)
                        GL_BlendEquationSubtract(false);
        }
+       if (rtlight->occlusion_buf) {
+               qglDeleteBuffersARB(1, &rtlight->occlusion_buf);
+       }
 }
 
 void R_Shadow_DrawCoronas(void)
index 3e150a2..1f5803c 100644 (file)
@@ -2,7 +2,7 @@
 "// written by Forest 'LordHavoc' Hale\n",
 "// shadowmapping enhancements by Lee 'eihrul' Salzman\n",
 "\n",
-"#ifdef USESKELETAL\n",
+"#if defined(USESKELETAL) || defined(USEOCCLUDE)\n",
 "#  ifdef GL_ARB_uniform_buffer_object\n",
 "#    extension GL_ARB_uniform_buffer_object : enable\n",
 "#  endif\n",
 "#ifdef USENORMALMAPSCROLLBLEND\n",
 "uniform highp vec2 NormalmapScrollBlend;\n",
 "#endif\n",
+"#ifdef USEOCCLUDE\n",
+"uniform occludeQuery {\n",
+"    uint visiblepixels;\n",
+"    uint allpixels;\n",
+"};\n",
+"#endif\n",
 "void main(void)\n",
 "{\n",
 "#ifdef USEOFFSETMAPPING\n",
 "      ScreenTexCoord = mix(SafeScreenTexCoord, ScreenTexCoord, f);\n",
 "      color.rgb = mix(color.rgb, cast_myhalf3(dp_texture2D(Texture_Reflection, ScreenTexCoord)) * ReflectColor.rgb, ReflectColor.a);\n",
 "#endif\n",
+"#ifdef USEOCCLUDE\n",
+"   color.rgb *= clamp(float(visiblepixels) / float(allpixels), 0.0, 1.0);\n",
+"#endif\n",
 "\n",
 "      dp_FragColor = vec4(color);\n",
 "}\n",
diff --git a/vid.h b/vid.h
index 7e8f320..7302386 100644 (file)
--- a/vid.h
+++ b/vid.h
@@ -53,6 +53,7 @@ typedef struct viddef_support_s
        qboolean arb_framebuffer_object;
        qboolean arb_multitexture;
        qboolean arb_occlusion_query;
+       qboolean arb_query_buffer_object;
        qboolean arb_shadow;
        qboolean arb_texture_compression;
        qboolean arb_texture_cube_map;
index d632a2b..2016762 100644 (file)
--- a/vid_sdl.c
+++ b/vid_sdl.c
@@ -1952,6 +1952,7 @@ void GLES_Init(void)
        vid.support.arb_draw_buffers = false;
        vid.support.arb_multitexture = false;
        vid.support.arb_occlusion_query = false;
+       vid.support.arb_query_buffer_object = false;
        vid.support.arb_shadow = false;
        vid.support.arb_texture_compression = false; // different (vendor-specific) formats than on desktop OpenGL...
        vid.support.arb_texture_cube_map = SDL_GL_ExtensionSupported("GL_OES_texture_cube_map") != 0;
index 2d6497e..7c321c2 100644 (file)
@@ -1065,6 +1065,7 @@ void VID_CheckExtensions(void)
        vid.support.arb_draw_buffers = GL_CheckExtension("GL_ARB_draw_buffers", drawbuffersfuncs, "-nodrawbuffers", false);
        vid.support.arb_multitexture = GL_CheckExtension("GL_ARB_multitexture", multitexturefuncs, "-nomtex", false);
        vid.support.arb_occlusion_query = GL_CheckExtension("GL_ARB_occlusion_query", occlusionqueryfuncs, "-noocclusionquery", false);
+       vid.support.arb_query_buffer_object = GL_CheckExtension("GL_ARB_query_buffer_object", NULL, "-noquerybuffer", true);
        vid.support.arb_shadow = GL_CheckExtension("GL_ARB_shadow", NULL, "-noshadow", false);
        vid.support.arb_texture_compression = GL_CheckExtension("GL_ARB_texture_compression", texturecompressionfuncs, "-notexturecompression", false);
        vid.support.arb_texture_cube_map = GL_CheckExtension("GL_ARB_texture_cube_map", NULL, "-nocubemap", false);
@@ -1110,6 +1111,7 @@ void VID_CheckExtensions(void)
 // COMMANDLINEOPTION: GL: -nofbo disables GL_EXT_framebuffer_object (which accelerates rendering), only used if GL_ARB_fragment_shader is also available
 // COMMANDLINEOPTION: GL: -nomtex disables GL_ARB_multitexture (required for faster map rendering)
 // COMMANDLINEOPTION: GL: -noocclusionquery disables GL_ARB_occlusion_query (which allows coronas to fade according to visibility, and potentially used for rendering optimizations)
+// COMMANDLINEOPTION: GL: -noquerybuffer disables GL_ARB_query_buffer_object (which allows corona fading without synchronous rendering)
 // COMMANDLINEOPTION: GL: -nos3tc disables GL_EXT_texture_compression_s3tc (which allows use of .dds texture caching)
 // COMMANDLINEOPTION: GL: -noseparatestencil disables use of OpenGL2.0 glStencilOpSeparate and GL_ATI_separate_stencil extensions (which accelerate shadow rendering)
 // COMMANDLINEOPTION: GL: -noshadow disables use of GL_ARB_shadow (required for hardware shadowmap filtering)
@@ -2122,6 +2124,7 @@ void VID_Soft_SharedSetup(void)
        vid.support.arb_depth_texture = true;
        vid.support.arb_draw_buffers = true;
        vid.support.arb_occlusion_query = true;
+       vid.support.arb_query_buffer_object = false;
        vid.support.arb_shadow = true;
        //vid.support.arb_texture_compression = true;
        vid.support.arb_texture_cube_map = true;
index 544d24b..24379fb 100644 (file)
--- a/vid_wgl.c
+++ b/vid_wgl.c
@@ -1525,6 +1525,7 @@ qboolean VID_InitModeDX(viddef_mode_t *mode, int version)
        vid.support.arb_depth_texture = true;
        vid.support.arb_draw_buffers = vid_d3d9caps.NumSimultaneousRTs > 1;
        vid.support.arb_occlusion_query = true; // can't find a cap for this
+       vid.support.arb_query_buffer_object = true;
        vid.support.arb_shadow = true;
        vid.support.arb_texture_compression = true;
        vid.support.arb_texture_cube_map = true;