Fix severe bugs in mul128 implementation which was breaking the Lehmer RNG.

author havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>

Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)

committer havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>

Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)
author havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>
Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)
committer havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>
Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)
diff --git a/mathlib.c b/mathlib.c

index b228576803c9f1956fa03040b0eec9e61f651b97..119f8bf79fe75cfa4a5f6ae80833d396b83a8f67 100644 (file)
--- a/mathlib.c
+++ b/mathlib.c
@@ -836,6 +836,7 @@ float RadiusFromBoundsAndOrigin (const vec3_t mins, const vec3_t maxs, const vec
         return sqrt(max(m1[0], m2[0]) + max(m1[1], m2[1]) + max(m1[2], m2[2]));
  }
  
+static void Math_RandomSeed_UnitTests(void);
  void Mathlib_Init(void)
  {
         int a;
@@ -844,6 +845,8 @@ void Mathlib_Init(void)
         ixtable[0] = 0;
         for (a = 1;a < 4096;a++)
                 ixtable[a] = 1.0f / a;
+
+       Math_RandomSeed_UnitTests();
  }
  
  #include "matrixlib.h"
@@ -902,30 +905,129 @@ int LoopingFrameNumberFromDouble(double t, int loopframes)
  
  static unsigned int mul_Lecuyer[4] = { 0x12e15e35, 0xb500f16e, 0x2e714eb2, 0xb37916a5 };
  
-static void mul128(unsigned int a[], unsigned int b[], unsigned int dest[4])
+static void mul128(const unsigned int a[], const unsigned int b[], unsigned int dest[4])
  {
+#ifdef __GNUC__
+       unsigned __int128 ia = (a[0] << 96) + (a[1] << 64) + (a[2] << 32) + (a[3]);
+       unsigned __int128 ib = (b[0] << 96) + (b[1] << 64) + (b[2] << 32) + (b[3]);
+       unsigned __int128 id = ia * ib;
+       dest[0] = (id >> 96) & 0xffffffff;
+       dest[1] = (id >> 64) & 0xffffffff;
+       dest[2] = (id >> 32) & 0xffffffff;
+       dest[3] = (id) & 0xffffffff;
+#else
         unsigned long long t[4];
-       t[0] = a[0] * b[0];
-       t[1] = a[1] * b[1];
-       t[2] = a[2] * b[2];
-       t[3] = a[3] * b[3];
-
-       // this is complicated because C doesn't have a way to make use of the
-       // cpu status carry flag, so we do it all in reverse order from what
-       // would otherwise make sense, and have to make multiple passes...
-       t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-       t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-       t[1] += t[0] >> 32; t[0] &= 0xffffffff;
  
-       t[3] += t[2] >> 32; t[2] &= 0xffffffff;
-       t[2] += t[1] >> 32; t[1] &= 0xffffffff;
-
-       t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+       // this multiply chain is relatively straightforward - a[] is repeatedly
+       // added with shifts based on b[] and the results stored into uint64,
+       // but due to C limitations (no access to carry flag) we have to resolve
+       // carries in a really lame way which wastes a fair number of ops
+       // (repeatedly iterating MSB to LSB, rather than LSB to MSB with carry),
+       // an alternative would be to use 16bit multiplies and resolve carries
+       // only at the end, but that would be twice as many multiplies...
+       //
+       // note: >> 32 is a function call in win32 MSVS2015 debug builds.
+       t[0] = (unsigned long long)a[0] * b[3];
+       t[1] = (unsigned long long)a[1] * b[3];
+       t[2] = (unsigned long long)a[2] * b[3];
+       t[3] = (unsigned long long)a[3] * b[3];
+       t[0] += t[1] >> 32;
+       t[1] &= 0xffffffff;
+       t[1] += t[2] >> 32;
+       t[2] &= 0xffffffff;
+       t[2] += t[3] >> 32;
+
+       t[0] += t[1] >> 32;
+       t[1] &= 0xffffffff;
+       t[1] += t[2] >> 32;
+       t[2] &= 0xffffffff;
+
+       t[0] += t[1] >> 32;
+       t[1] &= 0xffffffff;
+
+       t[0] += (unsigned long long)a[1] * b[2];
+       t[1] += (unsigned long long)a[2] * b[2];
+       t[2] += (unsigned long long)a[3] * b[2];
+       t[0] += t[1] >> 32;
+       t[1] &= 0xffffffff;
+       t[1] += t[2] >> 32;
+
+       t[0] += t[1] >> 32;
+       t[1] &= 0xffffffff;
+
+       t[0] += (unsigned long long)a[2] * b[1];
+       t[1] += (unsigned long long)a[3] * b[1];
+       t[0] += t[1] >> 32;
+
+       t[0] += (unsigned long long)a[3] * b[0];
  
         dest[0] = t[0] & 0xffffffff;
         dest[1] = t[1] & 0xffffffff;
         dest[2] = t[2] & 0xffffffff;
         dest[3] = t[3] & 0xffffffff;
+#endif
+}
+
+void testmul128(unsigned int a0, unsigned int a1, unsigned int a2, unsigned int a3, unsigned int b0, unsigned int b1, unsigned int b2, unsigned int b3, unsigned int x0, unsigned int x1, unsigned int x2, unsigned int x3)
+{
+       unsigned int a[4];
+       unsigned int b[4];
+       unsigned int expected[4];
+       unsigned int result[4];
+       a[0] = a0;
+       a[1] = a1;
+       a[2] = a2;
+       a[3] = a3;
+       b[0] = b0;
+       b[1] = b1;
+       b[2] = b2;
+       b[3] = b3;
+       expected[0] = x0;
+       expected[1] = x1;
+       expected[2] = x2;
+       expected[3] = x3;
+       mul128(a, b, result);
+       if (result[0] != expected[0]
+        || result[1] != expected[1]
+        || result[2] != expected[2]
+        || result[3] != expected[3])
+               Con_Printf("testmul128(\na = %08x %08x %08x %08x,\nb = %08x %08x %08x %08x,\nx = %08x %08x %08x %08x) instead computed\nc = %08x %08x %08x %08x\n", a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3], expected[0], expected[1], expected[2], expected[3], result[0], result[1], result[2], result[3]);
+}
+
+void Math_RandomSeed_UnitTests(void)
+{
+       testmul128(
+               0x00000000, 0x00000000, 0x00000000, 0x00000001,
+               0x00000000, 0x00000000, 0x00000000, 0x00000001,
+               0x00000000, 0x00000000, 0x00000000, 0x00000001);
+       testmul128(
+               0x00000000, 0x00000000, 0x00000000, 0x00000001,
+               0x00000000, 0x00000000, 0x00000001, 0x00000000,
+               0x00000000, 0x00000000, 0x00000001, 0x00000000);
+       testmul128(
+               0x00000000, 0x00000000, 0x00000001, 0x00000000,
+               0x00000000, 0x00000000, 0x00000000, 0x00000001,
+               0x00000000, 0x00000000, 0x00000001, 0x00000000);
+       testmul128(
+               0x00000000, 0x00000000, 0x00000000, 0x00000001,
+               0x00000001, 0x00000001, 0x00000001, 0x00000001,
+               0x00000001, 0x00000001, 0x00000001, 0x00000001);
+       testmul128(
+               0x00000000, 0x00000000, 0x00000000, 0x00000002,
+               0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+               0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe);
+       testmul128(
+               0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+               0x00000000, 0x00000000, 0x00000000, 0x00000002,
+               0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe);
+       testmul128(
+               0x00000000, 0x00000000, 0xffffffff, 0xffffffff,
+               0x00000000, 0x00000000, 0x00000002, 0x00000000,
+               0x00000001, 0xffffffff, 0xfffffffe, 0x00000000);
+       testmul128(
+               0x00000000, 0x00000000, 0x00000002, 0x00000000,
+               0x00000000, 0x00000000, 0xffffffff, 0xffffffff,
+               0x00000001, 0xffffffff, 0xfffffffe, 0x00000000);
  }
  
  void Math_RandomSeed_Reset(randomseed_t *r)
@@ -936,13 +1038,12 @@ void Math_RandomSeed_Reset(randomseed_t *r)
         r->s[3] = 0;
  }
  
-void Math_RandomSeed_FromInt(randomseed_t *r, unsigned int n)
+void Math_RandomSeed_FromInts(randomseed_t *r, unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3)
  {
-       // if the entire s[] is zero the algorithm would break completely, so make sure it isn't zero by putting a 1 here
-       r->s[0] = 1;
-       r->s[1] = 0;
-       r->s[2] = 0;
-       r->s[3] = n;
+       r->s[0] = s0;
+       r->s[1] = s1;
+       r->s[2] = s2;
+       r->s[3] = s3 | 1; // the Lehmer RNG requires that the seed be odd
  }
  
  unsigned long long Math_rand64(randomseed_t *r)
diff --git a/mathlib.h b/mathlib.h

index 472d5759e37a5af2df22bc5d28a51005d63d1fd8..0acb9500627fc2e3aa20f3c20f83c304e05273d3 100644 (file)
--- a/mathlib.h
+++ b/mathlib.h
@@ -311,7 +311,7 @@ typedef struct randomseed_s
  randomseed_t;
  
  void Math_RandomSeed_Reset(randomseed_t *r);
-void Math_RandomSeed_FromInt(randomseed_t *r, unsigned int n);
+void Math_RandomSeed_FromInts(randomseed_t *r, unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3);
  unsigned long long Math_rand64(randomseed_t *r);
  float Math_randomf(randomseed_t *r);
  float Math_crandomf(randomseed_t *r);
diff --git a/r_shadow.c b/r_shadow.c

index caef494e8cb40f75a34d8a4bff6fab04202fce3e..0d05ac1e4544a73ed4b4fcdfb8caebca3b638f0c 100644 (file)
--- a/r_shadow.c
+++ b/r_shadow.c
@@ -359,6 +359,7 @@ cvar_t r_shadow_bouncegrid_particlebounceintensity = {CVAR_SAVE, "r_shadow_bounc
  cvar_t r_shadow_bouncegrid_particleintensity = {CVAR_SAVE, "r_shadow_bouncegrid_particleintensity", "0.25", "brightness of particles contributing to bouncegrid texture"};
  cvar_t r_shadow_bouncegrid_sortlightpaths = {CVAR_SAVE, "r_shadow_bouncegrid_sortlightpaths", "1", "sort light paths before accumulating them into the bouncegrid texture, this reduces cpu cache misses"};
  cvar_t r_shadow_bouncegrid_lightpathsize = {CVAR_SAVE, "r_shadow_bouncegrid_lightpathsize", "1", "width of the light path for accumulation of light in the bouncegrid texture"};
+cvar_t r_shadow_bouncegrid_normalizevectors = { CVAR_SAVE, "r_shadow_bouncegrid_normalizevectors", "1", "normalize random vectors (otherwise their length can vary, which dims the lighting further from the light)" };
  cvar_t r_shadow_bouncegrid_static = {CVAR_SAVE, "r_shadow_bouncegrid_static", "1", "use static radiosity solution (high quality) rather than dynamic (splotchy)"};
  cvar_t r_shadow_bouncegrid_static_bounceminimumintensity = { CVAR_SAVE, "r_shadow_bouncegrid_static_bounceminimumintensity", "0.01", "stop bouncing once intensity drops below this fraction of the original particle color" };
  cvar_t r_shadow_bouncegrid_static_directionalshading = {CVAR_SAVE, "r_shadow_bouncegrid_static_directionalshading", "1", "whether to use directionalshading when in static mode"};
@@ -823,6 +824,7 @@ void R_Shadow_Init(void)
         Cvar_RegisterVariable(&r_shadow_bouncegrid_includedirectlighting);
         Cvar_RegisterVariable(&r_shadow_bouncegrid_intensity);
         Cvar_RegisterVariable(&r_shadow_bouncegrid_lightpathsize);
+       Cvar_RegisterVariable(&r_shadow_bouncegrid_normalizevectors);
         Cvar_RegisterVariable(&r_shadow_bouncegrid_particlebounceintensity);
         Cvar_RegisterVariable(&r_shadow_bouncegrid_particleintensity);
         Cvar_RegisterVariable(&r_shadow_bouncegrid_sortlightpaths);
@@ -2575,8 +2577,9 @@ static void R_Shadow_BounceGrid_GenerateSettings(r_shadow_bouncegrid_settings_t
         settings->spacing[0]                    = spacing;
         settings->spacing[1]                    = spacing;
         settings->spacing[2]                    = spacing;
-       settings->stablerandom                  = s ? 1 : r_shadow_bouncegrid_dynamic_stablerandom.integer;
+       settings->stablerandom                  = r_shadow_bouncegrid_dynamic_stablerandom.integer;
         settings->bounceminimumintensity2       = bounceminimumintensity * bounceminimumintensity;
+       settings->normalizevectors              = r_shadow_bouncegrid_normalizevectors.integer != 0;
  
         // bound the values for sanity
         settings->maxphotons = bound(1, settings->maxphotons, 25000000);
@@ -3273,7 +3276,7 @@ static void R_Shadow_BounceGrid_TracePhotons(r_shadow_bouncegrid_settings_t sett
         //trace_t cliptrace2;
         //trace_t cliptrace3;
         unsigned int lightindex;
-       unsigned int seed = (unsigned int)(realtime * 1000.0f);
+       unsigned int seed;
         randomseed_t randomseed;
         vec3_t shotcolor;
         vec3_t baseshotcolor;
@@ -3284,8 +3287,18 @@ static void R_Shadow_BounceGrid_TracePhotons(r_shadow_bouncegrid_settings_t sett
         vec_t radius;
         vec_t s;
         rtlight_t *rtlight;
+       union
+       {
+               unsigned int s[4];
+               double d;
+       }
+       rseed;
  
-       Math_RandomSeed_FromInt(&randomseed, seed);
+       // compute a seed for the unstable random modes
+       memset(&rseed, 0, sizeof(rseed));
+       rseed.d = realtime;
+       Math_RandomSeed_FromInts(&randomseed, rseed.s[0], rseed.s[1], rseed.s[2], rseed.s[3]);
+       seed = rseed.s[0] ^ rseed.s[1] ^ rseed.s[2] ^ rseed.s[3];
  
         r_shadow_bouncegrid_state.numsplatpaths = 0;
  
@@ -3325,21 +3338,33 @@ static void R_Shadow_BounceGrid_TracePhotons(r_shadow_bouncegrid_settings_t sett
                 r_refdef.stats[r_stat_bouncegrid_particles] += shootparticles;
                 // we stop caring about bounces once the brightness goes below this fraction of the original intensity
                 bounceminimumintensity2 = VectorLength(baseshotcolor) * settings.bounceminimumintensity2;
-               switch (settings.stablerandom)
+
+               // for stablerandom we start the RNG with the position of the light
+               if (settings.stablerandom > 0)
                 {
-               default:
-                       break;
-               case 1:
-                       Math_RandomSeed_FromInt(&randomseed, lightindex * 11937);
-                       // prime the random number generator a bit
-                       Math_crandomf(&randomseed);
-                       break;
-               case 2:
-                       seed = lightindex * 11937;
-                       // prime the random number generator a bit
-                       lhcheeserand(seed);
-                       break;
+                       union
+                       {
+                               unsigned int i[4];
+                               float f[4];
+                       }
+                       u;
+                       u.f[0] = rtlight->shadoworigin[0];
+                       u.f[1] = rtlight->shadoworigin[1];
+                       u.f[2] = rtlight->shadoworigin[2];
+                       u.f[3] = 1;
+                       switch (settings.stablerandom)
+                       {
+                       default:
+                               break;
+                       case 1:
+                               seed = u.i[0] ^ u.i[1] ^ u.i[2] ^ u.i[3];
+                               break;
+                       case 2:
+                               Math_RandomSeed_FromInts(&randomseed, u.i[0], u.i[1], u.i[2], u.i[3]);
+                               break;
+                       }
                 }
+
                 for (shotparticles = 0;shotparticles < shootparticles;shotparticles++)
                 {
                         VectorCopy(baseshotcolor, shotcolor);
@@ -3358,25 +3383,30 @@ static void R_Shadow_BounceGrid_TracePhotons(r_shadow_bouncegrid_settings_t sett
                                 break;
                         case -1:
                         case 1:
-                               VectorLehmerRandom(&randomseed, clipend);
+                               VectorCheeseRandom(seed, clipend);
                                 if (settings.bounceanglediffuse)
                                 {
                                         // we want random to be stable, so we still have to do all the random we would have done
                                         for (bouncecount = 0; bouncecount < maxbounce; bouncecount++)
-                                               VectorLehmerRandom(&randomseed, bouncerandom[bouncecount]);
+                                               VectorCheeseRandom(seed, bouncerandom[bouncecount]);
                                 }
                                 break;
                         case -2:
                         case 2:
-                               VectorCheeseRandom(seed, clipend);
+                               VectorLehmerRandom(&randomseed, clipend);
                                 if (settings.bounceanglediffuse)
                                 {
                                         // we want random to be stable, so we still have to do all the random we would have done
                                         for (bouncecount = 0; bouncecount < maxbounce; bouncecount++)
-                                               VectorCheeseRandom(seed, bouncerandom[bouncecount]);
+                                               VectorLehmerRandom(&randomseed, bouncerandom[bouncecount]);
                                 }
                                 break;
                         }
+
+                       // we want a uniform distribution spherically, not merely within the sphere
+                       if (settings.normalizevectors)
+                               VectorNormalize(clipend);
+
                         VectorMA(clipstart, radius, clipend, clipend);
                         for (bouncecount = 0;;bouncecount++)
                         {
diff --git a/r_shadow.h b/r_shadow.h

index 10f8e1e0606bf071f99c22a78b5234d93cb737d9..fc36f337765f56694c1419406948f92de21d25af 100644 (file)
--- a/r_shadow.h
+++ b/r_shadow.h
@@ -45,6 +45,7 @@ typedef struct r_shadow_bouncegrid_settings_s
         qboolean directionalshading;
         qboolean includedirectlighting;
         qboolean blur;
+       qboolean normalizevectors;
         int floatcolors;
         float dlightparticlemultiplier;
         qboolean hitmodels;
author	havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>
	Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)
committer	havoc <havoc@d7cf8633-e32d-0410-b094-e92efae38249>
	Mon, 28 Nov 2016 03:56:16 +0000 (03:56 +0000)
mathlib.c		patch \| blob \| history
mathlib.h		patch \| blob \| history
r_shadow.c		patch \| blob \| history
r_shadow.h		patch \| blob \| history