util.c

   1 /*
   2  * Copyright (C) 2012, 2013
   3  *     Dale Weiler
   4  *     Wolfgang Bumiller
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   7  * this software and associated documentation files (the "Software"), to deal in
   8  * the Software without restriction, including without limitation the rights to
   9  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10  * of the Software, and to permit persons to whom the Software is furnished to do
  11  * so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include <string.h>
  25 #include <stdlib.h>
  26
  27 #include "gmqcc.h"
  28
  29 /*
  30  * Initially this was handled with a table in the gmqcc.h header, but
  31  * much to my surprise the contents of the table was duplicated for
  32  * each translation unit, causing all these strings to be duplicated
  33  * for every .c file it was included into. This method culls back on
  34  * it. This is a 'utility' function because the executor also depends
  35  * on this for dissasembled bytecode.
  36  */
  37 const char *util_instr_str[VINSTR_END] = {
  38     "DONE",       "MUL_F",      "MUL_V",      "MUL_FV",
  39     "MUL_VF",     "DIV_F",      "ADD_F",      "ADD_V",
  40     "SUB_F",      "SUB_V",      "EQ_F",       "EQ_V",
  41     "EQ_S",       "EQ_E",       "EQ_FNC",     "NE_F",
  42     "NE_V",       "NE_S",       "NE_E",       "NE_FNC",
  43     "LE",         "GE",         "LT",         "GT",
  44     "LOAD_F",     "LOAD_V",     "LOAD_S",     "LOAD_ENT",
  45     "LOAD_FLD",   "LOAD_FNC",   "ADDRESS",    "STORE_F",
  46     "STORE_V",    "STORE_S",    "STORE_ENT",  "STORE_FLD",
  47     "STORE_FNC",  "STOREP_F",   "STOREP_V",   "STOREP_S",
  48     "STOREP_ENT", "STOREP_FLD", "STOREP_FNC", "RETURN",
  49     "NOT_F",      "NOT_V",      "NOT_S",      "NOT_ENT",
  50     "NOT_FNC",    "IF",         "IFNOT",      "CALL0",
  51     "CALL1",      "CALL2",      "CALL3",      "CALL4",
  52     "CALL5",      "CALL6",      "CALL7",      "CALL8",
  53     "STATE",      "GOTO",       "AND",        "OR",
  54     "BITAND",     "BITOR"
  55 };
  56
  57 void util_debug(const char *area, const char *ms, ...) {
  58     va_list  va;
  59     if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
  60         return;
  61
  62     if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
  63         return;
  64
  65     va_start(va, ms);
  66     con_out ("[%s] ", area);
  67     con_vout(ms, va);
  68     va_end  (va);
  69 }
  70
  71 /*
  72  * only required if big endian .. otherwise no need to swap
  73  * data.
  74  */
  75 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
  76     static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
  77         while (l--) {
  78             d[l] = (d[l] << 8) | (d[l] >> 8);
  79         }
  80     }
  81
  82     static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
  83         while (l--) {
  84             uint32_t v;
  85             v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
  86             d[l] = (v << 16) | (v >> 16);
  87         }
  88     }
  89
  90     /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
  91      * so let's go the safe way
  92      */
  93     static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
  94         /*
  95         while (l--) {
  96             uint64_t v;
  97             v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
  98             v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
  99             d[l] = (v << 32) | (v >> 32);
 100         }
 101         */
 102         size_t i;
 103         for (i = 0; i < l; i += 2) {
 104             uint32_t v1 = d[i];
 105             d[i] = d[i+1];
 106             d[i+1] = v1;
 107             util_swap32(d+i, 2);
 108         }
 109     }
 110 #endif
 111
 112 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
 113 #   if PLATFORM_BYTE_ORDER == -1 /* runtime check */
 114     if (*((char*)&typesize))
 115         return;
 116 #else
 117     /* prevent unused warnings */
 118     (void) _data;
 119     (void) length;
 120     (void) typesize;
 121
 122 #   if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
 123         return;
 124 #   else
 125         switch (typesize) {
 126             case 1: return;
 127             case 2:
 128                 util_swap16((uint16_t*)_data, length>>1);
 129                 return;
 130             case 4:
 131                 util_swap32((uint32_t*)_data, length>>2);
 132                 return;
 133             case 8:
 134                 util_swap64((uint32_t*)_data, length>>3);
 135                 return;
 136
 137             default: exit(EXIT_FAILURE); /* please blow the fuck up! */
 138         }
 139 #   endif
 140 #endif
 141 }
 142
 143 /*
 144  * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
 145  * the initial value used for the register, weather the bits of each byte are reflected
 146  * before being processed, weather the algorithm itself feeds input bytes through the
 147  * register or XORs them with a byte from one end and then straight into the table, as
 148  * well as (but not limited to the idea of reflected versions) where the final register
 149  * value becomes reversed, and finally weather the value itself is used to XOR the final
 150  * register value.  AS such you can already imagine how painfully annoying CRCs are,
 151  * of course we stand to target Quake, which expects it's certian set of rules for proper
 152  * calculation of a CRC.
 153  *
 154  * In most traditional CRC algorithms on uses a reflected table driven method where a value
 155  * or register is reflected if it's bits are swapped around it's center.  For example:
 156  * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
 157  * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
 158  * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
 159  * which I respectfully as a programmer don't agree with.
 160  *
 161  * So now you know what we target, and why we target it, despite how unsettling it may seem
 162  * but those are what Quake seems to request.
 163  */
 164
 165 static const uint16_t util_crc16_table[] = {
 166     0x0000,     0x1021,     0x2042,     0x3063,     0x4084,     0x50A5,
 167     0x60C6,     0x70E7,     0x8108,     0x9129,     0xA14A,     0xB16B,
 168     0xC18C,     0xD1AD,     0xE1CE,     0xF1EF,     0x1231,     0x0210,
 169     0x3273,     0x2252,     0x52B5,     0x4294,     0x72F7,     0x62D6,
 170     0x9339,     0x8318,     0xB37B,     0xA35A,     0xD3BD,     0xC39C,
 171     0xF3FF,     0xE3DE,     0x2462,     0x3443,     0x0420,     0x1401,
 172     0x64E6,     0x74C7,     0x44A4,     0x5485,     0xA56A,     0xB54B,
 173     0x8528,     0x9509,     0xE5EE,     0xF5CF,     0xC5AC,     0xD58D,
 174     0x3653,     0x2672,     0x1611,     0x0630,     0x76D7,     0x66F6,
 175     0x5695,     0x46B4,     0xB75B,     0xA77A,     0x9719,     0x8738,
 176     0xF7DF,     0xE7FE,     0xD79D,     0xC7BC,     0x48C4,     0x58E5,
 177     0x6886,     0x78A7,     0x0840,     0x1861,     0x2802,     0x3823,
 178     0xC9CC,     0xD9ED,     0xE98E,     0xF9AF,     0x8948,     0x9969,
 179     0xA90A,     0xB92B,     0x5AF5,     0x4AD4,     0x7AB7,     0x6A96,
 180     0x1A71,     0x0A50,     0x3A33,     0x2A12,     0xDBFD,     0xCBDC,
 181     0xFBBF,     0xEB9E,     0x9B79,     0x8B58,     0xBB3B,     0xAB1A,
 182     0x6CA6,     0x7C87,     0x4CE4,     0x5CC5,     0x2C22,     0x3C03,
 183     0x0C60,     0x1C41,     0xEDAE,     0xFD8F,     0xCDEC,     0xDDCD,
 184     0xAD2A,     0xBD0B,     0x8D68,     0x9D49,     0x7E97,     0x6EB6,
 185     0x5ED5,     0x4EF4,     0x3E13,     0x2E32,     0x1E51,     0x0E70,
 186     0xFF9F,     0xEFBE,     0xDFDD,     0xCFFC,     0xBF1B,     0xAF3A,
 187     0x9F59,     0x8F78,     0x9188,     0x81A9,     0xB1CA,     0xA1EB,
 188     0xD10C,     0xC12D,     0xF14E,     0xE16F,     0x1080,     0x00A1,
 189     0x30C2,     0x20E3,     0x5004,     0x4025,     0x7046,     0x6067,
 190     0x83B9,     0x9398,     0xA3FB,     0xB3DA,     0xC33D,     0xD31C,
 191     0xE37F,     0xF35E,     0x02B1,     0x1290,     0x22F3,     0x32D2,
 192     0x4235,     0x5214,     0x6277,     0x7256,     0xB5EA,     0xA5CB,
 193     0x95A8,     0x8589,     0xF56E,     0xE54F,     0xD52C,     0xC50D,
 194     0x34E2,     0x24C3,     0x14A0,     0x0481,     0x7466,     0x6447,
 195     0x5424,     0x4405,     0xA7DB,     0xB7FA,     0x8799,     0x97B8,
 196     0xE75F,     0xF77E,     0xC71D,     0xD73C,     0x26D3,     0x36F2,
 197     0x0691,     0x16B0,     0x6657,     0x7676,     0x4615,     0x5634,
 198     0xD94C,     0xC96D,     0xF90E,     0xE92F,     0x99C8,     0x89E9,
 199     0xB98A,     0xA9AB,     0x5844,     0x4865,     0x7806,     0x6827,
 200     0x18C0,     0x08E1,     0x3882,     0x28A3,     0xCB7D,     0xDB5C,
 201     0xEB3F,     0xFB1E,     0x8BF9,     0x9BD8,     0xABBB,     0xBB9A,
 202     0x4A75,     0x5A54,     0x6A37,     0x7A16,     0x0AF1,     0x1AD0,
 203     0x2AB3,     0x3A92,     0xFD2E,     0xED0F,     0xDD6C,     0xCD4D,
 204     0xBDAA,     0xAD8B,     0x9DE8,     0x8DC9,     0x7C26,     0x6C07,
 205     0x5C64,     0x4C45,     0x3CA2,     0x2C83,     0x1CE0,     0x0CC1,
 206     0xEF1F,     0xFF3E,     0xCF5D,     0xDF7C,     0xAF9B,     0xBFBA,
 207     0x8FD9,     0x9FF8,     0x6E17,     0x7E36,     0x4E55,     0x5E74,
 208     0x2E93,     0x3EB2,     0x0ED1,     0x1EF0
 209 };
 210
 211 /* Non - Reflected */
 212 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
 213     register uint16_t h = current;
 214     for (; len; --len, ++k)
 215         h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
 216     return h;
 217 }
 218 /* Reflective Varation (for reference) */
 219 #if 0
 220 uint16_t util_crc16(const char *k, int len, const short clamp) {
 221     register uint16_t h= (uint16_t)0xFFFFFFFF;
 222     for (; len; --len, ++k)
 223         h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
 224     return (~h)%clamp;
 225 }
 226 #endif
 227
 228 /*
 229  * modifier is the match to make and the transpsition from it, while add is the upper-value that determines the
 230  * transposion from uppercase to lower case.
 231  */
 232 static GMQCC_INLINE size_t util_strtransform(const char *in, char *out, size_t outsz, const char *mod, int add) {
 233     size_t sz = 1;
 234     for (; *in && sz < outsz; ++in, ++out, ++sz) {
 235         *out = (*in == mod[0])
 236                     ? mod[1]
 237                     : (util_isalpha(*in) && ((add > 0) ? util_isupper(*in) : !util_isupper(*in)))
 238                         ? *in + add
 239                         : *in;
 240     }
 241     *out = 0;
 242     return sz-1;
 243 }
 244
 245 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
 246     return util_strtransform(in, out, outsz, "-_", 'A'-'a');
 247 }
 248 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
 249     return util_strtransform(in, out, outsz, "_-", 'a'-'A');
 250 }
 251 size_t util_optimizationtostr(const char *in, char *out, size_t outsz) {
 252     return util_strtransform(in, out, outsz, "_ ", 'a'-'A');
 253 }
 254
 255 /*
 256  * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
 257  * exists, otherwise compiler error.
 258  *
 259  * TODO: fix for MSVC ....
 260  */
 261 int util_vasprintf(char **dat, const char *fmt, va_list args) {
 262     int   ret;
 263     int   len;
 264     char *tmp = NULL;
 265
 266     /*
 267      * For visuals tido _vsnprintf doesn't tell you the length of a
 268      * formatted string if it overflows. However there is a MSVC
 269      * intrinsic (which is documented wrong) called _vcsprintf which
 270      * will return the required amount to allocate.
 271      */
 272     #ifdef _MSC_VER
 273         if ((len = _vscprintf(fmt, args)) < 0) {
 274             *dat = NULL;
 275             return -1;
 276         }
 277
 278         tmp = (char*)mem_a(len + 1);
 279         if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
 280             mem_d(tmp);
 281             *dat = NULL;
 282             return -1;
 283         }
 284         *dat = tmp;
 285         return len;
 286     #else
 287         /*
 288          * For everything else we have a decent conformint vsnprintf that
 289          * returns the number of bytes needed.  We give it a try though on
 290          * a short buffer, since efficently speaking, it could be nice to
 291          * above a second vsnprintf call.
 292          */
 293         char    buf[128];
 294         va_list cpy;
 295         va_copy(cpy, args);
 296         len = vsnprintf(buf, sizeof(buf), fmt, cpy);
 297         va_end (cpy);
 298
 299         if (len < (int)sizeof(buf)) {
 300             *dat = util_strdup(buf);
 301             return len;
 302         }
 303
 304         /* not large enough ... */
 305         tmp = (char*)mem_a(len + 1);
 306         if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
 307             mem_d(tmp);
 308             *dat = NULL;
 309             return -1;
 310         }
 311
 312         *dat = tmp;
 313         return len;
 314     #endif
 315 }
 316 int util_asprintf(char **ret, const char *fmt, ...) {
 317     va_list  args;
 318     int      read;
 319     va_start(args, fmt);
 320     read = util_vasprintf(ret, fmt, args);
 321     va_end  (args);
 322
 323     return read;
 324 }
 325
 326 /*
 327  * These are various re-implementations (wrapping the real ones) of
 328  * string functions that MSVC consideres unsafe. We wrap these up and
 329  * use the safe varations on MSVC.
 330  */
 331 #ifdef _MSC_VER
 332     static char **util_strerror_allocated() {
 333         static char **data = NULL;
 334         return data;
 335     }
 336
 337     static void util_strerror_cleanup(void) {
 338         size_t i;
 339         char  **data = util_strerror_allocated();
 340         for (i = 0; i < vec_size(data); i++)
 341             mem_d(data[i]);
 342         vec_free(data);
 343     }
 344
 345     const char *util_strerror(int num) {
 346         char         *allocated = NULL;
 347         static bool   install   = false;
 348         static size_t tries     = 0;
 349         char        **vector    = util_strerror_allocated();
 350
 351         /* try installing cleanup handler */
 352         while (!install) {
 353             if (tries == 32)
 354                 return "(unknown)";
 355
 356             install = !atexit(&util_strerror_cleanup);
 357             tries ++;
 358         }
 359
 360         allocated = (char*)mem_a(4096); /* A page must be enough */
 361         strerror_s(allocated, 4096, num);
 362
 363         vec_push(vector, allocated);
 364         return (const char *)allocated;
 365     }
 366
 367     int util_snprintf(char *src, size_t bytes, const char *format, ...) {
 368         int      rt;
 369         va_list  va;
 370         va_start(va, format);
 371
 372         rt = vsprintf_s(src, bytes, format, va);
 373         va_end  (va);
 374
 375         return rt;
 376     }
 377
 378     char *util_strcat(char *dest, const char *src) {
 379         strcat_s(dest, strlen(src), src);
 380         return dest;
 381     }
 382
 383     char *util_strncpy(char *dest, const char *src, size_t num) {
 384         strncpy_s(dest, num, src, num);
 385         return dest;
 386     }
 387 #else
 388     const char *util_strerror(int num) {
 389         return strerror(num);
 390     }
 391
 392     int util_snprintf(char *src, size_t bytes, const char *format, ...) {
 393         int      rt;
 394         va_list  va;
 395         va_start(va, format);
 396         rt = vsnprintf(src, bytes, format, va);
 397         va_end  (va);
 398
 399         return rt;
 400     }
 401
 402     char *util_strcat(char *dest, const char *src) {
 403         return strcat(dest, src);
 404     }
 405
 406     char *util_strncpy(char *dest, const char *src, size_t num) {
 407         return strncpy(dest, src, num);
 408     }
 409
 410 #endif /*! _MSC_VER */
 411
 412 /*
 413  * Implementation of the Mersenne twister PRNG (pseudo random numer
 414  * generator).  Implementation of MT19937.  Has a period of 2^19937-1
 415  * which is a Mersenne Prime (hence the name).
 416  *
 417  * Implemented from specification and original paper:
 418  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
 419  *
 420  * This code is placed in the public domain by me personally
 421  * (Dale Weiler, a.k.a graphitemaster).
 422  */
 423
 424 #define MT_SIZE    624
 425 #define MT_PERIOD  397
 426 #define MT_SPACE   (MT_SIZE - MT_PERIOD)
 427
 428 static uint32_t mt_state[MT_SIZE];
 429 static size_t   mt_index = 0;
 430
 431 static GMQCC_INLINE void mt_generate(void) {
 432     /*
 433      * The loop has been unrolled here: the original paper and implemenation
 434      * Called for the following code:
 435      * for (register unsigned i = 0; i < MT_SIZE; ++i) {
 436      *     register uint32_t load;
 437      *     load  = (0x80000000 & mt_state[i])                 // most  significant 32nd bit
 438      *     load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
 439      *
 440      *     mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
 441      *
 442      *     if (load & 1) mt_state[i] ^= 0x9908B0DF;
 443      * }
 444      *
 445      * This essentially is a waste: we have two modulus operations, and
 446      * a branch that is executed every iteration from [0, MT_SIZE).
 447      *
 448      * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
 449      * information on how this clever trick works.
 450      */
 451     static const uint32_t matrix[2] = {
 452         0x00000000,
 453         0x9908B0Df
 454     };
 455     /*
 456      * This register gives up a little more speed by instructing the compiler
 457      * to force these into CPU registers (they're counters for indexing mt_state
 458      * which we can force the compiler to generate prefetch instructions for)
 459      */
 460     register uint32_t y;
 461     register uint32_t i;
 462
 463     /*
 464      * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
 465      * to [0, MT_SIZE)  (634 iterations).
 466      */
 467     for (i = 0; i < MT_SPACE-1; ++i) {
 468         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
 469         mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
 470
 471         i ++; /* loop unroll */
 472
 473         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
 474         mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
 475     }
 476
 477     /*
 478      * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
 479      * = 2*2*3*3*11])
 480      */
 481     i = MT_SPACE;
 482     while (i < MT_SIZE-2) {
 483         /*
 484          * We expand this 11 times .. manually, no macros are required
 485          * here. This all fits in the CPU cache.
 486          */
 487         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 488         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 489         ++i;
 490         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 491         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 492         ++i;
 493         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 494         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 495         ++i;
 496         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 497         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 498         ++i;
 499         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 500         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 501         ++i;
 502         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 503         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 504         ++i;
 505         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 506         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 507         ++i;
 508         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 509         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 510         ++i;
 511         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 512         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 513         ++i;
 514         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 515         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 516         ++i;
 517         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
 518         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
 519         ++i;
 520     }
 521
 522     /* i = mt_state[623] */
 523     y                     = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
 524     mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
 525 }
 526
 527 void util_seed(uint32_t value) {
 528     /*
 529      * We seed the mt_state with a LCG (linear congruential generator)
 530      * We're operating exactly on exactly m=32, so there is no need to
 531      * use modulus.
 532      *
 533      * The multipler of choice is 0x6C07865, also knows as the Borosh-
 534      * Niederreiter multipler used for modulus 2^32.  More can be read
 535      * about this in Knuth's TAOCP Volume 2, page 106.
 536      *
 537      * If you don't own TAOCP something is wrong with you :-) .. so I
 538      * also provided a link to the original paper by Borosh and
 539      * Niederreiter.  It's called "Optional Multipliers for PRNG by The
 540      * Linear Congruential Method" (1983).
 541      * http://en.wikipedia.org/wiki/Linear_congruential_generator
 542      *
 543      * From said page, it says the following:
 544      * "A common Mersenne twister implementation, interestingly enough
 545      *  used an LCG to generate seed data."
 546      *
 547      * Remarks:
 548      * The data we're operating on is 32-bits for the mt_state array, so
 549      * there is no masking required with 0xFFFFFFFF
 550      */
 551     register size_t i;
 552
 553     mt_state[0] = value;
 554     for (i = 1; i < MT_SIZE; ++i)
 555         mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
 556 }
 557
 558 uint32_t util_rand() {
 559     register uint32_t y;
 560
 561     /*
 562      * This is inlined with any sane compiler (I checked)
 563      * for some reason though, SubC seems to be generating invalid
 564      * code when it inlines this.
 565      */
 566     if (!mt_index)
 567         mt_generate();
 568
 569     y = mt_state[mt_index];
 570
 571     /* Standard tempering */
 572     y ^= y >> 11;              /* +7 */
 573     y ^= y << 7  & 0x9D2C5680; /* +4 */
 574     y ^= y << 15 & 0xEFC60000; /* -4 */
 575     y ^= y >> 18;              /* -7 */
 576
 577     if(++mt_index == MT_SIZE)
 578          mt_index = 0;
 579
 580     return y;
 581 }