2 * Copyright (C) 2012, 2013
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 /* TODO: remove globals ... */
29 static uint64_t mem_ab = 0;
30 static uint64_t mem_db = 0;
31 static uint64_t mem_at = 0;
32 static uint64_t mem_dt = 0;
33 static uint64_t mem_pk = 0;
34 static uint64_t mem_hw = 0;
40 struct memblock_t *next;
41 struct memblock_t *prev;
46 if (mem_hw > mem_pk) \
50 static struct memblock_t *mem_start = NULL;
52 void *util_memory_a(size_t byte, unsigned int line, const char *file) {
53 struct memblock_t *info = (struct memblock_t*)malloc(sizeof(struct memblock_t) + byte);
54 void *data = (void*)(info+1);
55 if (!info) return NULL;
60 info->next = mem_start;
62 mem_start->prev = info;
74 void util_memory_d(void *ptrn) {
75 struct memblock_t *info = NULL;
78 info = ((struct memblock_t*)ptrn - 1);
85 info->prev->next = info->next;
87 info->next->prev = info->prev;
88 if (info == mem_start)
89 mem_start = info->next;
94 void *util_memory_r(void *ptrn, size_t byte, unsigned int line, const char *file) {
95 struct memblock_t *oldinfo = NULL;
97 struct memblock_t *newinfo;
100 return util_memory_a(byte, line, file);
106 oldinfo = ((struct memblock_t*)ptrn - 1);
107 newinfo = ((struct memblock_t*)malloc(sizeof(struct memblock_t) + byte));
111 util_memory_d(oldinfo+1);
116 memcpy(newinfo+1, oldinfo+1, oldinfo->byte);
120 oldinfo->prev->next = oldinfo->next;
122 oldinfo->next->prev = oldinfo->prev;
123 if (oldinfo == mem_start)
124 mem_start = oldinfo->next;
127 newinfo->line = line;
128 newinfo->byte = byte;
129 newinfo->file = file;
130 newinfo->prev = NULL;
131 newinfo->next = mem_start;
133 mem_start->prev = newinfo;
136 mem_ab -= oldinfo->byte;
137 mem_hw -= oldinfo->byte;
138 mem_ab += newinfo->byte;
139 mem_hw += newinfo->byte;
148 static void util_dumpmem(struct memblock_t *memory, uint16_t cols) {
150 for (i = 0; i < memory->byte + ((memory->byte % cols) ? (cols - memory->byte % cols) : 0); i++) {
151 if (i % cols == 0) con_out(" 0x%06X: ", i);
152 if (i < memory->byte) con_out("%02X " , 0xFF & ((char*)(memory + 1))[i]);
155 if ((uint16_t)(i % cols) == (cols - 1)) {
156 for (j = i - (cols - 1); j <= i; j++) {
160 : (isprint(((char*)(memory + 1))[j]))
161 ? 0xFF & ((char*)(memory + 1)) [j]
171 * The following is a VERY tight, efficent, hashtable for integer
172 * values and keys, and for nothing more. We could make our existing
173 * hashtable support type-genericness through a void * pointer but,
174 * ideally that would make things more complicated. We also don't need
175 * that much of a bloat for something as basic as this.
183 typedef size_entry_t **size_table_t;
185 size_table_t util_st_new() {
186 return (size_table_t)memset(
187 mem_a(sizeof(size_entry_t*) * ST_SIZE),
188 0, ST_SIZE * sizeof(size_entry_t*)
191 void util_st_del(size_table_t table) {
193 for (; i < ST_SIZE; i++) if(table[i]) mem_d(table[i]);
196 size_entry_t *util_st_get(size_table_t table, size_t key) {
197 size_t hash = (key % ST_SIZE);
198 while (table[hash] && table[hash]->key != key)
199 hash = (hash + 1) % ST_SIZE;
202 void util_st_put(size_table_t table, size_t key, size_t value) {
203 size_t hash = (key % ST_SIZE);
204 while (table[hash] && table[hash]->key != key)
205 hash = (hash + 1) % ST_SIZE;
206 table[hash] = (size_entry_t*)mem_a(sizeof(size_entry_t));
207 table[hash]->key = key;
208 table[hash]->value = value;
211 static uint64_t vectors = 0;
212 static uint64_t vector_sizes = 0;
213 static size_table_t vector_usage = NULL;
215 void util_meminfo() {
216 struct memblock_t *info;
218 if (OPTS_OPTION_BOOL(OPTION_DEBUG)) {
219 for (info = mem_start; info; info = info->next) {
220 con_out("lost: %u (bytes) at %s:%u\n",
225 util_dumpmem(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS));
229 if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
230 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
231 con_out("Memory information:\n\
232 Total allocations: %llu\n\
233 Total deallocations: %llu\n\
234 Total allocated: %f (MB)\n\
235 Total deallocated: %f (MB)\n\
236 Total peak memory: %f (MB)\n\
237 Total leaked memory: %f (MB) in %llu allocations\n",
240 (float)(mem_ab) / 1048576.0f,
241 (float)(mem_db) / 1048576.0f,
242 (float)(mem_pk) / 1048576.0f,
243 (float)(mem_ab - mem_db) / 1048576.0f,
245 /* could be more clever */
250 if (OPTS_OPTION_BOOL(OPTION_STATISTICS) ||
251 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
254 con_out("Additional Statistics:\n\
255 Total vectors used: %u\n\
256 Total unique vector sizes: %u\n",
258 (unsigned)vector_sizes
262 for (i = 0; i < ST_SIZE; i++) {
265 if (!(entry = vector_usage[i]))
268 con_out(" # of %3u (bytes) vectors: %u\n",
269 (unsigned)entry->key,
270 (unsigned)entry->value
276 util_st_del(vector_usage);
280 * Some string utility functions, because strdup uses malloc, and we want
281 * to track all memory (without replacing malloc).
283 char *_util_Estrdup(const char *s, const char *file, size_t line) {
287 /* in case of -DNOTRACK */
294 if ((len = strlen(s)) && (ptr = (char*)mem_af(len+1, line, file))) {
301 char *_util_Estrdup_empty(const char *s, const char *file, size_t line) {
305 /* in case of -DNOTRACK */
313 if ((ptr = (char*)mem_af(len+1, line, file))) {
320 void util_debug(const char *area, const char *ms, ...) {
322 if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
325 if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
329 con_out ("[%s] ", area);
335 * only required if big endian .. otherwise no need to swap
338 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
339 static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
341 d[l] = (d[l] << 8) | (d[l] >> 8);
345 static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
348 v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
349 d[l] = (v << 16) | (v >> 16);
353 /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
354 * so let's go the safe way
356 static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
360 v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
361 v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
362 d[l] = (v << 32) | (v >> 32);
366 for (i = 0; i < l; i += 2) {
375 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
376 # if PLATFORM_BYTE_ORDER == -1 /* runtime check */
377 if (*((char*)&typesize))
380 /* prevent unused warnings */
385 # if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
391 util_swap16((uint16_t*)_data, length>>1);
394 util_swap32((uint32_t*)_data, length>>2);
397 util_swap64((uint32_t*)_data, length>>3);
400 default: exit(EXIT_FAILURE); /* please blow the fuck up! */
407 * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
408 * the initial value used for the register, weather the bits of each byte are reflected
409 * before being processed, weather the algorithm itself feeds input bytes through the
410 * register or XORs them with a byte from one end and then straight into the table, as
411 * well as (but not limited to the idea of reflected versions) where the final register
412 * value becomes reversed, and finally weather the value itself is used to XOR the final
413 * register value. AS such you can already imagine how painfully annoying CRCs are,
414 * of course we stand to target Quake, which expects it's certian set of rules for proper
415 * calculation of a CRC.
417 * In most traditional CRC algorithms on uses a reflected table driven method where a value
418 * or register is reflected if it's bits are swapped around it's center. For example:
419 * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
420 * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
421 * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
422 * which I respectfully as a programmer don't agree with.
424 * So now you know what we target, and why we target it, despite how unsettling it may seem
425 * but those are what Quake seems to request.
428 static const uint16_t util_crc16_table[] = {
429 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5,
430 0x60C6, 0x70E7, 0x8108, 0x9129, 0xA14A, 0xB16B,
431 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 0x1231, 0x0210,
432 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
433 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C,
434 0xF3FF, 0xE3DE, 0x2462, 0x3443, 0x0420, 0x1401,
435 0x64E6, 0x74C7, 0x44A4, 0x5485, 0xA56A, 0xB54B,
436 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
437 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6,
438 0x5695, 0x46B4, 0xB75B, 0xA77A, 0x9719, 0x8738,
439 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 0x48C4, 0x58E5,
440 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
441 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969,
442 0xA90A, 0xB92B, 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96,
443 0x1A71, 0x0A50, 0x3A33, 0x2A12, 0xDBFD, 0xCBDC,
444 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
445 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03,
446 0x0C60, 0x1C41, 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD,
447 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 0x7E97, 0x6EB6,
448 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
449 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A,
450 0x9F59, 0x8F78, 0x9188, 0x81A9, 0xB1CA, 0xA1EB,
451 0xD10C, 0xC12D, 0xF14E, 0xE16F, 0x1080, 0x00A1,
452 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
453 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C,
454 0xE37F, 0xF35E, 0x02B1, 0x1290, 0x22F3, 0x32D2,
455 0x4235, 0x5214, 0x6277, 0x7256, 0xB5EA, 0xA5CB,
456 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
457 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447,
458 0x5424, 0x4405, 0xA7DB, 0xB7FA, 0x8799, 0x97B8,
459 0xE75F, 0xF77E, 0xC71D, 0xD73C, 0x26D3, 0x36F2,
460 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
461 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9,
462 0xB98A, 0xA9AB, 0x5844, 0x4865, 0x7806, 0x6827,
463 0x18C0, 0x08E1, 0x3882, 0x28A3, 0xCB7D, 0xDB5C,
464 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
465 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0,
466 0x2AB3, 0x3A92, 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D,
467 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 0x7C26, 0x6C07,
468 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
469 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA,
470 0x8FD9, 0x9FF8, 0x6E17, 0x7E36, 0x4E55, 0x5E74,
471 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
474 /* Non - Reflected */
475 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
476 register uint16_t h = current;
477 for (; len; --len, ++k)
478 h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
481 /* Reflective Varation (for reference) */
483 uint16_t util_crc16(const char *k, int len, const short clamp) {
484 register uint16_t h= (uint16_t)0xFFFFFFFF;
485 for (; len; --len, ++k)
486 h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
491 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
493 for (; *in && sz < outsz; ++in, ++out, ++sz)
494 *out = (*in == '-') ? '_' : (isalpha(*in) && !isupper(*in)) ? *in + 'A' - 'a': *in;
499 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
501 for (; *in && sz < outsz; ++in, ++out, ++sz)
502 *out = (*in == '_') ? '-' : (isalpha(*in) && isupper(*in)) ? *in + 'a' - 'A' : *in;
507 /* TODO: rewrite ... when I redo the ve cleanup */
508 void _util_vec_grow(void **a, size_t i, size_t s) {
509 vector_t *d = vec_meta(*a);
511 size_entry_t *e = NULL;
515 m = 2 * d->allocated + i;
516 p = mem_r(d, s * m + sizeof(vector_t));
519 p = mem_a(s * m + sizeof(vector_t));
520 ((vector_t*)p)->used = 0;
525 vector_usage = util_st_new();
527 if ((e = util_st_get(vector_usage, s))) {
530 util_st_put(vector_usage, s, 1); /* start off with 1 */
534 *a = (vector_t*)p + 1;
535 vec_meta(*a)->allocated = m;
539 * Hash table for generic data, based on dynamic memory allocations
540 * all around. This is the internal interface, please look for
541 * EXPOSED INTERFACE comment below
543 typedef struct hash_node_t {
544 char *key; /* the key for this node in table */
545 void *value; /* pointer to the data as void* */
546 struct hash_node_t *next; /* next node (linked list) */
549 GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
550 const uint32_t mix = 0x5BD1E995;
551 const uint32_t rot = 24;
552 size_t size = strlen(key);
553 uint32_t hash = 0x1EF0 /* LICRC TAB */ ^ size;
555 const unsigned char *data = (const unsigned char*)key;
558 alias = (data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24));
560 alias ^= alias >> rot;
571 case 3: hash ^= data[2] << 16;
572 case 2: hash ^= data[1] << 8;
573 case 1: hash ^= data[0];
581 return (size_t) (hash % ht->size);
584 static hash_node_t *_util_htnewpair(const char *key, void *value) {
586 if (!(node = (hash_node_t*)mem_a(sizeof(hash_node_t))))
589 if (!(node->key = util_strdupe(key))) {
601 * EXPOSED INTERFACE for the hashtable implementation
602 * util_htnew(size) -- to make a new hashtable
603 * util_htset(table, key, value, sizeof(value)) -- to set something in the table
604 * util_htget(table, key) -- to get something from the table
605 * util_htdel(table) -- to delete the table
607 hash_table_t *util_htnew(size_t size) {
608 hash_table_t *hashtable = NULL;
612 if (!(hashtable = (hash_table_t*)mem_a(sizeof(hash_table_t))))
615 if (!(hashtable->table = (hash_node_t**)mem_a(sizeof(hash_node_t*) * size))) {
620 hashtable->size = size;
621 memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
626 void util_htseth(hash_table_t *ht, const char *key, size_t bin, void *value) {
627 hash_node_t *newnode = NULL;
628 hash_node_t *next = NULL;
629 hash_node_t *last = NULL;
631 next = ht->table[bin];
633 while (next && next->key && strcmp(key, next->key) > 0)
634 last = next, next = next->next;
636 /* already in table, do a replace */
637 if (next && next->key && strcmp(key, next->key) == 0) {
640 /* not found, grow a pair man :P */
641 newnode = _util_htnewpair(key, value);
642 if (next == ht->table[bin]) {
643 newnode->next = next;
644 ht->table[bin] = newnode;
646 last->next = newnode;
648 newnode->next = next;
649 last->next = newnode;
654 void util_htset(hash_table_t *ht, const char *key, void *value) {
655 util_htseth(ht, key, util_hthash(ht, key), value);
658 void *util_htgeth(hash_table_t *ht, const char *key, size_t bin) {
659 hash_node_t *pair = ht->table[bin];
661 while (pair && pair->key && strcmp(key, pair->key) > 0)
664 if (!pair || !pair->key || strcmp(key, pair->key) != 0)
670 void *util_htget(hash_table_t *ht, const char *key) {
671 return util_htgeth(ht, key, util_hthash(ht, key));
674 void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) {
679 keylen = strlen(key);
681 pair = ht->table[bin];
682 while (pair && pair->key) {
683 len = strlen(pair->key);
689 cmp = strcmp(key, pair->key);
697 cmp = strcmp(key, pair->key + len - keylen);
699 uintptr_t up = (uintptr_t)pair->value;
709 * Free all allocated data in a hashtable, this is quite the amount
712 void util_htrem(hash_table_t *ht, void (*callback)(void *data)) {
714 for (; i < ht->size; i++) {
715 hash_node_t *n = ht->table[i];
735 void util_htremkey(hash_table_t *ht, void (*callback)(void *data, const char *key)) {
737 for (; i < ht->size; i++) {
738 hash_node_t *n = ht->table[i];
744 callback(n->value, n->key);
757 void util_htrmh(hash_table_t *ht, const char *key, size_t bin, void (*cb)(void*)) {
758 hash_node_t **pair = &ht->table[bin];
761 while (*pair && (*pair)->key && strcmp(key, (*pair)->key) > 0)
762 pair = &(*pair)->next;
765 if (!tmp || !tmp->key || strcmp(key, tmp->key) != 0)
776 void util_htrm(hash_table_t *ht, const char *key, void (*cb)(void*)) {
777 util_htrmh(ht, key, util_hthash(ht, key), cb);
780 void util_htdel(hash_table_t *ht) {
781 util_htrem(ht, NULL);
785 * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
786 * exists, otherwise compiler error.
788 * TODO: fix for MSVC ....
790 int util_vasprintf(char **dat, const char *fmt, va_list args) {
796 * For visuals tido _vsnprintf doesn't tell you the length of a
797 * formatted string if it overflows. However there is a MSVC
798 * intrinsic (which is documented wrong) called _vcsprintf which
799 * will return the required amount to allocate.
802 if ((len = _vscprintf(fmt, args)) < 0) {
807 tmp = (char*)mem_a(len + 1);
808 if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
817 * For everything else we have a decent conformint vsnprintf that
818 * returns the number of bytes needed. We give it a try though on
819 * a short buffer, since efficently speaking, it could be nice to
820 * above a second vsnprintf call.
825 len = vsnprintf(buf, sizeof(buf), fmt, cpy);
828 if (len < (int)sizeof(buf)) {
829 *dat = util_strdup(buf);
833 /* not large enough ... */
834 tmp = (char*)mem_a(len + 1);
835 if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
845 int util_asprintf(char **ret, const char *fmt, ...) {
849 read = util_vasprintf(ret, fmt, args);
856 * These are various re-implementations (wrapping the real ones) of
857 * string functions that MSVC consideres unsafe. We wrap these up and
858 * use the safe varations on MSVC.
861 static char **util_strerror_allocated() {
862 static char **data = NULL;
866 static void util_strerror_cleanup(void) {
868 char **data = util_strerror_allocated();
869 for (i = 0; i < vec_size(data); i++)
874 const char *util_strerror(int num) {
875 char *allocated = NULL;
876 static bool install = false;
877 static size_t tries = 0;
878 char **vector = util_strerror_allocated();
880 /* try installing cleanup handler */
885 install = !atexit(&util_strerror_cleanup);
889 allocated = (char*)mem_a(4096); /* A page must be enough */
890 strerror_s(allocated, 4096, num);
892 vec_push(vector, allocated);
893 return (const char *)allocated;
896 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
899 va_start(va, format);
901 rt = vsprintf_s(src, bytes, format, va);
907 char *util_strcat(char *dest, const char *src) {
908 strcat_s(dest, strlen(src), src);
912 char *util_strncpy(char *dest, const char *src, size_t num) {
913 strncpy_s(dest, num, src, num);
917 const char *util_strerror(int num) {
918 return strerror(num);
921 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
924 va_start(va, format);
925 rt = vsnprintf(src, bytes, format, va);
931 char *util_strcat(char *dest, const char *src) {
932 return strcat(dest, src);
935 char *util_strncpy(char *dest, const char *src, size_t num) {
936 return strncpy(dest, src, num);
939 #endif /*! _MSC_VER */
942 * Implementation of the Mersenne twister PRNG (pseudo random numer
943 * generator). Implementation of MT19937. Has a period of 2^19937-1
944 * which is a Mersenne Prime (hence the name).
946 * Implemented from specification and original paper:
947 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
949 * This code is placed in the public domain by me personally
950 * (Dale Weiler, a.k.a graphitemaster).
954 #define MT_PERIOD 397
955 #define MT_SPACE (MT_SIZE - MT_PERIOD)
957 static uint32_t mt_state[MT_SIZE];
958 static size_t mt_index = 0;
960 static GMQCC_INLINE void mt_generate() {
962 * The loop has been unrolled here: the original paper and implemenation
963 * Called for the following code:
964 * for (register unsigned i = 0; i < MT_SIZE; ++i) {
965 * register uint32_t load;
966 * load = (0x80000000 & mt_state[i]) // most significant 32nd bit
967 * load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
969 * mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
971 * if (load & 1) mt_state[i] ^= 0x9908B0DF;
974 * This essentially is a waste: we have two modulus operations, and
975 * a branch that is executed every iteration from [0, MT_SIZE).
977 * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
978 * information on how this clever trick works.
980 static const uint32_t matrix[2] = {
985 * This register gives up a little more speed by instructing the compiler
986 * to force these into CPU registers (they're counters for indexing mt_state
987 * which we can force the compiler to generate prefetch instructions for)
993 * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
994 * to [0, MT_SIZE) (634 iterations).
996 for (i = 0; i < MT_SPACE; ++i) {
997 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
998 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
1000 i ++; /* loop unroll */
1002 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
1003 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
1007 * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
1011 while (i < MT_SIZE - 1) {
1013 * We expand this 11 times .. manually, no macros are required
1014 * here. This all fits in the CPU cache.
1016 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1017 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1019 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1020 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1022 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1023 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1025 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1026 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1028 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1029 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1031 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1032 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1034 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1035 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1037 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1038 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1040 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1041 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1043 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1044 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1046 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1047 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1051 /* i = mt_state[623] */
1052 y = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
1053 mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
1056 void util_seed(uint32_t value) {
1058 * We seed the mt_state with a LCG (linear congruential generator)
1059 * We're operating exactly on exactly m=32, so there is no need to
1062 * The multipler of choice is 0x6C07865, also knows as the Borosh-
1063 * Niederreiter multipler used for modulus 2^32. More can be read
1064 * about this in Knuth's TAOCP Volume 2, page 106.
1066 * If you don't own TAOCP something is wrong with you :-) .. so I
1067 * also provided a link to the original paper by Borosh and
1068 * Niederreiter. It's called "Optional Multipliers for PRNG by The
1069 * Linear Congruential Method" (1983).
1070 * http://en.wikipedia.org/wiki/Linear_congruential_generator
1072 * From said page, it says the following:
1073 * "A common Mersenne twister implementation, interestingly enough
1074 * used an LCG to generate seed data."
1077 * The data we're operating on is 32-bits for the mt_state array, so
1078 * there is no masking required with 0xFFFFFFFF
1082 mt_state[0] = value;
1083 for (i = 1; i < MT_SIZE; ++i)
1084 mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
1087 uint32_t util_rand() {
1088 register uint32_t y;
1091 * This is inlined with any sane compiler (I checked)
1092 * for some reason though, SubC seems to be generating invalid
1093 * code when it inlines this.
1098 y = mt_state[mt_index];
1100 /* Standard tempering */
1101 y ^= y >> 11; /* +7 */
1102 y ^= y << 7 & 0x9D2C5680; /* +4 */
1103 y ^= y << 15 & 0xEFC60000; /* -4 */
1104 y ^= y >> 18; /* -7 */
1106 if(++mt_index == MT_SIZE)