2 * Copyright (C) 2012, 2013
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 /* TODO: remove globals ... */
29 static uint64_t mem_ab = 0;
30 static uint64_t mem_db = 0;
31 static uint64_t mem_at = 0;
32 static uint64_t mem_dt = 0;
33 static uint64_t mem_pk = 0;
34 static uint64_t mem_hw = 0;
40 struct memblock_t *next;
41 struct memblock_t *prev;
46 if (mem_hw > mem_pk) \
50 static struct memblock_t *mem_start = NULL;
52 void *util_memory_a(size_t byte, unsigned int line, const char *file) {
53 struct memblock_t *info = (struct memblock_t*)malloc(sizeof(struct memblock_t) + byte);
54 void *data = (void*)(info+1);
55 if (!info) return NULL;
60 info->next = mem_start;
62 mem_start->prev = info;
74 void util_memory_d(void *ptrn) {
75 struct memblock_t *info = NULL;
78 info = ((struct memblock_t*)ptrn - 1);
85 info->prev->next = info->next;
87 info->next->prev = info->prev;
88 if (info == mem_start)
89 mem_start = info->next;
94 void *util_memory_r(void *ptrn, size_t byte, unsigned int line, const char *file) {
95 struct memblock_t *oldinfo = NULL;
97 struct memblock_t *newinfo;
100 return util_memory_a(byte, line, file);
106 oldinfo = ((struct memblock_t*)ptrn - 1);
107 newinfo = ((struct memblock_t*)malloc(sizeof(struct memblock_t) + byte));
111 util_memory_d(oldinfo+1);
116 memcpy(newinfo+1, oldinfo+1, oldinfo->byte);
120 oldinfo->prev->next = oldinfo->next;
122 oldinfo->next->prev = oldinfo->prev;
123 if (oldinfo == mem_start)
124 mem_start = oldinfo->next;
127 newinfo->line = line;
128 newinfo->byte = byte;
129 newinfo->file = file;
130 newinfo->prev = NULL;
131 newinfo->next = mem_start;
133 mem_start->prev = newinfo;
136 mem_ab -= oldinfo->byte;
137 mem_hw -= oldinfo->byte;
138 mem_ab += newinfo->byte;
139 mem_hw += newinfo->byte;
148 static void util_dumpmem(struct memblock_t *memory, uint16_t cols) {
150 for (i = 0; i < memory->byte + ((memory->byte % cols) ? (cols - memory->byte % cols) : 0); i++) {
151 if (i % cols == 0) con_out(" 0x%06X: ", i);
152 if (i < memory->byte) con_out("%02X " , 0xFF & ((char*)(memory + 1))[i]);
155 if ((uint16_t)(i % cols) == (cols - 1)) {
156 for (j = i - (cols - 1); j <= i; j++) {
160 : (isprint(((char*)(memory + 1))[j]))
161 ? 0xFF & ((char*)(memory + 1)) [j]
171 * The following is a VERY tight, efficent, hashtable for integer
172 * values and keys, and for nothing more. We could make our existing
173 * hashtable support type-genericness through a void * pointer but,
174 * ideally that would make things more complicated. We also don't need
175 * that much of a bloat for something as basic as this.
183 typedef size_entry_t **size_table_t;
185 size_table_t util_st_new() {
186 return (size_table_t)memset(
187 mem_a(sizeof(size_entry_t*) * ST_SIZE),
188 0, ST_SIZE * sizeof(size_entry_t*)
191 void util_st_del(size_table_t table) {
193 for (; i < ST_SIZE; i++) if(table[i]) mem_d(table[i]);
196 size_entry_t *util_st_get(size_table_t table, size_t key) {
197 size_t hash = (key % ST_SIZE);
198 while (table[hash] && table[hash]->key != key)
199 hash = (hash + 1) % ST_SIZE;
202 void util_st_put(size_table_t table, size_t key, size_t value) {
203 size_t hash = (key % ST_SIZE);
204 while (table[hash] && table[hash]->key != key)
205 hash = (hash + 1) % ST_SIZE;
206 table[hash] = (size_entry_t*)mem_a(sizeof(size_entry_t));
207 table[hash]->key = key;
208 table[hash]->value = value;
211 static uint64_t strdups = 0;
212 static uint64_t vectors = 0;
213 static uint64_t vector_sizes = 0;
214 static size_table_t vector_usage = NULL;
216 void util_meminfo() {
217 struct memblock_t *info;
219 if (OPTS_OPTION_BOOL(OPTION_DEBUG)) {
220 for (info = mem_start; info; info = info->next) {
221 con_out("lost: %u (bytes) at %s:%u\n",
226 util_dumpmem(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS));
230 if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
231 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
232 con_out("Memory information:\n\
233 Total allocations: %llu\n\
234 Total deallocations: %llu\n\
235 Total allocated: %f (MB)\n\
236 Total deallocated: %f (MB)\n\
237 Total peak memory: %f (MB)\n\
238 Total leaked memory: %f (MB) in %llu allocations\n",
241 (float)(mem_ab) / 1048576.0f,
242 (float)(mem_db) / 1048576.0f,
243 (float)(mem_pk) / 1048576.0f,
244 (float)(mem_ab - mem_db) / 1048576.0f,
246 /* could be more clever */
251 if (OPTS_OPTION_BOOL(OPTION_STATISTICS) ||
252 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
256 con_out("Additional Statistics:\n\
257 Total vectors allocated: %u\n\
258 Total string duplicates: %u\n\
259 Total unique vector sizes: %u\n",
262 (unsigned)vector_sizes
265 for (; i < ST_SIZE; i++) {
268 if (!(entry = vector_usage[i]))
271 con_out(" %u| # of %3u (bytes) vectors: %u\n",
273 (unsigned)entry->key,
274 (unsigned)entry->value
281 util_st_del(vector_usage);
285 * Some string utility functions, because strdup uses malloc, and we want
286 * to track all memory (without replacing malloc).
288 char *_util_Estrdup(const char *s, const char *file, size_t line) {
292 /* in case of -DNOTRACK */
299 if ((len = strlen(s)) && (ptr = (char*)mem_af(len+1, line, file))) {
307 char *_util_Estrdup_empty(const char *s, const char *file, size_t line) {
311 /* in case of -DNOTRACK */
319 if ((ptr = (char*)mem_af(len+1, line, file))) {
327 void util_debug(const char *area, const char *ms, ...) {
329 if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
332 if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
336 con_out ("[%s] ", area);
342 * only required if big endian .. otherwise no need to swap
345 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
346 static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
348 d[l] = (d[l] << 8) | (d[l] >> 8);
352 static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
355 v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
356 d[l] = (v << 16) | (v >> 16);
360 /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
361 * so let's go the safe way
363 static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
367 v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
368 v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
369 d[l] = (v << 32) | (v >> 32);
373 for (i = 0; i < l; i += 2) {
382 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
383 # if PLATFORM_BYTE_ORDER == -1 /* runtime check */
384 if (*((char*)&typesize))
387 /* prevent unused warnings */
392 # if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
398 util_swap16((uint16_t*)_data, length>>1);
401 util_swap32((uint32_t*)_data, length>>2);
404 util_swap64((uint32_t*)_data, length>>3);
407 default: exit(EXIT_FAILURE); /* please blow the fuck up! */
414 * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
415 * the initial value used for the register, weather the bits of each byte are reflected
416 * before being processed, weather the algorithm itself feeds input bytes through the
417 * register or XORs them with a byte from one end and then straight into the table, as
418 * well as (but not limited to the idea of reflected versions) where the final register
419 * value becomes reversed, and finally weather the value itself is used to XOR the final
420 * register value. AS such you can already imagine how painfully annoying CRCs are,
421 * of course we stand to target Quake, which expects it's certian set of rules for proper
422 * calculation of a CRC.
424 * In most traditional CRC algorithms on uses a reflected table driven method where a value
425 * or register is reflected if it's bits are swapped around it's center. For example:
426 * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
427 * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
428 * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
429 * which I respectfully as a programmer don't agree with.
431 * So now you know what we target, and why we target it, despite how unsettling it may seem
432 * but those are what Quake seems to request.
435 static const uint16_t util_crc16_table[] = {
436 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5,
437 0x60C6, 0x70E7, 0x8108, 0x9129, 0xA14A, 0xB16B,
438 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 0x1231, 0x0210,
439 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
440 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C,
441 0xF3FF, 0xE3DE, 0x2462, 0x3443, 0x0420, 0x1401,
442 0x64E6, 0x74C7, 0x44A4, 0x5485, 0xA56A, 0xB54B,
443 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
444 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6,
445 0x5695, 0x46B4, 0xB75B, 0xA77A, 0x9719, 0x8738,
446 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 0x48C4, 0x58E5,
447 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
448 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969,
449 0xA90A, 0xB92B, 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96,
450 0x1A71, 0x0A50, 0x3A33, 0x2A12, 0xDBFD, 0xCBDC,
451 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
452 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03,
453 0x0C60, 0x1C41, 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD,
454 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 0x7E97, 0x6EB6,
455 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
456 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A,
457 0x9F59, 0x8F78, 0x9188, 0x81A9, 0xB1CA, 0xA1EB,
458 0xD10C, 0xC12D, 0xF14E, 0xE16F, 0x1080, 0x00A1,
459 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
460 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C,
461 0xE37F, 0xF35E, 0x02B1, 0x1290, 0x22F3, 0x32D2,
462 0x4235, 0x5214, 0x6277, 0x7256, 0xB5EA, 0xA5CB,
463 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
464 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447,
465 0x5424, 0x4405, 0xA7DB, 0xB7FA, 0x8799, 0x97B8,
466 0xE75F, 0xF77E, 0xC71D, 0xD73C, 0x26D3, 0x36F2,
467 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
468 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9,
469 0xB98A, 0xA9AB, 0x5844, 0x4865, 0x7806, 0x6827,
470 0x18C0, 0x08E1, 0x3882, 0x28A3, 0xCB7D, 0xDB5C,
471 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
472 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0,
473 0x2AB3, 0x3A92, 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D,
474 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 0x7C26, 0x6C07,
475 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
476 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA,
477 0x8FD9, 0x9FF8, 0x6E17, 0x7E36, 0x4E55, 0x5E74,
478 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
481 /* Non - Reflected */
482 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
483 register uint16_t h = current;
484 for (; len; --len, ++k)
485 h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
488 /* Reflective Varation (for reference) */
490 uint16_t util_crc16(const char *k, int len, const short clamp) {
491 register uint16_t h= (uint16_t)0xFFFFFFFF;
492 for (; len; --len, ++k)
493 h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
498 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
500 for (; *in && sz < outsz; ++in, ++out, ++sz)
501 *out = (*in == '-') ? '_' : (isalpha(*in) && !isupper(*in)) ? *in + 'A' - 'a': *in;
506 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
508 for (; *in && sz < outsz; ++in, ++out, ++sz)
509 *out = (*in == '_') ? '-' : (isalpha(*in) && isupper(*in)) ? *in + 'a' - 'A' : *in;
514 /* TODO: rewrite ... when I redo the ve cleanup */
515 void _util_vec_grow(void **a, size_t i, size_t s) {
516 vector_t *d = vec_meta(*a);
518 size_entry_t *e = NULL;
522 m = 2 * d->allocated + i;
523 p = mem_r(d, s * m + sizeof(vector_t));
526 p = mem_a(s * m + sizeof(vector_t));
527 ((vector_t*)p)->used = 0;
532 vector_usage = util_st_new();
534 if ((e = util_st_get(vector_usage, s))) {
537 util_st_put(vector_usage, s, 1); /* start off with 1 */
541 *a = (vector_t*)p + 1;
542 vec_meta(*a)->allocated = m;
546 * Hash table for generic data, based on dynamic memory allocations
547 * all around. This is the internal interface, please look for
548 * EXPOSED INTERFACE comment below
550 typedef struct hash_node_t {
551 char *key; /* the key for this node in table */
552 void *value; /* pointer to the data as void* */
553 struct hash_node_t *next; /* next node (linked list) */
556 GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
557 const uint32_t mix = 0x5BD1E995;
558 const uint32_t rot = 24;
559 size_t size = strlen(key);
560 uint32_t hash = 0x1EF0 /* LICRC TAB */ ^ size;
562 const unsigned char *data = (const unsigned char*)key;
565 alias = (data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24));
567 alias ^= alias >> rot;
578 case 3: hash ^= data[2] << 16;
579 case 2: hash ^= data[1] << 8;
580 case 1: hash ^= data[0];
588 return (size_t) (hash % ht->size);
591 static hash_node_t *_util_htnewpair(const char *key, void *value) {
593 if (!(node = (hash_node_t*)mem_a(sizeof(hash_node_t))))
596 if (!(node->key = util_strdupe(key))) {
608 * EXPOSED INTERFACE for the hashtable implementation
609 * util_htnew(size) -- to make a new hashtable
610 * util_htset(table, key, value, sizeof(value)) -- to set something in the table
611 * util_htget(table, key) -- to get something from the table
612 * util_htdel(table) -- to delete the table
614 hash_table_t *util_htnew(size_t size) {
615 hash_table_t *hashtable = NULL;
619 if (!(hashtable = (hash_table_t*)mem_a(sizeof(hash_table_t))))
622 if (!(hashtable->table = (hash_node_t**)mem_a(sizeof(hash_node_t*) * size))) {
627 hashtable->size = size;
628 memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
633 void util_htseth(hash_table_t *ht, const char *key, size_t bin, void *value) {
634 hash_node_t *newnode = NULL;
635 hash_node_t *next = NULL;
636 hash_node_t *last = NULL;
638 next = ht->table[bin];
640 while (next && next->key && strcmp(key, next->key) > 0)
641 last = next, next = next->next;
643 /* already in table, do a replace */
644 if (next && next->key && strcmp(key, next->key) == 0) {
647 /* not found, grow a pair man :P */
648 newnode = _util_htnewpair(key, value);
649 if (next == ht->table[bin]) {
650 newnode->next = next;
651 ht->table[bin] = newnode;
653 last->next = newnode;
655 newnode->next = next;
656 last->next = newnode;
661 void util_htset(hash_table_t *ht, const char *key, void *value) {
662 util_htseth(ht, key, util_hthash(ht, key), value);
665 void *util_htgeth(hash_table_t *ht, const char *key, size_t bin) {
666 hash_node_t *pair = ht->table[bin];
668 while (pair && pair->key && strcmp(key, pair->key) > 0)
671 if (!pair || !pair->key || strcmp(key, pair->key) != 0)
677 void *util_htget(hash_table_t *ht, const char *key) {
678 return util_htgeth(ht, key, util_hthash(ht, key));
681 void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) {
686 keylen = strlen(key);
688 pair = ht->table[bin];
689 while (pair && pair->key) {
690 len = strlen(pair->key);
696 cmp = strcmp(key, pair->key);
704 cmp = strcmp(key, pair->key + len - keylen);
706 uintptr_t up = (uintptr_t)pair->value;
716 * Free all allocated data in a hashtable, this is quite the amount
719 void util_htrem(hash_table_t *ht, void (*callback)(void *data)) {
721 for (; i < ht->size; i++) {
722 hash_node_t *n = ht->table[i];
742 void util_htrmh(hash_table_t *ht, const char *key, size_t bin, void (*cb)(void*)) {
743 hash_node_t **pair = &ht->table[bin];
746 while (*pair && (*pair)->key && strcmp(key, (*pair)->key) > 0)
747 pair = &(*pair)->next;
750 if (!tmp || !tmp->key || strcmp(key, tmp->key) != 0)
761 void util_htrm(hash_table_t *ht, const char *key, void (*cb)(void*)) {
762 util_htrmh(ht, key, util_hthash(ht, key), cb);
765 void util_htdel(hash_table_t *ht) {
766 util_htrem(ht, NULL);
770 * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
771 * exists, otherwise compiler error.
773 * TODO: fix for MSVC ....
775 int util_vasprintf(char **dat, const char *fmt, va_list args) {
781 * For visuals tido _vsnprintf doesn't tell you the length of a
782 * formatted string if it overflows. However there is a MSVC
783 * intrinsic (which is documented wrong) called _vcsprintf which
784 * will return the required amount to allocate.
787 if ((len = _vscprintf(fmt, args)) < 0) {
792 tmp = (char*)mem_a(len + 1);
793 if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
802 * For everything else we have a decent conformint vsnprintf that
803 * returns the number of bytes needed. We give it a try though on
804 * a short buffer, since efficently speaking, it could be nice to
805 * above a second vsnprintf call.
810 len = vsnprintf(buf, sizeof(buf), fmt, cpy);
813 if (len < (int)sizeof(buf)) {
814 *dat = util_strdup(buf);
818 /* not large enough ... */
819 tmp = (char*)mem_a(len + 1);
820 if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
830 int util_asprintf(char **ret, const char *fmt, ...) {
834 read = util_vasprintf(ret, fmt, args);
841 * These are various re-implementations (wrapping the real ones) of
842 * string functions that MSVC consideres unsafe. We wrap these up and
843 * use the safe varations on MSVC.
846 static char **util_strerror_allocated() {
847 static char **data = NULL;
851 static void util_strerror_cleanup(void) {
853 char **data = util_strerror_allocated();
854 for (i = 0; i < vec_size(data); i++)
859 const char *util_strerror(int num) {
860 char *allocated = NULL;
861 static bool install = false;
862 static size_t tries = 0;
863 char **vector = util_strerror_allocated();
865 /* try installing cleanup handler */
870 install = !atexit(&util_strerror_cleanup);
874 allocated = (char*)mem_a(4096); /* A page must be enough */
875 strerror_s(allocated, 4096, num);
877 vec_push(vector, allocated);
878 return (const char *)allocated;
881 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
884 va_start(va, format);
886 rt = vsprintf_s(src, bytes, format, va);
892 char *util_strcat(char *dest, const char *src) {
893 strcat_s(dest, strlen(src), src);
897 char *util_strncpy(char *dest, const char *src, size_t num) {
898 strncpy_s(dest, num, src, num);
902 const char *util_strerror(int num) {
903 return strerror(num);
906 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
909 va_start(va, format);
910 rt = vsnprintf(src, bytes, format, va);
916 char *util_strcat(char *dest, const char *src) {
917 return strcat(dest, src);
920 char *util_strncpy(char *dest, const char *src, size_t num) {
921 return strncpy(dest, src, num);
924 #endif /*! _MSC_VER */
927 * Implementation of the Mersenne twister PRNG (pseudo random numer
928 * generator). Implementation of MT19937. Has a period of 2^19937-1
929 * which is a Mersenne Prime (hence the name).
931 * Implemented from specification and original paper:
932 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
934 * This code is placed in the public domain by me personally
935 * (Dale Weiler, a.k.a graphitemaster).
939 #define MT_PERIOD 397
940 #define MT_SPACE (MT_SIZE - MT_PERIOD)
942 static uint32_t mt_state[MT_SIZE];
943 static size_t mt_index = 0;
945 static GMQCC_INLINE void mt_generate() {
947 * The loop has been unrolled here: the original paper and implemenation
948 * Called for the following code:
949 * for (register unsigned i = 0; i < MT_SIZE; ++i) {
950 * register uint32_t load;
951 * load = (0x80000000 & mt_state[i]) // most significant 32nd bit
952 * load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
954 * mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
956 * if (load & 1) mt_state[i] ^= 0x9908B0DF;
959 * This essentially is a waste: we have two modulus operations, and
960 * a branch that is executed every iteration from [0, MT_SIZE).
962 * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
963 * information on how this clever trick works.
965 static const uint32_t matrix[2] = {
970 * This register gives up a little more speed by instructing the compiler
971 * to force these into CPU registers (they're counters for indexing mt_state
972 * which we can force the compiler to generate prefetch instructions for)
978 * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
979 * to [0, MT_SIZE) (634 iterations).
981 for (i = 0; i < MT_SPACE; ++i) {
982 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
983 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
985 i ++; /* loop unroll */
987 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
988 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
992 * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
996 while (i < MT_SIZE - 1) {
998 * We expand this 11 times .. manually, no macros are required
999 * here. This all fits in the CPU cache.
1001 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1002 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1004 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1005 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1007 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1008 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1010 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1011 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1013 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1014 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1016 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1017 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1019 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1020 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1022 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1023 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1025 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1026 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1028 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1029 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1031 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1032 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1036 /* i = mt_state[623] */
1037 y = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
1038 mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
1041 void util_seed(uint32_t value) {
1043 * We seed the mt_state with a LCG (linear congruential generator)
1044 * We're operating exactly on exactly m=32, so there is no need to
1047 * The multipler of choice is 0x6C07865, also knows as the Borosh-
1048 * Niederreiter multipler used for modulus 2^32. More can be read
1049 * about this in Knuth's TAOCP Volume 2, page 106.
1051 * If you don't own TAOCP something is wrong with you :-) .. so I
1052 * also provided a link to the original paper by Borosh and
1053 * Niederreiter. It's called "Optional Multipliers for PRNG by The
1054 * Linear Congruential Method" (1983).
1055 * http://en.wikipedia.org/wiki/Linear_congruential_generator
1057 * From said page, it says the following:
1058 * "A common Mersenne twister implementation, interestingly enough
1059 * used an LCG to generate seed data."
1062 * The data we're operating on is 32-bits for the mt_state array, so
1063 * there is no masking required with 0xFFFFFFFF
1067 mt_state[0] = value;
1068 for (i = 1; i < MT_SIZE; ++i)
1069 mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
1072 uint32_t util_rand() {
1073 register uint32_t y;
1076 * This is inlined with any sane compiler (I checked)
1077 * for some reason though, SubC seems to be generating invalid
1078 * code when it inlines this.
1083 y = mt_state[mt_index];
1085 /* Standard tempering */
1086 y ^= y >> 11; /* +7 */
1087 y ^= y << 7 & 0x9D2C5680; /* +4 */
1088 y ^= y << 15 & 0xEFC60000; /* -4 */
1089 y ^= y >> 18; /* -7 */
1091 if(++mt_index == MT_SIZE)