Implemented a optimized hash-set that can be used in various parts of the compiler...
authorDale Weiler <killfieldengine@gmail.com>
Tue, 1 Jan 2013 07:59:04 +0000 (07:59 +0000)
committerDale Weiler <killfieldengine@gmail.com>
Tue, 1 Jan 2013 07:59:04 +0000 (07:59 +0000)
gmqcc.h
util.c

diff --git a/gmqcc.h b/gmqcc.h
index 52ae6c72cb3778ac35ff2826ec3c0e6c537ba4e5..02b9bfe147f0b06535c8f14b5115e10b45272587 100644 (file)
--- a/gmqcc.h
+++ b/gmqcc.h
@@ -320,6 +320,14 @@ typedef struct hash_table_t {
     struct hash_node_t **table;
 } hash_table_t, *ht;
 
+typedef struct hash_set_t {
+    size_t  bits;
+    size_t  mask;
+    size_t  capacity;
+    size_t *items;
+    size_t  total;
+} hash_set_t, *hs;
+
 /*
  * hashtable implementation:
  *
@@ -358,6 +366,45 @@ void          util_htseth(hash_table_t *ht, const char *key, size_t hash, void *
 
 void         *util_htget (hash_table_t *ht, const char *key);
 void         *util_htgeth(hash_table_t *ht, const char *key, size_t hash);
+
+/*
+ * hashset implementation:
+ *      This was designed for pointers:  you manage the life of the object yourself
+ *      if you do use this for non-pointers please be warned that the object may not
+ *      be valid if the duration of it exceeds (i.e on stack).  So you need to allocate
+ *      yourself, or put those in global scope to ensure duration is for the whole
+ *      runtime.
+ *
+ * util_hsnew()                             -- to make a new hashset
+ * util_hsadd(set, key)                     -- to add something in the set
+ * util_hshas(set, key)                     -- to check if something is in the set
+ * util_hsrem(set, key)                     -- to remove something in the set
+ * util_hsdel(set)                          -- to delete the set
+ *
+ * example of use:
+ * 
+ * hs    foo = util_hsnew();
+ * char *bar = "hello blub\n";
+ * char *baz = "hello dale\n";
+ *
+ * util_hsadd(foo, bar);
+ * util_hsadd(foo, baz);
+ * util_hsrem(foo, baz);
+ *
+ * printf("bar %d | baz %d\n",
+ *     util_hshas(foo, bar),
+ *     util_hshad(foo, baz)
+ * );
+ *
+ * util_hsdel(foo);  
+ */
+
+hash_set_t *util_hsnew(void);
+int         util_hsadd(hash_set_t *, void *);
+int         util_hshas(hash_set_t *, void *);
+int         util_hsrem(hash_set_t *, void *);
+void        util_hsdel(hash_set_t *);
 /*===================================================================*/
 /*============================ file.c ===============================*/
 /*===================================================================*/
diff --git a/util.c b/util.c
index ddf7805ccf35ba392351241ed02ccbd3d2e81309..5d0d683a0f11f3ac69cd9aa0101d62545c68662e 100644 (file)
--- a/util.c
+++ b/util.c
@@ -573,6 +573,146 @@ void util_htdel(hash_table_t *ht) {
     mem_d(ht);
 }
 
+/*
+ * A basic implementation of a hash-set.  Unlike a hashtable, a hash
+ * set doesn't maintain key-value pairs.  It simply maintains a key
+ * that can be set, removed, and checked for.
+ *
+ * See EXPOSED interface comment below 
+ */
+#define GMQCC_HASHSET_PRIME0 0x0049
+#define GMQCC_HASHSET_PRIME1 0x1391
+
+static int util_hsput(hash_set_t *set, void *item) {
+    size_t hash = (size_t)item; /* shouldn't drop the bits */
+    size_t iter;
+
+    /* a == 0 || a == 1 */
+    if (hash >> 1)
+        return -1;
+
+    iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+    /* while (set->items[iter] != 0 && set->items[iter] != 1) */
+    while  (!(set->items[iter] >> 1)) {
+        if (set->items[iter] == hash)
+            return 0;
+
+        iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+    }
+
+    set->total ++;
+    set->items[iter] = hash;
+
+    return 1;
+}
+
+static void util_hsupdate(hash_set_t *set) {
+    size_t *old;
+    size_t  end;
+    size_t  itr;
+
+    /* time to rehash? */
+    if ((float)set->total >= (size_t)((double)set->capacity * 0.85)) {
+        old = set->items;
+        end = set->capacity;
+
+        set->bits ++;
+        set->capacity = (size_t)(1 << set->bits);
+        set->mask     = set->capacity - 1;
+        set->items    = mem_a(set->capacity * sizeof(size_t));
+        set->total    = 0;
+
+        /*assert(set->items);*/
+
+        /*
+         * this shouldn't be slow?  if so unroll it a little perhaps
+         * (shouldn't be though)
+         */
+        for (itr = 0; itr < end; itr++)
+            util_hsput(set, (void*)old[itr]);
+
+        mem_d(old);
+    }
+}
+
+/*
+ * EXPOSED interface: all of these functions are exposed to the outside
+ * for use. The stuff above is static because it's the "internal" mechanics
+ * for syncronizing the set for updating, and putting data into the set.
+ */   
+int util_hsadd(hash_set_t *set, void *item) {
+    int run = util_hsput(set, item); /* inlined */
+    util_hsupdate(set);
+
+    return run;
+}
+
+/* remove item in set */
+int util_hsrem(hash_set_t *set, void *item) {
+    size_t hash = (size_t)item;
+    size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+    while  (set->items[iter]) {
+        if (set->items[iter] == hash) {
+            set->items[iter] =  1;
+            set->total       --;
+
+            return 1;
+        }
+        iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+    }
+
+    return 0;
+}
+
+/* check if item is set */
+int util_hshas(hash_set_t *set, void *item) {
+    size_t hash = (size_t)item;
+    size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+    while  (set->items[iter]) {
+        if (set->items[iter] == hash)
+            return 1;
+
+        iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+    }
+
+    return 0;
+}
+
+hash_set_t *util_hsnew(void) {
+    hash_set_t *set;
+
+    if (!(set = mem_a(sizeof(hash_set_t))))
+        return NULL;
+
+    set->bits     = 3;
+    set->total    = 0;
+    set->capacity = (size_t)(1 << set->bits);
+    set->mask     = set->capacity - 1;
+    set->items    = mem_a(set->capacity * sizeof(size_t));
+
+    if (!set->items) {
+        util_hsdel(set);
+        return NULL;
+    }
+
+    return set;
+}
+
+void util_hsdel(hash_set_t *set) {
+    if (!set) return;
+
+    if (set->items)
+        mem_d(set->items);
+
+    mem_d(set);
+}
+#undef GMQCC_HASHSET_PRIME0
+#undef GMQCC_HASHSET_PRIME1
+
+
 /*
  * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
  * exists, otherwise compiler error.