]> de.git.xonotic.org Git - xonotic/gmqcc.git/blobdiff - intrin.c
Make log use the slightly improved algorithm for small values.
[xonotic/gmqcc.git] / intrin.c
index b2ef3ed6b1b149981344fc8142e71f5c4077535c..4df403b82dfdd0bef92f3095ce7c8e21afcdfbe1 100644 (file)
--- a/intrin.c
+++ b/intrin.c
@@ -559,7 +559,7 @@ static ast_expression *intrin_exp2(intrin_t *intrin) {
 
     vec_push(value->expression.params, arg1);
 
-    vec_push(callpow->params, (ast_expression*)fold_constgen_float(intrin->fold, 2.0f));
+    vec_push(callpow->params, (ast_expression*)intrin->fold->imm_float[3]);
     vec_push(callpow->params, (ast_expression*)arg1);
 
     /* return <callpow> */
@@ -1310,7 +1310,7 @@ static ast_expression *intrin_nan(intrin_t *intrin) {
 
 static ast_expression *intrin_inf(intrin_t *intrin) {
     /*
-     * float nan(void) {
+     * float inf(void) {
      *     float x = 1.0f;
      *     float y = 0.0f;
      *     return x / y;
@@ -1379,7 +1379,6 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
      *       }
      *   }
      *
-     *   float out;
      *   float A_i       = 1;
      *   float B_i       = 0;
      *   float A_iminus1 = 0;
@@ -1390,8 +1389,18 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
      *       nth   = 0.0f;
      *
      *       while (whole >= base) {
-     *           whole /= base;
-     *           nth++;
+     *           float base2    = base;
+     *           float n2       = 1.0f;
+     *           float newbase2 = base2 * base2;
+     *
+     *           while (whole >= newbase2) {
+     *               base2     = newbase2;
+     *               n2       *= 2;
+     *               newbase2 *= newbase2;
+     *           }
+     *
+     *           whole /= base2;
+     *           nth += n2;
      *       }
      *
      *       float b_iplus1 = n;
@@ -1419,7 +1428,6 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
     ast_value    *whole      = ast_value_new(intrin_ctx(intrin), "whole",     TYPE_FLOAT);
     ast_value    *nth        = ast_value_new(intrin_ctx(intrin), "nth",       TYPE_FLOAT);
     ast_value    *sign       = ast_value_new(intrin_ctx(intrin), "sign",      TYPE_FLOAT);
-    ast_value    *out        = ast_value_new(intrin_ctx(intrin), "out",       TYPE_FLOAT);
     ast_value    *A_i        = ast_value_new(intrin_ctx(intrin), "A_i",       TYPE_FLOAT);
     ast_value    *B_i        = ast_value_new(intrin_ctx(intrin), "B_i",       TYPE_FLOAT);
     ast_value    *A_iminus1  = ast_value_new(intrin_ctx(intrin), "A_iminus1", TYPE_FLOAT);
@@ -1428,12 +1436,16 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
     ast_value    *A_iplus1   = ast_value_new(intrin_ctx(intrin), "A_iplus1",  TYPE_FLOAT);
     ast_value    *B_iplus1   = ast_value_new(intrin_ctx(intrin), "B_iplus1",  TYPE_FLOAT);
     ast_value    *eps        = ast_value_new(intrin_ctx(intrin), "eps",       TYPE_FLOAT);
+    ast_value    *base2      = ast_value_new(intrin_ctx(intrin), "base2",     TYPE_FLOAT);
+    ast_value    *n2         = ast_value_new(intrin_ctx(intrin), "n2",        TYPE_FLOAT);
+    ast_value    *newbase2   = ast_value_new(intrin_ctx(intrin), "newbase2",  TYPE_FLOAT);
     ast_block    *block      = ast_block_new(intrin_ctx(intrin));
     ast_block    *plt1orblt1 = ast_block_new(intrin_ctx(intrin)); /* (power <= 1.0f || base <= 1.0f) */
     ast_block    *plt1       = ast_block_new(intrin_ctx(intrin)); /* (power < 1.0f) */
     ast_block    *blt1       = ast_block_new(intrin_ctx(intrin)); /* (base  < 1.0f) */
     ast_block    *forloop    = ast_block_new(intrin_ctx(intrin)); /* for(;;) */
     ast_block    *whileloop  = ast_block_new(intrin_ctx(intrin)); /* while (whole >= base) */
+    ast_block    *nestwhile  = ast_block_new(intrin_ctx(intrin)); /* while (whole >= newbase2) */
     ast_function *func       = intrin_value(intrin, &value, "ln", TYPE_FLOAT);
     size_t        i;
 
@@ -1444,7 +1456,6 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
     vec_push(block->locals, nth);
     vec_push(block->locals, sign);
     vec_push(block->locals, eps);
-    vec_push(block->locals, out);
     vec_push(block->locals, A_i);
     vec_push(block->locals, B_i);
     vec_push(block->locals, A_iminus1);
@@ -1608,25 +1619,116 @@ static ast_expression *intrin_ln(intrin_t *intrin) {
         )
     );
 
-    /* whole /= base; */
+    /* base2 = base; */
+    vec_push(whileloop->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)base2,
+            (ast_expression*)base
+        )
+    );
+
+    /* n2 = 1.0f; */
+    vec_push(whileloop->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)n2,
+            (ast_expression*)intrin->fold->imm_float[1]
+        )
+    );
+
+    /* newbase2 = base2 * base2; */
+    vec_push(whileloop->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)newbase2,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_MUL_F,
+                (ast_expression*)base2,
+                (ast_expression*)base2
+            )
+        )
+    );
+
+    /* while loop locals */
+    vec_push(whileloop->locals, base2);
+    vec_push(whileloop->locals, n2);
+    vec_push(whileloop->locals, newbase2);
+
+    /* base2 = newbase2; */
+    vec_push(nestwhile->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)base2,
+            (ast_expression*)newbase2
+        )
+    );
+
+    /* n2 *= 2; */
+    vec_push(nestwhile->exprs,
+        (ast_expression*)ast_binstore_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            INSTR_MUL_F,
+            (ast_expression*)n2,
+            (ast_expression*)intrin->fold->imm_float[3] /* 2.0f */
+        )
+    );
+
+    /* newbase2 *= newbase2; */
+    vec_push(nestwhile->exprs,
+        (ast_expression*)ast_binstore_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            INSTR_MUL_F,
+            (ast_expression*)newbase2,
+            (ast_expression*)newbase2
+        )
+    );
+
+    /* while (whole >= newbase2) */
+    vec_push(whileloop->exprs,
+        (ast_expression*)ast_loop_new(
+            intrin_ctx(intrin),
+            NULL,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_GE,
+                (ast_expression*)whole,
+                (ast_expression*)newbase2
+            ),
+            false,
+            NULL,
+            false,
+            NULL,
+            (ast_expression*)nestwhile
+        )
+    );
+
+    /* whole /= base2; */
     vec_push(whileloop->exprs,
         (ast_expression*)ast_binstore_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
             INSTR_DIV_F,
             (ast_expression*)whole,
-            (ast_expression*)base
+            (ast_expression*)base2
         )
     );
 
-    /* nth ++; */
+    /* nth += n2; */
     vec_push(whileloop->exprs,
         (ast_expression*)ast_binstore_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
             INSTR_ADD_F,
             (ast_expression*)nth,
-            (ast_expression*)intrin->fold->imm_float[1]
+            (ast_expression*)n2
         )
     );
 
@@ -1849,14 +1951,14 @@ static const intrin_func_t intrinsics[] = {
     {&intrin_mod,              "__builtin_mod",              "mod",      2},
     {&intrin_pow,              "__builtin_pow",              "pow",      2},
     {&intrin_fabs,             "__builtin_fabs",             "fabs",     1},
-    {&intrin_epsilon,          "__builtin_epsilon",          "",         0},
-    {&intrin_nan,              "__builtin_nan",              "",         0},
-    {&intrin_inf,              "__builtin_inf",              "",         0},
-    {&intrin_ln,               "__builtin_ln",               "",         2},
     {&intrin_log,              "__builtin_log",              "log",      1},
     {&intrin_log10,            "__builtin_log10",            "log10",    1},
     {&intrin_log2,             "__builtin_log2",             "log2",     1},
     {&intrin_logb,             "__builtin_logb",             "logb",     1},
+    {&intrin_epsilon,          "__builtin_epsilon",          "",         0},
+    {&intrin_nan,              "__builtin_nan",              "",         0},
+    {&intrin_inf,              "__builtin_inf",              "",         0},
+    {&intrin_ln,               "__builtin_ln",               "",         2},
     {&intrin_debug_typestring, "__builtin_debug_typestring", "",         0},
     {&intrin_nullfunc,         "#nullfunc",                  "",         0}
 };