Hardcode next_up and next_down instead of relying on nextafter.

Unfortunately, compilers are currently unable to optimize the nextafter function, even in the degenerate case where the second argument is explicitly infinite. So, this commit implements this case by hand. On the following testcase, this gives a 15% speedup. From Coq Require Import Int63 BinPos PrimFloat. Definition foo n := let eps := sub (next_up one) one in Pos.iter (fun x => next_down (add x eps)) two n. Time Eval vm_compute in foo 100000000. And when looking at the cost of just the allocation-free version of next_down, the speedup is 1500%. Said otherwise, the latency of next_down is now on par with the measurement noise due to cache misses and the like.
author: Guillaume Melquiond 2020-10-02 18:17:23 +0200
committer: Guillaume Melquiond 2020-11-13 15:14:35 +0100
commit: ef3ec53e4f74f32a705489b332b037569680d28e (patch)
tree: 2747eea9583d288ba99c01326a09ac19bcbf3d1e /kernel/byterun/coq_float64.c
parent: 5c7f63fa7a88cf2cb9b6837eb2797268c5843030 (diff)
1 files changed, 30 insertions, 2 deletions
diff --git a/kernel/byterun/coq_float64.c b/kernel/byterun/coq_float64.c
index ebbfe35fa6..bea47dd47e 100644
--- a/kernel/byterun/coq_float64.c
+++ b/kernel/byterun/coq_float64.c
@@ -9,12 +9,40 @@
 /************************************************************************/
 
 #include <math.h>
+#include <stdint.h>
 
 #define CAML_INTERNALS
 #include <caml/alloc.h>
 
 #include "coq_values.h"
 
+union double_bits {
+  double d;
+  uint64_t u;
+};
+
+static double next_up(double x) {
+  union double_bits bits;
+  if (!(x < INFINITY)) return x; // x is +oo or NaN
+  bits.d = x;
+  int64_t i = bits.u;
+  if (i >= 0) ++bits.u; // x >= +0.0, go away from zero
+  else if (bits.u + bits.u == 0) bits.u = 1; // x is -0.0, should almost never happen
+  else --bits.u; // x < 0.0, go toward zero
+  return bits.d;
+}
+
+static double next_down(double x) {
+  union double_bits bits;
+  if (!(x > -INFINITY)) return x; // x is -oo or NaN
+  bits.d = x;
+  int64_t i = bits.u;
+  if (i == 0) bits.u = INT64_MIN + 1; // x is +0.0
+  else if (i < 0) ++bits.u; // x <= -0.0, go away from zero
+  else --bits.u; // x > 0.0, go toward zero
+  return bits.d;
+}
+
 #define DECLARE_FBINOP(name, e)                                         \
   double coq_##name(double x, double y) {                               \
     return e;                                                           \
@@ -38,8 +66,8 @@ DECLARE_FBINOP(fadd, x + y)
 DECLARE_FBINOP(fsub, x - y)
 DECLARE_FBINOP(fdiv, x / y)
 DECLARE_FUNOP(fsqrt, sqrt(x))
-DECLARE_FUNOP(next_up, nextafter(x, INFINITY))
-DECLARE_FUNOP(next_down, nextafter(x, -INFINITY))
+DECLARE_FUNOP(next_up, next_up(x))
+DECLARE_FUNOP(next_down, next_down(x))
 
 value coq_is_double(value x) {
   return Val_long(Is_double(x));
author	Guillaume Melquiond	2020-10-02 18:17:23 +0200
committer	Guillaume Melquiond	2020-11-13 15:14:35 +0100
commit	ef3ec53e4f74f32a705489b332b037569680d28e (patch)
tree	2747eea9583d288ba99c01326a09ac19bcbf3d1e /kernel/byterun/coq_float64.c
parent	5c7f63fa7a88cf2cb9b6837eb2797268c5843030 (diff)