5 files changed, 275 insertions, 205 deletions
diff --git a/kernel/byterun/coq_interp.c b/kernel/byterun/coq_interp.c
index d2c88bffcc..4b45608ae5 100644
--- a/kernel/byterun/coq_interp.c
+++ b/kernel/byterun/coq_interp.c
@@ -29,13 +29,6 @@
 #include "coq_uint63_emul.h"
 #endif
 
-/* spiwack: I append here a few macros for value/number manipulation */
-#define uint32_of_value(val) (((uint32_t)(val)) >> 1)
-#define value_of_uint32(i)   ((value)((((uint32_t)(i)) << 1) | 1))
-#define UI64_of_uint32(lo) ((uint64_t)((uint32_t)(lo)))
-#define UI64_of_value(val) (UI64_of_uint32(uint32_of_value(val)))
-/* /spiwack */
-
 
 
 /* Registers for the abstract machine:
@@ -104,7 +97,8 @@ if (sp - num_args < coq_stack_threshold) {                                     \
    several architectures.
 */
 
-#if defined(__GNUC__) && !defined(DEBUG)
+#if defined(__GNUC__) && !defined(DEBUG) && !defined(__INTEL_COMPILER) \
+    && !defined(__llvm__)
 #ifdef __mips__
 #define PC_REG asm("$16")
 #define SP_REG asm("$17")
@@ -133,7 +127,7 @@ if (sp - num_args < coq_stack_threshold) {                                     \
 #define SP_REG asm("%edi")
 #define ACCU_REG
 #endif
-#if defined(PPC) || defined(_POWER) || defined(_IBMR2)
+#if defined(__ppc__) || defined(__ppc64__)
 #define PC_REG asm("26")
 #define SP_REG asm("27")
 #define ACCU_REG asm("28")
@@ -148,8 +142,9 @@ if (sp - num_args < coq_stack_threshold) {                                     \
 #define SP_REG asm("a4")
 #define ACCU_REG asm("d7")
 #endif
-#if defined(__arm__) && !defined(__thumb2__)
-#define PC_REG asm("r9")
+/* OCaml PR#4953: these specific registers not available in Thumb mode */
+#if defined(__arm__) && !defined(__thumb__)
+#define PC_REG asm("r6")
 #define SP_REG asm("r8")
 #define ACCU_REG asm("r7")
 #endif
@@ -159,6 +154,17 @@ if (sp - num_args < coq_stack_threshold) {                                     \
 #define ACCU_REG asm("38")
 #define JUMPTBL_BASE_REG asm("39")
 #endif
+#ifdef __x86_64__
+#define PC_REG asm("%r15")
+#define SP_REG asm("%r14")
+#define ACCU_REG asm("%r13")
+#endif
+#ifdef __aarch64__
+#define PC_REG asm("%x19")
+#define SP_REG asm("%x20")
+#define ACCU_REG asm("%x21")
+#define JUMPTBL_BASE_REG asm("%x22")
+#endif
 #endif
 
 #define CheckInt1() do{                            \
@@ -193,6 +199,12 @@ if (sp - num_args < coq_stack_threshold) {                                     \
 #define AllocCarry(cond) Alloc_small(accu, 1, (cond)? coq_tag_C1 : coq_tag_C0)
 #define AllocPair() Alloc_small(accu, 2, coq_tag_pair)
 
+#define Swap_accu_sp do{                        \
+    value swap_accu_sp_tmp__ = accu;            \
+    accu = *sp;                                 \
+    *sp = swap_accu_sp_tmp__;                   \
+  }while(0)
+
 /* For signal handling, we hijack some code from the caml runtime */
 
 extern intnat caml_signals_are_pending;
@@ -1213,7 +1225,7 @@ value coq_interprete
         /* Adds the integer in the accumulator with 
            the one ontop of the stack (which is poped)*/
         print_instr("ADDINT63");
-        accu = uint63_add(accu, *sp++);
+        Uint63_add(accu, *sp++);
         Next;
       }
 
@@ -1221,10 +1233,12 @@ value coq_interprete
         print_instr("CHECKADDCINT63");
         CheckInt2();
 	/* returns the sum with a carry */
-        value s;
-        s = uint63_add(accu, *sp++);
-        AllocCarry(uint63_lt(s,accu));
-        Field(accu, 0) = s;
+        int c;
+        Uint63_add(accu, *sp);
+        Uint63_lt(c, accu, *sp);
+        Swap_accu_sp;
+        AllocCarry(c);
+        Field(accu, 0) = *sp++;
 	Next;
       }
 
@@ -1232,10 +1246,12 @@ value coq_interprete
         print_instr("ADDCARRYCINT63");
         CheckInt2();
 	/* returns the sum plus one with a carry */
-        value s;
-        s = uint63_addcarry(accu, *sp++);
-        AllocCarry(uint63_leq(s, accu));
-        Field(accu, 0) = s;
+        int c;
+        Uint63_addcarry(accu, *sp);
+        Uint63_leq(c, accu, *sp);
+        Swap_accu_sp;
+        AllocCarry(c);
+        Field(accu, 0) = *sp++;
 	Next;
       }
 
@@ -1246,7 +1262,7 @@ value coq_interprete
       Instruct (SUBINT63) {
         print_instr("SUBINT63");
 	/* returns the subtraction */
-        accu = uint63_sub(accu, *sp++);
+        Uint63_sub(accu, *sp++);
         Next;
       }
 
@@ -1254,12 +1270,12 @@ value coq_interprete
         print_instr("SUBCINT63");
         CheckInt2();
 	/* returns the subtraction with a carry */
-        value b;
-        value s;
-        b = *sp++;
-        s = uint63_sub(accu,b);
-        AllocCarry(uint63_lt(accu,b));
-        Field(accu, 0) = s;
+        int c;
+        Uint63_lt(c, accu, *sp);
+        Uint63_sub(accu, *sp);
+        Swap_accu_sp;
+        AllocCarry(c);
+        Field(accu, 0) = *sp++;
 	Next;
       }
 
@@ -1267,12 +1283,12 @@ value coq_interprete
         print_instr("SUBCARRYCINT63");
         CheckInt2();
 	/* returns the subtraction minus one with a carry */
-        value b;
-        value s;
-        b = *sp++;
-        s = uint63_subcarry(accu,b);
-        AllocCarry(uint63_leq(accu,b));
-        Field(accu, 0) = s;
+        int c;
+        Uint63_leq(c,accu,*sp);
+        Uint63_subcarry(accu,*sp);
+        Swap_accu_sp;
+        AllocCarry(c);
+        Field(accu, 0) = *sp++;
 	Next;
       }
 
@@ -1280,7 +1296,7 @@ value coq_interprete
         print_instr("MULINT63");
         CheckInt2();
 	/* returns the multiplication */
-        accu = uint63_mul(accu,*sp++);
+        Uint63_mul(accu,*sp++);
 	Next;
       }
 
@@ -1288,15 +1304,11 @@ value coq_interprete
         /*returns the multiplication on a pair */
         print_instr("MULCINT63");
         CheckInt2();
-        /*accu = 2v+1, *sp=2w+1 ==> p = 2v*w */
-        /* TODO: implement
-        p = I64_mul (UI64_of_value (accu), UI64_of_uint32 ((*sp++)^1));
-        AllocPair(); */
-        /* Field(accu, 0) = (value)(I64_lsr(p,31)|1) ; */ /*higher part*/
-        /* Field(accu, 1) = (value)(I64_to_int32(p)|1); */ /*lower part*/
-        value x = accu;
+        Uint63_mulc(accu, *sp, sp);
+        *--sp = accu;
         AllocPair();
-        Field(accu, 1) = uint63_mulc(x, *sp++, &Field(accu, 0));
+        Field(accu, 1) = *sp++;
+        Field(accu, 0) = *sp++;
         Next;
       }
 
@@ -1306,13 +1318,13 @@ value coq_interprete
         /* spiwack: a priori no need of the NON_STANDARD_DIV_MOD flag
                     since it probably only concerns negative number.
                     needs to be checked at this point */
-        value divisor;
-        divisor = *sp++;
-        if (uint63_eq0(divisor)) {
-          accu = divisor;
+        int b;
+        Uint63_eq0(b, *sp);
+        if (b) {
+          accu = *sp++;
 	}
 	else {
-          accu = uint63_div(accu, divisor);
+          Uint63_div(accu, *sp++);
         }
 	Next;
       }
@@ -1320,13 +1332,13 @@ value coq_interprete
       Instruct(CHECKMODINT63) {
         print_instr("CHEKMODINT63");
         CheckInt2();
-        value divisor;
-        divisor = *sp++;
-        if (uint63_eq0(divisor)) {
-          accu = divisor;
+        int b;
+        Uint63_eq0(b, *sp);
+        if (b) {
+          accu = *sp++;
 	}
         else {
-          accu = uint63_mod(accu,divisor);
+          Uint63_mod(accu,*sp++);
 	}
 	Next;
       }
@@ -1337,19 +1349,24 @@ value coq_interprete
         /* spiwack: a priori no need of the NON_STANDARD_DIV_MOD flag
                     since it probably only concerns negative number.
                     needs to be checked at this point */
-        value divisor;
-        divisor = *sp++;
-        if (uint63_eq0(divisor)) {
-          Alloc_small(accu, 2, 1); /* ( _ , arity, tag ) */
-          Field(accu, 0) = divisor;
-          Field(accu, 1) = divisor;
+        int b;
+        Uint63_eq0(b, *sp);
+        if (b) {
+          AllocPair();
+          Field(accu, 0) = *sp;
+          Field(accu, 1) = *sp++;
         }
         else {
-          value modulus;
-          modulus = accu;
-          Alloc_small(accu, 2, 1); /* ( _ , arity, tag ) */
-          Field(accu, 0) = uint63_div(modulus,divisor);
-          Field(accu, 1) = uint63_mod(modulus,divisor);
+          *--sp = accu;
+          Uint63_div(sp[0],sp[1]);
+          Swap_accu_sp;
+          Uint63_mod(accu,sp[1]);
+          sp[1] = sp[0];
+          Swap_accu_sp;
+          AllocPair();
+          Field(accu, 0) = sp[1];
+          Field(accu, 1) = sp[0];
+          sp += 2;
         }
 	Next;
       }
@@ -1357,78 +1374,47 @@ value coq_interprete
       Instruct (CHECKDIV21INT63) {
         print_instr("DIV21INT63");
         CheckInt3();
-        /* spiwack: takes three int31 (the two first ones represent an
-                    int62) and performs the euclidian division of the
-                    int62 by the int31 */
-        /* TODO: implement this
-        bigint = UI64_of_value(accu);
-        bigint = I64_or(I64_lsl(bigint, 31),UI64_of_value(*sp++));
-        uint64 divisor;
-        divisor = UI64_of_value(*sp++);
-        Alloc_small(accu, 2, 1); */ /* ( _ , arity, tag ) */
-        /* if (I64_is_zero (divisor)) {
-           Field(accu, 0) = 1; */ /* 2*0+1 */
-        /* Field(accu, 1) = 1; */ /* 2*0+1 */
-        /* }
-        else {
-          uint64 quo, mod;
-          I64_udivmod(bigint, divisor, &quo, &mod);
-          Field(accu, 0) = value_of_uint32(I64_to_int32(quo));
-          Field(accu, 1) = value_of_uint32(I64_to_int32(mod));
-        } */
-        value bigint; /* TODO: fix */
-        bigint = *sp++; /* TODO: take accu into account */
-        value divisor;
-        divisor = *sp++;
-        if (uint63_eq0(divisor)) {
-          Alloc_small(accu, 2, 1);
-          Field(accu, 0) = divisor;
-          Field(accu, 1) = divisor;
-	}
-        else {
-          value quo, mod;
-          mod = uint63_div21(accu, bigint, divisor, &quo);
-          Alloc_small(accu, 2, 1);
-          Field(accu, 0) = quo;
-          Field(accu, 1) = mod;
-	}
+        Uint63_div21(accu, sp[0], sp[1], &(sp[1]));
+        Swap_accu_sp;
+        AllocPair();
+        Field(accu, 0) = sp[1];
+        Field(accu, 1) = sp[0];
+        sp += 2;
         Next;
       }
 
       Instruct(CHECKLXORINT63) {
         print_instr("CHECKLXORINT63");
         CheckInt2();
-        accu = uint63_lxor(accu,*sp++);
+        Uint63_lxor(accu,*sp++);
         Next;
       }
 
       Instruct(CHECKLORINT63) {
         print_instr("CHECKLORINT63");
         CheckInt2();
-        accu = uint63_lor(accu,*sp++);
+        Uint63_lor(accu,*sp++);
         Next;
       }
 
       Instruct(CHECKLANDINT63) {
         print_instr("CHECKLANDINT63");
         CheckInt2();
-        accu = uint63_land(accu,*sp++);
+        Uint63_land(accu,*sp++);
         Next;
       }
 
       Instruct(CHECKLSLINT63) {
         print_instr("CHECKLSLINT63");
         CheckInt2();
-        value p = *sp++;
-        accu = uint63_lsl(accu,p);
+        Uint63_lsl(accu,*sp++);
         Next;
       }
 
       Instruct(CHECKLSRINT63) {
         print_instr("CHECKLSRINT63");
         CheckInt2();
-        value p = *sp++;
-        accu = uint63_lsr(accu,p);
+        Uint63_lsr(accu,*sp++);
         Next;
       }
 
@@ -1436,7 +1422,7 @@ value coq_interprete
         print_instr("CHECKLSLINT63CONST1");
         if (Is_uint63(accu)) {
           pc++;
-          accu = uint63_lsl1(accu);
+          Uint63_lsl1(accu);
           Next;
         } else {
           *--sp = uint63_one();
@@ -1450,7 +1436,7 @@ value coq_interprete
         print_instr("CHECKLSRINT63CONST1");
         if (Is_uint63(accu)) {
           pc++;
-          accu = uint63_lsr1(accu);
+          Uint63_lsr1(accu);
           Next;
         } else {
           *--sp = uint63_one();
@@ -1463,18 +1449,17 @@ value coq_interprete
       Instruct (CHECKADDMULDIVINT63) {
         print_instr("CHECKADDMULDIVINT63");
         CheckInt3();
-        value x;
-        value y;
-        x = *sp++;
-        y = *sp++;
-        accu = uint63_addmuldiv(accu,x,y);
+        Uint63_addmuldiv(accu,sp[0],sp[1]);
+        sp += 2;
         Next;
       }
 
       Instruct (CHECKEQINT63) {
         print_instr("CHECKEQINT63");
         CheckInt2();
-        accu = uint63_eq(accu,*sp++) ? coq_true : coq_false;
+        int b;
+        Uint63_eq(b, accu, *sp++);
+        accu = b ? coq_true : coq_false;
         Next;
       }
 
@@ -1484,7 +1469,9 @@ value coq_interprete
      }
      Instruct (LTINT63) {
        print_instr("LTINT63");
-       accu = uint63_lt(accu,*sp++) ? coq_true : coq_false;
+       int b;
+       Uint63_lt(b,accu,*sp++);
+       accu = b ? coq_true : coq_false;
        Next;
      }
 
@@ -1494,7 +1481,9 @@ value coq_interprete
      }
      Instruct (LEINT63) {
        print_instr("LEINT63");
-       accu = uint63_leq(accu,*sp++) ? coq_true : coq_false;
+       int b;
+       Uint63_leq(b,accu,*sp++);
+       accu = b ? coq_true : coq_false;
        Next;
      }
 
@@ -1503,25 +1492,30 @@ value coq_interprete
         /* assumes Inductive _ : _ := Eq | Lt | Gt */
         print_instr("CHECKCOMPAREINT63");
         CheckInt2();
-        if (uint63_eq(accu,*sp)) {
+        int b;
+        Uint63_eq(b, accu, *sp);
+        if (b) {
           accu = coq_Eq;
           sp++;
         }
-        else accu = uint63_lt(accu,*sp++) ? coq_Lt : coq_Gt;
+        else {
+          Uint63_lt(b, accu, *sp++);
+          accu = b ? coq_Lt : coq_Gt;
+        }
         Next;
       }
 
       Instruct (CHECKHEAD0INT63) {
         print_instr("CHECKHEAD0INT63");
         CheckInt1();
-        accu = uint63_head0(accu);
+        Uint63_head0(accu);
         Next;
       }
 
       Instruct (CHECKTAIL0INT63) {
         print_instr("CHECKTAIL0INT63");
         CheckInt1();
-        accu = uint63_tail0(accu);
+        Uint63_tail0(accu);
         Next;
       }
 
@@ -1593,7 +1587,7 @@ value coq_push_vstack(value stk, value max_stack_size) {
   print_instr("push_vstack");print_int(len);
    for(i = 0; i < len; i++) coq_sp[i] = Field(stk,i);
   sp = coq_sp;
-  CHECK_STACK(uint32_of_value(max_stack_size));
+  CHECK_STACK(uint_of_value(max_stack_size));
   return Val_unit;
 }
 
diff --git a/kernel/byterun/coq_memory.c b/kernel/byterun/coq_memory.c
index 542a05fd25..a1c49bee95 100644
--- a/kernel/byterun/coq_memory.c
+++ b/kernel/byterun/coq_memory.c
@@ -105,7 +105,7 @@ value init_coq_vm(value unit) /* ML */
     init_coq_interpreter();
     
     /* Some predefined pointer code.
-     * It is typically contained in accumlator blocks whose tag is 0 and thus
+     * It is typically contained in accumulator blocks whose tag is 0 and thus
      * scanned by the GC, so make it look like an OCaml block. */
     value accu_block = (value) coq_stat_alloc(2 * sizeof(value));
     Hd_hp (accu_block) = Make_header (1, Abstract_tag, Caml_black);        \
diff --git a/kernel/byterun/coq_memory.h b/kernel/byterun/coq_memory.h
index 9375b15de2..1ea461c5e5 100644
--- a/kernel/byterun/coq_memory.h
+++ b/kernel/byterun/coq_memory.h
@@ -19,7 +19,7 @@
 
 
 #define Coq_stack_size (4096 * sizeof(value))
-#define Coq_stack_threshold (256 * sizeof(value))
+#define Coq_stack_threshold (256 * sizeof(value))  /* see kernel/cbytegen.ml */
 #define Coq_max_stack_size (256 * 1024)
 
 #define TRANSP 0
diff --git a/kernel/byterun/coq_uint63_emul.h b/kernel/byterun/coq_uint63_emul.h
index 5499f124a2..528cc6fc1f 100644
--- a/kernel/byterun/coq_uint63_emul.h
+++ b/kernel/byterun/coq_uint63_emul.h
@@ -6,6 +6,8 @@
 
 #define Is_uint63(v) (Tag_val(v) == Custom_tag)
 
+#define uint_of_value(val) (((uint32_t)(val)) >> 1)
+
 # define DECLARE_NULLOP(name) \
 value uint63_##name() { \
   static value* cb = 0; \
@@ -15,83 +17,142 @@ value uint63_##name() { \
   }
 
 # define DECLARE_UNOP(name) \
-value uint63_##name(value x) { \
+value uint63_##name##_ml(value x) { \
   static value* cb = 0; \
   CAMLparam1(x); \
   if (!cb) cb = caml_named_value("uint63 " #name); \
   CAMLreturn(caml_callback(*cb, x)); \
   }
 
-# define DECLARE_PREDICATE(name) \
-value uint63_##name(value x) { \
-  static value* cb = 0; \
-  CAMLparam1(x); \
-  if (!cb) cb = caml_named_value("uint63 " #name); \
-  CAMLreturn(Int_val(caml_callback(*cb, x))); \
-  }
+# define CALL_UNOP_NOASSIGN(name, x) \
+  value uint63_return_value__; \
+  value uint63_arg_x__ = (x); \
+  Setup_for_gc; \
+  uint63_return_value__ = uint63_##name##_ml(uint63_arg_x__); \
+  Restore_after_gc
+
+# define CALL_UNOP(name, x) do{ \
+    CALL_UNOP_NOASSIGN(name, x); \
+    accu = uint63_return_value__; \
+  }while(0)
+
+# define CALL_PREDICATE(r, name, x) do{ \
+    CALL_UNOP_NOASSIGN(name, x); \
+    (r) = Int_val(uint63_return_value__); \
+  }while(0)
 
 # define DECLARE_BINOP(name) \
-value uint63_##name(value x, value y) { \
+value uint63_##name##_ml(value x, value y) { \
   static value* cb = 0; \
   CAMLparam2(x, y); \
   if (!cb) cb = caml_named_value("uint63 " #name); \
   CAMLreturn(caml_callback2(*cb, x, y)); \
   }
 
-# define DECLARE_RELATION(name) \
-value uint63_##name(value x, value y) { \
-  static value* cb = 0; \
-  CAMLparam2(x, y); \
-  if (!cb) cb = caml_named_value("uint63 " #name); \
-  CAMLreturn(Int_val(caml_callback2(*cb, x, y))); \
-  }
+# define CALL_BINOP_NOASSIGN(name, x, y) \
+  value uint63_return_value__; \
+  value uint63_arg_x__ = (x); \
+  value uint63_arg_y__ = (y); \
+  Setup_for_gc; \
+  uint63_return_value__ = uint63_##name##_ml(uint63_arg_x__, uint63_arg_y__); \
+  Restore_after_gc
+
+# define CALL_BINOP(name, x, y) do{ \
+    CALL_BINOP_NOASSIGN(name, x, y);  \
+    accu = uint63_return_value__; \
+  }while(0)
+
+# define CALL_RELATION(r, name, x, y) do{ \
+    CALL_BINOP_NOASSIGN(name, x, y); \
+    (r) = Int_val(uint63_return_value__); \
+  }while(0)
 
 # define DECLARE_TEROP(name) \
-value uint63_##name(value x, value y, value z) { \
+value uint63_##name##_ml(value x, value y, value z) { \
   static value* cb = 0; \
   CAMLparam3(x, y, z); \
   if (!cb) cb = caml_named_value("uint63 " #name); \
   CAMLreturn(caml_callback3(*cb, x, y, z)); \
   }
 
+# define CALL_TEROP(name, x, y, z) do{ \
+    value uint63_return_value__; \
+    value uint63_arg_x__ = (x); \
+    value uint63_arg_y__ = (y); \
+    value uint63_arg_z__ = (z); \
+    Setup_for_gc; \
+    uint63_return_value__ = uint63_##name##_ml(uint63_arg_x__, uint63_arg_y__, uint63_arg_z__); \
+    Restore_after_gc; \
+    accu = uint63_return_value__; \
+  }while(0)
 
 DECLARE_NULLOP(one)
 DECLARE_BINOP(add)
+#define Uint63_add(x, y) CALL_BINOP(add, x, y)
 DECLARE_BINOP(addcarry)
+#define Uint63_addcarry(x, y) CALL_BINOP(addcarry, x, y)
 DECLARE_TEROP(addmuldiv)
+#define Uint63_addmuldiv(x, y, z) CALL_TEROP(addmuldiv, x, y, z)
 DECLARE_BINOP(div)
-DECLARE_TEROP(div21_ml)
-DECLARE_RELATION(eq)
-DECLARE_PREDICATE(eq0)
+#define Uint63_div(x, y) CALL_BINOP(div, x, y)
+DECLARE_BINOP(eq)
+#define Uint63_eq(r, x, y) CALL_RELATION(r, eq, x, y)
+DECLARE_UNOP(eq0)
+#define Uint63_eq0(r, x) CALL_PREDICATE(r, eq0, x)
 DECLARE_UNOP(head0)
+#define Uint63_head0(x) CALL_UNOP(head0, x)
 DECLARE_BINOP(land)
-DECLARE_RELATION(leq)
+#define Uint63_land(x, y) CALL_BINOP(land, x, y)
+DECLARE_BINOP(leq)
+#define Uint63_leq(r, x, y) CALL_RELATION(r, leq, x, y)
 DECLARE_BINOP(lor)
+#define Uint63_lor(x, y) CALL_BINOP(lor, x, y)
 DECLARE_BINOP(lsl)
+#define Uint63_lsl(x, y) CALL_BINOP(lsl, x, y)
 DECLARE_UNOP(lsl1)
+#define Uint63_lsl1(x) CALL_UNOP(lsl1, x)
 DECLARE_BINOP(lsr)
+#define Uint63_lsr(x, y) CALL_BINOP(lsr, x, y)
 DECLARE_UNOP(lsr1)
-DECLARE_RELATION(lt)
+#define Uint63_lsr1(x) CALL_UNOP(lsr1, x)
+DECLARE_BINOP(lt)
+#define Uint63_lt(r, x, y) CALL_RELATION(r, lt, x, y)
 DECLARE_BINOP(lxor)
+#define Uint63_lxor(x, y) CALL_BINOP(lxor, x, y)
 DECLARE_BINOP(mod)
+#define Uint63_mod(x, y) CALL_BINOP(mod, x, y)
 DECLARE_BINOP(mul)
-DECLARE_BINOP(mulc_ml)
+#define Uint63_mul(x, y) CALL_BINOP(mul, x, y)
 DECLARE_BINOP(sub)
+#define Uint63_sub(x, y) CALL_BINOP(sub, x, y)
 DECLARE_BINOP(subcarry)
+#define Uint63_subcarry(x, y) CALL_BINOP(subcarry, x, y)
 DECLARE_UNOP(tail0)
+#define Uint63_tail0(x) CALL_UNOP(tail0, x)
+
+DECLARE_TEROP(div21_ml)
+# define Uint63_div21(x, y, z, q) do{ \
+    value uint63_return_value__; \
+    value uint63_arg_x__ = (x); \
+    value uint63_arg_y__ = (y); \
+    value uint63_arg_z__ = (z); \
+    Setup_for_gc; \
+    uint63_return_value__ = \
+      uint63_div21_ml_ml(uint63_arg_x__, uint63_arg_y__, uint63_arg_z__); \
+    Restore_after_gc; \
+    *q = Field(uint63_return_value__, 0); \
+    accu = Field(uint63_return_value__, 1); \
+  }while(0)
 
-value uint63_div21(value x, value y, value z, value* q) {
-  CAMLparam3(x, y, z);
-  CAMLlocal1(qr);
-  qr = uint63_div21_ml(x, y, z);
-  *q = Field(qr, 0);
-  CAMLreturn(Field(qr, 1));
-}
-
-value uint63_mulc(value x, value y, value* h) {
-  CAMLparam2(x, y);
-  CAMLlocal1(hl);
-  hl = uint63_mulc_ml(x, y);
-  *h = Field(hl, 0);
-  CAMLreturn(Field(hl, 1));
-}
+DECLARE_BINOP(mulc_ml)
+# define Uint63_mulc(x, y, h) do{ \
+    value uint63_return_value__; \
+    value uint63_arg_x__ = (x); \
+    value uint63_arg_y__ = (y); \
+    Setup_for_gc; \
+    uint63_return_value__ = \
+      uint63_mulc_ml_ml(uint63_arg_x__, uint63_arg_y__); \
+    Restore_after_gc; \
+    *(h) = Field(uint63_return_value__, 0); \
+    accu = Field(uint63_return_value__, 1); \
+  }while(0)
diff --git a/kernel/byterun/coq_uint63_native.h b/kernel/byterun/coq_uint63_native.h
index 92f4dc79bc..9fbd3f83d8 100644
--- a/kernel/byterun/coq_uint63_native.h
+++ b/kernel/byterun/coq_uint63_native.h
@@ -1,5 +1,6 @@
 #define Is_uint63(v) (Is_long(v))
 
+#define uint_of_value(val) (((uint64_t)(val)) >> 1)
 #define uint63_of_value(val) ((uint64_t)(val) >> 1)
 
 /* 2^63 * y + x as a value */
@@ -9,28 +10,43 @@
 #define uint63_one() ((value) 3) /* 2*1 + 1 */
 
 #define uint63_eq(x,y) ((x) == (y))
-#define uint63_eq0(x) ((x) == (uint64_t)1)
+#define Uint63_eq(r,x,y) ((r) = uint63_eq(x,y))
+#define Uint63_eq0(r,x) ((r) = ((x) == (uint64_t)1))
 #define uint63_lt(x,y) ((uint64_t) (x) < (uint64_t) (y))
+#define Uint63_lt(r,x,y) ((r) = uint63_lt(x,y))
 #define uint63_leq(x,y) ((uint64_t) (x) <= (uint64_t) (y))
+#define Uint63_leq(r,x,y) ((r) = uint63_leq(x,y))
 
-#define uint63_add(x,y) ((value)((uint64_t) (x) + (uint64_t) (y) - 1))
-#define uint63_addcarry(x,y) ((value)((uint64_t) (x) + (uint64_t) (y) + 1))
-#define uint63_sub(x,y) ((value)((uint64_t) (x) - (uint64_t) (y) + 1))
-#define uint63_subcarry(x,y) ((value)((uint64_t) (x) - (uint64_t) (y) - 1))
-#define uint63_mul(x,y) (Val_long(uint63_of_value(x) * uint63_of_value(y)))
-#define uint63_div(x,y) (Val_long(uint63_of_value(x) / uint63_of_value(y)))
-#define uint63_mod(x,y) (Val_long(uint63_of_value(x) % uint63_of_value(y)))
+#define Uint63_add(x,y) (accu = (value)((uint64_t) (x) + (uint64_t) (y) - 1))
+#define Uint63_addcarry(x,y) (accu = (value)((uint64_t) (x) + (uint64_t) (y) + 1))
+#define Uint63_sub(x,y) (accu = (value)((uint64_t) (x) - (uint64_t) (y) + 1))
+#define Uint63_subcarry(x,y) (accu = (value)((uint64_t) (x) - (uint64_t) (y) - 1))
+#define Uint63_mul(x,y) (accu = Val_long(uint63_of_value(x) * uint63_of_value(y)))
+#define Uint63_div(x,y) (accu = Val_long(uint63_of_value(x) / uint63_of_value(y)))
+#define Uint63_mod(x,y) (accu = Val_long(uint63_of_value(x) % uint63_of_value(y)))
 
-#define uint63_lxor(x,y) ((value)(((uint64_t)(x) ^ (uint64_t)(y)) | 1))
-#define uint63_lor(x,y) ((value)((uint64_t)(x) | (uint64_t)(y)))
-#define uint63_land(x,y) ((value)((uint64_t)(x) & (uint64_t)(y)))
+#define Uint63_lxor(x,y) (accu = (value)(((uint64_t)(x) ^ (uint64_t)(y)) | 1))
+#define Uint63_lor(x,y) (accu = (value)((uint64_t)(x) | (uint64_t)(y)))
+#define Uint63_land(x,y) (accu = (value)((uint64_t)(x) & (uint64_t)(y)))
 
 /* TODO: is + or | better? OCAML uses + */
 /* TODO: is - or ^ better? */
-#define uint63_lsl(x,y) ((y) < (uint64_t) 127 ? ((value)((((uint64_t)(x)-1) << (uint63_of_value(y))) | 1)) : uint63_zero)
-#define uint63_lsr(x,y) ((y) < (uint64_t) 127 ? ((value)(((uint64_t)(x) >> (uint63_of_value(y))) | 1)) : uint63_zero)
-#define uint63_lsl1(x) ((value)((((uint64_t)(x)-1) << 1) +1))
-#define uint63_lsr1(x) ((value)(((uint64_t)(x) >> 1) |1))
+#define Uint63_lsl(x,y) do{ \
+  value uint63_lsl_y__ = (y); \
+  if (uint63_lsl_y__ < (uint64_t) 127) \
+    accu = (value)((((uint64_t)(x)-1) << uint63_of_value(uint63_lsl_y__)) | 1); \
+  else \
+    accu = uint63_zero; \
+  }while(0)
+#define Uint63_lsr(x,y) do{ \
+  value uint63_lsl_y__ = (y); \
+  if (uint63_lsl_y__ < (uint64_t) 127) \
+    accu = (value)(((uint64_t)(x) >> uint63_of_value(uint63_lsl_y__)) | 1); \
+  else \
+    accu = uint63_zero; \
+  }while(0)
+#define Uint63_lsl1(x) (accu = (value)((((uint64_t)(x)-1) << 1) +1))
+#define Uint63_lsr1(x) (accu = (value)(((uint64_t)(x) >> 1) |1))
 
 /* addmuldiv(p,x,y) = x * 2^p + y / 2 ^ (63 - p) */
 /* (modulo 2^63) for p <= 63 */
@@ -40,6 +56,7 @@ value uint63_addmuldiv(uint64_t p, uint64_t x, uint64_t y) {
   r |= ((uint64_t)y >> (63-shiftby)) | 1;
   return r;
 }
+#define Uint63_addmuldiv(p, x, y) (accu = uint63_addmuldiv(p, x, y))
 
 value uint63_head0(uint64_t x) {
   int r = 0;
@@ -51,6 +68,7 @@ value uint63_head0(uint64_t x) {
   if (!(x & 0x8000000000000000)) { x <<=1;   r += 1; }
   return Val_int(r);
 }
+#define Uint63_head0(x) (accu = uint63_head0(x))
 
 value uint63_tail0(value x) {
   int r = 0;
@@ -63,6 +81,7 @@ value uint63_tail0(value x) {
   if (!(x & 0x00000001)) { x >>=1;   r += 1; }
   return Val_int(r);
 }
+#define Uint63_tail0(x) (accu = uint63_tail0(x))
 
 value uint63_mulc(value x, value y, value* h) {
   x = (uint64_t)x >> 1;
@@ -86,40 +105,36 @@ value uint63_mulc(value x, value y, value* h) {
   *h = Val_int(hr);
   return Val_int(lr);
 }
+#define Uint63_mulc(x, y, h) (accu = uint63_mulc(x, y, h))
 
 #define lt128(xh,xl,yh,yl) (uint63_lt(xh,yh) || (uint63_eq(xh,yh) && uint63_lt(xl,yl)))
 #define le128(xh,xl,yh,yl) (uint63_lt(xh,yh) || (uint63_eq(xh,yh) && uint63_leq(xl,yl)))
 
-value uint63_div21(value xh, value xl, value y, value* q) {
-  xh = (uint64_t)xh >> 1;
-  xl = ((uint64_t)xl >> 1) | ((uint64_t)xh << 63);
-  xh = (uint64_t)xh >> 1;
-  uint64_t maskh = 0;
-  uint64_t maskl = 1;
-  uint64_t dh = 0;
-  uint64_t dl = (uint64_t)y >> 1;
-  int cmp = 1;
-  while (dh >= 0 && cmp) {
-    cmp = lt128(dh,dl,xh,xl);
-    dh = (dh << 1) | (dl >> 63);
-    dl = dl << 1;
-    maskh = (maskh << 1) | (maskl >> 63);
-    maskl = maskl << 1;
+#define maxuint63 ((uint64_t)0x7FFFFFFFFFFFFFFF)
+/* precondition: xh < y */
+/* outputs r and sets ql to q s.t. x = q * y + r, r < y */
+static value uint63_div21_aux(value xh, value xl, value y, value* ql) {
+  uint64_t nh = uint63_of_value(xh);
+  uint64_t nl = uint63_of_value(xl);
+  y = uint63_of_value(y);
+  uint64_t q = 0;
+  for (int i = 0; i < 63; ++i) {
+    // invariants: 0 <= nh < y, nl = (xl*2^i) % 2^64,
+    // (q*y + nh) * 2^(63-i) + (xl % 2^(63-i)) = (xh%y) * 2^63 + xl
+    nl <<= 1;
+    nh = (nh << 1) | (nl >> 63);
+    q <<= 1;
+    if (nh >= y) { q |= 1; nh -= y; }
   }
-  uint64_t remh = xh;
-  uint64_t reml = xl;
-  uint64_t quotient = 0;
-  while (maskh | maskl) {
-    if (le128(dh,dl,remh,reml)) {
-      quotient = quotient | maskl;
-      if (uint63_lt(reml,dl)) {remh = remh - dh - 1;} else {remh = remh - dh;}
-      reml = reml - dl;
-    }
-    maskl = (maskl >> 1) | (maskh << 63);
-    maskh = maskh >> 1;
-    dl = (dl >> 1) | (dh << 63);
-    dh = dh >> 1;
+  *ql = Val_int(q);
+  return Val_int(nh);
+}
+value uint63_div21(value xh, value xl, value y, value* ql) {
+  if (uint63_leq(y, xh)) {
+    *ql = Val_int(0);
+    return Val_int(0);
+  } else {
+    return uint63_div21_aux(xh, xl, y, ql);
   }
-  *q = Val_int(quotient);
-  return Val_int(reml);
 }
+#define Uint63_div21(xh, xl, y, q) (accu = uint63_div21(xh, xl, y, q))