From 5aa6d99982a9d6e2736f1eedb7fcdf6b9d05b6a0 Mon Sep 17 00:00:00 2001
From: Guillaume Melquiond
Date: Mon, 24 Aug 2020 15:11:16 +0200
Subject: Modify bytecode representation of closures to please OCaml's GC (fix
 #12636).

The second field of a closure can no longer be the value of the first free
variable (or another closure of a mutually recursive block) but must be an
offset to the first free variable.

This commit makes the bytecode compiler and interpreter agnostic to the
actual representation of closures. To do so, the execution environment
(variable coq_env) no longer points to the currently executed closure but
to the last one. This has the following consequences:

- OFFSETCLOSURE(n) now always loads the n-th closure of a recursive block
  (counted from last to first);
- ENVACC(n) now always loads the value of the n-th free variable.

These two changes make the bytecode compiler simpler, since it no
longer has to track the relative position of closures and free variables.
The last change makes the interpreter a bit slower, since it has to adjust
coq_env when executing GRABREC. Hopefully, cache locality will make the
overhead negligible.
---
 kernel/byterun/coq_fix_code.c |  12 ++--
 kernel/byterun/coq_interp.c   | 139 ++++++++++++++++++++++--------------------
 kernel/byterun/coq_values.c   |  16 +++--
 kernel/genOpcodeFiles.ml      |  10 ++-
 kernel/vm.ml                  |   2 +-
 kernel/vmbytegen.ml           |  76 +++++++++--------------
 kernel/vmemitcodes.ml         |  16 ++---
 kernel/vmsymtable.ml          |  14 ++---
 kernel/vmvalues.ml            |  57 ++++++++---------
 kernel/vmvalues.mli           |   1 -
 10 files changed, 163 insertions(+), 180 deletions(-)

(limited to 'kernel')

diff --git a/kernel/byterun/coq_fix_code.c b/kernel/byterun/coq_fix_code.c
index 814cdfe1d8..9118410549 100644
--- a/kernel/byterun/coq_fix_code.c
+++ b/kernel/byterun/coq_fix_code.c
@@ -33,12 +33,12 @@ void init_arity () {
   arity[ACC0]=arity[ACC1]=arity[ACC2]=arity[ACC3]=arity[ACC4]=arity[ACC5]=
     arity[ACC6]=arity[ACC7]=arity[PUSH]=arity[PUSHACC0]=arity[PUSHACC1]=
     arity[PUSHACC2]=arity[PUSHACC3]=arity[PUSHACC4]=arity[PUSHACC5]=
-    arity[PUSHACC6]=arity[PUSHACC7]=arity[ENVACC1]=arity[ENVACC2]=
-    arity[ENVACC3]=arity[ENVACC4]=arity[PUSHENVACC1]=arity[PUSHENVACC2]=
-    arity[PUSHENVACC3]=arity[PUSHENVACC4]=arity[APPLY1]=arity[APPLY2]=
-    arity[APPLY3]=arity[APPLY4]=arity[RESTART]=arity[OFFSETCLOSUREM2]=
-    arity[OFFSETCLOSURE0]=arity[OFFSETCLOSURE2]=arity[PUSHOFFSETCLOSUREM2]=
-    arity[PUSHOFFSETCLOSURE0]=arity[PUSHOFFSETCLOSURE2]=
+    arity[PUSHACC6]=arity[PUSHACC7]=
+    arity[ENVACC0]=arity[ENVACC1]=arity[ENVACC2]=arity[ENVACC3]=
+    arity[PUSHENVACC0]=arity[PUSHENVACC1]=arity[PUSHENVACC2]=arity[PUSHENVACC3]=
+    arity[APPLY1]=arity[APPLY2]=arity[APPLY3]=arity[APPLY4]=arity[RESTART]=
+    arity[OFFSETCLOSURE0]=arity[OFFSETCLOSURE1]=
+    arity[PUSHOFFSETCLOSURE0]=arity[PUSHOFFSETCLOSURE1]=
     arity[GETFIELD0]=arity[GETFIELD1]=arity[SETFIELD0]=arity[SETFIELD1]=
     arity[CONST0]=arity[CONST1]=arity[CONST2]=arity[CONST3]=
     arity[PUSHCONST0]=arity[PUSHCONST1]=arity[PUSHCONST2]=arity[PUSHCONST3]=
diff --git a/kernel/byterun/coq_interp.c b/kernel/byterun/coq_interp.c
index 15cc451ea8..b44ae34582 100644
--- a/kernel/byterun/coq_interp.c
+++ b/kernel/byterun/coq_interp.c
@@ -383,37 +383,49 @@ value coq_interprete
       }
       /* Access in heap-allocated environment */
 
+      Instruct(ENVACC0){
+        print_instr("ENVACC0");
+        accu = Field(coq_env, 2);
+        Next;
+      }
       Instruct(ENVACC1){
         print_instr("ENVACC1");
-        accu = Field(coq_env, 1); Next;
+        accu = Field(coq_env, 3);
+        Next;
       }
       Instruct(ENVACC2){
         print_instr("ENVACC2");
-        accu = Field(coq_env, 2); Next;
+        accu = Field(coq_env, 4);
+        Next;
       }
       Instruct(ENVACC3){
         print_instr("ENVACC3");
-        accu = Field(coq_env, 3); Next;
+        accu = Field(coq_env, 5);
+        Next;
       }
-      Instruct(ENVACC4){
-        print_instr("ENVACC4");
-        accu = Field(coq_env, 4); Next;
+      Instruct(PUSHENVACC0){
+        print_instr("PUSHENVACC0");
+        *--sp = accu;
+        accu = Field(coq_env, 2);
+        Next;
       }
       Instruct(PUSHENVACC1){
         print_instr("PUSHENVACC1");
-        *--sp = accu; accu = Field(coq_env, 1); Next;
+        *--sp = accu;
+        accu = Field(coq_env, 3);
+        Next;
       }
       Instruct(PUSHENVACC2){
         print_instr("PUSHENVACC2");
-        *--sp = accu; accu = Field(coq_env, 2); Next;
+        *--sp = accu;
+        accu = Field(coq_env, 4);
+        Next;
       }
       Instruct(PUSHENVACC3){
         print_instr("PUSHENVACC3");
-        *--sp = accu; accu = Field(coq_env, 3); Next;
-      }
-      Instruct(PUSHENVACC4){
-        print_instr("PUSHENVACC4");
-        *--sp = accu; accu = Field(coq_env, 4); Next;
+        *--sp = accu;
+        accu = Field(coq_env, 5);
+        Next;
       }
       Instruct(PUSHENVACC){
         print_instr("PUSHENVACC");
@@ -423,7 +435,7 @@ value coq_interprete
       Instruct(ENVACC){
         print_instr("ENVACC");
         print_int(*pc);
-        accu = Field(coq_env, *pc++);
+        accu = Field(coq_env, 2 + *pc++);
         Next;
       }
       /* Function application */
@@ -598,7 +610,6 @@ value coq_interprete
         sp[0] = arg1;
         sp[1] = arg2;
         pc = Code_val(accu);
-        print_lint(accu);
         coq_env = accu;
         coq_extra_args += 1;
         goto check_stack;
@@ -643,13 +654,13 @@ value coq_interprete
       }
 
       Instruct(RESTART) {
-        int num_args = Wosize_val(coq_env) - 2;
+        int num_args = Wosize_val(coq_env) - 3;
         int i;
         print_instr("RESTART");
         CHECK_STACK(num_args);
         sp -= num_args;
-        for (i = 0; i < num_args; i++) sp[i] = Field(coq_env, i + 2);
-        coq_env = Field(coq_env, 1);
+        for (i = 0; i < num_args; i++) sp[i] = Field(coq_env, i + 3);
+        coq_env = Field(coq_env, 2);
         coq_extra_args += num_args;
         Next;
       }
@@ -663,9 +674,10 @@ value coq_interprete
         } else {
           mlsize_t num_args, i;
           num_args = 1 + coq_extra_args; /* arg1 + extra args */
-          Alloc_small(accu, num_args + 2, Closure_tag);
-          Field(accu, 1) = coq_env;
-          for (i = 0; i < num_args; i++) Field(accu, i + 2) = sp[i];
+          Alloc_small(accu, num_args + 3, Closure_tag);
+          Field(accu, 1) = Val_int(2);
+          Field(accu, 2) = coq_env;
+          for (i = 0; i < num_args; i++) Field(accu, i + 3) = sp[i];
           Code_val(accu) = pc - 3; /* Point to the preceding RESTART instr. */
           sp += num_args;
           pc = (code_t)(sp[0]);
@@ -680,16 +692,18 @@ value coq_interprete
         int rec_pos = *pc++; /* commence a zero */
         print_instr("GRABREC");
         if (rec_pos <= coq_extra_args && !Is_accu(sp[rec_pos])) {
-          pc++;/* On saute le Restart */
+          pc++; /* Skip the next RESTART, then point coq_env at the free variables. */
+          coq_env = coq_env + (Int_val(Field(coq_env, 1)) - 2) * sizeof(value);
         } else {
           if (coq_extra_args < rec_pos) {
             /* Partial application */
             mlsize_t num_args, i;
             num_args = 1 + coq_extra_args; /* arg1 + extra args */
-            Alloc_small(accu, num_args + 2, Closure_tag);
-            Field(accu, 1) = coq_env;
-            for (i = 0; i < num_args; i++) Field(accu, i + 2) = sp[i];
-            Code_val(accu) = pc - 3;
+            Alloc_small(accu, num_args + 3, Closure_tag);
+            Code_val(accu) = pc - 3; /* Point to the preceding RESTART instr. */
+            Field(accu, 1) = Val_int(2);
+            Field(accu, 2) = coq_env;
+            for (i = 0; i < num_args; i++) Field(accu, i + 3) = sp[i];
             sp += num_args;
             pc = (code_t)(sp[0]);
             coq_env = sp[1];
@@ -699,10 +713,11 @@ value coq_interprete
             /* The recursif argument is an accumulator */
             mlsize_t num_args, i;
             /* Construction of fixpoint applied to its [rec_pos-1] first arguments */
-            Alloc_small(accu, rec_pos + 2, Closure_tag);
-            Field(accu, 1) = coq_env; // We store the fixpoint in the first field
-            for (i = 0; i < rec_pos; i++) Field(accu, i + 2) = sp[i]; // Storing args
-            Code_val(accu) = pc;
+            Alloc_small(accu, rec_pos + 3, Closure_tag);
+            Code_val(accu) = pc; /* Point to the next RESTART instr. */
+            Field(accu, 1) = Val_int(2);
+            Field(accu, 2) = coq_env; // We store the fixpoint in the first field
+            for (i = 0; i < rec_pos; i++) Field(accu, i + 3) = sp[i]; // Storing args
             sp += rec_pos;
             *--sp = accu;
               /* Construction of the atom */
@@ -732,12 +747,13 @@ value coq_interprete
         print_instr("CLOSURE");
         print_int(nvars);
         if (nvars > 0) *--sp = accu;
-        Alloc_small(accu, 1 + nvars, Closure_tag);
+        Alloc_small(accu, 2 + nvars, Closure_tag);
+        Field(accu, 1) = Val_int(2);
         Code_val(accu) = pc + *pc;
         pc++;
         for (i = 0; i < nvars; i++) {
           print_lint(sp[i]);
-          Field(accu, i + 1) = sp[i];
+          Field(accu, i + 2) = sp[i];
         }
         sp += nvars;
         Next;
@@ -758,24 +774,19 @@ value coq_interprete
         }
         pc += nfuncs;
         *--sp=accu;
-        Alloc_small(accu, nfuncs * 2 + nvars, Closure_tag);
-        Field(accu, nfuncs * 2 + nvars - 1) = *sp++;
-        /* On remplie la partie pour les variables libres */
-        p = &Field(accu, nfuncs * 2 - 1);
-        for (i = 0; i < nvars; i++) {
-          *p++ = *sp++;
-        }
+        Alloc_small(accu, nfuncs * 3 + nvars, Closure_tag);
+        Field(accu, nfuncs * 3 + nvars - 1) = *sp++;
         p = &Field(accu, 0);
-        *p = (value) (pc + pc[0]);
-        p++;
+        *p++ = (value) (pc + pc[0]);
+        *p++ = Val_int(nfuncs * 3 - 1);
         for (i = 1; i < nfuncs; i++) {
-          *p = Make_header(i * 2, Infix_tag, Caml_white);
-          p++;                                /* color irrelevant. */
-          *p = (value) (pc + pc[i]);
-          p++;
+          *p++ = Make_header(i * 3, Infix_tag, Caml_white); /* color irrelevant. */
+          *p++ = (value) (pc + pc[i]);
+          *p++ = Val_int((nfuncs - i) * 3 - 1);
         }
+        for (i = 0; i < nvars; i++) *p++ = *sp++;
         pc += nfuncs;
-        accu = accu + 2 * start * sizeof(value);
+        accu = accu + start * 3 * sizeof(value);
         Next;
       }
 
@@ -805,14 +816,15 @@ value coq_interprete
         /* creation des fonction cofix */
 
         p = sp;
-        size = nfunc + nvars + 2;
+        size = nfunc + nvars + 3;
         for (i=0; i < nfunc; i++) {
 
           Alloc_small(accu, size, Closure_tag);
           Code_val(accu) = pc+pc[i];
-          for (j = 0; j < nfunc; j++) Field(accu, j+1) = p[j];
+          Field(accu, 1) = Val_int(2);
+          for (j = 0; j < nfunc; ++j) Field(accu, j + 2) = p[j];
           Field(accu, size - 1) = p[nfunc];
-          for (j = nfunc+1; j <= nfunc+nvars; j++) Field(accu, j) = p[j];
+          for (j = nfunc + 1; j <= nfunc + nvars; ++j) Field(accu, j + 1) = p[j];
           *--sp = accu;
           /* creation du block contenant le cofix */
 
@@ -837,15 +849,8 @@ value coq_interprete
       } /* fallthrough */
       Instruct(OFFSETCLOSURE) {
         print_instr("OFFSETCLOSURE");
-        accu = coq_env + *pc++ * sizeof(value); Next;
-      }
-      Instruct(PUSHOFFSETCLOSUREM2) {
-        print_instr("PUSHOFFSETCLOSUREM2");
-        *--sp = accu;
-      } /* fallthrough */
-      Instruct(OFFSETCLOSUREM2) {
-        print_instr("OFFSETCLOSUREM2");
-        accu = coq_env - 2 * sizeof(value); Next;
+        accu = coq_env - *pc++ * 3 * sizeof(value);
+        Next;
       }
       Instruct(PUSHOFFSETCLOSURE0) {
         print_instr("PUSHOFFSETCLOSURE0");
@@ -853,15 +858,17 @@ value coq_interprete
       }/* fallthrough */
       Instruct(OFFSETCLOSURE0) {
         print_instr("OFFSETCLOSURE0");
-        accu = coq_env; Next;
+        accu = coq_env;
+        Next;
       }
-      Instruct(PUSHOFFSETCLOSURE2){
-        print_instr("PUSHOFFSETCLOSURE2");
+      Instruct(PUSHOFFSETCLOSURE1){
+        print_instr("PUSHOFFSETCLOSURE1");
         *--sp = accu; /* fallthrough */
       }
-      Instruct(OFFSETCLOSURE2) {
-        print_instr("OFFSETCLOSURE2");
-        accu = coq_env + 2 * sizeof(value); Next;
+      Instruct(OFFSETCLOSURE1) {
+        print_instr("OFFSETCLOSURE1");
+        accu = coq_env - 3 * sizeof(value);
+        Next;
       }
 
 /* Access to global variables */
@@ -1875,11 +1882,11 @@ value coq_push_val(value v) {
 value coq_push_arguments(value args) {
   int nargs,i;
   value * sp = coq_sp;
-  nargs = Wosize_val(args) - 2;
+  nargs = Wosize_val(args) - 3;
   CHECK_STACK(nargs);
   coq_sp -= nargs;
   print_instr("push_args");print_int(nargs);
-  for(i = 0; i < nargs; i++) coq_sp[i] = Field(args, i+2);
+  for (i = 0; i < nargs; i++) coq_sp[i] = Field(args, i + 3);
   return Val_unit;
 }
 
diff --git a/kernel/byterun/coq_values.c b/kernel/byterun/coq_values.c
index bbe91da628..adfd4e8954 100644
--- a/kernel/byterun/coq_values.c
+++ b/kernel/byterun/coq_values.c
@@ -38,9 +38,9 @@ value coq_closure_arity(value clos) {
   opcode_t * c = Code_val(clos);
   if (Is_instruction(c,RESTART)) {
     c++;
-    if (Is_instruction(c,GRAB)) return Val_int(3 + c[1] - Wosize_val(clos));
+    if (Is_instruction(c,GRAB)) return Val_int(4 + c[1] - Wosize_val(clos));
     else {
-      if (Wosize_val(clos) != 2) caml_failwith("Coq Values : coq_closure_arity");
+      if (Wosize_val(clos) != 3) caml_failwith("Coq Values : coq_closure_arity");
       return Val_int(1);
     }
   }
@@ -50,13 +50,17 @@ value coq_closure_arity(value clos) {
 
 /* Fonction sur les  fix */
 
-value coq_offset(value v) {
+value coq_current_fix(value v) {
   if (Tag_val(v) == Closure_tag) return Val_int(0);
-  else return Val_long(-Wsize_bsize(Infix_offset_val(v)));
+  else return Val_long(Wsize_bsize(Infix_offset_val(v)) / 3);
 }
 
-value coq_offset_closure(value v, value offset){
-  return (value)&Field(v, Int_val(offset));
+value coq_shift_fix(value v, value offset) {
+  return v + Int_val(offset) * 3 * sizeof(value);
+}
+
+value coq_last_fix(value v) {
+  return v + (Int_val(Field(v, 1)) - 2) * sizeof(value);
 }
 
 value coq_set_bytecode_field(value v, value i, value code) {
diff --git a/kernel/genOpcodeFiles.ml b/kernel/genOpcodeFiles.ml
index 2d74cca44c..f052e03cde 100644
--- a/kernel/genOpcodeFiles.ml
+++ b/kernel/genOpcodeFiles.ml
@@ -38,15 +38,15 @@ let opcodes =
     "PUSHACC7";
     "PUSHACC";
     "POP";
+    "ENVACC0";
     "ENVACC1";
     "ENVACC2";
     "ENVACC3";
-    "ENVACC4";
     "ENVACC";
+    "PUSHENVACC0";
     "PUSHENVACC1";
     "PUSHENVACC2";
     "PUSHENVACC3";
-    "PUSHENVACC4";
     "PUSHENVACC";
     "PUSH_RETADDR";
     "APPLY";
@@ -65,13 +65,11 @@ let opcodes =
     "CLOSURE";
     "CLOSUREREC";
     "CLOSURECOFIX";
-    "OFFSETCLOSUREM2";
     "OFFSETCLOSURE0";
-    "OFFSETCLOSURE2";
+    "OFFSETCLOSURE1";
     "OFFSETCLOSURE";
-    "PUSHOFFSETCLOSUREM2";
     "PUSHOFFSETCLOSURE0";
-    "PUSHOFFSETCLOSURE2";
+    "PUSHOFFSETCLOSURE1";
     "PUSHOFFSETCLOSURE";
     "GETGLOBAL";
     "PUSHGETGLOBAL";
diff --git a/kernel/vm.ml b/kernel/vm.ml
index 76954a83d8..3adb2f2113 100644
--- a/kernel/vm.ml
+++ b/kernel/vm.ml
@@ -95,7 +95,7 @@ let reduce_fix k vf =
   (* computing types *)
   let fc_typ = fix_types fb in
   let ndef = Array.length fc_typ in
-  let et = offset_closure_fix fb (2*(ndef - 1)) in
+  let et = fix_env fb in
   let ftyp =
     Array.map
       (fun c -> interprete c crazy_val et 0) fc_typ in
diff --git a/kernel/vmbytegen.ml b/kernel/vmbytegen.ml
index 1274e3a867..a826cf8193 100644
--- a/kernel/vmbytegen.ml
+++ b/kernel/vmbytegen.ml
@@ -28,10 +28,10 @@ open Environ
 (* The virtual machine doesn't distinguish closures and their environment *)
 
 (* Representation of function environments :                              *)
-(*        [clos_t | code | fv1 | fv2 | ... | fvn ]                        *)
+(*        [clos_t | code | envofs=2 | fv1 | fv2 | ... | fvn ]             *)
 (*                ^                                                       *)
-(*  The offset for accessing free variables is 1 (we must skip the code   *)
-(*  pointer).                                                             *)
+(*  The offset for accessing free variables is 2 (we must skip the code   *)
+(*  pointer and the environment offset).                                  *)
 (*  While compiling, free variables are stored in [in_env] in order       *)
 (*  opposite to machine representation, so we can add new free variables  *)
 (*  easily (i.e. without changing the position of previous variables)     *)
@@ -42,9 +42,9 @@ open Environ
 (* In the function body [arg1] is represented by de Bruijn [n], and       *)
 (* [argn] by de Bruijn [1]                                                *)
 
-(* Representation of environments of mutual fixpoints :                  *)
-(* [t1|C1| ... |tc|Cc| ... |t(nbr)|C(nbr)| fv1 | fv2 | .... | fvn | type] *)
-(*                ^<----------offset--------->                            *)
+(* Representation of environments of mutual fixpoints :                   *)
+(* [clos_t|C1|envofs1| ... |infix_t|Ci|envofsi| ... |infix_t|Cnbr|envofsnbr=2| fv1 | fv2 | .... | fvn | type] *)
+(*                                 ^                                      *)
 (* type = [Ct1 | .... | Ctn]                                              *)
 (* Ci is the code pointer of the i-th body                                *)
 (* At runtime, a fixpoint environment (which is the same as the fixpoint  *)
@@ -52,25 +52,25 @@ open Environ
 (* In each fixpoint body, de Bruijn [nbr] represents the first fixpoint   *)
 (* and de Bruijn [1] the last one.                                        *)
 (* Access to these variables is performed by the [Koffsetclosure n]       *)
-(* instruction that shifts the environment pointer of [n] fields.         *)
+(* instruction that shifts the environment pointer by [n] closuress.      *)
 
 (* This allows representing mutual fixpoints in just one block.           *)
 (* [Ct1 | ... | Ctn] is an array holding code pointers of the fixpoint    *)
 (* types. They are used in conversion tests (which requires that          *)
 (* fixpoint types must be convertible). Their environment is the one of   *)
 (* the last fixpoint :                                                    *)
-(* [t1|C1| ... |tc|Cc| ... |t(nbr)|C(nbr)| fv1 | fv2 | .... | fvn | type] *)
-(*                                ^                                       *)
+(* [clos_t|C1| ... |infix_t|Ci| ... |infix_t|Cnbr|envofsnbr=2| fv1 | fv2 | .... | fvn | type] *)
+(*                                          ^                             *)
 
 (* Representation of mutual cofix :                                       *)
 (*  a1 =   [A_t | accumulate | [Cfx_t | fcofix1 ] ]                       *)
 (*                ...                                                     *)
 (*  anbr = [A_t | accumulate | [Cfx_t | fcofixnbr ] ]                     *)
 (*                                                                        *)
-(*  fcofix1 = [clos_t   | code1   | a1 |...| anbr | fv1 |...| fvn | type] *)
+(*  fcofix1   = [clos_t | code1   | envofs=2 | a1 |...| anbr | fv1 |...| fvn | type] *)
 (*                      ^                                                 *)
 (*                ...                                                     *)
-(*  fcofixnbr = [clos_t | codenbr | a1 |...| anbr | fv1 |...| fvn | type] *)
+(*  fcofixnbr = [clos_t | codenbr | envofs=2 | a1 |...| anbr | fv1 |...| fvn | type] *)
 (*                      ^                                                 *)
 (* The [ai] blocks are functions that accumulate their arguments:         *)
 (*           ai arg1  argp --->                                           *)
@@ -87,7 +87,7 @@ open Environ
 (* a cycle, e.g.:                                                         *)
 (*   cofix one := cons 1 one                                              *)
 (*   a1 = [A_t | accumulate | [Cfx_t|fcofix1] ]                           *)
-(*   fcofix1 = [clos_t | code | a1]                                       *)
+(*   fcofix1 = [clos_t | code | envofs | a1]                              *)
 (* The result of evaluating [a1] is [cons_t | 1 | a1].                    *)
 (* When [a1] is updated :                                                 *)
 (*  a1 = [A_t | accumulate | [Cfxe_t | fcofix1 | [cons_t | 1 | a1]] ]     *)
@@ -131,9 +131,7 @@ type comp_env = {
                                  (* universes are always at the bottom.    *)
     nb_stack : int;              (* number of variables on the stack       *)
     in_stack : int Range.t;      (* position in the stack                  *)
-    nb_rec : int;                (* number of mutually recursive functions *)
-    pos_rec  : instruction list; (* instruction d'acces pour les variables *)
-                                 (*  de point fix ou de cofix              *)
+    pos_rec : instruction array; (* instruction to access mutually-defined functions *)
     offset : int;
     in_env : vm_env ref          (* The free variables of the expression   *)
   }
@@ -159,8 +157,7 @@ let empty_comp_env ()=
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
+    pos_rec = [||];
     offset = 0;
     in_env = ref empty_fv
   }
@@ -195,9 +192,8 @@ let comp_env_fun ?(univs=0) arity =
     nb_uni_stack = univs ;
     nb_stack = arity;
     in_stack = add_param arity 0 Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1;
+    pos_rec = [||];
+    offset = 0;
     in_env = ref empty_fv
   }
 
@@ -207,24 +203,18 @@ let comp_env_fix_type  rfv =
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1;
+    pos_rec = [||];
+    offset = 0;
     in_env = rfv
   }
 
-let comp_env_fix ndef curr_pos arity rfv =
-   let prec = ref [] in
-   for i = ndef downto 1 do
-     prec := Koffsetclosure (2 * (ndef - curr_pos - i)) :: !prec
-   done;
+let comp_env_fix ndef arity rfv =
    { arity;
      nb_uni_stack = 0;
      nb_stack = arity;
      in_stack = add_param arity 0 Range.empty;
-     nb_rec = ndef;
-     pos_rec = !prec;
-     offset = 2 * (ndef - curr_pos - 1)+1;
+     pos_rec = Array.init ndef (fun i -> Koffsetclosure i);
+     offset = 0;
      in_env = rfv
    }
 
@@ -233,24 +223,18 @@ let comp_env_cofix_type ndef rfv =
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1+ndef;
+    pos_rec = [||];
+    offset = ndef;
     in_env = rfv
   }
 
 let comp_env_cofix ndef arity rfv =
-   let prec = ref [] in
-   for i = 1 to ndef do
-     prec := Kenvacc i :: !prec
-   done;
    { arity;
      nb_uni_stack = 0;
      nb_stack = arity;
      in_stack = add_param arity 0 Range.empty;
-     nb_rec = ndef;
-     pos_rec = !prec;
-     offset = ndef+1;
+     pos_rec = Array.init ndef (fun i -> Kenvacc (ndef - 1 - i));
+     offset = ndef;
      in_env = rfv
    }
 
@@ -283,11 +267,11 @@ let pos_rel i r sz =
     Kacc(sz - (Range.get r.in_stack (i-1)))
   else
     let i = i - r.nb_stack in
-    if i <= r.nb_rec then
-      try List.nth r.pos_rec (i-1)
-      with (Failure _|Invalid_argument _) -> assert false
+    let nb_rec = Array.length r.pos_rec in
+    if i <= nb_rec then
+      r.pos_rec.(i - 1)
     else
-      let i = i - r.nb_rec in
+      let i = i - nb_rec in
       let db = FVrel(i) in
       let env = !(r.in_env) in
       try Kenvacc(r.offset + find_at db env)
@@ -627,7 +611,7 @@ let rec compile_lam env cenv lam sz cont =
       for i = 0 to ndef - 1 do
         let params,body = decompose_Llam bodies.(i) in
         let arity = Array.length params in
-        let env_body = comp_env_fix ndef i arity rfv in
+        let env_body = comp_env_fix ndef arity rfv in
         let cont1 =
           ensure_stack_capacity (compile_lam env env_body body arity) [Kreturn arity]
         in
diff --git a/kernel/vmemitcodes.ml b/kernel/vmemitcodes.ml
index 2dfc9a2941..f913cb906c 100644
--- a/kernel/vmemitcodes.ml
+++ b/kernel/vmemitcodes.ml
@@ -270,12 +270,12 @@ let emit_instr env = function
   | Kacc n ->
       if n < 8 then out env(opACC0 + n) else (out env opACC; out_int env n)
   | Kenvacc n ->
-      if n >= 1 && n <= 4
-      then out env(opENVACC1 + n - 1)
+      if n >= 0 && n <= 3
+      then out env(opENVACC0 + n)
       else (out env opENVACC; out_int env n)
   | Koffsetclosure ofs ->
-      if Int.equal ofs (-2) || Int.equal ofs 0 || Int.equal ofs 2
-      then out env (opOFFSETCLOSURE0 + ofs / 2)
+      if Int.equal ofs 0 || Int.equal ofs 1
+      then out env (opOFFSETCLOSURE0 + ofs)
       else (out env opOFFSETCLOSURE; out_int env ofs)
   | Kpush ->
       out env opPUSH
@@ -385,13 +385,13 @@ let rec emit env insns remaining = match insns with
       if n < 8 then out env(opPUSHACC0 + n) else (out env opPUSHACC; out_int env n);
       emit env c remaining
   | Kpush :: Kenvacc n :: c ->
-      if n >= 1 && n <= 4
-      then out env(opPUSHENVACC1 + n - 1)
+      if n >= 0 && n <= 3
+      then out env(opPUSHENVACC0 + n)
       else (out env opPUSHENVACC; out_int env n);
       emit env c remaining
   | Kpush :: Koffsetclosure ofs :: c ->
-      if Int.equal ofs (-2) || Int.equal ofs 0 || Int.equal ofs 2
-      then out env(opPUSHOFFSETCLOSURE0 + ofs / 2)
+      if Int.equal ofs 0 || Int.equal ofs 1
+      then out env(opPUSHOFFSETCLOSURE0 + ofs)
       else (out env opPUSHOFFSETCLOSURE; out_int env ofs);
       emit env c remaining
   | Kpush :: Kgetglobal id :: c ->
diff --git a/kernel/vmsymtable.ml b/kernel/vmsymtable.ml
index 85f7369654..4ad830a298 100644
--- a/kernel/vmsymtable.ml
+++ b/kernel/vmsymtable.ml
@@ -206,15 +206,11 @@ and eval_to_patch env (buff,pl,fv) =
   in
   let tc = patch buff pl slots in
   let vm_env =
-    (* Beware, this may look like a call to [Array.map], but it's not.
-       Calling [Array.map f] when the first argument returned by [f]
-       is a float would lead to [vm_env] being an unboxed Double_array
-       (Tag_val = Double_array_tag) whereas eval_tcode expects a
-       regular array (Tag_val = 0).
-       See test-suite/primitive/float/coq_env_double_array.v
-       for an actual instance. *)
-    let a = Array.make (Array.length fv) crazy_val in
-    Array.iteri (fun i v -> a.(i) <- slot_for_fv env v) fv; a in
+    (* Environment should look like a closure, so free variables start at slot 2. *)
+    let a = Array.make (Array.length fv + 2) crazy_val in
+    a.(1) <- Obj.magic 2;
+    Array.iteri (fun i v -> a.(i + 2) <- slot_for_fv env v) fv;
+    a in
   eval_tcode tc (get_atom_rel ()) (vm_global global_data.glob_val) vm_env
 
 and val_of_constr env c =
diff --git a/kernel/vmvalues.ml b/kernel/vmvalues.ml
index de604176cb..177f67210c 100644
--- a/kernel/vmvalues.ml
+++ b/kernel/vmvalues.ml
@@ -166,7 +166,6 @@ let cofix_upd_val v = (Obj.magic v : values)
 type vm_env
 type vm_global
 let fun_env v = (Obj.magic v : vm_env)
-let fix_env v = (Obj.magic v : vm_env)
 let cofix_env v = (Obj.magic v : vm_env)
 let cofix_upd_env v = (Obj.magic v : vm_env)
 type vstack = values array
@@ -207,13 +206,13 @@ type vswitch = {
 (*             dom : values, codom : vfun                                 *)
 (*                                                                        *)
 (* + Functions have two representations :                                 *)
-(*   - unapplied fun :  vf = Ct_[ C | fv1 | ... | fvn]                    *)
+(*   - unapplied fun :  vf = Ct_[ C | envofs=2 | fv1 | ... | fvn]         *)
 (*                                       C:tcode, fvi : values            *)
 (*     Remark : a function and its environment is the same value.         *)
-(*   - partially applied fun : Ct_[Restart:C| vf | arg1 | ... argn]       *)
+(*   - partially applied fun : Ct_[ Restart::C | envofs=2 | vf | arg1 | ... | argn] *)
 (*                                                                        *)
 (* + Fixpoints :                                                          *)
-(*   -        Ct_[C1|Infix_t|C2|...|Infix_t|Cn|fv1|...|fvn]               *)
+(*   - Ct_[C1|envofs1=3*n-1 | Infix_t|C2|envofs2 | ... | Infix_t|Cn|envofsn=2 | fv1|...|fvn] *)
 (*     One single block to represent all of the fixpoints, each fixpoint  *)
 (*     is the pointer to the field holding the pointer to its code, and   *)
 (*     the infix tag is used to know where the block starts.              *)
@@ -291,10 +290,10 @@ type whd =
   | Vuniv_level of Univ.Level.t
 
 (* Functions over arguments *)
-let nargs : arguments -> int = fun args -> (Obj.size (Obj.repr args)) - 2
+let nargs : arguments -> int = fun args -> Obj.size (Obj.repr args) - 3
 let arg args i =
   if  0 <= i && i < (nargs args) then
-    val_of_obj (Obj.field (Obj.repr args) (i+2))
+    val_of_obj (Obj.field (Obj.repr args) (i + 3))
   else invalid_arg
                 ("Vm.arg size = "^(string_of_int (nargs args))^
                  " acces "^(string_of_int i))
@@ -356,7 +355,7 @@ let rec whd_accu a stk =
   | i when Int.equal i fix_app_tag ->
       let fa = Obj.field at 1 in
       let zfix  =
-        Zfix (Obj.obj (Obj.field fa 1), Obj.obj fa) in
+        Zfix (Obj.obj (Obj.field fa 2), Obj.obj fa) in
       whd_accu (Obj.field at 0) (zfix :: stk)
   | i when Int.equal i switch_tag ->
       let zswitch = Zswitch (Obj.obj (Obj.field at 1)) in
@@ -408,7 +407,7 @@ let whd_val : values -> whd =
           (match kind_of_closure o with
            | 0 -> Vfun(Obj.obj o)
            | 1 -> Vfix(Obj.obj o, None)
-           | 2 -> Vfix(Obj.obj (Obj.field o 1), Some (Obj.obj o))
+           | 2 -> Vfix(Obj.obj (Obj.field o 2), Some (Obj.obj o))
            | 3 -> Vatom_stk(Aid(RelKey(int_tcode (fun_code o) 1)), [])
            | _ -> CErrors.anomaly ~label:"Vm.whd " (Pp.str "kind_of_closure does not work."))
         else if Int.equal tag Obj.custom_tag then Vint64 (Obj.magic v)
@@ -516,29 +515,23 @@ external closure_arity : vfun -> int = "coq_closure_arity"
 
 (* Functions over fixpoint *)
 
-external offset : Obj.t -> int = "coq_offset"
-external offset_closure : Obj.t -> int -> Obj.t = "coq_offset_closure"
-external offset_closure_fix : vfix -> int -> vm_env = "coq_offset_closure"
+external current_fix : vfix -> int = "coq_current_fix"
+external shift_fix : vfix -> int -> vfix = "coq_shift_fix"
+external last_fix : vfix -> vfix = "coq_last_fix"
 external tcode_array : tcode_array -> tcode array = "coq_tcode_array"
 
-let first o = (offset_closure o (offset o))
-let first_fix (v:vfix) = (Obj.magic (first (Obj.repr v)) : vfix)
+let first_fix o = shift_fix o (- current_fix o)
+let fix_env v = (Obj.magic (last_fix v) : vm_env)
 
 let last o = (Obj.field o (Obj.size o - 1))
 let fix_types (v:vfix) = tcode_array (Obj.magic (last (Obj.repr v)) : tcode_array)
 let cofix_types (v:vcofix) = tcode_array (Obj.magic (last (Obj.repr v)) : tcode_array)
 
-let current_fix vf = - (offset (Obj.repr vf) / 2)
-
-let unsafe_fb_code fb i =
-  let off = (2 * i) * (Sys.word_size / 8) in
-  Obj.obj (Obj.add_offset (Obj.repr fb) (Int32.of_int off))
-
-let unsafe_rec_arg fb i = int_tcode (unsafe_fb_code fb i) 1
+let unsafe_rec_arg fb i = int_tcode (Obj.magic (shift_fix fb i)) 1
 
 let rec_args vf =
-  let fb = first (Obj.repr vf) in
-  let size = Obj.size (last fb) in
+  let fb = first_fix vf in
+  let size = Obj.size (last (Obj.repr fb)) in
   Array.init size (unsafe_rec_arg fb)
 
 exception FALSE
@@ -547,10 +540,10 @@ let check_fix f1 f2 =
   let i1, i2 = current_fix f1, current_fix f2 in
   (* Checking starting point *)
   if i1 = i2 then
-    let fb1,fb2 = first (Obj.repr f1), first (Obj.repr f2) in
-    let n = Obj.size (last fb1) in
+    let fb1,fb2 = first_fix f1, first_fix f2 in
+    let n = Obj.size (last (Obj.repr fb1)) in
     (* Checking number of definitions *)
-    if n = Obj.size (last fb2) then
+    if n = Obj.size (last (Obj.repr fb2)) then
       (* Checking recursive arguments *)
       try
         for i = 0 to n - 1 do
@@ -593,21 +586,23 @@ let relaccu_code i =
 
 let mk_fix_body k ndef fb =
   let e = Obj.dup (Obj.repr fb) in
+  let env = Obj.repr (fix_env (Obj.obj e)) in
   for i = 0 to ndef - 1 do
-    set_bytecode_field e (2 * i) (relaccu_code (k + i))
+    set_bytecode_field e (3 * i) (relaccu_code (k + i))
   done;
   let fix_body i =
-    let c = offset_tcode (unsafe_fb_code fb i) 2 in
-    let res = Obj.new_block Obj.closure_tag 2 in
+    let c = offset_tcode (Obj.magic (shift_fix fb i)) 2 in
+    let res = Obj.new_block Obj.closure_tag 3 in
     set_bytecode_field res 0 c;
-    Obj.set_field res 1 (offset_closure e (2*i));
+    Obj.set_field res 1 (Obj.repr 2);
+    Obj.set_field res 2 env;
     ((Obj.obj res) : vfun)  in
   Array.init ndef fix_body
 
 (* Functions over vcofix *)
 
 let get_fcofix vcf i =
-  match whd_val (Obj.obj (Obj.field (Obj.repr vcf) (i+1))) with
+  match whd_val (Obj.obj (Obj.field (Obj.repr vcf) (i+2))) with
   | Vcofix(vcfi, _, _) -> vcfi
   | _ -> assert false
 
@@ -628,7 +623,7 @@ let check_cofix vcf1 vcf2 =
 let mk_cofix_body apply_varray k ndef vcf =
   let e = Obj.dup (Obj.repr vcf) in
   for i = 0 to ndef - 1 do
-    Obj.set_field e (i+1) (Obj.repr (val_of_rel (k+i)))
+    Obj.set_field e (i+2) (Obj.repr (val_of_rel (k+i)))
   done;
 
   let cofix_body i =
diff --git a/kernel/vmvalues.mli b/kernel/vmvalues.mli
index f6efd49cfc..ee3e2bd5fc 100644
--- a/kernel/vmvalues.mli
+++ b/kernel/vmvalues.mli
@@ -181,7 +181,6 @@ val rec_args : vfix -> int array
 val first_fix : vfix -> vfix
 val fix_types : vfix -> tcode array
 val cofix_types : vcofix -> tcode array
-external offset_closure_fix : vfix -> int -> vm_env = "coq_offset_closure"
 val mk_fix_body : int -> int -> vfix -> vfun array
 
 (** CoFix *)
-- 
cgit v1.2.3


From 2d63a61a991ab42f2124775b184898d7af6725dd Mon Sep 17 00:00:00 2001
From: Guillaume Melquiond
Date: Wed, 26 Aug 2020 08:34:25 +0200
Subject: Use the same memory layout as closures for accumulators.

That way, accumulators again can be used directly as execution
environments by the bytecode interpreter. This fixes the issue of the
first argument of accumulators being dropped when strongly normalizing.
---
 kernel/byterun/coq_interp.c | 103 ++++++++++++++++++++++----------------------
 kernel/byterun/coq_values.h |   1 -
 kernel/vmbytegen.ml         |  14 +++---
 kernel/vmvalues.ml          |  16 ++++---
 4 files changed, 68 insertions(+), 66 deletions(-)

(limited to 'kernel')

diff --git a/kernel/byterun/coq_interp.c b/kernel/byterun/coq_interp.c
index b44ae34582..40ddcbb213 100644
--- a/kernel/byterun/coq_interp.c
+++ b/kernel/byterun/coq_interp.c
@@ -712,26 +712,26 @@ value coq_interprete
           } else {
             /* The recursif argument is an accumulator */
             mlsize_t num_args, i;
+            value block;
             /* Construction of fixpoint applied to its [rec_pos-1] first arguments */
             Alloc_small(accu, rec_pos + 3, Closure_tag);
             Code_val(accu) = pc; /* Point to the next RESTART instr. */
             Field(accu, 1) = Val_int(2);
             Field(accu, 2) = coq_env; // We store the fixpoint in the first field
-            for (i = 0; i < rec_pos; i++) Field(accu, i + 3) = sp[i]; // Storing args
-            sp += rec_pos;
-            *--sp = accu;
-              /* Construction of the atom */
-            Alloc_small(accu, 2, ATOM_FIX_TAG);
-            Field(accu,1) = sp[0];
-            Field(accu,0)  = sp[1];
-            sp++; sp[0] = accu;
-              /* Construction of the accumulator */
+            for (i = 0; i < rec_pos; i++) Field(accu, i + 3) = *sp++; // Storing args
+            /* Construction of the atom */
+            Alloc_small(block, 2, ATOM_FIX_TAG);
+            Field(block, 0) = *sp++;
+            Field(block, 1) = accu;
+            accu = block;
+            /* Construction of the accumulator */
             num_args = coq_extra_args - rec_pos;
-            Alloc_small(accu, 2+num_args, Accu_tag);
-            Code_val(accu) = accumulate;
-            Field(accu,1) = sp[0]; sp++;
-            for (i = 0; i < num_args;i++)Field(accu, i + 2) = sp[i];
-            sp += num_args;
+            Alloc_small(block, 3 + num_args, Accu_tag);
+            Code_val(block) = accumulate;
+            Field(block, 1) = Val_int(2);
+            Field(block, 2) = accu;
+            for (i = 0; i < num_args; i++) Field(block, i + 3) = *sp++;
+            accu = block;
             pc = (code_t)(sp[0]);
             coq_env = sp[1];
             coq_extra_args = Long_val(sp[2]);
@@ -808,9 +808,10 @@ value coq_interprete
 
         /* Creation des blocks accumulate */
         for(i=0; i < nfunc; i++) {
-          Alloc_small(accu, 2, Accu_tag);
+          Alloc_small(accu, 3, Accu_tag);
           Code_val(accu) = accumulate;
-          Field(accu,1) = Val_int(1);
+          Field(accu, 1) = Val_int(2);
+          Field(accu, 2) = Val_int(1);
           *--sp=accu;
         }
         /* creation des fonction cofix */
@@ -818,22 +819,17 @@ value coq_interprete
         p = sp;
         size = nfunc + nvars + 3;
         for (i=0; i < nfunc; i++) {
-
+          value block;
           Alloc_small(accu, size, Closure_tag);
           Code_val(accu) = pc+pc[i];
           Field(accu, 1) = Val_int(2);
           for (j = 0; j < nfunc; ++j) Field(accu, j + 2) = p[j];
           Field(accu, size - 1) = p[nfunc];
           for (j = nfunc + 1; j <= nfunc + nvars; ++j) Field(accu, j + 1) = p[j];
-          *--sp = accu;
-          /* creation du block contenant le cofix */
-
-          Alloc_small(accu,1, ATOM_COFIX_TAG);
-          Field(accu, 0) = sp[0];
-          *sp = accu;
-          /* mise a jour du block accumulate */
-          caml_modify(&Field(p[i], 1),*sp);
-          sp++;
+          Alloc_small(block, 1, ATOM_COFIX_TAG);
+          Field(block, 0) = accu;
+          /* update the accumulate block */
+          caml_modify(&Field(p[i], 2), block);
         }
         pc += nfunc;
         accu = p[start];
@@ -1028,7 +1024,7 @@ value coq_interprete
         print_instr("PROJ");
         if (Is_accu (accu)) {
           *--sp = accu; // Save matched block on stack
-          accu = Field(accu, 1); // Save atom to accu register
+          accu = Field(accu, 2); // Save atom to accu register
           switch (Tag_val(accu)) {
           case ATOM_COFIX_TAG: // We are forcing a cofix
             {
@@ -1040,11 +1036,11 @@ value coq_interprete
               coq_env = Field(accu, 0); // Pointer to suspension
               accu = sp[2]; // Save accumulator to accu register
               sp[2] = Val_long(coq_extra_args); // Push number of args for return
-              nargs = Wosize_val(accu) - 2; // Number of args = size of accumulator - 1 (accumulator code) - 1 (atom)
+              nargs = Wosize_val(accu) - 3; // Number of args = size of accumulator - 2 (accumulator closure) - 1 (atom)
               // Push arguments to stack
               CHECK_STACK(nargs + 1);
               sp -= nargs;
-              for (i = 0; i < nargs; ++i) sp[i] = Field(accu, i + 2);
+              for (i = 0; i < nargs; ++i) sp[i] = Field(accu, i + 3);
               *--sp = accu; // Last argument is the pointer to the suspension
               coq_extra_args = nargs;
               pc = Code_val(coq_env); // Trigger evaluation
@@ -1066,9 +1062,10 @@ value coq_interprete
               Field(accu, 0) = Field(coq_global_data, *pc++);
               Field(accu, 1) = *sp++;
               /* Create accumulator */
-              Alloc_small(block, 2, Accu_tag);
+              Alloc_small(block, 3, Accu_tag);
               Code_val(block) = accumulate;
-              Field(block, 1) = accu;
+              Field(block, 1) = Val_int(2);
+              Field(block, 2) = accu;
               accu = block;
             }
           }
@@ -1142,7 +1139,7 @@ value coq_interprete
       Instruct(MAKESWITCHBLOCK) {
         print_instr("MAKESWITCHBLOCK");
         *--sp = accu; // Save matched block on stack
-        accu = Field(accu,1); // Save atom to accu register
+        accu = Field(accu, 2); // Save atom to accu register
         switch (Tag_val(accu)) {
         case ATOM_COFIX_TAG: // We are forcing a cofix
           {
@@ -1156,11 +1153,11 @@ value coq_interprete
             coq_env = Field(accu,0); // Pointer to suspension
             accu = sp[2]; // Save accumulator to accu register
             sp[2] = Val_long(coq_extra_args); // Push number of args for return
-            nargs = Wosize_val(accu) - 2; // Number of args = size of accumulator - 1 (accumulator code) - 1 (atom)
+            nargs = Wosize_val(accu) - 3; // Number of args = size of accumulator - 2 (accumulator closure) - 1 (atom)
             // Push arguments to stack
             CHECK_STACK(nargs+1);
             sp -= nargs;
-            for (i = 0; i < nargs; i++) sp[i] = Field(accu, i + 2);
+            for (i = 0; i < nargs; i++) sp[i] = Field(accu, i + 3);
             *--sp = accu; // Leftmost argument is the pointer to the suspension
             print_lint(nargs);
             coq_extra_args = nargs;
@@ -1181,6 +1178,7 @@ value coq_interprete
             mlsize_t sz;
             int i, annot;
             code_t typlbl,swlbl;
+            value block;
             print_instr("MAKESWITCHBLOCK");
 
             typlbl = (code_t)pc + *pc;
@@ -1207,24 +1205,26 @@ value coq_interprete
             *--sp = accu;
             Alloc_small(accu, 1, Abstract_tag);
             Code_val(accu) = swlbl;
-            *--sp = accu;
             /* We create the switch zipper */
-            Alloc_small(accu, 5, Default_tag);
-            Field(accu, 0) = sp[1];
-            Field(accu, 1) = sp[0];
-            Field(accu, 2) = sp[3];
-            Field(accu, 3) = sp[2];
-            Field(accu, 4) = coq_env;
+            Alloc_small(block, 5, Default_tag);
+            Field(block, 0) = sp[0];
+            Field(block, 1) = accu;
+            Field(block, 2) = sp[2];
+            Field(block, 3) = sp[1];
+            Field(block, 4) = coq_env;
             sp += 3;
-            sp[0] = accu;
+            accu = block;
             /* We create the atom */
-            Alloc_small(accu, 2, ATOM_SWITCH_TAG);
-            Field(accu, 0) = sp[1]; Field(accu, 1) = sp[0];
-            sp++;sp[0] = accu;
+            Alloc_small(block, 2, ATOM_SWITCH_TAG);
+            Field(block, 0) = *sp++;
+            Field(block, 1) = accu;
+            accu = block;
             /* We create the accumulator */
-            Alloc_small(accu, 2, Accu_tag);
-            Code_val(accu) = accumulate;
-            Field(accu,1) = *sp++;
+            Alloc_small(block, 3, Accu_tag);
+            Code_val(block) = accumulate;
+            Field(block, 1) = Val_int(2);
+            Field(block, 2) = accu;
+            accu = block;
           }
         }
         Next;
@@ -1235,10 +1235,11 @@ value coq_interprete
       Instruct(MAKEACCU) {
         int i;
         print_instr("MAKEACCU");
-        Alloc_small(accu, coq_extra_args + 3, Accu_tag);
+        Alloc_small(accu, coq_extra_args + 4, Accu_tag);
         Code_val(accu) = accumulate;
-        Field(accu,1) = Field(coq_atom_tbl, *pc);
-        for(i = 2; i < coq_extra_args + 3; i++) Field(accu, i) = *sp++;
+        Field(accu, 1) = Val_int(2);
+        Field(accu, 2) = Field(coq_atom_tbl, *pc);
+        for (i = 2; i < coq_extra_args + 3; i++) Field(accu, i + 1) = *sp++;
         pc = (code_t)(sp[0]);
         coq_env = sp[1];
         coq_extra_args = Long_val(sp[2]);
diff --git a/kernel/byterun/coq_values.h b/kernel/byterun/coq_values.h
index a19f9b56c1..bde6e14367 100644
--- a/kernel/byterun/coq_values.h
+++ b/kernel/byterun/coq_values.h
@@ -30,7 +30,6 @@
 
 /* Les blocs accumulate */
 #define Is_accu(v) (Is_block(v) && (Tag_val(v) == Accu_tag))
-#define IS_EVALUATED_COFIX(v) (Is_accu(v) && Is_block(Field(v,1)) && (Tag_val(Field(v,1)) == ATOM_COFIXEVALUATED_TAG))
 #define Is_double(v) (Tag_val(v) == Double_tag)
 #define Is_tailrec_switch(v) (Field(v,1) == Val_true)
 
diff --git a/kernel/vmbytegen.ml b/kernel/vmbytegen.ml
index a826cf8193..375b1aface 100644
--- a/kernel/vmbytegen.ml
+++ b/kernel/vmbytegen.ml
@@ -74,23 +74,23 @@ open Environ
 (*                      ^                                                 *)
 (* The [ai] blocks are functions that accumulate their arguments:         *)
 (*           ai arg1  argp --->                                           *)
-(*    ai' = [A_t | accumulate | [Cfx_t | fcofixi] | arg1 | ... | argp ]   *)
+(*    ai' = [A_t | accumulate | envofs | [Cfx_t | fcofixi] | arg1 | ... | argp ] *)
 (* If such a block is matched against, we have to force evaluation,       *)
 (* function [fcofixi] is then applied to [ai'] [arg1] ... [argp]          *)
 (* (note that [ai'] is a pointer to the closure, passed as argument)      *)
 (* Once evaluation is completed [ai'] is updated with the result:         *)
 (*  ai' <--                                                               *)
-(*   [A_t | accumulate | [Cfxe_t |fcofixi|result] | arg1 | ... | argp ]   *)
+(*   [A_t | accumulate | envofs | [Cfxe_t |fcofixi|result] | arg1 | ... | argp ] *)
 (* This representation is nice because the application of the cofix is    *)
 (* evaluated only once (it simulates a lazy evaluation)                   *)
 (* Moreover, when cofix don't have arguments, it is possible to create    *)
 (* a cycle, e.g.:                                                         *)
 (*   cofix one := cons 1 one                                              *)
-(*   a1 = [A_t | accumulate | [Cfx_t|fcofix1] ]                           *)
+(*   a1 = [A_t | accumulate | envofs | [Cfx_t|fcofix1] ]                  *)
 (*   fcofix1 = [clos_t | code | envofs | a1]                              *)
 (* The result of evaluating [a1] is [cons_t | 1 | a1].                    *)
 (* When [a1] is updated :                                                 *)
-(*  a1 = [A_t | accumulate | [Cfxe_t | fcofix1 | [cons_t | 1 | a1]] ]     *)
+(*  a1 = [A_t | accumulate | envofs | [Cfxe_t | fcofix1 | [cons_t | 1 | a1]] ] *)
 (* The cycle is created ...                                               *)
 (*                                                                        *)
 (* In Cfxe_t accumulators, we need to store [fcofixi] for testing         *)
@@ -394,16 +394,16 @@ let add_grabrec rec_arg arity lbl cont =
 let cont_cofix arity =
     (* accu = res                                                         *)
     (* stk  = ai::args::ra::...                                           *)
-    (* ai   = [At|accumulate|[Cfx_t|fcofix]|args]                         *)
+    (* ai   = [At|accumulate|envofs|[Cfx_t|fcofix]|args]                  *)
   [ Kpush;
     Kpush;        (*                 stk = res::res::ai::args::ra::...    *)
     Kacc 2;
-    Kfield 1;
+    Kfield 2;
     Kfield 0;
     Kmakeblock(2, cofix_evaluated_tag);
     Kpush;        (*  stk = [Cfxe_t|fcofix|res]::res::ai::args::ra::...*)
     Kacc 2;
-    Ksetfield 1;  (*   ai = [At|accumulate|[Cfxe_t|fcofix|res]|args]      *)
+    Ksetfield 2;  (*   ai = [At|accumulate|envofs|[Cfxe_t|fcofix|res]|args] *)
                   (*  stk = res::ai::args::ra::...                        *)
     Kacc 0;       (* accu = res                                           *)
     Kreturn (arity+2) ]
diff --git a/kernel/vmvalues.ml b/kernel/vmvalues.ml
index 177f67210c..98a1b13373 100644
--- a/kernel/vmvalues.ml
+++ b/kernel/vmvalues.ml
@@ -231,7 +231,7 @@ type vswitch = {
 (* + vm_env is the type of the machine environments (i.e. a function or   *)
 (*   a fixpoint)                                                          *)
 (*                                                                        *)
-(* + Accumulators : At_[accumulate| accu | arg1 | ... | argn ]            *)
+(* + Accumulators : At_[accumulate | envofs=2 | accu | arg1 | ... | argn ] *)
 (*   - representation of [accu] : tag_[....]                              *)
 (*     -- tag <= 3 : encoding atom type (sorts, free vars, etc.)          *)
 (*     -- 10_[accu|proj name] : a projection blocked by an accu           *)
@@ -328,9 +328,9 @@ let uni_lvl_val (v : values) : Univ.Level.t =
 
 let rec whd_accu a stk =
   let stk =
-    if Int.equal (Obj.size a) 2 then stk
+    if Int.equal (Obj.size a) 3 then stk
     else Zapp (Obj.obj a) :: stk in
-  let at = Obj.field a 1 in
+  let at = Obj.field a 2 in
   match Obj.tag at with
   | i when Int.equal i type_atom_tag ->
      begin match stk with
@@ -421,9 +421,10 @@ let whd_val : values -> whd =
 
 let obj_of_atom : atom -> Obj.t =
   fun a ->
-    let res = Obj.new_block accu_tag 2 in
+    let res = Obj.new_block accu_tag 3 in
     set_bytecode_field res 0 accumulate;
-    Obj.set_field res 1 (Obj.repr a);
+    Obj.set_field res 1 (Obj.repr 2);
+    Obj.set_field res 2 (Obj.repr a);
     res
 
 (* obj_of_str_const : structured_constant -> Obj.t *)
@@ -631,9 +632,10 @@ let mk_cofix_body apply_varray k ndef vcf =
     let c = Obj.field (Obj.repr vcfi) 0 in
     Obj.set_field e 0 c;
     let atom = Obj.new_block cofix_tag 1 in
-    let self = Obj.new_block accu_tag 2 in
+    let self = Obj.new_block accu_tag 3 in
     set_bytecode_field self 0 accumulate;
-    Obj.set_field self 1 (Obj.repr atom);
+    Obj.set_field self 1 (Obj.repr 2);
+    Obj.set_field self 2 (Obj.repr atom);
     apply_varray (Obj.obj e) [|Obj.obj self|] in
   Array.init ndef cofix_body
 
-- 
cgit v1.2.3


From 27695b52413f9fd6bcb60d77e0bdba538c16201f Mon Sep 17 00:00:00 2001
From: Guillaume Melquiond
Date: Wed, 26 Aug 2020 11:40:24 +0200
Subject: Use the closure tag for accumulators.

The first field of accumulators points out of the OCaml heap, so using
closures instead of tag-0 objects is better for the GC.

Accumulators are distinguished from closures because their code pointer
necessarily is the "accumulate" address, which points to an ACCUMULATE
instruction.
---
 kernel/byterun/coq_interp.c | 15 ++++++++-----
 kernel/byterun/coq_memory.c |  2 +-
 kernel/byterun/coq_values.h |  3 ---
 kernel/vmvalues.ml          | 55 ++++++++++++++++++++-------------------------
 kernel/vmvalues.mli         |  2 --
 5 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'kernel')

diff --git a/kernel/byterun/coq_interp.c b/kernel/byterun/coq_interp.c
index 40ddcbb213..1b6da7dd6f 100644
--- a/kernel/byterun/coq_interp.c
+++ b/kernel/byterun/coq_interp.c
@@ -193,6 +193,8 @@ if (sp - num_args < coq_stack_threshold) {                                     \
 #endif
 #endif
 
+#define Is_accu(v) (Is_block(v) && Tag_val(v) == Closure_tag && Code_val(v) == accumulate)
+
 #define CheckPrimArgs(cond, apply_lbl) do{         \
     if (cond) pc++;                                \
     else{                                          \
@@ -726,7 +728,7 @@ value coq_interprete
             accu = block;
             /* Construction of the accumulator */
             num_args = coq_extra_args - rec_pos;
-            Alloc_small(block, 3 + num_args, Accu_tag);
+            Alloc_small(block, 3 + num_args, Closure_tag);
             Code_val(block) = accumulate;
             Field(block, 1) = Val_int(2);
             Field(block, 2) = accu;
@@ -808,7 +810,7 @@ value coq_interprete
 
         /* Creation des blocks accumulate */
         for(i=0; i < nfunc; i++) {
-          Alloc_small(accu, 3, Accu_tag);
+          Alloc_small(accu, 3, Closure_tag);
           Code_val(accu) = accumulate;
           Field(accu, 1) = Val_int(2);
           Field(accu, 2) = Val_int(1);
@@ -957,6 +959,7 @@ value coq_interprete
         print_int(sizes & 0xFFFFFF);
         if (Is_block(accu)) {
           long index = Tag_val(accu);
+          if (index == Closure_tag) index = 0;
           print_instr("block");
           print_lint(index);
           pc += pc[(sizes & 0xFFFFFF) + index];
@@ -1062,7 +1065,7 @@ value coq_interprete
               Field(accu, 0) = Field(coq_global_data, *pc++);
               Field(accu, 1) = *sp++;
               /* Create accumulator */
-              Alloc_small(block, 3, Accu_tag);
+              Alloc_small(block, 3, Closure_tag);
               Code_val(block) = accumulate;
               Field(block, 1) = Val_int(2);
               Field(block, 2) = accu;
@@ -1126,7 +1129,7 @@ value coq_interprete
         mlsize_t i, size;
         print_instr("ACCUMULATE");
         size = Wosize_val(coq_env);
-        Alloc_small(accu, size + coq_extra_args + 1, Accu_tag);
+        Alloc_small(accu, size + coq_extra_args + 1, Closure_tag);
         for(i = 0; i < size; i++) Field(accu, i) = Field(coq_env, i);
         for(i = size; i <= coq_extra_args + size; i++)
           Field(accu, i) = *sp++;
@@ -1220,7 +1223,7 @@ value coq_interprete
             Field(block, 1) = accu;
             accu = block;
             /* We create the accumulator */
-            Alloc_small(block, 3, Accu_tag);
+            Alloc_small(block, 3, Closure_tag);
             Code_val(block) = accumulate;
             Field(block, 1) = Val_int(2);
             Field(block, 2) = accu;
@@ -1235,7 +1238,7 @@ value coq_interprete
       Instruct(MAKEACCU) {
         int i;
         print_instr("MAKEACCU");
-        Alloc_small(accu, coq_extra_args + 4, Accu_tag);
+        Alloc_small(accu, coq_extra_args + 4, Closure_tag);
         Code_val(accu) = accumulate;
         Field(accu, 1) = Val_int(2);
         Field(accu, 2) = Field(coq_atom_tbl, *pc);
diff --git a/kernel/byterun/coq_memory.c b/kernel/byterun/coq_memory.c
index 6233675c66..ae5251c252 100644
--- a/kernel/byterun/coq_memory.c
+++ b/kernel/byterun/coq_memory.c
@@ -108,7 +108,7 @@ value init_coq_vm(value unit) /* ML */
     init_coq_interpreter();
 
     /* Some predefined pointer code.
-     * It is typically contained in accumulator blocks whose tag is 0 and thus
+     * It is typically contained in accumulator blocks and thus might be
      * scanned by the GC, so make it look like an OCaml block. */
     value accu_block = (value) coq_stat_alloc(2 * sizeof(value));
     Hd_hp (accu_block) = Make_header (1, Abstract_tag, Caml_black);        \
diff --git a/kernel/byterun/coq_values.h b/kernel/byterun/coq_values.h
index bde6e14367..f07018711b 100644
--- a/kernel/byterun/coq_values.h
+++ b/kernel/byterun/coq_values.h
@@ -17,7 +17,6 @@
 #include <float.h>
 
 #define Default_tag 0
-#define Accu_tag 0
 
 #define ATOM_ID_TAG 0
 #define ATOM_INDUCTIVE_TAG 1
@@ -28,8 +27,6 @@
 #define ATOM_COFIX_TAG 6
 #define ATOM_COFIXEVALUATED_TAG 7
 
-/* Les blocs accumulate */
-#define Is_accu(v) (Is_block(v) && (Tag_val(v) == Accu_tag))
 #define Is_double(v) (Tag_val(v) == Double_tag)
 #define Is_tailrec_switch(v) (Field(v,1) == Val_true)
 
diff --git a/kernel/vmvalues.ml b/kernel/vmvalues.ml
index 98a1b13373..0678f37a0b 100644
--- a/kernel/vmvalues.ml
+++ b/kernel/vmvalues.ml
@@ -34,8 +34,6 @@ let crazy_val = (val_of_obj (Obj.repr 0))
 
 type tag = int
 
-let accu_tag = 0
-
 let type_atom_tag = 2
 let max_atom_tag = 2
 let proj_tag = 3
@@ -225,8 +223,7 @@ type vswitch = {
 (* + Cofixpoints : see cbytegen.ml                                        *)
 (*                                                                        *)
 (* + vblock's encode (non constant) constructors as in Ocaml, but         *)
-(*   starting from 0 up. tag 0 ( = accu_tag) is reserved for              *)
-(*   accumulators.                                                        *)
+(*   starting from 0 up.                                                  *)
 (*                                                                        *)
 (* + vm_env is the type of the machine environments (i.e. a function or   *)
 (*   a fixpoint)                                                          *)
@@ -391,29 +388,28 @@ external accumulate : unit -> tcode = "accumulate_code"
 external set_bytecode_field : Obj.t -> int -> tcode -> unit = "coq_set_bytecode_field"
 let accumulate = accumulate ()
 
-let whd_val : values -> whd =
-  fun v ->
-    let o = Obj.repr v in
-    if Obj.is_int o then Vconstr_const (Obj.obj o)
+let whd_val (v: values) =
+  let o = Obj.repr v in
+  if Obj.is_int o then Vconstr_const (Obj.obj o)
+  else
+    let tag = Obj.tag o in
+    if Int.equal tag 0 then
+      if Int.equal (Obj.size o) 1 then
+        Varray (Obj.obj o)
+      else Vprod (Obj.obj o)
+    else if Int.equal tag Obj.closure_tag && is_accumulate (fun_code o) then
+      whd_accu o []
+    else if Int.equal tag Obj.closure_tag || Int.equal tag Obj.infix_tag then
+      (match kind_of_closure o with
+       | 0 -> Vfun(Obj.obj o)
+       | 1 -> Vfix(Obj.obj o, None)
+       | 2 -> Vfix(Obj.obj (Obj.field o 2), Some (Obj.obj o))
+       | 3 -> Vatom_stk(Aid(RelKey(int_tcode (fun_code o) 1)), [])
+       | _ -> CErrors.anomaly ~label:"Vm.whd " (Pp.str "kind_of_closure does not work."))
+    else if Int.equal tag Obj.custom_tag then Vint64 (Obj.magic v)
+    else if Int.equal tag Obj.double_tag then Vfloat64 (Obj.magic v)
     else
-      let tag = Obj.tag o in
-      if tag = accu_tag then
-        if Int.equal (Obj.size o) 1 then
-          Varray(Obj.obj o)
-        else if is_accumulate (fun_code o) then whd_accu o []
-        else Vprod(Obj.obj o)
-      else
-        if tag = Obj.closure_tag || tag = Obj.infix_tag then
-          (match kind_of_closure o with
-           | 0 -> Vfun(Obj.obj o)
-           | 1 -> Vfix(Obj.obj o, None)
-           | 2 -> Vfix(Obj.obj (Obj.field o 2), Some (Obj.obj o))
-           | 3 -> Vatom_stk(Aid(RelKey(int_tcode (fun_code o) 1)), [])
-           | _ -> CErrors.anomaly ~label:"Vm.whd " (Pp.str "kind_of_closure does not work."))
-        else if Int.equal tag Obj.custom_tag then Vint64 (Obj.magic v)
-        else if Int.equal tag Obj.double_tag then Vfloat64 (Obj.magic v)
-        else
-           Vconstr_block(Obj.obj o)
+      Vconstr_block (Obj.obj o)
 
 (**********************************************)
 (* Constructors *******************************)
@@ -421,7 +417,7 @@ let whd_val : values -> whd =
 
 let obj_of_atom : atom -> Obj.t =
   fun a ->
-    let res = Obj.new_block accu_tag 3 in
+    let res = Obj.new_block Obj.closure_tag 3 in
     set_bytecode_field res 0 accumulate;
     Obj.set_field res 1 (Obj.repr 2);
     Obj.set_field res 2 (Obj.repr a);
@@ -632,10 +628,7 @@ let mk_cofix_body apply_varray k ndef vcf =
     let c = Obj.field (Obj.repr vcfi) 0 in
     Obj.set_field e 0 c;
     let atom = Obj.new_block cofix_tag 1 in
-    let self = Obj.new_block accu_tag 3 in
-    set_bytecode_field self 0 accumulate;
-    Obj.set_field self 1 (Obj.repr 2);
-    Obj.set_field self 2 (Obj.repr atom);
+    let self = obj_of_atom (Obj.obj atom) in
     apply_varray (Obj.obj e) [|Obj.obj self|] in
   Array.init ndef cofix_body
 
diff --git a/kernel/vmvalues.mli b/kernel/vmvalues.mli
index ee3e2bd5fc..6632dc46b2 100644
--- a/kernel/vmvalues.mli
+++ b/kernel/vmvalues.mli
@@ -27,8 +27,6 @@ type to_update
 
 type tag = int
 
-val accu_tag : tag
-
 val type_atom_tag : tag
 val max_atom_tag : tag
 val proj_tag : tag
-- 
cgit v1.2.3