Modify bytecode representation of closures to please OCaml's GC (fix #12636).

The second field of a closure can no longer be the value of the first free variable (or another closure of a mutually recursive block) but must be an offset to the first free variable. This commit makes the bytecode compiler and interpreter agnostic to the actual representation of closures. To do so, the execution environment (variable coq_env) no longer points to the currently executed closure but to the last one. This has the following consequences: - OFFSETCLOSURE(n) now always loads the n-th closure of a recursive block (counted from last to first); - ENVACC(n) now always loads the value of the n-th free variable. These two changes make the bytecode compiler simpler, since it no longer has to track the relative position of closures and free variables. The last change makes the interpreter a bit slower, since it has to adjust coq_env when executing GRABREC. Hopefully, cache locality will make the overhead negligible.
author: Guillaume Melquiond 2020-08-24 15:11:16 +0200
committer: Guillaume Melquiond 2020-09-22 14:55:15 +0200
commit: 5aa6d99982a9d6e2736f1eedb7fcdf6b9d05b6a0 (patch)
tree: 398efe1fc52e59b87364c5c97b7135024359e2fa /kernel/vmbytegen.ml
parent: c3a73c5e923953efea016a81d380e58b2cccb4f9 (diff)
1 files changed, 30 insertions, 46 deletions
diff --git a/kernel/vmbytegen.ml b/kernel/vmbytegen.ml
index 1274e3a867..a826cf8193 100644
--- a/kernel/vmbytegen.ml
+++ b/kernel/vmbytegen.ml
@@ -28,10 +28,10 @@ open Environ
 (* The virtual machine doesn't distinguish closures and their environment *)
 
 (* Representation of function environments :                              *)
-(*        [clos_t | code | fv1 | fv2 | ... | fvn ]                        *)
+(*        [clos_t | code | envofs=2 | fv1 | fv2 | ... | fvn ]             *)
 (*                ^                                                       *)
-(*  The offset for accessing free variables is 1 (we must skip the code   *)
-(*  pointer).                                                             *)
+(*  The offset for accessing free variables is 2 (we must skip the code   *)
+(*  pointer and the environment offset).                                  *)
 (*  While compiling, free variables are stored in [in_env] in order       *)
 (*  opposite to machine representation, so we can add new free variables  *)
 (*  easily (i.e. without changing the position of previous variables)     *)
@@ -42,9 +42,9 @@ open Environ
 (* In the function body [arg1] is represented by de Bruijn [n], and       *)
 (* [argn] by de Bruijn [1]                                                *)
 
-(* Representation of environments of mutual fixpoints :                  *)
-(* [t1|C1| ... |tc|Cc| ... |t(nbr)|C(nbr)| fv1 | fv2 | .... | fvn | type] *)
-(*                ^<----------offset--------->                            *)
+(* Representation of environments of mutual fixpoints :                   *)
+(* [clos_t|C1|envofs1| ... |infix_t|Ci|envofsi| ... |infix_t|Cnbr|envofsnbr=2| fv1 | fv2 | .... | fvn | type] *)
+(*                                 ^                                      *)
 (* type = [Ct1 | .... | Ctn]                                              *)
 (* Ci is the code pointer of the i-th body                                *)
 (* At runtime, a fixpoint environment (which is the same as the fixpoint  *)
@@ -52,25 +52,25 @@ open Environ
 (* In each fixpoint body, de Bruijn [nbr] represents the first fixpoint   *)
 (* and de Bruijn [1] the last one.                                        *)
 (* Access to these variables is performed by the [Koffsetclosure n]       *)
-(* instruction that shifts the environment pointer of [n] fields.         *)
+(* instruction that shifts the environment pointer by [n] closuress.      *)
 
 (* This allows representing mutual fixpoints in just one block.           *)
 (* [Ct1 | ... | Ctn] is an array holding code pointers of the fixpoint    *)
 (* types. They are used in conversion tests (which requires that          *)
 (* fixpoint types must be convertible). Their environment is the one of   *)
 (* the last fixpoint :                                                    *)
-(* [t1|C1| ... |tc|Cc| ... |t(nbr)|C(nbr)| fv1 | fv2 | .... | fvn | type] *)
-(*                                ^                                       *)
+(* [clos_t|C1| ... |infix_t|Ci| ... |infix_t|Cnbr|envofsnbr=2| fv1 | fv2 | .... | fvn | type] *)
+(*                                          ^                             *)
 
 (* Representation of mutual cofix :                                       *)
 (*  a1 =   [A_t | accumulate | [Cfx_t | fcofix1 ] ]                       *)
 (*                ...                                                     *)
 (*  anbr = [A_t | accumulate | [Cfx_t | fcofixnbr ] ]                     *)
 (*                                                                        *)
-(*  fcofix1 = [clos_t   | code1   | a1 |...| anbr | fv1 |...| fvn | type] *)
+(*  fcofix1   = [clos_t | code1   | envofs=2 | a1 |...| anbr | fv1 |...| fvn | type] *)
 (*                      ^                                                 *)
 (*                ...                                                     *)
-(*  fcofixnbr = [clos_t | codenbr | a1 |...| anbr | fv1 |...| fvn | type] *)
+(*  fcofixnbr = [clos_t | codenbr | envofs=2 | a1 |...| anbr | fv1 |...| fvn | type] *)
 (*                      ^                                                 *)
 (* The [ai] blocks are functions that accumulate their arguments:         *)
 (*           ai arg1  argp --->                                           *)
@@ -87,7 +87,7 @@ open Environ
 (* a cycle, e.g.:                                                         *)
 (*   cofix one := cons 1 one                                              *)
 (*   a1 = [A_t | accumulate | [Cfx_t|fcofix1] ]                           *)
-(*   fcofix1 = [clos_t | code | a1]                                       *)
+(*   fcofix1 = [clos_t | code | envofs | a1]                              *)
 (* The result of evaluating [a1] is [cons_t | 1 | a1].                    *)
 (* When [a1] is updated :                                                 *)
 (*  a1 = [A_t | accumulate | [Cfxe_t | fcofix1 | [cons_t | 1 | a1]] ]     *)
@@ -131,9 +131,7 @@ type comp_env = {
                                  (* universes are always at the bottom.    *)
     nb_stack : int;              (* number of variables on the stack       *)
     in_stack : int Range.t;      (* position in the stack                  *)
-    nb_rec : int;                (* number of mutually recursive functions *)
-    pos_rec  : instruction list; (* instruction d'acces pour les variables *)
-                                 (*  de point fix ou de cofix              *)
+    pos_rec : instruction array; (* instruction to access mutually-defined functions *)
     offset : int;
     in_env : vm_env ref          (* The free variables of the expression   *)
   }
@@ -159,8 +157,7 @@ let empty_comp_env ()=
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
+    pos_rec = [||];
     offset = 0;
     in_env = ref empty_fv
   }
@@ -195,9 +192,8 @@ let comp_env_fun ?(univs=0) arity =
     nb_uni_stack = univs ;
     nb_stack = arity;
     in_stack = add_param arity 0 Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1;
+    pos_rec = [||];
+    offset = 0;
     in_env = ref empty_fv
   }
 
@@ -207,24 +203,18 @@ let comp_env_fix_type  rfv =
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1;
+    pos_rec = [||];
+    offset = 0;
     in_env = rfv
   }
 
-let comp_env_fix ndef curr_pos arity rfv =
-   let prec = ref [] in
-   for i = ndef downto 1 do
-     prec := Koffsetclosure (2 * (ndef - curr_pos - i)) :: !prec
-   done;
+let comp_env_fix ndef arity rfv =
    { arity;
      nb_uni_stack = 0;
      nb_stack = arity;
      in_stack = add_param arity 0 Range.empty;
-     nb_rec = ndef;
-     pos_rec = !prec;
-     offset = 2 * (ndef - curr_pos - 1)+1;
+     pos_rec = Array.init ndef (fun i -> Koffsetclosure i);
+     offset = 0;
      in_env = rfv
    }
 
@@ -233,24 +223,18 @@ let comp_env_cofix_type ndef rfv =
     nb_uni_stack = 0;
     nb_stack = 0;
     in_stack = Range.empty;
-    nb_rec = 0;
-    pos_rec = [];
-    offset = 1+ndef;
+    pos_rec = [||];
+    offset = ndef;
     in_env = rfv
   }
 
 let comp_env_cofix ndef arity rfv =
-   let prec = ref [] in
-   for i = 1 to ndef do
-     prec := Kenvacc i :: !prec
-   done;
    { arity;
      nb_uni_stack = 0;
      nb_stack = arity;
      in_stack = add_param arity 0 Range.empty;
-     nb_rec = ndef;
-     pos_rec = !prec;
-     offset = ndef+1;
+     pos_rec = Array.init ndef (fun i -> Kenvacc (ndef - 1 - i));
+     offset = ndef;
      in_env = rfv
    }
 
@@ -283,11 +267,11 @@ let pos_rel i r sz =
     Kacc(sz - (Range.get r.in_stack (i-1)))
   else
     let i = i - r.nb_stack in
-    if i <= r.nb_rec then
-      try List.nth r.pos_rec (i-1)
-      with (Failure _|Invalid_argument _) -> assert false
+    let nb_rec = Array.length r.pos_rec in
+    if i <= nb_rec then
+      r.pos_rec.(i - 1)
     else
-      let i = i - r.nb_rec in
+      let i = i - nb_rec in
       let db = FVrel(i) in
       let env = !(r.in_env) in
       try Kenvacc(r.offset + find_at db env)
@@ -627,7 +611,7 @@ let rec compile_lam env cenv lam sz cont =
       for i = 0 to ndef - 1 do
         let params,body = decompose_Llam bodies.(i) in
         let arity = Array.length params in
-        let env_body = comp_env_fix ndef i arity rfv in
+        let env_body = comp_env_fix ndef arity rfv in
         let cont1 =
           ensure_stack_capacity (compile_lam env env_body body arity) [Kreturn arity]
         in
author	Guillaume Melquiond	2020-08-24 15:11:16 +0200
committer	Guillaume Melquiond	2020-09-22 14:55:15 +0200
commit	5aa6d99982a9d6e2736f1eedb7fcdf6b9d05b6a0 (patch)
tree	398efe1fc52e59b87364c5c97b7135024359e2fa /kernel/vmbytegen.ml
parent	c3a73c5e923953efea016a81d380e58b2cccb4f9 (diff)