Merge PR #13537: More efficient implementation for substitutions.

Reviewed-by: SkySkimmer Ack-by: gares
author: coqbot-app[bot] 2020-12-09 18:01:37 +0000
committer: GitHub 2020-12-09 18:01:37 +0000
commit: 4fbc04b0fe0b89edfe35438d01d3b664fc21e6f8 (patch)
tree: 7ae1df67aecdccd63d86f251306e2707a07f8bc9 /kernel
parent: 9e0ca704fa8273a8337ff9e118d2d08620af8962 (diff)
parent: e0a943d40c6749e602421ac89059e0d6caf57518 (diff)
6 files changed, 234 insertions, 151 deletions
diff --git a/kernel/cClosure.ml b/kernel/cClosure.ml
index c9326615dc..d2256720c4 100644
--- a/kernel/cClosure.ml
+++ b/kernel/cClosure.ml
@@ -759,6 +759,10 @@ let get_nth_arg head n stk =
     | ((ZcaseT _ | Zproj _ | Zfix _ | Zprimitive _) :: _ | []) as s -> (None, List.rev rstk @ s) in
   strip_rec [] head n stk
 
+let rec subs_consn v i n s =
+  if Int.equal i n then s
+  else subs_consn v (i + 1) n (subs_cons v.(i) s)
+
 (* Beta reduction: look for an applied argument in the stack.
    Since the encountered update marks are removed, h must be a whnf *)
 let rec get_args n tys f e = function
@@ -770,14 +774,13 @@ let rec get_args n tys f e = function
         get_args n tys f (subs_shft (k,e)) s
     | Zapp l :: s ->
         let na = Array.length l in
-        if n == na then (Inl (subs_cons(l,e)),s)
+        if n == na then (Inl (subs_consn l 0 na e), s)
         else if n < na then (* more arguments *)
-          let args = Array.sub l 0 n in
           let eargs = Array.sub l n (na-n) in
-          (Inl (subs_cons(args,e)), Zapp eargs :: s)
+          (Inl (subs_consn l 0 n e), Zapp eargs :: s)
         else (* more lambdas *)
           let etys = List.skipn na tys in
-          get_args (n-na) etys f (subs_cons(l,e)) s
+          get_args (n-na) etys f (subs_consn l 0 na e) s
     | ((ZcaseT _ | Zproj _ | Zfix _ | Zprimitive _) :: _ | []) as stk ->
       (Inr {mark=mark Cstr Unknown;term=FLambda(n,tys,f,e)}, stk)
 
@@ -931,7 +934,11 @@ let contract_fix_vect fix =
            env, Array.length bds)
       | _ -> assert false
   in
-  (subs_cons(Array.init nfix make_body, env), thisbody)
+  let rec mk_subs env i =
+    if Int.equal i nfix then env
+    else mk_subs (subs_cons (make_body i) env) (i + 1)
+  in
+  (mk_subs env 0, thisbody)
 
 let unfold_projection info p =
   if red_projection info.i_flags p
@@ -1367,7 +1374,7 @@ let rec knr info tab m stk =
             knit info tab fxe fxbd (args@stk')
         | (_,args, ((Zapp _ | Zfix _ | Zshift _ | Zupdate _ | Zprimitive _) :: _ | [] as s)) -> (m,args@s))
   | FLetIn (_,v,_,bd,e) when red_set info.i_flags fZETA ->
-      knit info tab (subs_cons([|v|],e)) bd stk
+      knit info tab (subs_cons v e) bd stk
   | FEvar(ev,env) ->
       (match info.i_cache.i_sigma ev with
           Some c -> knit info tab env c stk
@@ -1417,7 +1424,7 @@ and case_inversion info tab ci (univs,args) v =
     let env = info_env info in
     let ind = ci.ci_ind in
     let params, indices = Array.chop ci.ci_npar args in
-    let psubst = subs_cons (params, subs_id 0) in
+    let psubst = subs_consn params 0 ci.ci_npar (subs_id 0) in
     let mib = Environ.lookup_mind (fst ind) env in
     let mip = mib.mind_packets.(snd ind) in
     (* indtyping enforces 1 ctor with no letins in the context *)
diff --git a/kernel/esubst.ml b/kernel/esubst.ml
index 3e8502b988..afd8e3ef67 100644
--- a/kernel/esubst.ml
+++ b/kernel/esubst.ml
@@ -60,127 +60,188 @@ let rec is_lift_id = function
 (*  Substitutions    *)
 (*********************)
 
-(* (bounded) explicit substitutions of type 'a *)
-type 'a subs =
-  | ESID of int            (* ESID(n)    = %n END   bounded identity *)
-  | CONS of 'a array * 'a subs
-                           (* CONS([|t1..tn|],S)  =
-                              (S.t1...tn)    parallel substitution
-                              beware of the order *)
-  | SHIFT of int * 'a subs (* SHIFT(n,S) = (^n o S) terms in S are relocated *)
-                           (*                        with n vars *)
-  | LIFT of int * 'a subs  (* LIFT(n,S) = (%n S) stands for ((^n o S).n...1) *)
-
-(* operations of subs: collapses constructors when possible.
- * Needn't be recursive if we always use these functions
- *)
-
-let subs_id i = ESID i
-
-let subs_cons(x,s) = if Int.equal (Array.length x) 0 then s else CONS(x,s)
-
-let subs_liftn n = function
-  | ESID p        -> ESID (p+n) (* bounded identity lifted extends by p *)
-  | LIFT (p,lenv) -> LIFT (p+n, lenv)
-  | lenv          -> LIFT (n,lenv)
-
-let subs_lift a = subs_liftn 1 a
-let subs_liftn n a = if Int.equal n 0 then a else subs_liftn n a
-
-let subs_shft = function
-  | (0, s)            -> s
-  | (n, SHIFT (k,s1)) -> SHIFT (k+n, s1)
-  | (n, s)            -> SHIFT (n,s)
-let subs_shft s = if Int.equal (fst s) 0 then snd s else subs_shft s
-
-let subs_shift_cons = function
-  (0, s, t)           -> CONS(t,s)
-| (k, SHIFT(n,s1), t) -> CONS(t,SHIFT(k+n, s1))
-| (k, s, t)           -> CONS(t,SHIFT(k, s));;
-
-(* Tests whether a substitution is equal to the identity *)
-let rec is_subs_id = function
-    ESID _     -> true
-  | LIFT(_,s)  -> is_subs_id s
-  | SHIFT(0,s) -> is_subs_id s
-  | CONS(x,s)  -> Int.equal (Array.length x) 0 && is_subs_id s
-  | _          -> false
-
-(* Expands de Bruijn k in the explicit substitution subs
- * lams accumulates de shifts to perform when retrieving the i-th value
- * the rules used are the following:
- *
- *    [id]k       --> k
- *    [S.t]1      --> t
- *    [S.t]k      --> [S](k-1)  if k > 1
- *    [^n o S] k  --> [^n]([S]k)
- *    [(%n S)] k  --> k         if k <= n
- *    [(%n S)] k  --> [^n]([S](k-n))
- *
- * the result is (Inr (k+lams,p)) when the variable is just relocated
- * where p is None if the variable points inside subs and Some(k) if the
- * variable points k bindings beyond subs.
- *)
-let rec exp_rel lams k subs =
-  match subs with
-    | CONS (def,_) when k <= Array.length def
-                           -> Inl(lams,def.(Array.length def - k))
-    | CONS (v,l)           -> exp_rel lams (k - Array.length v) l
-    | LIFT (n,_) when k<=n -> Inr(lams+k,None)
-    | LIFT (n,l)           -> exp_rel (n+lams) (k-n) l
-    | SHIFT (n,s)          -> exp_rel (n+lams) k s
-    | ESID n when k<=n     -> Inr(lams+k,None)
-    | ESID n               -> Inr(lams+k,Some (k-n))
-
-let expand_rel k subs = exp_rel 0 k subs
-
-let rec subs_map f = function
-| ESID _ as s -> s
-| CONS (x, s) -> CONS (Array.map f x, subs_map f s)
-| SHIFT (n, s) -> SHIFT (n, subs_map f s)
-| LIFT (n, s) -> LIFT (n, subs_map f s)
-
-let rec lift_subst mk_cl s1 s2 = match s1 with
-| ELID -> subs_map (fun c -> mk_cl ELID c) s2
-| ELSHFT(s, k) -> subs_shft(k, lift_subst mk_cl s s2)
-| ELLFT (k, s) ->
-  match s2 with
-  | CONS(x,s') ->
-      CONS(CArray.Fun1.map mk_cl s1 x, lift_subst mk_cl s1 s')
-  | ESID n -> lift_subst mk_cl s (ESID (n + k))
-  | SHIFT(k',s') ->
-      if k<k'
-      then subs_shft(k, lift_subst mk_cl s (subs_shft(k'-k, s')))
-      else subs_shft(k', lift_subst mk_cl (el_liftn (k-k') s) s')
-  | LIFT(k',s') ->
-      if k<k'
-      then subs_liftn k (lift_subst mk_cl s (subs_liftn (k'-k) s'))
-      else subs_liftn k' (lift_subst mk_cl (el_liftn (k-k') s) s')
-
-let rec comp mk_cl s1 s2 =
-  match (s1, s2) with
-    | _, ESID _ -> s1
-    | ESID _, _ -> s2
-    | SHIFT(k,s), _ -> subs_shft(k, comp mk_cl s s2)
-    | _, CONS(x,s') ->
-        CONS(Array.Fun1.map (fun s t -> mk_cl(s,t)) s1 x, comp mk_cl s1 s')
-    | CONS(x,s), SHIFT(k,s') ->
-        let lg = Array.length x in
-        if k == lg then comp mk_cl s s'
-        else if k > lg then comp mk_cl s (SHIFT(k-lg, s'))
-        else comp mk_cl (CONS(Array.sub x 0 (lg-k), s)) s'
-    | CONS(x,s), LIFT(k,s') ->
-        let lg = Array.length x in
-        if k == lg then CONS(x, comp mk_cl s s')
-        else if k > lg then CONS(x, comp mk_cl s (LIFT(k-lg, s')))
-        else
-          CONS(Array.sub x (lg-k) k,
-               comp mk_cl (CONS(Array.sub x 0 (lg-k),s)) s')
-    | LIFT(k,s), SHIFT(k',s') ->
-        if k<k'
-        then subs_shft(k, comp mk_cl s (subs_shft(k'-k, s')))
-        else subs_shft(k', comp mk_cl (subs_liftn (k-k') s) s')
-    | LIFT(k,s), LIFT(k',s') ->
-        if k<k'
-        then subs_liftn k (comp mk_cl s (subs_liftn (k'-k) s'))
-        else subs_liftn k' (comp mk_cl (subs_liftn (k-k') s) s')
+(* Variant of skewed lists enriched w.r.t. a monoid. See the Range module.
+
+  In addition to the indexed data, every node contains a monoid element, in our
+  case, integers. It corresponds to the number of partial shifts to apply when
+  reaching this subtree. The total shift is obtained by summing all the partial
+  shifts encountered in the tree traversal. For efficiency, we also cache the
+  sum of partial shifts of the whole subtree as the last argument of the [Node]
+  constructor.
+
+  A more intuitive but inefficient representation of this data structure would
+  be a list of terms interspeded with shifts, as in
+
+  type 'a subst = NIL | CONS of 'a or_var * 'a subst | SHIFT of 'a subst
+
+  On this inefficient representation, the typing rules would be:
+
+  · ⊢ NIL : ·
+  Γ ⊢ σ : Δ and Γ ⊢ t : A{σ} implies Γ ⊢ CONS (t, σ) : Δ, A
+  Γ ⊢ σ : Δ implies Γ, A ⊢ SHIFT σ : Δ
+
+  The efficient representation is isomorphic to this naive variant, except that
+  shifts are grouped together, and we use skewed lists instead of lists.
+
+*)
+
+type shf = int
+let cmp n m = n + m
+let idn = 0
+
+type 'a or_var = Arg of 'a | Var of int
+
+type 'a tree =
+| Leaf of shf * 'a or_var
+| Node of shf * 'a or_var * 'a tree * 'a tree * shf
+(*
+  Invariants:
+  - All trees are complete.
+  - Define get_shift inductively as [get_shift (Leaf (w, _)) := w] and
+    [get_shift (Node (w, _, t1, t2, _)) := w + t1 + t2] then for every tree
+    of the form Node (_, _, t1, t2, sub), we must have
+    sub = get_shift t1 + get_shift t2.
+
+  In the naive semantics:
+
+  Leaf (w, x) := SHIFT^w (CONS (x, NIL))
+  Node (w, x, t1, t2, _) := SHIFT^w (CONS (x, t1 @ t2))
+
+*)
+
+type 'a subs = Nil of shf * int | Cons of int * 'a tree * 'a subs
+(*
+  In the naive semantics mentioned above, we have the following.
+
+  Nil (w, n) stands for SHIFT^w (ID n) where ID n is a compact form of identity
+  substitution, defined inductively as
+
+  ID 0 := NIL
+  ID (S n) := CONS (Var 1, SHIFT (ID n))
+
+  Cons (h, t, s) stands for (t @ s) and h is the total number of values in the
+  tree t. In particular, it is always of the form 2^n - 1 for some n.
+*)
+
+(* Returns the number of shifts contained in the whole tree. *)
+let eval = function
+| Leaf (w, _) -> w
+| Node (w1, _, _, _, w2) -> cmp w1 w2
+
+let leaf x = Leaf (idn, x)
+let node x t1 t2 = Node (idn, x, t1, t2, cmp (eval t1) (eval t2))
+
+let rec tree_get h w t i = match t with
+| Leaf (w', x) ->
+  let w = cmp w w' in
+  if i = 0 then w, Inl x else assert false
+| Node (w', x, t1, t2, _) ->
+  let w = cmp w w' in
+  if i = 0 then w, Inl x
+  else
+    let h = h / 2 in
+    if i <= h then tree_get h w t1 (i - 1)
+    else tree_get h (cmp w (eval t1)) t2 (i - h - 1)
+
+let rec get w l i = match l with
+| Nil (w', n) ->
+  let w = cmp w w' in
+  if i < n then w, Inl (Var (i + 1))
+  else n + w, Inr (i - n) (* FIXME: double check *)
+| Cons (h, t, rem) ->
+  if i < h then tree_get h w t i else get (cmp (eval t) w) rem (i - h)
+
+let get l i = get idn l i
+
+let tree_write w = function
+| Leaf (w', x) -> Leaf (cmp w w', x)
+| Node (w', x, t1, t2, wt) -> Node (cmp w w', x, t1, t2, wt)
+
+let write w l = match l with
+| Nil (w', n) -> Nil (cmp w w', n)
+| Cons (h, t, rem) -> Cons (h, tree_write w t, rem)
+
+let cons x l = match l with
+| Cons (h1, t1, Cons (h2, t2, rem)) ->
+  if Int.equal h1 h2 then Cons (1 + h1 + h2, node x t1 t2, rem)
+  else Cons (1, leaf x, l)
+| _ -> Cons (1, leaf x, l)
+
+let expand_rel n s =
+  let k, v = get s (n - 1) in
+  match v with
+  | Inl (Arg v) -> Inl (k, v)
+  | Inl (Var i) -> Inr (k + i, None)
+  | Inr i -> Inr (k + i + 1, Some (i + 1))
+
+let is_subs_id = function
+| Nil (w, _) -> Int.equal w 0
+| Cons (_, _, _) -> false
+
+let subs_cons v s = cons (Arg v) s
+
+let rec push_vars i s =
+  if Int.equal i 0 then s
+  else push_vars (pred i) (cons (Var i) s)
+
+let subs_liftn n s =
+  if Int.equal n 0 then s
+  else match s with
+  | Nil (0, m) -> Nil (0, m + n) (* Preserve identity substitutions *)
+  | Nil _ | Cons _ ->
+    let s = write n s in
+    push_vars n s
+
+let subs_lift s = match s with
+| Nil (0, m) -> Nil (0, m + 1) (* Preserve identity substitutions *)
+| Nil _ | Cons _ ->
+  cons (Var 1) (write 1 s)
+
+let subs_id n = Nil (0, n)
+
+let subs_shft (n, s) = write n s
+
+(* pop is the n-ary tailrec variant of a function whose typing rules would be
+   given as follows. Assume Γ ⊢ e : Δ, A, then
+   - Γ := Ξ, A, Ω for some Ξ and Ω with |Ω| := fst (pop e)
+   - Ξ ⊢ snd (pop e) : Δ
+*)
+let rec pop n i e =
+  if Int.equal n 0 then i, e
+  else match e with
+  | ELID -> i, e
+  | ELLFT (k, e) ->
+    if k <= n then pop (n - k) i e
+    else i, ELLFT (k - n, e)
+  | ELSHFT (e, k) -> pop n (i + k) e
+
+let apply mk e = function
+| Var i -> Var (reloc_rel i e)
+| Arg v -> Arg (mk e v)
+
+let rec tree_map mk e = function
+| Leaf (w, x) ->
+  let (n, e) = pop w 0 e in
+  Leaf (w + n, apply mk e x), e
+| Node (w, x, t1, t2, _) ->
+  let (n, e) = pop w 0 e in
+  let x = apply mk e x in
+  let t1, e = tree_map mk e t1 in
+  let t2, e = tree_map mk e t2 in
+  Node (w + n, x, t1, t2, cmp (eval t1) (eval t2)), e
+
+let rec lift_id e n = match e with
+| ELID -> Nil (0, n)
+| ELSHFT (e, k) -> write k (lift_id e n)
+| ELLFT (k, e) ->
+  if k <= n then subs_liftn k (lift_id e (n - k))
+  else assert false
+
+let rec lift_subst mk e s = match s with
+| Nil (w, m) ->
+  let (n, e) = pop w 0 e in
+  write (w + n) (lift_id e m)
+| Cons (h, t, rem) ->
+  let t, e = tree_map mk e t in
+  let rem = lift_subst mk e rem in
+  Cons (h, t, rem)
diff --git a/kernel/esubst.mli b/kernel/esubst.mli
index 4239e42adc..8ff29ab07a 100644
--- a/kernel/esubst.mli
+++ b/kernel/esubst.mli
@@ -11,28 +11,38 @@
 (** Explicit substitutions *)
 
 (** {6 Explicit substitutions } *)
-(** Explicit substitutions of type ['a].
-    - ESID(n)             = %n END   bounded identity
-    - CONS([|t1..tn|],S)  = (S.t1...tn)    parallel substitution
-        (beware of the order: indice 1 is substituted by tn)
-    - SHIFT(n,S)          = (^n o S) terms in S are relocated with n vars
-    - LIFT(n,S)           = (%n S) stands for ((^n o S).n...1)
-         (corresponds to S crossing n binders) *)
-type 'a subs = private
-  | ESID of int
-  | CONS of 'a array * 'a subs
-  | SHIFT of int * 'a subs
-  | LIFT of int * 'a subs
+(** Explicit substitutions for some type of terms ['a].
+
+    Assuming terms enjoy a notion of typability Γ ⊢ t : A, where Γ is a
+    telescope and A a type, substitutions can be typed as Γ ⊢ σ : Δ, where
+    as a first approximation σ is a list of terms u₁; ...; uₙ s.t.
+    Δ := (x₁ : A₁), ..., (xₙ : Aₙ) and Γ ⊢ uᵢ : Aᵢ{u₁...uᵢ₋₁} for all 1 ≤ i ≤ n.
+
+    Substitutions can be applied to terms as follows, and furthermore
+    if Γ ⊢ σ : Δ and Δ ⊢ t : A, then Γ ⊢ t{σ} : A{σ}.
+
+    We make the typing rules explicit below, but we omit the explicit De Bruijn
+    fidgetting and leave relocations implicit in terms and types.
+
+*)
+type 'a subs
 
 (** Derived constructors granting basic invariants *)
+
+(** Assuming |Γ| = n, Γ ⊢ subs_id n : Γ *)
 val subs_id : int -> 'a subs
-val subs_cons: 'a array * 'a subs -> 'a subs
+
+(** Assuming Γ ⊢ σ : Δ and Γ ⊢ t : A{σ}, then Γ ⊢ subs_cons t σ : Δ, A *)
+val subs_cons: 'a -> 'a subs -> 'a subs
+
+(** Assuming Γ ⊢ σ : Δ and |Ξ| = n, then Γ, Ξ ⊢ subs_shft (n, σ) : Δ *)
 val subs_shft: int * 'a subs -> 'a subs
+
+(** Unary variant of {!subst_liftn}. *)
 val subs_lift: 'a subs -> 'a subs
-val subs_liftn: int -> 'a subs -> 'a subs
 
-(** [subs_shift_cons(k,s,[|t1..tn|])] builds (^k s).t1..tn *)
-val subs_shift_cons: int * 'a subs * 'a array -> 'a subs
+(** Assuming Γ ⊢ σ : Δ and |Ξ| = n, then Γ, Ξ ⊢ subs_liftn n σ : Δ, Ξ *)
+val subs_liftn: int -> 'a subs -> 'a subs
 
 (** [expand_rel k subs] expands de Bruijn [k] in the explicit substitution
     [subs]. The result is either (Inl(lams,v)) when the variable is
@@ -51,7 +61,6 @@ val is_subs_id: 'a subs -> bool
     mk_clos is used when a closure has to be created, i.e. when
     s1 is applied on an element of s2.
 *)
-val comp : ('a subs * 'a -> 'a) -> 'a subs -> 'a subs -> 'a subs
 
 (** {6 Compact representation } *)
 (** Compact representation of explicit relocations
@@ -60,6 +69,10 @@ val comp : ('a subs * 'a -> 'a) -> 'a subs -> 'a subs -> 'a subs
 
     Invariant ensured by the private flag: no lift contains two consecutive
     [ELSHFT] nor two consecutive [ELLFT].
+
+    Relocations are a particular kind of substitutions that only contain
+    variables. In particular, [el_*] enjoys the same typing rules as the
+    equivalent substitution function [subs_*].
 *)
 type lift = private
   | ELID
@@ -77,5 +90,7 @@ val is_lift_id : lift -> bool
     substitution equivalent to applying el then s. Argument
     mk_clos is used when a closure has to be created, i.e. when
     el is applied on an element of s.
+
+    That is, if Γ ⊢ e : Δ and Δ ⊢ σ : Ξ, then Γ ⊢ lift_subst mk e σ : Ξ.
 *)
 val lift_subst : (lift -> 'a -> 'b) -> lift -> 'a subs -> 'b subs
diff --git a/kernel/nativelambda.ml b/kernel/nativelambda.ml
index 18f16f427d..b27c53ef0f 100644
--- a/kernel/nativelambda.ml
+++ b/kernel/nativelambda.ml
@@ -102,7 +102,7 @@ let decompose_Llam_Llet lam =
 let subst_id = subs_id 0
 let lift = subs_lift
 let liftn = subs_liftn
-let cons v subst = subs_cons([|v|], subst)
+let cons v subst = subs_cons v subst
 let shift subst = subs_shft (1, subst)
 
 (* Linked code location utilities *)
diff --git a/kernel/typeops.ml b/kernel/typeops.ml
index 85e24f87b7..802a32b0e7 100644
--- a/kernel/typeops.ml
+++ b/kernel/typeops.ml
@@ -198,7 +198,7 @@ let type_of_apply env func funt argsv argstv =
         let argt = argstv.(i) in
         let c1 = term_of_fconstr c1 in
         begin match conv_leq false env argt c1 with
-        | () -> apply_rec (i+1) (mk_clos (Esubst.subs_cons ([| inject arg |], e)) c2)
+        | () -> apply_rec (i+1) (mk_clos (Esubst.subs_cons (inject arg) e) c2)
         | exception NotConvertible ->
           error_cant_apply_bad_type env
             (i+1,c1,argt)
diff --git a/kernel/vmlambda.ml b/kernel/vmlambda.ml
index 9cca204e8c..390fa58883 100644
--- a/kernel/vmlambda.ml
+++ b/kernel/vmlambda.ml
@@ -179,7 +179,7 @@ let decompose_Llam lam =
 let subst_id = subs_id 0
 let lift = subs_lift
 let liftn = subs_liftn
-let cons v subst = subs_cons([|v|], subst)
+let cons v subst = subs_cons v subst
 let shift subst = subs_shft (1, subst)
 
 (* A generic map function *)
author	coqbot-app[bot]	2020-12-09 18:01:37 +0000
committer	GitHub	2020-12-09 18:01:37 +0000
commit	4fbc04b0fe0b89edfe35438d01d3b664fc21e6f8 (patch)
tree	7ae1df67aecdccd63d86f251306e2707a07f8bc9 /kernel
parent	9e0ca704fa8273a8337ff9e118d2d08620af8962 (diff)
parent	e0a943d40c6749e602421ac89059e0d6caf57518 (diff)