aboutsummaryrefslogtreecommitdiff
path: root/clib/cEphemeron.ml
diff options
context:
space:
mode:
authorwhitequark2018-06-17 12:06:17 +0000
committerPierre-Marie Pédrot2018-10-03 14:00:42 +0200
commit326f52393f03b4255822f80ffaa52c4e585c8a61 (patch)
treec8ae42c708a10a7d733fd19705f0aaa69ebf6a12 /clib/cEphemeron.ml
parent016f4a302090bcbb4ad47197dda2c60d6f598f6a (diff)
Rewrite CEphemeron to use a type-safe implementation based on GADTs.
This eliminates 2 uses of Obj from TCB. Moreover, with the new implementation, a key that is marshalled and then unmarshalled does not compare equal with itself even using the polymorphic equality function. See the comment in CEphemeron.ml for details on implementation.
Diffstat (limited to 'clib/cEphemeron.ml')
-rw-r--r--clib/cEphemeron.ml164
1 files changed, 92 insertions, 72 deletions
diff --git a/clib/cEphemeron.ml b/clib/cEphemeron.ml
index 3136d66e34..75b75a1dd4 100644
--- a/clib/cEphemeron.ml
+++ b/clib/cEphemeron.ml
@@ -8,84 +8,104 @@
(* * (see LICENSE file for the text of the license) *)
(************************************************************************)
-type key_type = int
-
-type boxed_key = key_type ref ref
-
-let mk_key : unit -> boxed_key =
- (* TODO: take a random value here. Is there a random function in OCaml? *)
- let bid = ref 0 in
- (* According to OCaml Gc module documentation, Pervasives.ref is one of the
- few ways of getting a boxed value the compiler will never alias. *)
- fun () -> incr bid; Pervasives.ref (Pervasives.ref !bid)
-
-(* A phantom type to preserve type safety *)
-type 'a key = boxed_key
-
-(* Comparing keys with == grants that if a key is unmarshalled (in the same
- process where it was created or in another one) it is not mistaken for
- an already existing one (unmarshal has no right to alias). If the initial
- value of bid is taken at random, then one also avoids potential collisions *)
-module HT = Hashtbl.Make(struct
- type t = key_type ref
- let equal k1 k2 = k1 == k2
- let hash id = !id
+(* Type-safe implementation by whitequark *)
+
+(* An extensible variant has an internal representation equivalent
+ to the following:
+
+ type constr = {
+ name: string,
+ id: int
+ }
+ type value = (*Object_tag*) constr * v1 * v2...
+
+ and the code generated by the compiler looks like:
+
+ (* type X += Y *)
+ let constr_Y = alloc { "Y", %caml_fresh_oo_id () }
+ (* match x with Y -> a | _ -> b *)
+ if x.0 == constr_Y then a else b
+
+ and the polymorphic comparison function works like:
+
+ let equal = fun (c1, ...) (c2, ...) ->
+ c1.id == c2.id
+
+ In every new extension constructor, the name field is a constant
+ string and the id field is filled with an unique[1] value returned
+ by %caml_fresh_oo_id. Moreover, every value of an extensible variant
+ type is allocated as a new block.
+
+ [1]: On 64-bit systems. On 32-bit systems, calling %caml_fresh_oo_id
+ 2**30 times will result in a wraparound. Note that this does
+ not affect soundness because constructors are compared by
+ physical equality during matching. See OCaml PR7809 for code
+ demonstrating this.
+
+ An extensible variant can be marshalled and unmarshalled, and
+ is guaranteed to not be equal to itself after unmarshalling,
+ since the id field is filled with another unique value.
+
+ Note that the explanation above is purely informative and we
+ do not depend on the exact representation of extensible variants,
+ only on the fact that no two constructor representations ever
+ alias. In particular, if the definition of constr is replaced with:
+
+ type constr = int
+
+ (where the value is truly unique for every created constructor),
+ correctness is preserved.
+ *)
+type 'a typ = ..
+
+(* Erases the contained type so that the key can be put in a hash table. *)
+type boxkey = Box : 'a typ -> boxkey [@@unboxed]
+
+(* Carry the type we just erased with the actual key. *)
+type 'a key = 'a typ * boxkey
+
+module EHashtbl = Ephemeron.K1.Make(struct
+ type t = boxkey
+ let equal = (==)
+ let hash = Hashtbl.hash
end)
-(* A key is the (unique) value inside a boxed key, hence it does not
- keep its corresponding boxed key reachable (replacing key_type by boxed_key
- would make the key always reachable) *)
-let values : Obj.t HT.t = HT.create 1001
-
-(* To avoid a race condition between the finalization function and
- get/create on the values hashtable, the finalization function just
- enqueues in an imperative list the item to be collected. Being the list
- imperative, even if the Gc enqueues an item while run_collection is operating,
- the tail of the list is eventually set to Empty on completion.
- Kudos to the authors of Why3 that came up with this solution for their
- implementation of weak hash tables! *)
-type imperative_list = cell ref
-and cell = Empty | Item of key_type ref * imperative_list
-
-let collection_queue : imperative_list ref = ref (ref Empty)
-
-let enqueue x = collection_queue := ref (Item (!x, !collection_queue))
-
-let run_collection () =
- let rec aux l = match !l with
- | Empty -> ()
- | Item (k, tl) -> HT.remove values k; aux tl in
- let l = !collection_queue in
- aux l;
- l := Empty
-
-(* The only reference to the boxed key is the one returned, when the user drops
- it the value eventually disappears from the values table above *)
-let create (v : 'a) : 'a key =
- run_collection ();
- let k = mk_key () in
- HT.add values !k (Obj.repr v);
- Gc.finalise enqueue k;
- k
+type value = { get : 'k. 'k typ -> 'k } [@@unboxed]
+
+let values : value EHashtbl.t =
+ EHashtbl.create 1001
+
+let create : type v. v -> v key =
+ fun value ->
+ let module M = struct
+ type _ typ += Typ : v typ
+
+ let get : type k. k typ -> k =
+ fun typ ->
+ match typ with
+ | Typ -> value
+ | _ -> assert false
+
+ let boxkey = Box Typ
+ let key = Typ, boxkey
+ let value = { get }
+ end in
+ EHashtbl.add values M.boxkey M.value;
+ M.key
(* Avoid raising Not_found *)
exception InvalidKey
-let get (k : 'a key) : 'a =
- run_collection ();
- try Obj.obj (HT.find values !k)
+let get (typ, boxkey) =
+ try (EHashtbl.find values boxkey).get typ
with Not_found -> raise InvalidKey
-(* Simple utils *)
-let default k v =
- try get k
- with InvalidKey -> v
+let default (typ, boxkey) default =
+ try (EHashtbl.find values boxkey).get typ
+ with Not_found -> default
-let iter_opt k f =
- match
- try Some (get k)
- with InvalidKey -> None
- with
- | None -> ()
- | Some v -> f v
+let iter_opt (typ, boxkey) f =
+ try f ((EHashtbl.find values boxkey).get typ)
+ with Not_found -> ()
-let clear () = run_collection ()
+(* NB: clear only removes dead bindings, so it uses Ephemeron.S.clean *)
+let clear () = EHashtbl.clean values