aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwhitequark2018-06-17 12:06:17 +0000
committerPierre-Marie Pédrot2018-10-03 14:00:42 +0200
commit326f52393f03b4255822f80ffaa52c4e585c8a61 (patch)
treec8ae42c708a10a7d733fd19705f0aaa69ebf6a12
parent016f4a302090bcbb4ad47197dda2c60d6f598f6a (diff)
Rewrite CEphemeron to use a type-safe implementation based on GADTs.
This eliminates 2 uses of Obj from TCB. Moreover, with the new implementation, a key that is marshalled and then unmarshalled does not compare equal with itself even using the polymorphic equality function. See the comment in CEphemeron.ml for details on implementation.
-rw-r--r--clib/cEphemeron.ml164
-rw-r--r--clib/cEphemeron.mli4
2 files changed, 94 insertions, 74 deletions
diff --git a/clib/cEphemeron.ml b/clib/cEphemeron.ml
index 3136d66e34..75b75a1dd4 100644
--- a/clib/cEphemeron.ml
+++ b/clib/cEphemeron.ml
@@ -8,84 +8,104 @@
(* * (see LICENSE file for the text of the license) *)
(************************************************************************)
-type key_type = int
-
-type boxed_key = key_type ref ref
-
-let mk_key : unit -> boxed_key =
- (* TODO: take a random value here. Is there a random function in OCaml? *)
- let bid = ref 0 in
- (* According to OCaml Gc module documentation, Pervasives.ref is one of the
- few ways of getting a boxed value the compiler will never alias. *)
- fun () -> incr bid; Pervasives.ref (Pervasives.ref !bid)
-
-(* A phantom type to preserve type safety *)
-type 'a key = boxed_key
-
-(* Comparing keys with == grants that if a key is unmarshalled (in the same
- process where it was created or in another one) it is not mistaken for
- an already existing one (unmarshal has no right to alias). If the initial
- value of bid is taken at random, then one also avoids potential collisions *)
-module HT = Hashtbl.Make(struct
- type t = key_type ref
- let equal k1 k2 = k1 == k2
- let hash id = !id
+(* Type-safe implementation by whitequark *)
+
+(* An extensible variant has an internal representation equivalent
+ to the following:
+
+ type constr = {
+ name: string,
+ id: int
+ }
+ type value = (*Object_tag*) constr * v1 * v2...
+
+ and the code generated by the compiler looks like:
+
+ (* type X += Y *)
+ let constr_Y = alloc { "Y", %caml_fresh_oo_id () }
+ (* match x with Y -> a | _ -> b *)
+ if x.0 == constr_Y then a else b
+
+ and the polymorphic comparison function works like:
+
+ let equal = fun (c1, ...) (c2, ...) ->
+ c1.id == c2.id
+
+ In every new extension constructor, the name field is a constant
+ string and the id field is filled with an unique[1] value returned
+ by %caml_fresh_oo_id. Moreover, every value of an extensible variant
+ type is allocated as a new block.
+
+ [1]: On 64-bit systems. On 32-bit systems, calling %caml_fresh_oo_id
+ 2**30 times will result in a wraparound. Note that this does
+ not affect soundness because constructors are compared by
+ physical equality during matching. See OCaml PR7809 for code
+ demonstrating this.
+
+ An extensible variant can be marshalled and unmarshalled, and
+ is guaranteed to not be equal to itself after unmarshalling,
+ since the id field is filled with another unique value.
+
+ Note that the explanation above is purely informative and we
+ do not depend on the exact representation of extensible variants,
+ only on the fact that no two constructor representations ever
+ alias. In particular, if the definition of constr is replaced with:
+
+ type constr = int
+
+ (where the value is truly unique for every created constructor),
+ correctness is preserved.
+ *)
+type 'a typ = ..
+
+(* Erases the contained type so that the key can be put in a hash table. *)
+type boxkey = Box : 'a typ -> boxkey [@@unboxed]
+
+(* Carry the type we just erased with the actual key. *)
+type 'a key = 'a typ * boxkey
+
+module EHashtbl = Ephemeron.K1.Make(struct
+ type t = boxkey
+ let equal = (==)
+ let hash = Hashtbl.hash
end)
-(* A key is the (unique) value inside a boxed key, hence it does not
- keep its corresponding boxed key reachable (replacing key_type by boxed_key
- would make the key always reachable) *)
-let values : Obj.t HT.t = HT.create 1001
-
-(* To avoid a race condition between the finalization function and
- get/create on the values hashtable, the finalization function just
- enqueues in an imperative list the item to be collected. Being the list
- imperative, even if the Gc enqueues an item while run_collection is operating,
- the tail of the list is eventually set to Empty on completion.
- Kudos to the authors of Why3 that came up with this solution for their
- implementation of weak hash tables! *)
-type imperative_list = cell ref
-and cell = Empty | Item of key_type ref * imperative_list
-
-let collection_queue : imperative_list ref = ref (ref Empty)
-
-let enqueue x = collection_queue := ref (Item (!x, !collection_queue))
-
-let run_collection () =
- let rec aux l = match !l with
- | Empty -> ()
- | Item (k, tl) -> HT.remove values k; aux tl in
- let l = !collection_queue in
- aux l;
- l := Empty
-
-(* The only reference to the boxed key is the one returned, when the user drops
- it the value eventually disappears from the values table above *)
-let create (v : 'a) : 'a key =
- run_collection ();
- let k = mk_key () in
- HT.add values !k (Obj.repr v);
- Gc.finalise enqueue k;
- k
+type value = { get : 'k. 'k typ -> 'k } [@@unboxed]
+
+let values : value EHashtbl.t =
+ EHashtbl.create 1001
+
+let create : type v. v -> v key =
+ fun value ->
+ let module M = struct
+ type _ typ += Typ : v typ
+
+ let get : type k. k typ -> k =
+ fun typ ->
+ match typ with
+ | Typ -> value
+ | _ -> assert false
+
+ let boxkey = Box Typ
+ let key = Typ, boxkey
+ let value = { get }
+ end in
+ EHashtbl.add values M.boxkey M.value;
+ M.key
(* Avoid raising Not_found *)
exception InvalidKey
-let get (k : 'a key) : 'a =
- run_collection ();
- try Obj.obj (HT.find values !k)
+let get (typ, boxkey) =
+ try (EHashtbl.find values boxkey).get typ
with Not_found -> raise InvalidKey
-(* Simple utils *)
-let default k v =
- try get k
- with InvalidKey -> v
+let default (typ, boxkey) default =
+ try (EHashtbl.find values boxkey).get typ
+ with Not_found -> default
-let iter_opt k f =
- match
- try Some (get k)
- with InvalidKey -> None
- with
- | None -> ()
- | Some v -> f v
+let iter_opt (typ, boxkey) f =
+ try f ((EHashtbl.find values boxkey).get typ)
+ with Not_found -> ()
-let clear () = run_collection ()
+(* NB: clear only removes dead bindings, so it uses Ephemeron.S.clean *)
+let clear () = EHashtbl.clean values
diff --git a/clib/cEphemeron.mli b/clib/cEphemeron.mli
index 8e753d0b62..00cdf87ab5 100644
--- a/clib/cEphemeron.mli
+++ b/clib/cEphemeron.mli
@@ -33,7 +33,7 @@
An ['a key] can always be marshalled. When marshalled, a key loses its
value. The function [get] raises Not_found on unmarshalled keys.
-
+
If a key is garbage collected, the corresponding value is garbage
collected too (unless extra references to it exist).
In short no memory management hassle, keys can just replace their
@@ -48,7 +48,7 @@ exception InvalidKey
val get : 'a key -> 'a
(* These never fail. *)
-val iter_opt : 'a key -> ('a -> unit) -> unit
val default : 'a key -> 'a -> 'a
+val iter_opt : 'a key -> ('a -> unit) -> unit
val clear : unit -> unit