diff options
| author | whitequark | 2018-06-17 12:06:17 +0000 |
|---|---|---|
| committer | Pierre-Marie Pédrot | 2018-10-03 14:00:42 +0200 |
| commit | 326f52393f03b4255822f80ffaa52c4e585c8a61 (patch) | |
| tree | c8ae42c708a10a7d733fd19705f0aaa69ebf6a12 | |
| parent | 016f4a302090bcbb4ad47197dda2c60d6f598f6a (diff) | |
Rewrite CEphemeron to use a type-safe implementation based on GADTs.
This eliminates 2 uses of Obj from TCB.
Moreover, with the new implementation, a key that is marshalled
and then unmarshalled does not compare equal with itself even using
the polymorphic equality function.
See the comment in CEphemeron.ml for details on implementation.
| -rw-r--r-- | clib/cEphemeron.ml | 164 | ||||
| -rw-r--r-- | clib/cEphemeron.mli | 4 |
2 files changed, 94 insertions, 74 deletions
diff --git a/clib/cEphemeron.ml b/clib/cEphemeron.ml index 3136d66e34..75b75a1dd4 100644 --- a/clib/cEphemeron.ml +++ b/clib/cEphemeron.ml @@ -8,84 +8,104 @@ (* * (see LICENSE file for the text of the license) *) (************************************************************************) -type key_type = int - -type boxed_key = key_type ref ref - -let mk_key : unit -> boxed_key = - (* TODO: take a random value here. Is there a random function in OCaml? *) - let bid = ref 0 in - (* According to OCaml Gc module documentation, Pervasives.ref is one of the - few ways of getting a boxed value the compiler will never alias. *) - fun () -> incr bid; Pervasives.ref (Pervasives.ref !bid) - -(* A phantom type to preserve type safety *) -type 'a key = boxed_key - -(* Comparing keys with == grants that if a key is unmarshalled (in the same - process where it was created or in another one) it is not mistaken for - an already existing one (unmarshal has no right to alias). If the initial - value of bid is taken at random, then one also avoids potential collisions *) -module HT = Hashtbl.Make(struct - type t = key_type ref - let equal k1 k2 = k1 == k2 - let hash id = !id +(* Type-safe implementation by whitequark *) + +(* An extensible variant has an internal representation equivalent + to the following: + + type constr = { + name: string, + id: int + } + type value = (*Object_tag*) constr * v1 * v2... + + and the code generated by the compiler looks like: + + (* type X += Y *) + let constr_Y = alloc { "Y", %caml_fresh_oo_id () } + (* match x with Y -> a | _ -> b *) + if x.0 == constr_Y then a else b + + and the polymorphic comparison function works like: + + let equal = fun (c1, ...) (c2, ...) -> + c1.id == c2.id + + In every new extension constructor, the name field is a constant + string and the id field is filled with an unique[1] value returned + by %caml_fresh_oo_id. Moreover, every value of an extensible variant + type is allocated as a new block. + + [1]: On 64-bit systems. On 32-bit systems, calling %caml_fresh_oo_id + 2**30 times will result in a wraparound. Note that this does + not affect soundness because constructors are compared by + physical equality during matching. See OCaml PR7809 for code + demonstrating this. + + An extensible variant can be marshalled and unmarshalled, and + is guaranteed to not be equal to itself after unmarshalling, + since the id field is filled with another unique value. + + Note that the explanation above is purely informative and we + do not depend on the exact representation of extensible variants, + only on the fact that no two constructor representations ever + alias. In particular, if the definition of constr is replaced with: + + type constr = int + + (where the value is truly unique for every created constructor), + correctness is preserved. + *) +type 'a typ = .. + +(* Erases the contained type so that the key can be put in a hash table. *) +type boxkey = Box : 'a typ -> boxkey [@@unboxed] + +(* Carry the type we just erased with the actual key. *) +type 'a key = 'a typ * boxkey + +module EHashtbl = Ephemeron.K1.Make(struct + type t = boxkey + let equal = (==) + let hash = Hashtbl.hash end) -(* A key is the (unique) value inside a boxed key, hence it does not - keep its corresponding boxed key reachable (replacing key_type by boxed_key - would make the key always reachable) *) -let values : Obj.t HT.t = HT.create 1001 - -(* To avoid a race condition between the finalization function and - get/create on the values hashtable, the finalization function just - enqueues in an imperative list the item to be collected. Being the list - imperative, even if the Gc enqueues an item while run_collection is operating, - the tail of the list is eventually set to Empty on completion. - Kudos to the authors of Why3 that came up with this solution for their - implementation of weak hash tables! *) -type imperative_list = cell ref -and cell = Empty | Item of key_type ref * imperative_list - -let collection_queue : imperative_list ref = ref (ref Empty) - -let enqueue x = collection_queue := ref (Item (!x, !collection_queue)) - -let run_collection () = - let rec aux l = match !l with - | Empty -> () - | Item (k, tl) -> HT.remove values k; aux tl in - let l = !collection_queue in - aux l; - l := Empty - -(* The only reference to the boxed key is the one returned, when the user drops - it the value eventually disappears from the values table above *) -let create (v : 'a) : 'a key = - run_collection (); - let k = mk_key () in - HT.add values !k (Obj.repr v); - Gc.finalise enqueue k; - k +type value = { get : 'k. 'k typ -> 'k } [@@unboxed] + +let values : value EHashtbl.t = + EHashtbl.create 1001 + +let create : type v. v -> v key = + fun value -> + let module M = struct + type _ typ += Typ : v typ + + let get : type k. k typ -> k = + fun typ -> + match typ with + | Typ -> value + | _ -> assert false + + let boxkey = Box Typ + let key = Typ, boxkey + let value = { get } + end in + EHashtbl.add values M.boxkey M.value; + M.key (* Avoid raising Not_found *) exception InvalidKey -let get (k : 'a key) : 'a = - run_collection (); - try Obj.obj (HT.find values !k) +let get (typ, boxkey) = + try (EHashtbl.find values boxkey).get typ with Not_found -> raise InvalidKey -(* Simple utils *) -let default k v = - try get k - with InvalidKey -> v +let default (typ, boxkey) default = + try (EHashtbl.find values boxkey).get typ + with Not_found -> default -let iter_opt k f = - match - try Some (get k) - with InvalidKey -> None - with - | None -> () - | Some v -> f v +let iter_opt (typ, boxkey) f = + try f ((EHashtbl.find values boxkey).get typ) + with Not_found -> () -let clear () = run_collection () +(* NB: clear only removes dead bindings, so it uses Ephemeron.S.clean *) +let clear () = EHashtbl.clean values diff --git a/clib/cEphemeron.mli b/clib/cEphemeron.mli index 8e753d0b62..00cdf87ab5 100644 --- a/clib/cEphemeron.mli +++ b/clib/cEphemeron.mli @@ -33,7 +33,7 @@ An ['a key] can always be marshalled. When marshalled, a key loses its value. The function [get] raises Not_found on unmarshalled keys. - + If a key is garbage collected, the corresponding value is garbage collected too (unless extra references to it exist). In short no memory management hassle, keys can just replace their @@ -48,7 +48,7 @@ exception InvalidKey val get : 'a key -> 'a (* These never fail. *) -val iter_opt : 'a key -> ('a -> unit) -> unit val default : 'a key -> 'a -> 'a +val iter_opt : 'a key -> ('a -> unit) -> unit val clear : unit -> unit |
