From 33509e348a6c9f505a6ebf714d8b142fc9df62a0 Mon Sep 17 00:00:00 2001 From: ppedrot Date: Wed, 26 Sep 2012 19:03:17 +0000 Subject: Cleaning, renaming obscure functions and documenting in Hashcons. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@15834 85f007b7-540e-0410-9357-904b9bb8a0f7 --- lib/hashcons.ml | 34 ++++++++-------- lib/hashcons.mli | 67 +++++++++++++++++++++++++------- lib/hashset.ml | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/hashset.mli | 41 ++++++++++++++++++++ lib/hashtbl_alt.ml | 109 ---------------------------------------------------- lib/hashtbl_alt.mli | 41 -------------------- lib/lib.mllib | 2 +- 7 files changed, 221 insertions(+), 182 deletions(-) create mode 100644 lib/hashset.ml create mode 100644 lib/hashset.mli delete mode 100644 lib/hashtbl_alt.ml delete mode 100644 lib/hashtbl_alt.mli (limited to 'lib') diff --git a/lib/hashcons.ml b/lib/hashcons.ml index 7c2897be09..f71af15c75 100644 --- a/lib/hashcons.ml +++ b/lib/hashcons.ml @@ -13,29 +13,29 @@ (* [t] is the type of object to hash-cons * [u] is the type of hash-cons functions for the sub-structures * of objects of type t (u usually has the form (t1->t1)*(t2->t2)*...). - * [hash_sub u x] is a function that hash-cons the sub-structures of x using + * [hashcons u x] is a function that hash-cons the sub-structures of x using * the hash-consing functions u provides. * [equal] is a comparison function. It is allowed to use physical equality - * on the sub-terms hash-consed by the hash_sub function. + * on the sub-terms hash-consed by the hashcons function. * [hash] is the hash function given to the Hashtbl.Make function * * Note that this module type coerces to the argument of Hashtbl.Make. *) -module type Comp = +module type HashconsedType = sig type t type u - val hash_sub : u -> t -> t + val hashcons : u -> t -> t val equal : t -> t -> bool val hash : t -> int end -(* The output is a function f such that - * [f ()] has the side-effect of creating (internally) a hash-table of the +(* The output is a function [generate] such that + * [generate ()] has the side-effect of creating (internally) a hash-table of the * hash-consed objects. The result is a function taking the sub-hashcons * functions and an object, and hashcons it. It does not really make sense - * to call f() with different sub-hcons functions. That's why we use the + * to call generate() with different sub-hcons functions. That's why we use the * wrappers simple_hcons, recursive_hcons, ... The latter just take as * argument the sub-hcons functions (the tables are created at that moment), * and returns the hcons function for t. @@ -45,10 +45,10 @@ module type S = sig type t type u - val f : unit -> (u -> t -> t) + val generate : unit -> (u -> t -> t) end -module Make(X:Comp) = +module Make (X : HashconsedType) = struct type t = X.t type u = X.u @@ -66,20 +66,20 @@ module Make(X:Comp) = (* The table is created when () is applied. * Hashconsing is then very simple: - * 1- hashcons the subterms using hash_sub and u + * 1- hashcons the subterms using hashcons and u * 2- look up in the table, if we do not get a hit, we add it *) - let f () = + let generate () = let tab = Htbl.create 97 in (fun u x -> - let y = X.hash_sub u x in + let y = X.hashcons u x in (* incr acces;*) try let r = Htbl.find tab y in(* incr succes;*) r with Not_found -> Htbl.add tab y y; y) end (* A few usefull wrappers: - * takes as argument the function f above and build a function of type + * takes as argument the function [generate] above and build a function of type * u -> t -> t that creates a fresh table each time it is applied to the * sub-hcons functions. *) @@ -136,7 +136,7 @@ module Hstring = Make( struct type t = string type u = unit - let hash_sub () s =(* incr accesstr;*) s + let hashcons () s =(* incr accesstr;*) s let equal s1 s2 =(* incr comparaisonstr; if*) s1=s2(* then (incr successtr; true) else false*) let hash = Hashtbl.hash @@ -175,7 +175,7 @@ module Hobj = Make( struct type t = Obj.t type u = (Obj.t -> Obj.t) * unit - let hash_sub (hrec,_) = hash_obj hrec + let hashcons (hrec,_) = hash_obj hrec let equal = comp_obj let hash = Hashtbl.hash end) @@ -185,8 +185,8 @@ module Hobj = Make( *) (* string : string -> string *) (* obj : Obj.t -> Obj.t *) -let string = register_hcons (simple_hcons Hstring.f) () -let obj = register_hcons (recursive_hcons Hobj.f) () +let string = register_hcons (simple_hcons Hstring.generate) () +let obj = register_hcons (recursive_hcons Hobj.generate) () (* The unsafe polymorphic hashconsing function *) let magic_hash (c : 'a) = diff --git a/lib/hashcons.mli b/lib/hashcons.mli index b4a9da2f86..bafec6f913 100644 --- a/lib/hashcons.mli +++ b/lib/hashcons.mli @@ -8,43 +8,82 @@ (** Generic hash-consing. *) -module type Comp = +module type HashconsedType = sig + (** {6 Generic hashconsing signature} + + Given an equivalence relation [equal], a hashconsing function is a + function that associates the same canonical element to two elements + related by [equal]. Usually, the element chosen is canonical w.r.t. + physical equality [(==)], so as to reduce memory consumption and + enhance efficiency of equality tests. + + In order to ensure canonicality, we need a way to remember the element + associated to a class of equivalence; this is done using a hidden state + generated by the [Make] functor. + *) + type t + (** Type of objects to hashcons. *) type u - val hash_sub : u -> t -> t + (** Type of hashcons functions for the sub-structures contained in [t]. + Usually a tuple of functions. *) + val hashcons : u -> t -> t + (** The actual hashconsing function, using its fist argument to recursively + hashcons substructures. *) val equal : t -> t -> bool + (** A comparison function. It is allowed to use physical equality + on the sub-terms hashconsed by the [hashcons] function. *) val hash : t -> int + (** A hash function passed to the underlying hashtable structure. *) end module type S = sig type t + (** Type of objects to hashcons. *) type u - val f : unit -> (u -> t -> t) + (** Type of hashcons functions for the sub-structures contained in [t]. *) + val generate : unit -> (u -> t -> t) + (** This has the side-effect of creating (internally) a hashtable of the + hashconsed objects. The result is a function taking the sub-hashcons + functions and an object, and hashconsing it. It does not really make sense + to call [generate] with different sub-hcons functions. That's why we use the + wrappers [simple_hcons], [recursive_hcons], ... The latter just take as + argument the sub-hcons functions (the tables are created at that moment), + and returns the hcons function for [t]. *) end -module Make(X:Comp) : (S with type t = X.t and type u = X.u) +module Make (X : HashconsedType) : (S with type t = X.t and type u = X.u) +(** Create a new hashconsing, given canonicalization functions. *) + +(** {6 Wrappers} *) + +(** * These are intended to be used together with instances of the [Make] + functor. *) val simple_hcons : (unit -> 'u -> 't -> 't) -> ('u -> 't -> 't) +(** [simple_hcons f sub obj] creates a new table each time it is applied to any + sub-hash function [sub]. *) + val recursive_hcons : (unit -> ('t -> 't) * 'u -> 't -> 't) -> ('u -> 't -> 't) +(** As [simple_hcons] but intended to be used with well-founded data structures. *) + val recursive_loop_hcons : (unit -> ('t -> 't) * 'u -> 't -> 't) -> ('u -> 't -> 't) +(** As [simple_hcons] but intended to be used with any recursive data structure, + in particular if they contain loops. *) + val recursive2_hcons : (unit -> ('t1 -> 't1) * ('t2 -> 't2) * 'u1 -> 't1 -> 't1) -> (unit -> ('t1 -> 't1) * ('t2 -> 't2) * 'u2 -> 't2 -> 't2) -> 'u1 -> 'u2 -> ('t1 -> 't1) * ('t2 -> 't2) +(** As [recursive_hcons] but with two mutually recursive structures. *) -(** Declaring and reinitializing global hash-consing functions *) - -val init : unit -> unit -val register_hcons : ('u -> 't -> 't) -> ('u -> 't -> 't) +(** {6 Hashconsing of usual structures} *) module Hstring : (S with type t = string and type u = unit) -module Hobj : (S with type t = Obj.t and type u = (Obj.t -> Obj.t) * unit) - -val string : string -> string -val obj : Obj.t -> Obj.t - -val magic_hash : 'a -> 'a +(** Hashconsing of strings. *) +module Hobj : (S with type t = Obj.t and type u = (Obj.t -> Obj.t) * unit) +(** Hashconsing of OCaml values. *) diff --git a/lib/hashset.ml b/lib/hashset.ml new file mode 100644 index 0000000000..28767df8f4 --- /dev/null +++ b/lib/hashset.ml @@ -0,0 +1,109 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* t -> bool +end + +module type S = sig + type elt + (* [may_add_and_get key constr] uses [key] to look for [constr] + in the hash table [H]. If [constr] is in [H], returns the + specific representation that is stored in [H]. Otherwise, + [constr] is stored in [H] and will be used as the canonical + representation of this value in the future. *) + val may_add_and_get : int -> elt -> elt +end + +module Make (E : EqType) = + struct + + type elt = E.t + + type bucketlist = Empty | Cons of elt * int * bucketlist + + let initial_size = 19991 + let table_data = ref (Array.make initial_size Empty) + let table_size = ref 0 + + let resize () = + let odata = !table_data in + let osize = Array.length odata in + let nsize = min (2 * osize + 1) Sys.max_array_length in + if nsize <> osize then begin + let ndata = Array.create nsize Empty in + let rec insert_bucket = function + | Empty -> () + | Cons (key, hash, rest) -> + let nidx = hash mod nsize in + ndata.(nidx) <- Cons (key, hash, ndata.(nidx)); + insert_bucket rest + in + for i = 0 to osize - 1 do insert_bucket odata.(i) done; + table_data := ndata + end + + let add hash key = + let odata = !table_data in + let osize = Array.length odata in + let i = hash mod osize in + odata.(i) <- Cons (key, hash, odata.(i)); + incr table_size; + if !table_size > osize lsl 1 then resize () + + let find_rec hash key bucket = + let rec aux = function + | Empty -> + add hash key; key + | Cons (k, h, rest) -> + if hash == h && E.equal key k then k else aux rest + in + aux bucket + + let may_add_and_get hash key = + let odata = !table_data in + match odata.(hash mod (Array.length odata)) with + | Empty -> add hash key; key + | Cons (k1, h1, rest1) -> + if hash == h1 && E.equal key k1 then k1 else + match rest1 with + | Empty -> add hash key; key + | Cons (k2, h2, rest2) -> + if hash == h2 && E.equal key k2 then k2 else + match rest2 with + | Empty -> add hash key; key + | Cons (k3, h3, rest3) -> + if hash == h2 && E.equal key k3 then k3 + else find_rec hash key rest3 + +end + +module Combine = struct + (* These are helper functions to combine the hash keys in a similar + way as [Hashtbl.hash] does. The constants [alpha] and [beta] must + be prime numbers. There were chosen empirically. Notice that the + problem of hashing trees is hard and there are plenty of study on + this topic. Therefore, there must be room for improvement here. *) + let alpha = 65599 + let beta = 7 + let combine x y = x * alpha + y + let combine3 x y z = combine x (combine y z) + let combine4 x y z t = combine x (combine3 y z t) + let combinesmall x y = beta * x + y +end diff --git a/lib/hashset.mli b/lib/hashset.mli new file mode 100644 index 0000000000..4b260791b8 --- /dev/null +++ b/lib/hashset.mli @@ -0,0 +1,41 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* t -> bool +end + +module type S = sig + type elt + (* [may_add_and_get key constr] uses [key] to look for [constr] + in the hash table [H]. If [constr] is in [H], returns the + specific representation that is stored in [H]. Otherwise, + [constr] is stored in [H] and will be used as the canonical + representation of this value in the future. *) + val may_add_and_get : int -> elt -> elt +end + +module Make (E : EqType) : S with type elt = E.t + +module Combine : sig + val combine : int -> int -> int + val combinesmall : int -> int -> int + val combine3 : int -> int -> int -> int + val combine4 : int -> int -> int -> int -> int +end diff --git a/lib/hashtbl_alt.ml b/lib/hashtbl_alt.ml deleted file mode 100644 index 2780afe8fd..0000000000 --- a/lib/hashtbl_alt.ml +++ /dev/null @@ -1,109 +0,0 @@ -(************************************************************************) -(* v * The Coq Proof Assistant / The Coq Development Team *) -(* t -> bool -end - -module type S = sig - type elt - (* [may_add_and_get key constr] uses [key] to look for [constr] - in the hash table [H]. If [constr] is in [H], returns the - specific representation that is stored in [H]. Otherwise, - [constr] is stored in [H] and will be used as the canonical - representation of this value in the future. *) - val may_add_and_get : int -> elt -> elt -end - -module Make (E : Hashtype) = - struct - - type elt = E.t - - type bucketlist = Empty | Cons of elt * int * bucketlist - - let initial_size = 19991 - let table_data = ref (Array.make initial_size Empty) - let table_size = ref 0 - - let resize () = - let odata = !table_data in - let osize = Array.length odata in - let nsize = min (2 * osize + 1) Sys.max_array_length in - if nsize <> osize then begin - let ndata = Array.create nsize Empty in - let rec insert_bucket = function - | Empty -> () - | Cons (key, hash, rest) -> - let nidx = hash mod nsize in - ndata.(nidx) <- Cons (key, hash, ndata.(nidx)); - insert_bucket rest - in - for i = 0 to osize - 1 do insert_bucket odata.(i) done; - table_data := ndata - end - - let add hash key = - let odata = !table_data in - let osize = Array.length odata in - let i = hash mod osize in - odata.(i) <- Cons (key, hash, odata.(i)); - incr table_size; - if !table_size > osize lsl 1 then resize () - - let find_rec hash key bucket = - let rec aux = function - | Empty -> - add hash key; key - | Cons (k, h, rest) -> - if hash == h && E.equals key k then k else aux rest - in - aux bucket - - let may_add_and_get hash key = - let odata = !table_data in - match odata.(hash mod (Array.length odata)) with - | Empty -> add hash key; key - | Cons (k1, h1, rest1) -> - if hash == h1 && E.equals key k1 then k1 else - match rest1 with - | Empty -> add hash key; key - | Cons (k2, h2, rest2) -> - if hash == h2 && E.equals key k2 then k2 else - match rest2 with - | Empty -> add hash key; key - | Cons (k3, h3, rest3) -> - if hash == h2 && E.equals key k3 then k3 - else find_rec hash key rest3 - -end - -module Combine = struct - (* These are helper functions to combine the hash keys in a similar - way as [Hashtbl.hash] does. The constants [alpha] and [beta] must - be prime numbers. There were chosen empirically. Notice that the - problem of hashing trees is hard and there are plenty of study on - this topic. Therefore, there must be room for improvement here. *) - let alpha = 65599 - let beta = 7 - let combine x y = x * alpha + y - let combine3 x y z = combine x (combine y z) - let combine4 x y z t = combine x (combine3 y z t) - let combinesmall x y = beta * x + y -end diff --git a/lib/hashtbl_alt.mli b/lib/hashtbl_alt.mli deleted file mode 100644 index 7a1dcd57f3..0000000000 --- a/lib/hashtbl_alt.mli +++ /dev/null @@ -1,41 +0,0 @@ -(************************************************************************) -(* v * The Coq Proof Assistant / The Coq Development Team *) -(* t -> bool -end - -module type S = sig - type elt - (* [may_add_and_get key constr] uses [key] to look for [constr] - in the hash table [H]. If [constr] is in [H], returns the - specific representation that is stored in [H]. Otherwise, - [constr] is stored in [H] and will be used as the canonical - representation of this value in the future. *) - val may_add_and_get : int -> elt -> elt -end - -module Make (E : Hashtype) : S with type elt = E.t - -module Combine : sig - val combine : int -> int -> int - val combinesmall : int -> int -> int - val combine3 : int -> int -> int -> int - val combine4 : int -> int -> int -> int -> int -end diff --git a/lib/lib.mllib b/lib/lib.mllib index 21eb7a4a51..a7d56c666a 100644 --- a/lib/lib.mllib +++ b/lib/lib.mllib @@ -4,6 +4,7 @@ Compat Loc Errors Bigint +Hashset Hashcons Dyn Segmenttree @@ -23,4 +24,3 @@ Heap Dnet Store Unionfind -Hashtbl_alt -- cgit v1.2.3