From bfb28c7720d9101e2caedff0c1f1f2cdbf0bad65 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 24 Jun 2019 17:08:11 +0200 Subject: Support extraction of Coq's string type to OCaml's string type Instead of the default extraction to OCaml's "char list" type. --- plugins/extraction/ExtrOcamlStringPlus.v | 105 +++++++++++++++++++++++++++++++ plugins/extraction/common.ml | 61 ++++++++++++++++++ plugins/extraction/common.mli | 8 +++ plugins/extraction/ocaml.ml | 1 + 4 files changed, 175 insertions(+) create mode 100644 plugins/extraction/ExtrOcamlStringPlus.v diff --git a/plugins/extraction/ExtrOcamlStringPlus.v b/plugins/extraction/ExtrOcamlStringPlus.v new file mode 100644 index 0000000000..fff89a4525 --- /dev/null +++ b/plugins/extraction/ExtrOcamlStringPlus.v @@ -0,0 +1,105 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2019 *) +(* char +[ +"(* If this appears, you're using Ascii internals. Please don't *) + (fun (b0,b1,b2,b3,b4,b5,b6,b7) -> + let f b i = if b then 1 lsl i else 0 in + Char.chr (f b0 0 + f b1 1 + f b2 2 + f b3 3 + f b4 4 + f b5 5 + f b6 6 + f b7 7))" +] +"(* If this appears, you're using Ascii internals. Please don't *) + (fun f c -> + let n = Char.code c in + let h i = (n land (1 lsl i)) <> 0 in + f (h 0) (h 1) (h 2) (h 3) (h 4) (h 5) (h 6) (h 7))". + +Extract Constant zero => "'\000'". +Extract Constant one => "'\001'". +Extract Constant shift => + "fun b c -> Char.chr (((Char.code c) lsl 1) land 255 + if b then 1 else 0)". + +Extract Inlined Constant ascii_dec => "(=)". +Extract Inlined Constant Ascii.eqb => "(=)". + +Extract Inductive string => "char list" [ "[]" "(::)" ]. + +(* python -c 'print(" ".join(r""" "%s" """.strip() % (r"'"'\''"'" if chr(i) == "'"'"'" else repr(""" "" """.strip()) if chr(i) == """ " """.strip() else repr(chr(i))) for i in range(256)))' # " to satisfy Coq's comment parser *) +Extract Inductive byte => char +["'\x00'" "'\x01'" "'\x02'" "'\x03'" "'\x04'" "'\x05'" "'\x06'" "'\x07'" "'\x08'" "'\t'" "'\n'" "'\x0b'" "'\x0c'" "'\r'" "'\x0e'" "'\x0f'" "'\x10'" "'\x11'" "'\x12'" "'\x13'" "'\x14'" "'\x15'" "'\x16'" "'\x17'" "'\x18'" "'\x19'" "'\x1a'" "'\x1b'" "'\x1c'" "'\x1d'" "'\x1e'" "'\x1f'" "' '" "'!'" "'""'" "'#'" "'$'" "'%'" "'&'" "'\''" "'('" "')'" "'*'" "'+'" "','" "'-'" "'.'" "'/'" "'0'" "'1'" "'2'" "'3'" "'4'" "'5'" "'6'" "'7'" "'8'" "'9'" "':'" "';'" "'<'" "'='" "'>'" "'?'" "'@'" "'A'" "'B'" "'C'" "'D'" "'E'" "'F'" "'G'" "'H'" "'I'" "'J'" "'K'" "'L'" "'M'" "'N'" "'O'" "'P'" "'Q'" "'R'" "'S'" "'T'" "'U'" "'V'" "'W'" "'X'" "'Y'" "'Z'" "'['" "'\\'" "']'" "'^'" "'_'" "'`'" "'a'" "'b'" "'c'" "'d'" "'e'" "'f'" "'g'" "'h'" "'i'" "'j'" "'k'" "'l'" "'m'" "'n'" "'o'" "'p'" "'q'" "'r'" "'s'" "'t'" "'u'" "'v'" "'w'" "'x'" "'y'" "'z'" "'{'" "'|'" "'}'" "'~'" "'\x7f'" "'\x80'" "'\x81'" "'\x82'" "'\x83'" "'\x84'" "'\x85'" "'\x86'" "'\x87'" "'\x88'" "'\x89'" "'\x8a'" "'\x8b'" "'\x8c'" "'\x8d'" "'\x8e'" "'\x8f'" "'\x90'" "'\x91'" "'\x92'" "'\x93'" "'\x94'" "'\x95'" "'\x96'" "'\x97'" "'\x98'" "'\x99'" "'\x9a'" "'\x9b'" "'\x9c'" "'\x9d'" "'\x9e'" "'\x9f'" "'\xa0'" "'\xa1'" "'\xa2'" "'\xa3'" "'\xa4'" "'\xa5'" "'\xa6'" "'\xa7'" "'\xa8'" "'\xa9'" "'\xaa'" "'\xab'" "'\xac'" "'\xad'" "'\xae'" "'\xaf'" "'\xb0'" "'\xb1'" "'\xb2'" "'\xb3'" "'\xb4'" "'\xb5'" "'\xb6'" "'\xb7'" "'\xb8'" "'\xb9'" "'\xba'" "'\xbb'" "'\xbc'" "'\xbd'" "'\xbe'" "'\xbf'" "'\xc0'" "'\xc1'" "'\xc2'" "'\xc3'" "'\xc4'" "'\xc5'" "'\xc6'" "'\xc7'" "'\xc8'" "'\xc9'" "'\xca'" "'\xcb'" "'\xcc'" "'\xcd'" "'\xce'" "'\xcf'" "'\xd0'" "'\xd1'" "'\xd2'" "'\xd3'" "'\xd4'" "'\xd5'" "'\xd6'" "'\xd7'" "'\xd8'" "'\xd9'" "'\xda'" "'\xdb'" "'\xdc'" "'\xdd'" "'\xde'" "'\xdf'" "'\xe0'" "'\xe1'" "'\xe2'" "'\xe3'" "'\xe4'" "'\xe5'" "'\xe6'" "'\xe7'" "'\xe8'" "'\xe9'" "'\xea'" "'\xeb'" "'\xec'" "'\xed'" "'\xee'" "'\xef'" "'\xf0'" "'\xf1'" "'\xf2'" "'\xf3'" "'\xf4'" "'\xf5'" "'\xf6'" "'\xf7'" "'\xf8'" "'\xf9'" "'\xfa'" "'\xfb'" "'\xfc'" "'\xfd'" "'\xfe'" "'\xff'"]. + +Extract Inlined Constant Byte.eqb => "(=)". +Extract Inlined Constant Byte.byte_eq_dec => "(=)". +Extract Inlined Constant Ascii.ascii_of_byte => "(fun x -> x)". +Extract Inlined Constant Ascii.byte_of_ascii => "(fun x -> x)". + +(* This differs from ExtrOcamlString.v: the latter extracts "string" + to "char list", and we extract "string" to "string" *) + +Extract Inductive string => "string" +[ +(* EmptyString *) +"(* If this appears, you're using String internals. Please don't *) + """" +" +(* String *) +"(* If this appears, you're using String internals. Please don't *) + (fun (c, s) -> String.make 1 c ^ s) +" +] +"(* If this appears, you're using Ascii internals. Please don't *) + (fun f0 f1 s -> + let l = String.length s in + if l = 0 then f0 else f1 (String.get s 0) (String.sub s 1 (l-1))) +". + +Extract Inlined Constant String.string_dec => "(=)". +Extract Inlined Constant String.eqb => "(=)". +Extract Inlined Constant String.append => "(^)". +Extract Inlined Constant String.concat => "String.concat". +Extract Inlined Constant String.prefix => + "(fun s1 s2 -> + let l1 = String.length s1 and l2 = String.length s2 in + l1 <= l2 && String.sub s2 0 l1 = s1)". +Extract Inlined Constant String.string_of_list_ascii => + "(fun l => String.of_seq (List.to_seq l))". +Extract Inlined Constant String.list_ascii_of_string => + "(fun s => List.of_seq (String.to_seq s))". +Extract Inlined Constant String.string_of_list_byte => + "(fun l => String.of_seq (List.to_seq l))". +Extract Inlined Constant String.list_byte_of_string => + "(fun s => List.of_seq (String.to_seq s))". + +(* Other operations in module String: + String.length + String.get + String.substring + String.index + String.findex + They all use type "nat". If we know that "nat" extracts + to O | S of nat, we can provide OCaml implementations + for these functions that work directly on OCaml's strings. + However "nat" could be extracted to other OCaml types... +*) + +(* +Definition test := "ceci est un test"%string. + +Recursive Extraction test Ascii.zero Ascii.one. +*) diff --git a/plugins/extraction/common.ml b/plugins/extraction/common.ml index 2f3f42c5f6..099a22408a 100644 --- a/plugins/extraction/common.ml +++ b/plugins/extraction/common.ml @@ -649,3 +649,64 @@ let get_native_char c = Char.chr (cumul l) let pp_native_char c = str ("'"^Char.escaped (get_native_char c)^"'") + +(** Special hack for constants of type String.string : if an + [Extract Inductive string => string] has been declared, then + the constants are directly turned into string literals *) + +let ind_string = mk_ind "Coq.Strings.String" "string" + +let check_extract_string () = + try + let string_type = match lang () with + | Ocaml -> "string" + | _ -> raise Not_found + in + String.equal (find_custom (IndRef (ind_string, 0))) string_type + with Not_found -> false + +(* The argument is known to be of type Coq.Strings.String.string. + Check that it is built from constructors EmptyString and String + with constant ascii arguments. *) + +let rec is_native_string_rec = function + (* "EmptyString" constructor *) + | MLcons(_, ConstructRef(_, 1), []) -> true + (* "String" constructor *) + | MLcons(_, ConstructRef(_, 2), [hd; tl]) -> + is_native_char hd && is_native_string_rec tl + (* others *) + | _ -> false + +(* Here we first check that the argument is of type Coq.Strings.String.string + and that extraction to native strings was requested. + Then we check every character via [is_native_string_rec]. *) + +let is_native_string c = + match c with + | MLcons(_, ConstructRef((kn,0), j), l) -> + MutInd.equal kn ind_string + && check_extract_string () + && is_native_string_rec c + | _ -> false + +(* Extract the underlying string. *) + +let rec get_native_string c = + let buf = Buffer.create 64 in + let rec get = function + (* "EmptyString" constructor *) + | MLcons(_, ConstructRef(_, 1), []) -> + Buffer.contents buf + (* "String" constructor *) + | MLcons(_, ConstructRef(_, 2), [hd; tl]) -> + Buffer.add_char buf (get_native_char hd); + get tl + (* others *) + | _ -> assert false + in get c + +(* Printing the underlying string. *) + +let pp_native_string c = + str ("\"" ^ String.escaped (get_native_string c) ^ "\"") diff --git a/plugins/extraction/common.mli b/plugins/extraction/common.mli index e4e9c4c527..d13ac7926a 100644 --- a/plugins/extraction/common.mli +++ b/plugins/extraction/common.mli @@ -81,3 +81,11 @@ val mk_ind : string -> string -> MutInd.t val is_native_char : ml_ast -> bool val get_native_char : ml_ast -> char val pp_native_char : ml_ast -> Pp.t + +(** Special hack for constants of type String.string : if an + [Extract Inductive string => string] has been declared, then + the constants are directly turned into string literals *) + +val is_native_string : ml_ast -> bool +val get_native_string : ml_ast -> string +val pp_native_string : ml_ast -> Pp.t diff --git a/plugins/extraction/ocaml.ml b/plugins/extraction/ocaml.ml index 66429833b9..ab37bd2d76 100644 --- a/plugins/extraction/ocaml.ml +++ b/plugins/extraction/ocaml.ml @@ -249,6 +249,7 @@ let rec pp_expr par env args = assert (List.is_empty args); begin match a with | _ when is_native_char c -> pp_native_char c + | _ when is_native_string c -> pp_native_string c | [a1;a2] when is_infix r -> let pp = pp_expr true env [] in pp_par par (pp a1 ++ str (get_infix r) ++ pp a2) -- cgit v1.2.3