aboutsummaryrefslogtreecommitdiff
path: root/plugins/extraction
diff options
context:
space:
mode:
authorXavier Leroy2019-06-24 17:08:11 +0200
committerMaxime Dénès2020-01-08 15:07:15 +0100
commitbfb28c7720d9101e2caedff0c1f1f2cdbf0bad65 (patch)
treeefe2b91a00c554e79c17cbaddf2f36b8c0ad24ed /plugins/extraction
parent3cdbd61f3acf0722c92f8192425eb1a677270b08 (diff)
Support extraction of Coq's string type to OCaml's string type
Instead of the default extraction to OCaml's "char list" type.
Diffstat (limited to 'plugins/extraction')
-rw-r--r--plugins/extraction/ExtrOcamlStringPlus.v105
-rw-r--r--plugins/extraction/common.ml61
-rw-r--r--plugins/extraction/common.mli8
-rw-r--r--plugins/extraction/ocaml.ml1
4 files changed, 175 insertions, 0 deletions
diff --git a/plugins/extraction/ExtrOcamlStringPlus.v b/plugins/extraction/ExtrOcamlStringPlus.v
new file mode 100644
index 0000000000..fff89a4525
--- /dev/null
+++ b/plugins/extraction/ExtrOcamlStringPlus.v
@@ -0,0 +1,105 @@
+(************************************************************************)
+(* * The Coq Proof Assistant / The Coq Development Team *)
+(* v * INRIA, CNRS and contributors - Copyright 1999-2019 *)
+(* <O___,, * (see CREDITS file for the list of authors) *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(* * (see LICENSE file for the text of the license) *)
+(************************************************************************)
+
+(* Extraction to Ocaml : extract ascii to OCaml's char type
+ and string to OCaml's string type. *)
+
+Require Coq.extraction.Extraction.
+
+Require Import Ascii String Coq.Strings.Byte.
+
+(* This is as in ExtrOcamlString.v *)
+
+Extract Inductive ascii => char
+[
+"(* If this appears, you're using Ascii internals. Please don't *)
+ (fun (b0,b1,b2,b3,b4,b5,b6,b7) ->
+ let f b i = if b then 1 lsl i else 0 in
+ Char.chr (f b0 0 + f b1 1 + f b2 2 + f b3 3 + f b4 4 + f b5 5 + f b6 6 + f b7 7))"
+]
+"(* If this appears, you're using Ascii internals. Please don't *)
+ (fun f c ->
+ let n = Char.code c in
+ let h i = (n land (1 lsl i)) <> 0 in
+ f (h 0) (h 1) (h 2) (h 3) (h 4) (h 5) (h 6) (h 7))".
+
+Extract Constant zero => "'\000'".
+Extract Constant one => "'\001'".
+Extract Constant shift =>
+ "fun b c -> Char.chr (((Char.code c) lsl 1) land 255 + if b then 1 else 0)".
+
+Extract Inlined Constant ascii_dec => "(=)".
+Extract Inlined Constant Ascii.eqb => "(=)".
+
+Extract Inductive string => "char list" [ "[]" "(::)" ].
+
+(* python -c 'print(" ".join(r""" "%s" """.strip() % (r"'"'\''"'" if chr(i) == "'"'"'" else repr(""" "" """.strip()) if chr(i) == """ " """.strip() else repr(chr(i))) for i in range(256)))' # " to satisfy Coq's comment parser *)
+Extract Inductive byte => char
+["'\x00'" "'\x01'" "'\x02'" "'\x03'" "'\x04'" "'\x05'" "'\x06'" "'\x07'" "'\x08'" "'\t'" "'\n'" "'\x0b'" "'\x0c'" "'\r'" "'\x0e'" "'\x0f'" "'\x10'" "'\x11'" "'\x12'" "'\x13'" "'\x14'" "'\x15'" "'\x16'" "'\x17'" "'\x18'" "'\x19'" "'\x1a'" "'\x1b'" "'\x1c'" "'\x1d'" "'\x1e'" "'\x1f'" "' '" "'!'" "'""'" "'#'" "'$'" "'%'" "'&'" "'\''" "'('" "')'" "'*'" "'+'" "','" "'-'" "'.'" "'/'" "'0'" "'1'" "'2'" "'3'" "'4'" "'5'" "'6'" "'7'" "'8'" "'9'" "':'" "';'" "'<'" "'='" "'>'" "'?'" "'@'" "'A'" "'B'" "'C'" "'D'" "'E'" "'F'" "'G'" "'H'" "'I'" "'J'" "'K'" "'L'" "'M'" "'N'" "'O'" "'P'" "'Q'" "'R'" "'S'" "'T'" "'U'" "'V'" "'W'" "'X'" "'Y'" "'Z'" "'['" "'\\'" "']'" "'^'" "'_'" "'`'" "'a'" "'b'" "'c'" "'d'" "'e'" "'f'" "'g'" "'h'" "'i'" "'j'" "'k'" "'l'" "'m'" "'n'" "'o'" "'p'" "'q'" "'r'" "'s'" "'t'" "'u'" "'v'" "'w'" "'x'" "'y'" "'z'" "'{'" "'|'" "'}'" "'~'" "'\x7f'" "'\x80'" "'\x81'" "'\x82'" "'\x83'" "'\x84'" "'\x85'" "'\x86'" "'\x87'" "'\x88'" "'\x89'" "'\x8a'" "'\x8b'" "'\x8c'" "'\x8d'" "'\x8e'" "'\x8f'" "'\x90'" "'\x91'" "'\x92'" "'\x93'" "'\x94'" "'\x95'" "'\x96'" "'\x97'" "'\x98'" "'\x99'" "'\x9a'" "'\x9b'" "'\x9c'" "'\x9d'" "'\x9e'" "'\x9f'" "'\xa0'" "'\xa1'" "'\xa2'" "'\xa3'" "'\xa4'" "'\xa5'" "'\xa6'" "'\xa7'" "'\xa8'" "'\xa9'" "'\xaa'" "'\xab'" "'\xac'" "'\xad'" "'\xae'" "'\xaf'" "'\xb0'" "'\xb1'" "'\xb2'" "'\xb3'" "'\xb4'" "'\xb5'" "'\xb6'" "'\xb7'" "'\xb8'" "'\xb9'" "'\xba'" "'\xbb'" "'\xbc'" "'\xbd'" "'\xbe'" "'\xbf'" "'\xc0'" "'\xc1'" "'\xc2'" "'\xc3'" "'\xc4'" "'\xc5'" "'\xc6'" "'\xc7'" "'\xc8'" "'\xc9'" "'\xca'" "'\xcb'" "'\xcc'" "'\xcd'" "'\xce'" "'\xcf'" "'\xd0'" "'\xd1'" "'\xd2'" "'\xd3'" "'\xd4'" "'\xd5'" "'\xd6'" "'\xd7'" "'\xd8'" "'\xd9'" "'\xda'" "'\xdb'" "'\xdc'" "'\xdd'" "'\xde'" "'\xdf'" "'\xe0'" "'\xe1'" "'\xe2'" "'\xe3'" "'\xe4'" "'\xe5'" "'\xe6'" "'\xe7'" "'\xe8'" "'\xe9'" "'\xea'" "'\xeb'" "'\xec'" "'\xed'" "'\xee'" "'\xef'" "'\xf0'" "'\xf1'" "'\xf2'" "'\xf3'" "'\xf4'" "'\xf5'" "'\xf6'" "'\xf7'" "'\xf8'" "'\xf9'" "'\xfa'" "'\xfb'" "'\xfc'" "'\xfd'" "'\xfe'" "'\xff'"].
+
+Extract Inlined Constant Byte.eqb => "(=)".
+Extract Inlined Constant Byte.byte_eq_dec => "(=)".
+Extract Inlined Constant Ascii.ascii_of_byte => "(fun x -> x)".
+Extract Inlined Constant Ascii.byte_of_ascii => "(fun x -> x)".
+
+(* This differs from ExtrOcamlString.v: the latter extracts "string"
+ to "char list", and we extract "string" to "string" *)
+
+Extract Inductive string => "string"
+[
+(* EmptyString *)
+"(* If this appears, you're using String internals. Please don't *)
+ """"
+"
+(* String *)
+"(* If this appears, you're using String internals. Please don't *)
+ (fun (c, s) -> String.make 1 c ^ s)
+"
+]
+"(* If this appears, you're using Ascii internals. Please don't *)
+ (fun f0 f1 s ->
+ let l = String.length s in
+ if l = 0 then f0 else f1 (String.get s 0) (String.sub s 1 (l-1)))
+".
+
+Extract Inlined Constant String.string_dec => "(=)".
+Extract Inlined Constant String.eqb => "(=)".
+Extract Inlined Constant String.append => "(^)".
+Extract Inlined Constant String.concat => "String.concat".
+Extract Inlined Constant String.prefix =>
+ "(fun s1 s2 ->
+ let l1 = String.length s1 and l2 = String.length s2 in
+ l1 <= l2 && String.sub s2 0 l1 = s1)".
+Extract Inlined Constant String.string_of_list_ascii =>
+ "(fun l => String.of_seq (List.to_seq l))".
+Extract Inlined Constant String.list_ascii_of_string =>
+ "(fun s => List.of_seq (String.to_seq s))".
+Extract Inlined Constant String.string_of_list_byte =>
+ "(fun l => String.of_seq (List.to_seq l))".
+Extract Inlined Constant String.list_byte_of_string =>
+ "(fun s => List.of_seq (String.to_seq s))".
+
+(* Other operations in module String:
+ String.length
+ String.get
+ String.substring
+ String.index
+ String.findex
+ They all use type "nat". If we know that "nat" extracts
+ to O | S of nat, we can provide OCaml implementations
+ for these functions that work directly on OCaml's strings.
+ However "nat" could be extracted to other OCaml types...
+*)
+
+(*
+Definition test := "ceci est un test"%string.
+
+Recursive Extraction test Ascii.zero Ascii.one.
+*)
diff --git a/plugins/extraction/common.ml b/plugins/extraction/common.ml
index 2f3f42c5f6..099a22408a 100644
--- a/plugins/extraction/common.ml
+++ b/plugins/extraction/common.ml
@@ -649,3 +649,64 @@ let get_native_char c =
Char.chr (cumul l)
let pp_native_char c = str ("'"^Char.escaped (get_native_char c)^"'")
+
+(** Special hack for constants of type String.string : if an
+ [Extract Inductive string => string] has been declared, then
+ the constants are directly turned into string literals *)
+
+let ind_string = mk_ind "Coq.Strings.String" "string"
+
+let check_extract_string () =
+ try
+ let string_type = match lang () with
+ | Ocaml -> "string"
+ | _ -> raise Not_found
+ in
+ String.equal (find_custom (IndRef (ind_string, 0))) string_type
+ with Not_found -> false
+
+(* The argument is known to be of type Coq.Strings.String.string.
+ Check that it is built from constructors EmptyString and String
+ with constant ascii arguments. *)
+
+let rec is_native_string_rec = function
+ (* "EmptyString" constructor *)
+ | MLcons(_, ConstructRef(_, 1), []) -> true
+ (* "String" constructor *)
+ | MLcons(_, ConstructRef(_, 2), [hd; tl]) ->
+ is_native_char hd && is_native_string_rec tl
+ (* others *)
+ | _ -> false
+
+(* Here we first check that the argument is of type Coq.Strings.String.string
+ and that extraction to native strings was requested.
+ Then we check every character via [is_native_string_rec]. *)
+
+let is_native_string c =
+ match c with
+ | MLcons(_, ConstructRef((kn,0), j), l) ->
+ MutInd.equal kn ind_string
+ && check_extract_string ()
+ && is_native_string_rec c
+ | _ -> false
+
+(* Extract the underlying string. *)
+
+let rec get_native_string c =
+ let buf = Buffer.create 64 in
+ let rec get = function
+ (* "EmptyString" constructor *)
+ | MLcons(_, ConstructRef(_, 1), []) ->
+ Buffer.contents buf
+ (* "String" constructor *)
+ | MLcons(_, ConstructRef(_, 2), [hd; tl]) ->
+ Buffer.add_char buf (get_native_char hd);
+ get tl
+ (* others *)
+ | _ -> assert false
+ in get c
+
+(* Printing the underlying string. *)
+
+let pp_native_string c =
+ str ("\"" ^ String.escaped (get_native_string c) ^ "\"")
diff --git a/plugins/extraction/common.mli b/plugins/extraction/common.mli
index e4e9c4c527..d13ac7926a 100644
--- a/plugins/extraction/common.mli
+++ b/plugins/extraction/common.mli
@@ -81,3 +81,11 @@ val mk_ind : string -> string -> MutInd.t
val is_native_char : ml_ast -> bool
val get_native_char : ml_ast -> char
val pp_native_char : ml_ast -> Pp.t
+
+(** Special hack for constants of type String.string : if an
+ [Extract Inductive string => string] has been declared, then
+ the constants are directly turned into string literals *)
+
+val is_native_string : ml_ast -> bool
+val get_native_string : ml_ast -> string
+val pp_native_string : ml_ast -> Pp.t
diff --git a/plugins/extraction/ocaml.ml b/plugins/extraction/ocaml.ml
index 66429833b9..ab37bd2d76 100644
--- a/plugins/extraction/ocaml.ml
+++ b/plugins/extraction/ocaml.ml
@@ -249,6 +249,7 @@ let rec pp_expr par env args =
assert (List.is_empty args);
begin match a with
| _ when is_native_char c -> pp_native_char c
+ | _ when is_native_string c -> pp_native_string c
| [a1;a2] when is_infix r ->
let pp = pp_expr true env [] in
pp_par par (pp a1 ++ str (get_infix r) ++ pp a2)