aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/flags.ml5
-rw-r--r--lib/flags.mli9
-rw-r--r--lib/pp.ml19
-rw-r--r--lib/system.ml8
-rw-r--r--lib/system.mli6
-rw-r--r--lib/unicode.ml97
-rw-r--r--lib/unicode.mli15
7 files changed, 113 insertions, 46 deletions
diff --git a/lib/flags.ml b/lib/flags.ml
index d4be81c61a..a178eb7552 100644
--- a/lib/flags.ml
+++ b/lib/flags.ml
@@ -43,11 +43,8 @@ let with_extra_values o l f x =
let boot = ref false
let load_init = ref true
-let batch_mode = ref false
-type compilation_mode = BuildVo | BuildVio | Vio2Vo
-let compilation_mode = ref BuildVo
-let compilation_output_name = ref None
+let record_aux_file = ref false
let test_mode = ref false
diff --git a/lib/flags.mli b/lib/flags.mli
index 3024c60396..8ec2a80730 100644
--- a/lib/flags.mli
+++ b/lib/flags.mli
@@ -13,12 +13,9 @@
val boot : bool ref
val load_init : bool ref
-(* Will affect STM caching *)
-val batch_mode : bool ref
-
-type compilation_mode = BuildVo | BuildVio | Vio2Vo
-val compilation_mode : compilation_mode ref
-val compilation_output_name : string option ref
+(** Set by coqtop to tell the kernel to output to the aux file; will
+ be eventually removed by cleanups such as PR#1103 *)
+val record_aux_file : bool ref
(* Flag set when the test-suite is called. Its only effect to display
verbose information for `Fail` *)
diff --git a/lib/pp.ml b/lib/pp.ml
index 88ddcb35b5..c3338688d2 100644
--- a/lib/pp.ml
+++ b/lib/pp.ml
@@ -82,10 +82,21 @@ let utf8_length s =
done ;
!cnt
-let app s1 s2 = match s1, s2 with
- | Ppcmd_empty, s
- | s, Ppcmd_empty -> s
- | s1, s2 -> Ppcmd_glue [s1; s2]
+let rec app d1 d2 = match d1, d2 with
+ | Ppcmd_empty, d
+ | d, Ppcmd_empty -> d
+
+ (* Optimizations *)
+ | Ppcmd_glue [l1;l2], Ppcmd_glue l3 -> Ppcmd_glue (l1 :: l2 :: l3)
+ | Ppcmd_glue [l1;l2], d2 -> Ppcmd_glue [l1 ; l2 ; d2]
+ | d1, Ppcmd_glue l2 -> Ppcmd_glue (d1 :: l2)
+
+ | Ppcmd_tag(t1,d1), Ppcmd_tag(t2,d2)
+ when t1 = t2 -> Ppcmd_tag(t1,app d1 d2)
+ | d1, d2 -> Ppcmd_glue [d1; d2]
+ (* Optimizations deemed too costly *)
+ (* | Ppcmd_glue l1, Ppcmd_glue l2 -> Ppcmd_glue (l1 @ l2) *)
+ (* | Ppcmd_string s1, Ppcmd_string s2 -> Ppcmd_string (s1 ^ s2) *)
let seq s = Ppcmd_glue s
diff --git a/lib/system.ml b/lib/system.ml
index 0b64b237da..4b5066ef41 100644
--- a/lib/system.ml
+++ b/lib/system.ml
@@ -54,6 +54,8 @@ let make_dir_table dir =
let filter_dotfiles s f = if f.[0] = '.' then s else StrSet.add f s in
Array.fold_left filter_dotfiles StrSet.empty (Sys.readdir dir)
+let trust_file_cache = ref true
+
let exists_in_dir_respecting_case dir bf =
let cache_dir dir =
let contents = make_dir_table dir in
@@ -62,10 +64,10 @@ let exists_in_dir_respecting_case dir bf =
let contents, fresh =
try
(* in batch mode, assume the directory content is still fresh *)
- StrMap.find dir !dirmap, !Flags.batch_mode
+ StrMap.find dir !dirmap, !trust_file_cache
with Not_found ->
(* in batch mode, we are not yet sure the directory exists *)
- if !Flags.batch_mode && not (exists_dir dir) then StrSet.empty, true
+ if !trust_file_cache && not (exists_dir dir) then StrSet.empty, true
else cache_dir dir, true in
StrSet.mem bf contents ||
not fresh &&
@@ -80,7 +82,7 @@ let file_exists_respecting_case path f =
let df = Filename.dirname f in
(String.equal df "." || aux df)
&& exists_in_dir_respecting_case (Filename.concat path df) bf
- in (!Flags.batch_mode || Sys.file_exists (Filename.concat path f)) && aux f
+ in (!trust_file_cache || Sys.file_exists (Filename.concat path f)) && aux f
let rec search paths test =
match paths with
diff --git a/lib/system.mli b/lib/system.mli
index 7281de97c9..aa964abebe 100644
--- a/lib/system.mli
+++ b/lib/system.mli
@@ -54,6 +54,12 @@ val where_in_path_rex :
val find_file_in_path :
?warn:bool -> CUnix.load_path -> string -> CUnix.physical_path * string
+val trust_file_cache : bool ref
+(** [trust_file_cache] indicates whether we trust the underlying
+ mapped file-system not to change along the execution of Coq. This
+ assumption greatly speds up file search, but it is often
+ inconvenient in interactive mode *)
+
val file_exists_respecting_case : string -> string -> bool
(** {6 I/O functions } *)
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 8eb2eb45d4..f193c4e0f8 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -8,13 +8,14 @@
(** Unicode utilities *)
-type status = Letter | IdentPart | Symbol | Unknown
+type status = Letter | IdentPart | Symbol | IdentSep | Unknown
(* The following table stores classes of Unicode characters that
- are used by the lexer. There are 3 different classes so 2 bits are
- allocated for each character. We only use 16 bits over the 31 bits
- to simplify the masking process. (This choice seems to be a good
- trade-off between speed and space after some benchmarks.) *)
+ are used by the lexer. There are 5 different classes so 3 bits
+ are allocated for each character. We encode the masks of 8
+ characters per word, thus using 24 bits over the 31 available
+ bits. (This choice seems to be a good trade-off between speed
+ and space after some benchmarks.) *)
(* A 256 KiB table, initially filled with zeros. *)
let table = Array.make (1 lsl 17) 0
@@ -24,14 +25,15 @@ let table = Array.make (1 lsl 17) 0
define the position of the pattern in the word.
Notice that pattern "00" means "undefined". *)
let mask i = function
- | Letter -> 1 lsl ((i land 7) lsl 1) (* 01 *)
- | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *)
- | Symbol -> 3 lsl ((i land 7) lsl 1) (* 11 *)
- | Unknown -> 0 lsl ((i land 7) lsl 1) (* 00 *)
+ | Letter -> 1 lsl ((i land 7) * 3) (* 001 *)
+ | IdentPart -> 2 lsl ((i land 7) * 3) (* 010 *)
+ | Symbol -> 3 lsl ((i land 7) * 3) (* 011 *)
+ | IdentSep -> 4 lsl ((i land 7) * 3) (* 100 *)
+ | Unknown -> 0 lsl ((i land 7) * 3) (* 000 *)
-(* Helper to reset 2 bits in a word. *)
+(* Helper to reset 3 bits in a word. *)
let reset_mask i =
- lnot (3 lsl ((i land 7) lsl 1))
+ lnot (7 lsl ((i land 7) * 3))
(* Initialize the lookup table from a list of segments, assigning
a status to every character of each segment. The order of these
@@ -50,13 +52,14 @@ let mk_lookup_table_from_unicode_tables_for status tables =
(* Look up into the table and interpret the found pattern. *)
let lookup x =
- let v = (table.(x lsr 3) lsr ((x land 7) lsl 1)) land 3 in
+ let v = (table.(x lsr 3) lsr ((x land 7) * 3)) land 7 in
if v = 1 then Letter
else if v = 2 then IdentPart
else if v = 3 then Symbol
+ else if v = 4 then IdentSep
else Unknown
-(* [classify] discriminates between 3 different kinds of
+(* [classify] discriminates between 5 different kinds of
symbols based on the standard unicode classification (extracted from
Camomile). *)
let classify =
@@ -67,13 +70,13 @@ let classify =
Unicodetable.sm; (* Symbol, maths. *)
Unicodetable.sc; (* Symbol, currency. *)
Unicodetable.so; (* Symbol, modifier. *)
- Unicodetable.pd; (* Punctation, dash. *)
- Unicodetable.pc; (* Punctation, connector. *)
- Unicodetable.pe; (* Punctation, open. *)
- Unicodetable.ps; (* Punctation, close. *)
- Unicodetable.pi; (* Punctation, initial quote. *)
- Unicodetable.pf; (* Punctation, final quote. *)
- Unicodetable.po; (* Punctation, other. *)
+ Unicodetable.pd; (* Punctuation, dash. *)
+ Unicodetable.pc; (* Punctuation, connector. *)
+ Unicodetable.pe; (* Punctuation, open. *)
+ Unicodetable.ps; (* Punctution, close. *)
+ Unicodetable.pi; (* Punctuation, initial quote. *)
+ Unicodetable.pf; (* Punctuation, final quote. *)
+ Unicodetable.po; (* Punctuation, other. *)
];
mk_lookup_table_from_unicode_tables_for Letter
[
@@ -107,14 +110,14 @@ let classify =
[(0x02074, 0x02079)]; (* Superscript 4-9. *)
single 0x0002E; (* Dot. *)
];
- mk_lookup_table_from_unicode_tables_for Letter
+ mk_lookup_table_from_unicode_tables_for IdentSep
[
single 0x005F; (* Underscore. *)
single 0x00A0; (* Non breaking space. *)
];
mk_lookup_table_from_unicode_tables_for IdentPart
[
- single 0x0027; (* Special space. *)
+ single 0x0027; (* Single quote. *)
];
(* Lookup *)
lookup
@@ -198,22 +201,40 @@ let escaped_if_non_utf8 s =
(* Check the well-formedness of an identifier *)
+let is_valid_ident_initial = function
+ | Letter | IdentSep -> true
+ | IdentPart | Symbol | Unknown -> false
+
let initial_refutation j n s =
- match classify n with
- | Letter -> None
- | _ ->
+ if is_valid_ident_initial (classify n) then None
+ else
let c = String.sub s 0 j in
Some (false,
"Invalid character '"^c^"' at beginning of identifier \""^s^"\".")
+let is_valid_ident_trailing = function
+ | Letter | IdentSep | IdentPart -> true
+ | Symbol | Unknown -> false
+
let trailing_refutation i j n s =
- match classify n with
- | Letter | IdentPart -> None
- | _ ->
+ if is_valid_ident_trailing (classify n) then None
+ else
let c = String.sub s i j in
Some (false,
"Invalid character '"^c^"' in identifier \""^s^"\".")
+let is_unknown = function
+ | Unknown -> true
+ | Letter | IdentSep | IdentPart | Symbol -> false
+
+let is_ident_part = function
+ | IdentPart -> true
+ | Letter | IdentSep | Symbol | Unknown -> false
+
+let is_ident_sep = function
+ | IdentSep -> true
+ | Letter | IdentPart | Symbol | Unknown -> false
+
let ident_refutation s =
if s = ".." then None else try
let j, n = next_utf8 s 0 in
@@ -247,6 +268,26 @@ let lowercase_first_char s =
let j, n = next_utf8 s 0 in
utf8_of_unicode (lowercase_unicode n)
+let split_at_first_letter s =
+ let n, v = next_utf8 s 0 in
+ if ((* optim *) n = 1 && s.[0] != '_') || not (is_ident_sep (classify v)) then None
+ else begin
+ let n = ref n in
+ let p = ref 0 in
+ while !n < String.length s &&
+ let n', v = next_utf8 s !n in
+ p := n';
+ (* Test if not letter *)
+ ((* optim *) n' = 1 && (s.[!n] = '_' || s.[!n] = '\''))
+ || let st = classify v in
+ is_ident_sep st || is_ident_part st
+ do n := !n + !p
+ done;
+ let s1 = String.sub s 0 !n in
+ let s2 = String.sub s !n (String.length s - !n) in
+ Some (s1,s2)
+ end
+
(** For extraction, we need to encode unicode character into ascii ones *)
let is_basic_ascii s =
diff --git a/lib/unicode.mli b/lib/unicode.mli
index b8e7c33ad4..32ffbb8e94 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -8,7 +8,7 @@
(** Unicode utilities *)
-type status = Letter | IdentPart | Symbol | Unknown
+type status
(** Classify a unicode char into 3 classes or unknown. *)
val classify : int -> status
@@ -17,10 +17,23 @@ val classify : int -> status
Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *)
val ident_refutation : string -> (bool * string) option
+(** Tells if a valid initial character for an identifier *)
+val is_valid_ident_initial : status -> bool
+
+(** Tells if a valid non-initial character for an identifier *)
+val is_valid_ident_trailing : status -> bool
+
+(** Tells if a character is unclassified *)
+val is_unknown : status -> bool
+
(** First char of a string, converted to lowercase
@raise Assert_failure if the input string is empty. *)
val lowercase_first_char : string -> string
+(** Split a string supposed to be an ident at the first letter;
+ as an optimization, return None if the first character is a letter *)
+val split_at_first_letter : string -> (string * string) option
+
(** Return [true] if all UTF-8 characters in the input string are just plain
ASCII characters. Returns [false] otherwise. *)
val is_basic_ascii : string -> bool