diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/flags.ml | 5 | ||||
| -rw-r--r-- | lib/flags.mli | 9 | ||||
| -rw-r--r-- | lib/pp.ml | 19 | ||||
| -rw-r--r-- | lib/system.ml | 8 | ||||
| -rw-r--r-- | lib/system.mli | 6 | ||||
| -rw-r--r-- | lib/unicode.ml | 97 | ||||
| -rw-r--r-- | lib/unicode.mli | 15 |
7 files changed, 113 insertions, 46 deletions
diff --git a/lib/flags.ml b/lib/flags.ml index d4be81c61a..a178eb7552 100644 --- a/lib/flags.ml +++ b/lib/flags.ml @@ -43,11 +43,8 @@ let with_extra_values o l f x = let boot = ref false let load_init = ref true -let batch_mode = ref false -type compilation_mode = BuildVo | BuildVio | Vio2Vo -let compilation_mode = ref BuildVo -let compilation_output_name = ref None +let record_aux_file = ref false let test_mode = ref false diff --git a/lib/flags.mli b/lib/flags.mli index 3024c60396..8ec2a80730 100644 --- a/lib/flags.mli +++ b/lib/flags.mli @@ -13,12 +13,9 @@ val boot : bool ref val load_init : bool ref -(* Will affect STM caching *) -val batch_mode : bool ref - -type compilation_mode = BuildVo | BuildVio | Vio2Vo -val compilation_mode : compilation_mode ref -val compilation_output_name : string option ref +(** Set by coqtop to tell the kernel to output to the aux file; will + be eventually removed by cleanups such as PR#1103 *) +val record_aux_file : bool ref (* Flag set when the test-suite is called. Its only effect to display verbose information for `Fail` *) @@ -82,10 +82,21 @@ let utf8_length s = done ; !cnt -let app s1 s2 = match s1, s2 with - | Ppcmd_empty, s - | s, Ppcmd_empty -> s - | s1, s2 -> Ppcmd_glue [s1; s2] +let rec app d1 d2 = match d1, d2 with + | Ppcmd_empty, d + | d, Ppcmd_empty -> d + + (* Optimizations *) + | Ppcmd_glue [l1;l2], Ppcmd_glue l3 -> Ppcmd_glue (l1 :: l2 :: l3) + | Ppcmd_glue [l1;l2], d2 -> Ppcmd_glue [l1 ; l2 ; d2] + | d1, Ppcmd_glue l2 -> Ppcmd_glue (d1 :: l2) + + | Ppcmd_tag(t1,d1), Ppcmd_tag(t2,d2) + when t1 = t2 -> Ppcmd_tag(t1,app d1 d2) + | d1, d2 -> Ppcmd_glue [d1; d2] + (* Optimizations deemed too costly *) + (* | Ppcmd_glue l1, Ppcmd_glue l2 -> Ppcmd_glue (l1 @ l2) *) + (* | Ppcmd_string s1, Ppcmd_string s2 -> Ppcmd_string (s1 ^ s2) *) let seq s = Ppcmd_glue s diff --git a/lib/system.ml b/lib/system.ml index 0b64b237da..4b5066ef41 100644 --- a/lib/system.ml +++ b/lib/system.ml @@ -54,6 +54,8 @@ let make_dir_table dir = let filter_dotfiles s f = if f.[0] = '.' then s else StrSet.add f s in Array.fold_left filter_dotfiles StrSet.empty (Sys.readdir dir) +let trust_file_cache = ref true + let exists_in_dir_respecting_case dir bf = let cache_dir dir = let contents = make_dir_table dir in @@ -62,10 +64,10 @@ let exists_in_dir_respecting_case dir bf = let contents, fresh = try (* in batch mode, assume the directory content is still fresh *) - StrMap.find dir !dirmap, !Flags.batch_mode + StrMap.find dir !dirmap, !trust_file_cache with Not_found -> (* in batch mode, we are not yet sure the directory exists *) - if !Flags.batch_mode && not (exists_dir dir) then StrSet.empty, true + if !trust_file_cache && not (exists_dir dir) then StrSet.empty, true else cache_dir dir, true in StrSet.mem bf contents || not fresh && @@ -80,7 +82,7 @@ let file_exists_respecting_case path f = let df = Filename.dirname f in (String.equal df "." || aux df) && exists_in_dir_respecting_case (Filename.concat path df) bf - in (!Flags.batch_mode || Sys.file_exists (Filename.concat path f)) && aux f + in (!trust_file_cache || Sys.file_exists (Filename.concat path f)) && aux f let rec search paths test = match paths with diff --git a/lib/system.mli b/lib/system.mli index 7281de97c9..aa964abebe 100644 --- a/lib/system.mli +++ b/lib/system.mli @@ -54,6 +54,12 @@ val where_in_path_rex : val find_file_in_path : ?warn:bool -> CUnix.load_path -> string -> CUnix.physical_path * string +val trust_file_cache : bool ref +(** [trust_file_cache] indicates whether we trust the underlying + mapped file-system not to change along the execution of Coq. This + assumption greatly speds up file search, but it is often + inconvenient in interactive mode *) + val file_exists_respecting_case : string -> string -> bool (** {6 I/O functions } *) diff --git a/lib/unicode.ml b/lib/unicode.ml index 8eb2eb45d4..f193c4e0f8 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -8,13 +8,14 @@ (** Unicode utilities *) -type status = Letter | IdentPart | Symbol | Unknown +type status = Letter | IdentPart | Symbol | IdentSep | Unknown (* The following table stores classes of Unicode characters that - are used by the lexer. There are 3 different classes so 2 bits are - allocated for each character. We only use 16 bits over the 31 bits - to simplify the masking process. (This choice seems to be a good - trade-off between speed and space after some benchmarks.) *) + are used by the lexer. There are 5 different classes so 3 bits + are allocated for each character. We encode the masks of 8 + characters per word, thus using 24 bits over the 31 available + bits. (This choice seems to be a good trade-off between speed + and space after some benchmarks.) *) (* A 256 KiB table, initially filled with zeros. *) let table = Array.make (1 lsl 17) 0 @@ -24,14 +25,15 @@ let table = Array.make (1 lsl 17) 0 define the position of the pattern in the word. Notice that pattern "00" means "undefined". *) let mask i = function - | Letter -> 1 lsl ((i land 7) lsl 1) (* 01 *) - | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *) - | Symbol -> 3 lsl ((i land 7) lsl 1) (* 11 *) - | Unknown -> 0 lsl ((i land 7) lsl 1) (* 00 *) + | Letter -> 1 lsl ((i land 7) * 3) (* 001 *) + | IdentPart -> 2 lsl ((i land 7) * 3) (* 010 *) + | Symbol -> 3 lsl ((i land 7) * 3) (* 011 *) + | IdentSep -> 4 lsl ((i land 7) * 3) (* 100 *) + | Unknown -> 0 lsl ((i land 7) * 3) (* 000 *) -(* Helper to reset 2 bits in a word. *) +(* Helper to reset 3 bits in a word. *) let reset_mask i = - lnot (3 lsl ((i land 7) lsl 1)) + lnot (7 lsl ((i land 7) * 3)) (* Initialize the lookup table from a list of segments, assigning a status to every character of each segment. The order of these @@ -50,13 +52,14 @@ let mk_lookup_table_from_unicode_tables_for status tables = (* Look up into the table and interpret the found pattern. *) let lookup x = - let v = (table.(x lsr 3) lsr ((x land 7) lsl 1)) land 3 in + let v = (table.(x lsr 3) lsr ((x land 7) * 3)) land 7 in if v = 1 then Letter else if v = 2 then IdentPart else if v = 3 then Symbol + else if v = 4 then IdentSep else Unknown -(* [classify] discriminates between 3 different kinds of +(* [classify] discriminates between 5 different kinds of symbols based on the standard unicode classification (extracted from Camomile). *) let classify = @@ -67,13 +70,13 @@ let classify = Unicodetable.sm; (* Symbol, maths. *) Unicodetable.sc; (* Symbol, currency. *) Unicodetable.so; (* Symbol, modifier. *) - Unicodetable.pd; (* Punctation, dash. *) - Unicodetable.pc; (* Punctation, connector. *) - Unicodetable.pe; (* Punctation, open. *) - Unicodetable.ps; (* Punctation, close. *) - Unicodetable.pi; (* Punctation, initial quote. *) - Unicodetable.pf; (* Punctation, final quote. *) - Unicodetable.po; (* Punctation, other. *) + Unicodetable.pd; (* Punctuation, dash. *) + Unicodetable.pc; (* Punctuation, connector. *) + Unicodetable.pe; (* Punctuation, open. *) + Unicodetable.ps; (* Punctution, close. *) + Unicodetable.pi; (* Punctuation, initial quote. *) + Unicodetable.pf; (* Punctuation, final quote. *) + Unicodetable.po; (* Punctuation, other. *) ]; mk_lookup_table_from_unicode_tables_for Letter [ @@ -107,14 +110,14 @@ let classify = [(0x02074, 0x02079)]; (* Superscript 4-9. *) single 0x0002E; (* Dot. *) ]; - mk_lookup_table_from_unicode_tables_for Letter + mk_lookup_table_from_unicode_tables_for IdentSep [ single 0x005F; (* Underscore. *) single 0x00A0; (* Non breaking space. *) ]; mk_lookup_table_from_unicode_tables_for IdentPart [ - single 0x0027; (* Special space. *) + single 0x0027; (* Single quote. *) ]; (* Lookup *) lookup @@ -198,22 +201,40 @@ let escaped_if_non_utf8 s = (* Check the well-formedness of an identifier *) +let is_valid_ident_initial = function + | Letter | IdentSep -> true + | IdentPart | Symbol | Unknown -> false + let initial_refutation j n s = - match classify n with - | Letter -> None - | _ -> + if is_valid_ident_initial (classify n) then None + else let c = String.sub s 0 j in Some (false, "Invalid character '"^c^"' at beginning of identifier \""^s^"\".") +let is_valid_ident_trailing = function + | Letter | IdentSep | IdentPart -> true + | Symbol | Unknown -> false + let trailing_refutation i j n s = - match classify n with - | Letter | IdentPart -> None - | _ -> + if is_valid_ident_trailing (classify n) then None + else let c = String.sub s i j in Some (false, "Invalid character '"^c^"' in identifier \""^s^"\".") +let is_unknown = function + | Unknown -> true + | Letter | IdentSep | IdentPart | Symbol -> false + +let is_ident_part = function + | IdentPart -> true + | Letter | IdentSep | Symbol | Unknown -> false + +let is_ident_sep = function + | IdentSep -> true + | Letter | IdentPart | Symbol | Unknown -> false + let ident_refutation s = if s = ".." then None else try let j, n = next_utf8 s 0 in @@ -247,6 +268,26 @@ let lowercase_first_char s = let j, n = next_utf8 s 0 in utf8_of_unicode (lowercase_unicode n) +let split_at_first_letter s = + let n, v = next_utf8 s 0 in + if ((* optim *) n = 1 && s.[0] != '_') || not (is_ident_sep (classify v)) then None + else begin + let n = ref n in + let p = ref 0 in + while !n < String.length s && + let n', v = next_utf8 s !n in + p := n'; + (* Test if not letter *) + ((* optim *) n' = 1 && (s.[!n] = '_' || s.[!n] = '\'')) + || let st = classify v in + is_ident_sep st || is_ident_part st + do n := !n + !p + done; + let s1 = String.sub s 0 !n in + let s2 = String.sub s !n (String.length s - !n) in + Some (s1,s2) + end + (** For extraction, we need to encode unicode character into ascii ones *) let is_basic_ascii s = diff --git a/lib/unicode.mli b/lib/unicode.mli index b8e7c33ad4..32ffbb8e94 100644 --- a/lib/unicode.mli +++ b/lib/unicode.mli @@ -8,7 +8,7 @@ (** Unicode utilities *) -type status = Letter | IdentPart | Symbol | Unknown +type status (** Classify a unicode char into 3 classes or unknown. *) val classify : int -> status @@ -17,10 +17,23 @@ val classify : int -> status Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *) val ident_refutation : string -> (bool * string) option +(** Tells if a valid initial character for an identifier *) +val is_valid_ident_initial : status -> bool + +(** Tells if a valid non-initial character for an identifier *) +val is_valid_ident_trailing : status -> bool + +(** Tells if a character is unclassified *) +val is_unknown : status -> bool + (** First char of a string, converted to lowercase @raise Assert_failure if the input string is empty. *) val lowercase_first_char : string -> string +(** Split a string supposed to be an ident at the first letter; + as an optimization, return None if the first character is a letter *) +val split_at_first_letter : string -> (string * string) option + (** Return [true] if all UTF-8 characters in the input string are just plain ASCII characters. Returns [false] otherwise. *) val is_basic_ascii : string -> bool |
