11 files changed, 167 insertions, 44 deletions
diff --git a/lib/cWarnings.ml b/lib/cWarnings.ml
index ff71452672..3699b1c614 100644
--- a/lib/cWarnings.ml
+++ b/lib/cWarnings.ml
@@ -93,8 +93,12 @@ let split_flags s =
     "all" flag, and reverses the list. *)
 let rec cut_before_all_rev acc = function
   | [] -> acc
-  | (_status,name as w) :: warnings ->
-     cut_before_all_rev (w :: if is_all_keyword name then [] else acc) warnings
+  | (status,name as w) :: warnings ->
+    let acc =
+      if is_all_keyword name then [w]
+      else if is_none_keyword name then [(Disabled,"all")]
+      else w :: acc in
+    cut_before_all_rev acc warnings
 
 let cut_before_all_rev warnings = cut_before_all_rev [] warnings
 
diff --git a/lib/coqProject_file.ml4 b/lib/coqProject_file.ml4
index 13de731f54..970666638c 100644
--- a/lib/coqProject_file.ml4
+++ b/lib/coqProject_file.ml4
@@ -206,7 +206,7 @@ let rec find_project_file ~from ~projfile_name =
   if Sys.file_exists fname then Some fname
   else
     let newdir = Filename.dirname from in
-    if newdir = "" || newdir = "/" then None
+    if newdir = from then None
     else find_project_file ~from:newdir ~projfile_name
 ;;
 
diff --git a/lib/envars.ml b/lib/envars.ml
index 68604ae6c9..206d750338 100644
--- a/lib/envars.ml
+++ b/lib/envars.ml
@@ -213,6 +213,7 @@ let print_config ?(prefix_var_name="") f coq_src_subdirs =
   fprintf f "%sCAMLP4BIN=%s/\n" prefix_var_name (camlp4bin ());
   fprintf f "%sCAMLP4LIB=%s\n" prefix_var_name (camlp4lib ());
   fprintf f "%sCAMLP4OPTIONS=%s\n" prefix_var_name Coq_config.camlp4compat;
+  fprintf f "%sCAMLFLAGS=%s\n" prefix_var_name Coq_config.caml_flags;
   fprintf f "%sHASNATDYNLINK=%s\n" prefix_var_name
     (if Coq_config.has_natdynlink then "true" else "false");
   fprintf f "%sCOQ_SRC_SUBDIRS=%s\n" prefix_var_name (String.concat " " coq_src_subdirs)
diff --git a/lib/loc.ml b/lib/loc.ml
index 06da13d44f..4a935a9d9c 100644
--- a/lib/loc.ml
+++ b/lib/loc.ml
@@ -62,6 +62,8 @@ let merge_opt l1 l2 = match l1, l2 with
 
 let unloc loc = (loc.bp, loc.ep)
 
+let shift_loc kb kp loc = { loc with bp = loc.bp + kb ; ep = loc.ep + kp }
+
 (** Located type *)
 type 'a located = t option * 'a
 
diff --git a/lib/loc.mli b/lib/loc.mli
index d4061e0446..fde490cc8a 100644
--- a/lib/loc.mli
+++ b/lib/loc.mli
@@ -40,6 +40,11 @@ val merge : t -> t -> t
 val merge_opt : t option -> t option -> t option
 (** Merge locations, usually generating the largest possible span *)
 
+val shift_loc : int -> int -> t -> t
+(** [shift_loc loc n p] shifts the beginning of location by [n] and
+    the end by [p]; it is assumed that the shifts do not change the
+    lines at which the location starts and ends *)
+
 (** {5 Located exceptions} *)
 
 val add_loc : Exninfo.info -> t -> Exninfo.info
diff --git a/lib/pp.ml b/lib/pp.ml
index 88ddcb35b5..c3338688d2 100644
--- a/lib/pp.ml
+++ b/lib/pp.ml
@@ -82,10 +82,21 @@ let utf8_length s =
   done ;
   !cnt
 
-let app s1 s2 = match s1, s2 with
-  | Ppcmd_empty, s
-  | s, Ppcmd_empty -> s
-  | s1, s2         -> Ppcmd_glue [s1; s2]
+let rec app d1 d2 = match d1, d2 with
+  | Ppcmd_empty,        d
+  | d,                  Ppcmd_empty      -> d
+
+  (* Optimizations *)
+  | Ppcmd_glue [l1;l2], Ppcmd_glue l3    -> Ppcmd_glue (l1 :: l2 :: l3)
+  | Ppcmd_glue [l1;l2], d2               -> Ppcmd_glue [l1 ; l2 ; d2]
+  | d1,                 Ppcmd_glue l2    -> Ppcmd_glue (d1 :: l2)
+
+  | Ppcmd_tag(t1,d1),   Ppcmd_tag(t2,d2)
+    when t1 = t2                         -> Ppcmd_tag(t1,app d1 d2)
+  | d1, d2                               -> Ppcmd_glue [d1; d2]
+  (* Optimizations deemed too costly *)
+  (* | Ppcmd_glue l1,    Ppcmd_glue l2    -> Ppcmd_glue   (l1 @ l2) *)
+  (* | Ppcmd_string s1,  Ppcmd_string s2  -> Ppcmd_string (s1 ^ s2) *)
 
 let seq s = Ppcmd_glue s
 
diff --git a/lib/system.ml b/lib/system.ml
index 12eacf2eaf..0b64b237da 100644
--- a/lib/system.ml
+++ b/lib/system.ml
@@ -52,7 +52,7 @@ let dirmap = ref StrMap.empty
 
 let make_dir_table dir =
   let filter_dotfiles s f = if f.[0] = '.' then s else StrSet.add f s in
-  Array.fold_left filter_dotfiles StrSet.empty (readdir dir)
+  Array.fold_left filter_dotfiles StrSet.empty (Sys.readdir dir)
 
 let exists_in_dir_respecting_case dir bf =
   let cache_dir dir =
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 959ccaf73c..f193c4e0f8 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -8,13 +8,14 @@
 
 (** Unicode utilities *)
 
-type status = Letter | IdentPart | Symbol | Unknown
+type status = Letter | IdentPart | Symbol | IdentSep | Unknown
 
 (* The following table stores classes of Unicode characters that
-   are used by the lexer. There are 3 different classes so 2 bits are
-   allocated for each character. We only use 16 bits over the 31 bits
-   to simplify the masking process. (This choice seems to be a good
-   trade-off between speed and space after some benchmarks.) *)
+   are used by the lexer. There are 5 different classes so 3 bits
+   are allocated for each character. We encode the masks of 8
+   characters per word, thus using 24 bits over the 31 available
+   bits. (This choice seems to be a good trade-off between speed
+   and space after some benchmarks.) *)
 
 (* A 256 KiB table, initially filled with zeros. *)
 let table = Array.make (1 lsl 17) 0
@@ -24,14 +25,15 @@ let table = Array.make (1 lsl 17) 0
    define the position of the pattern in the word.
    Notice that pattern "00" means "undefined". *)
 let mask i = function
-  | Letter    -> 1 lsl ((i land 7) lsl 1) (* 01 *)
-  | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *)
-  | Symbol    -> 3 lsl ((i land 7) lsl 1) (* 11 *)
-  | Unknown   -> 0 lsl ((i land 7) lsl 1) (* 00 *)
+  | Letter    -> 1 lsl ((i land 7) * 3) (* 001 *)
+  | IdentPart -> 2 lsl ((i land 7) * 3) (* 010 *)
+  | Symbol    -> 3 lsl ((i land 7) * 3) (* 011 *)
+  | IdentSep  -> 4 lsl ((i land 7) * 3) (* 100 *)
+  | Unknown   -> 0 lsl ((i land 7) * 3) (* 000 *)
 
-(* Helper to reset 2 bits in a word. *)
+(* Helper to reset 3 bits in a word. *)
 let reset_mask i =
-  lnot (3 lsl ((i land 7) lsl 1))
+  lnot (7 lsl ((i land 7) * 3))
 
 (* Initialize the lookup table from a list of segments, assigning
    a status to every character of each segment. The order of these
@@ -50,13 +52,14 @@ let mk_lookup_table_from_unicode_tables_for status tables =
 
 (* Look up into the table and interpret the found pattern. *)
 let lookup x =
-  let v = (table.(x lsr 3) lsr ((x land 7) lsl 1)) land 3 in
+  let v = (table.(x lsr 3) lsr ((x land 7) * 3)) land 7 in
     if      v = 1 then Letter
     else if v = 2 then IdentPart
     else if v = 3 then Symbol
+    else if v = 4 then IdentSep
     else Unknown
 
-(* [classify] discriminates between 3 different kinds of
+(* [classify] discriminates between 5 different kinds of
    symbols based on the standard unicode classification (extracted from
    Camomile). *)
 let classify =
@@ -67,13 +70,13 @@ let classify =
         Unicodetable.sm;           (* Symbol, maths.                    *)
         Unicodetable.sc;           (* Symbol, currency.                 *)
         Unicodetable.so;           (* Symbol, modifier.                 *)
-        Unicodetable.pd;           (* Punctation, dash.                 *)
-        Unicodetable.pc;           (* Punctation, connector.            *)
-        Unicodetable.pe;           (* Punctation, open.                 *)
-        Unicodetable.ps;           (* Punctation, close.                *)
-        Unicodetable.pi;           (* Punctation, initial quote.        *)
-        Unicodetable.pf;           (* Punctation, final quote.          *)
-        Unicodetable.po;           (* Punctation, other.                *)
+        Unicodetable.pd;           (* Punctuation, dash.                *)
+        Unicodetable.pc;           (* Punctuation, connector.           *)
+        Unicodetable.pe;           (* Punctuation, open.                *)
+        Unicodetable.ps;           (* Punctution, close.                *)
+        Unicodetable.pi;           (* Punctuation, initial quote.       *)
+        Unicodetable.pf;           (* Punctuation, final quote.         *)
+        Unicodetable.po;           (* Punctuation, other.               *)
       ];
     mk_lookup_table_from_unicode_tables_for Letter
       [
@@ -107,14 +110,14 @@ let classify =
         [(0x02074, 0x02079)];      (* Superscript 4-9.                  *)
         single 0x0002E;            (* Dot.                              *)
       ];
-    mk_lookup_table_from_unicode_tables_for Letter
+    mk_lookup_table_from_unicode_tables_for IdentSep
       [
         single 0x005F;             (* Underscore.                       *)
         single 0x00A0;             (* Non breaking space.               *)
       ];
     mk_lookup_table_from_unicode_tables_for IdentPart
       [
-        single 0x0027;             (* Special space.                    *)
+        single 0x0027;             (* Single quote.                     *)
       ];
     (* Lookup *)
     lookup
@@ -163,24 +166,75 @@ let is_utf8 s =
   in
   try check 0 with End_of_input -> true | Invalid_argument _ -> false
 
+(* Escape string if it contains non-utf8 characters *)
+
+let escaped_non_utf8 s =
+  let mk_escape x = Printf.sprintf "%%%X" x in
+  let buff = Buffer.create (String.length s * 3) in
+  let rec process_trailing_aux i j =
+    if i = j then i else
+      match String.unsafe_get s i with
+      | '\128'..'\191' -> process_trailing_aux (i+1) j
+      | _ -> i in
+  let process_trailing i n =
+    let j = if i+n-1 >= String.length s then i+1 else process_trailing_aux (i+1) (i+n) in
+    (if j = i+n then
+      Buffer.add_string buff (String.sub s i n)
+    else
+      let v = Array.init (j-i) (fun k -> mk_escape (Char.code s.[i+k])) in
+      Buffer.add_string buff (String.concat "" (Array.to_list v)));
+    j in
+  let rec process i =
+    if i >= String.length s then Buffer.contents buff else
+      let c = String.unsafe_get s i in
+      match c with
+      | '\000'..'\127' -> Buffer.add_char buff c; process (i+1)
+      | '\128'..'\191' | '\248'..'\255' -> Buffer.add_string buff (mk_escape (Char.code c)); process (i+1)
+      | '\192'..'\223' -> process (process_trailing i 2)
+      | '\224'..'\239' -> process (process_trailing i 3)
+      | '\240'..'\247' -> process (process_trailing i 4)
+  in
+  process 0
+
+let escaped_if_non_utf8 s =
+  if is_utf8 s then s else escaped_non_utf8 s
+
 (* Check the well-formedness of an identifier *)
 
+let is_valid_ident_initial = function
+  | Letter | IdentSep -> true
+  | IdentPart | Symbol | Unknown -> false
+
 let initial_refutation j n s =
-  match classify n with
-  | Letter -> None
-  | _ ->
+  if is_valid_ident_initial (classify n) then None
+  else
       let c = String.sub s 0 j in
       Some (false,
             "Invalid character '"^c^"' at beginning of identifier \""^s^"\".")
 
+let is_valid_ident_trailing = function
+  | Letter | IdentSep | IdentPart -> true
+  | Symbol | Unknown -> false
+
 let trailing_refutation i j n s =
-  match classify n with
-  | Letter | IdentPart -> None
-  | _ ->
+  if is_valid_ident_trailing (classify n) then None
+  else
       let c = String.sub s i j in
       Some (false,
             "Invalid character '"^c^"' in identifier \""^s^"\".")
 
+let is_unknown = function
+  | Unknown -> true
+  | Letter | IdentSep | IdentPart | Symbol -> false
+
+let is_ident_part = function
+  | IdentPart -> true
+  | Letter | IdentSep | Symbol | Unknown -> false
+
+let is_ident_sep = function
+  | IdentSep -> true
+  | Letter | IdentPart | Symbol | Unknown -> false
+
 let ident_refutation s =
   if s = ".." then None else try
     let j, n = next_utf8 s 0 in
@@ -198,7 +252,7 @@ let ident_refutation s =
         |x -> x
   with
   | End_of_input -> Some (true,"The empty string is not an identifier.")
-  | Invalid_argument _ -> Some (true,s^": invalid utf8 sequence.")
+  | Invalid_argument _ -> Some (true,escaped_non_utf8 s^": invalid utf8 sequence.")
 
 let lowercase_unicode =
   let tree = Segmenttree.make Unicodetable.to_lower in
@@ -214,6 +268,26 @@ let lowercase_first_char s =
   let j, n = next_utf8 s 0 in
   utf8_of_unicode (lowercase_unicode n)
 
+let split_at_first_letter s =
+  let n, v = next_utf8 s 0 in
+  if ((* optim *) n = 1 && s.[0] != '_') || not (is_ident_sep (classify v)) then None
+  else begin
+    let n = ref n in
+    let p = ref 0 in
+    while !n < String.length s &&
+          let n', v = next_utf8 s !n in
+          p := n';
+          (* Test if not letter *)
+          ((* optim *) n' = 1 && (s.[!n] = '_' || s.[!n] = '\''))
+          || let st = classify v in
+             is_ident_sep st || is_ident_part st
+    do n := !n + !p
+    done;
+    let s1 = String.sub s 0 !n in
+    let s2 = String.sub s !n (String.length s - !n) in
+    Some (s1,s2)
+  end
+
 (** For extraction, we need to encode unicode character into ascii ones *)
 
 let is_basic_ascii s =
@@ -268,9 +342,7 @@ let utf8_length s =
       | '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *)
       | '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *)
       | '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *)
-      | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *)
-      | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *)
-      | '\254'..'\255' -> nc := 0 (* invalid byte *)
+      | '\248'..'\255' -> nc := 0 (* invalid byte *)
     end ;
     incr p ;
     while !p < len && !nc > 0 do
@@ -299,9 +371,7 @@ let utf8_sub s start_u len_u =
       |	'\192'..'\223' -> nc := 1 (* expect 1 continuation byte *)
       |	'\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *)
       |	'\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *)
-      |	'\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *)
-      |	'\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *)
-      |	'\254'..'\255' -> nc := 0 (* invalid byte *)
+      |	'\248'..'\255' -> nc := 0 (* invalid byte *)
     end ;
     incr p ;
     while !p < len_b && !nc > 0 do
diff --git a/lib/unicode.mli b/lib/unicode.mli
index c7d7424801..32ffbb8e94 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -8,7 +8,7 @@
 
 (** Unicode utilities *)
 
-type status = Letter | IdentPart | Symbol | Unknown
+type status
 
 (** Classify a unicode char into 3 classes or unknown. *)
 val classify : int -> status
@@ -17,10 +17,23 @@ val classify : int -> status
     Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *)
 val ident_refutation : string -> (bool * string) option
 
+(** Tells if a valid initial character for an identifier *)
+val is_valid_ident_initial : status -> bool
+
+(** Tells if a valid non-initial character for an identifier *)
+val is_valid_ident_trailing : status -> bool
+
+(** Tells if a character is unclassified *)
+val is_unknown : status -> bool
+
 (** First char of a string, converted to lowercase
     @raise Assert_failure if the input string is empty. *)
 val lowercase_first_char : string -> string
 
+(** Split a string supposed to be an ident at the first letter;
+    as an optimization, return None if the first character is a letter *)
+val split_at_first_letter : string -> (string * string) option
+
 (** Return [true] if all UTF-8 characters in the input string are just plain
     ASCII characters. Returns [false] otherwise. *)
 val is_basic_ascii : string -> bool
@@ -40,3 +53,6 @@ val utf8_length : string -> int
 
 (** Variant of {!String.sub} for UTF-8 strings. *)
 val utf8_sub : string -> int -> int -> string
+
+(** Return a "%XX"-escaped string if it contains non UTF-8 characters. *)
+val escaped_if_non_utf8 : string -> string
diff --git a/lib/util.ml b/lib/util.ml
index 36282b2dac..6de012da0e 100644
--- a/lib/util.ml
+++ b/lib/util.ml
@@ -171,3 +171,12 @@ let open_utf8_file_in fname =
   let s = Bytes.make 3 ' ' in
   if input in_chan s 0 3 < 3 || not (is_bom s) then seek_in in_chan 0;
   in_chan
+
+(** A trick which can typically be used to store on the fly the
+   computation of values in the "when" clause of a "match" then
+   retrieve the evaluated result in the r.h.s of the clause *)
+
+let set_temporary_memory () =
+  let a = ref None in
+  (fun x -> assert (!a = None); a := Some x; x),
+  (fun () -> match !a with Some x -> x | None -> assert false)
diff --git a/lib/util.mli b/lib/util.mli
index d910e7e28e..c54f5825cd 100644
--- a/lib/util.mli
+++ b/lib/util.mli
@@ -137,3 +137,8 @@ val sym : ('a, 'b) eq -> ('b, 'a) eq
 
 val open_utf8_file_in : string -> in_channel
 (** Open an utf-8 encoded file and skip the byte-order mark if any. *)
+
+val set_temporary_memory : unit -> ('a -> 'a) * (unit -> 'a)
+(** A trick which can typically be used to store on the fly the
+   computation of values in the "when" clause of a "match" then
+   retrieve the evaluated result in the r.h.s of the clause *)