From a66b57ba4bba866bb626bde2b6fe3b762347eb3e Mon Sep 17 00:00:00 2001
From: Pierre-Marie Pédrot
Date: Mon, 9 May 2016 16:33:21 +0200
Subject: Rename Lexer -> CLexer.

---
 .gitignore             |   2 +-
 dev/printers.mllib     |   2 +-
 grammar/argextend.ml4  |   2 +-
 grammar/grammar.mllib  |   2 +-
 parsing/cLexer.ml4     | 695 +++++++++++++++++++++++++++++++++++++++++++++++++
 parsing/cLexer.mli     |  38 +++
 parsing/compat.ml4     |   2 +-
 parsing/egramcoq.ml    |   6 +-
 parsing/g_constr.ml4   |   2 +-
 parsing/g_prim.ml4     |   2 +-
 parsing/g_tactic.ml4   |   2 +-
 parsing/g_vernac.ml4   |   2 +-
 parsing/lexer.ml4      | 695 -------------------------------------------------
 parsing/lexer.mli      |  38 ---
 parsing/parsing.mllib  |   2 +-
 parsing/pcoq.ml4       |   4 +-
 tactics/tacinterp.ml   |   2 +-
 toplevel/cerrors.ml    |   2 +-
 toplevel/coqloop.ml    |   2 +-
 toplevel/coqtop.ml     |   4 +-
 toplevel/metasyntax.ml |  14 +-
 toplevel/vernac.ml     |   8 +-
 22 files changed, 764 insertions(+), 764 deletions(-)
 create mode 100644 parsing/cLexer.ml4
 create mode 100644 parsing/cLexer.mli
 delete mode 100644 parsing/lexer.ml4
 delete mode 100644 parsing/lexer.mli

diff --git a/.gitignore b/.gitignore
index 3adb9c67c1..6d843661ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,7 +127,7 @@ grammar/tacextend.ml
 grammar/vernacextend.ml
 grammar/argextend.ml
 parsing/pcoq.ml
-parsing/lexer.ml
+parsing/cLexer.ml
 plugins/setoid_ring/newring.ml
 plugins/field/field.ml
 plugins/nsatz/nsatz.ml
diff --git a/dev/printers.mllib b/dev/printers.mllib
index ad9a5d75e6..e054c1bc70 100644
--- a/dev/printers.mllib
+++ b/dev/printers.mllib
@@ -151,7 +151,7 @@ States
 
 Genprint
 Tok
-Lexer
+CLexer
 Ppextend
 Pputils
 Ppannotation
diff --git a/grammar/argextend.ml4 b/grammar/argextend.ml4
index cb0f7d2d31..b3c71a703c 100644
--- a/grammar/argextend.ml4
+++ b/grammar/argextend.ml4
@@ -287,7 +287,7 @@ EXTEND
 	GramNonTerminal (!@loc, t, g, Some (Names.Id.of_string s))
       | s = STRING ->
 	  if String.length s > 0 && Util.is_letter s.[0] then
-	    Lexer.add_keyword s;
+	    CLexer.add_keyword s;
           GramTerminal s
     ] ]
   ;
diff --git a/grammar/grammar.mllib b/grammar/grammar.mllib
index 71e5b8ae2c..1864fae5fc 100644
--- a/grammar/grammar.mllib
+++ b/grammar/grammar.mllib
@@ -50,7 +50,7 @@ Constrexpr_ops
 
 Compat
 Tok
-Lexer
+CLexer
 Pcoq
 G_prim
 G_tactic
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
new file mode 100644
index 0000000000..5d96873f31
--- /dev/null
+++ b/parsing/cLexer.ml4
@@ -0,0 +1,695 @@
+(************************************************************************)
+(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
+(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016     *)
+(*   \VV/  **************************************************************)
+(*    //   *      This file is distributed under the terms of the       *)
+(*         *       GNU Lesser General Public License Version 2.1        *)
+(************************************************************************)
+
+open Pp
+open Util
+open Compat
+open Tok
+
+(* Dictionaries: trees annotated with string options, each node being a map
+   from chars to dictionaries (the subtrees). A trie, in other words. *)
+
+module CharOrd = struct type t = char let compare : char -> char -> int = compare end
+module CharMap = Map.Make (CharOrd)
+
+type ttree = {
+  node : string option;
+  branch : ttree CharMap.t }
+
+let empty_ttree = { node = None; branch = CharMap.empty }
+
+let ttree_add ttree str =
+  let rec insert tt i =
+    if i == String.length str then
+      {node = Some str; branch = tt.branch}
+    else
+      let c = str.[i] in
+      let br =
+        match try Some (CharMap.find c tt.branch) with Not_found -> None with
+          | Some tt' ->
+              CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
+          | None ->
+              let tt' = {node = None; branch = CharMap.empty} in
+              CharMap.add c (insert tt' (i + 1)) tt.branch
+      in
+      { node = tt.node; branch = br }
+  in
+  insert ttree 0
+
+(* Search a string in a dictionary: raises [Not_found]
+   if the word is not present. *)
+
+let ttree_find ttree str =
+  let rec proc_rec tt i =
+    if i == String.length str then tt
+    else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
+  in
+  proc_rec ttree 0
+
+(* Removes a string from a dictionary: returns an equal dictionary
+   if the word not present. *)
+let ttree_remove ttree str =
+  let rec remove tt i =
+    if i == String.length str then
+      {node = None; branch = tt.branch}
+    else
+      let c = str.[i] in
+      let br =
+        match try Some (CharMap.find c tt.branch) with Not_found -> None with
+          | Some tt' ->
+              CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
+          | None -> tt.branch
+      in
+      { node = tt.node; branch = br }
+  in
+  remove ttree 0
+
+
+(* Errors occurring while lexing (explained as "Lexer error: ...") *)
+
+module Error = struct
+
+  type t =
+    | Illegal_character
+    | Unterminated_comment
+    | Unterminated_string
+    | Undefined_token
+    | Bad_token of string
+    | UnsupportedUnicode of int
+
+  exception E of t
+
+  let to_string x =
+    "Syntax Error: Lexer: " ^
+      (match x with
+         | Illegal_character -> "Illegal character"
+         | Unterminated_comment -> "Unterminated comment"
+         | Unterminated_string -> "Unterminated string"
+         | Undefined_token -> "Undefined token"
+         | Bad_token tok -> Format.sprintf "Bad token %S" tok
+         | UnsupportedUnicode x ->
+             Printf.sprintf "Unsupported Unicode character (0x%x)" x)
+
+  (* Require to fix the Camlp4 signature *)
+  let print ppf x = Pp.pp_with ppf (Pp.str (to_string x))
+
+end
+open Error
+
+let err loc str = Loc.raise (Loc.make_loc loc) (Error.E str)
+
+let bad_token str = raise (Error.E (Bad_token str))
+
+(* Lexer conventions on tokens *)
+
+type token_kind =
+  | Utf8Token of (Unicode.status * int)
+  | AsciiChar
+  | EmptyStream
+
+let error_unsupported_unicode_character n unicode cs =
+  let bp = Stream.count cs in
+  err (bp,bp+n) (UnsupportedUnicode unicode)
+
+let error_utf8 cs =
+  let bp = Stream.count cs in
+  Stream.junk cs; (* consume the char to avoid read it and fail again *)
+  err (bp, bp+1) Illegal_character
+
+let utf8_char_size cs = function
+  (* Utf8 leading byte *)
+  | '\x00'..'\x7F' -> 1
+  | '\xC0'..'\xDF' -> 2
+  | '\xE0'..'\xEF' -> 3
+  | '\xF0'..'\xF7' -> 4
+  | _ (* '\x80'..\xBF'|'\xF8'..'\xFF' *) -> error_utf8 cs
+
+let njunk n = Util.repeat n Stream.junk
+
+let check_utf8_trailing_byte cs c =
+  if not (Int.equal (Char.code c land 0xC0) 0x80) then error_utf8 cs
+
+(* Recognize utf8 blocks (of length less than 4 bytes) *)
+(* but don't certify full utf8 compliance (e.g. no emptyness check) *)
+let lookup_utf8_tail c cs =
+  let c1 = Char.code c in
+  if Int.equal (c1 land 0x40) 0 || Int.equal (c1 land 0x38) 0x38 then error_utf8 cs
+  else
+    let n, unicode =
+      if Int.equal (c1 land 0x20) 0 then
+      match Stream.npeek 2 cs with
+      | [_;c2] ->
+          check_utf8_trailing_byte cs c2;
+          2, (c1 land 0x1F) lsl 6 + (Char.code c2 land 0x3F)
+      | _ -> error_utf8 cs
+      else if Int.equal (c1 land 0x10) 0 then
+      match Stream.npeek 3 cs with
+      | [_;c2;c3] ->
+          check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3;
+          3, (c1 land 0x0F) lsl 12 + (Char.code c2 land 0x3F) lsl 6 +
+          (Char.code c3 land 0x3F)
+      | _ -> error_utf8 cs
+      else match Stream.npeek 4 cs with
+      | [_;c2;c3;c4] ->
+          check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3;
+          check_utf8_trailing_byte cs c4;
+          4, (c1 land 0x07) lsl 18 + (Char.code c2 land 0x3F) lsl 12 +
+          (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F)
+      | _ -> error_utf8 cs
+    in
+    try Unicode.classify unicode, n
+    with Unicode.Unsupported ->
+      njunk n cs; error_unsupported_unicode_character n unicode cs
+
+let lookup_utf8 cs =
+  match Stream.peek cs with
+    | Some ('\x00'..'\x7F') -> AsciiChar
+    | Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail c cs)
+    | None -> EmptyStream
+
+let unlocated f x = f x
+  (** FIXME: should we still unloc the exception? *)
+(*   try f x with Loc.Exc_located (_, exc) -> raise exc *)
+
+let check_keyword str =
+  let rec loop_symb = parser
+    | [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str
+    | [< s >] ->
+        match unlocated lookup_utf8 s with
+        | Utf8Token (_,n) -> njunk n s; loop_symb s
+        | AsciiChar -> Stream.junk s; loop_symb s
+        | EmptyStream -> ()
+  in
+  loop_symb (Stream.of_string str)
+
+let check_keyword_to_add s =
+  try check_keyword s
+  with Error.E (UnsupportedUnicode unicode) ->
+    Flags.if_verbose msg_warning
+      (strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \
+                               which will not be parsable." s unicode))
+
+let check_ident str =
+  let rec loop_id intail = parser
+    | [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] ->
+        loop_id true s
+    | [< ' ('0'..'9' | ''') when intail; s >] ->
+        loop_id true s
+    | [< s >] ->
+        match unlocated lookup_utf8 s with
+        | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s
+        | Utf8Token (Unicode.IdentPart, n) when intail ->
+          njunk n s;
+          loop_id true s
+        | EmptyStream -> ()
+        | Utf8Token _ | AsciiChar -> bad_token str
+  in
+  loop_id false (Stream.of_string str)
+
+let is_ident str =
+  try let _ = check_ident str in true with Error.E _ -> false
+
+(* Keyword and symbol dictionary *)
+let token_tree = ref empty_ttree
+
+let is_keyword s =
+  try match (ttree_find !token_tree s).node with None -> false | Some _ -> true
+  with Not_found -> false
+
+let add_keyword str =
+  if not (is_keyword str) then
+    begin
+      check_keyword_to_add str;
+      token_tree := ttree_add !token_tree str
+    end
+
+let remove_keyword str =
+  token_tree := ttree_remove !token_tree str
+
+(* Freeze and unfreeze the state of the lexer *)
+type frozen_t = ttree
+
+let freeze () = !token_tree
+let unfreeze tt = (token_tree := tt)
+
+(* The string buffering machinery *)
+
+let buff = ref (String.create 80)
+
+let store len x =
+  if len >= String.length !buff then
+    buff := !buff ^ String.create (String.length !buff);
+  !buff.[len] <- x;
+  succ len
+
+let rec nstore n len cs =
+  if n>0 then nstore (n-1) (store len (Stream.next cs)) cs else len
+
+let get_buff len = String.sub !buff 0 len
+
+(* The classical lexer: idents, numbers, quoted strings, comments *)
+
+let rec ident_tail len = parser
+  | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
+      ident_tail (store len c) s
+  | [< s >] ->
+      match lookup_utf8 s with
+      | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
+          ident_tail (nstore n len s) s
+      | _ -> len
+
+let rec number len = parser
+  | [< ' ('0'..'9' as c); s >] -> number (store len c) s
+  | [< >] -> len
+
+let rec string in_comments bp len = parser
+  | [< ''"'; esc=(parser [<''"' >] -> true | [< >] -> false); s >] ->
+      if esc then string in_comments bp (store len '"') s else len
+  | [< ''('; s >] ->
+      (parser
+        | [< ''*'; s >] ->
+            string
+              (Option.map succ in_comments)
+              bp (store (store len '(') '*')
+              s
+        | [< >] ->
+            string in_comments bp (store len '(') s) s
+  | [< ''*'; s >] ->
+      (parser
+        | [< '')'; s >] ->
+            let () = match in_comments with
+            | Some 0 ->
+                msg_warning
+                  (strbrk
+                     "Not interpreting \"*)\" as the end of current \
+                      non-terminated comment because it occurs in a \
+                      non-terminated string of the comment.")
+            | _ -> ()
+            in
+            let in_comments = Option.map pred in_comments in
+            string in_comments bp (store (store len '*') ')') s
+        | [< >] ->
+            string in_comments bp (store len '*') s) s
+  | [< 'c; s >] -> string in_comments bp (store len c) s
+  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_string
+
+(* Hook for exporting comment into xml theory files *)
+let (f_xml_output_comment, xml_output_comment) = Hook.make ~default:ignore ()
+
+(* Utilities for comments in beautify *)
+let comment_begin = ref None
+let comm_loc bp = match !comment_begin with
+| None -> comment_begin := Some bp
+| _ -> ()
+
+let current = Buffer.create 8192
+let between_com = ref true
+
+type com_state = int option * string * bool
+let restore_com_state (o,s,b) =
+  comment_begin := o;
+  Buffer.clear current; Buffer.add_string current s;
+  between_com := b
+let dflt_com = (None,"",true)
+let com_state () =
+  let s = (!comment_begin, Buffer.contents current, !between_com) in
+  restore_com_state dflt_com; s
+
+let real_push_char c = Buffer.add_char current c
+
+(* Add a char if it is between two commands, if it is a newline or
+   if the last char is not a space itself. *)
+let push_char c =
+  if
+    !between_com || List.mem c ['\n';'\r'] ||
+    (List.mem c [' ';'\t']&&
+     (Int.equal (Buffer.length current) 0 ||
+      not (let s = Buffer.contents current in
+           List.mem s.[String.length s - 1] [' ';'\t';'\n';'\r'])))
+  then
+    real_push_char c
+
+let push_string s = Buffer.add_string current s
+
+let null_comment s =
+  let rec null i =
+    i<0 || (List.mem s.[i] [' ';'\t';'\n';'\r'] && null (i-1)) in
+  null (String.length s - 1)
+
+let comment_stop ep =
+  let current_s = Buffer.contents current in
+  if !Flags.xml_export && Buffer.length current > 0 &&
+    (!between_com || not(null_comment current_s)) then
+      Hook.get f_xml_output_comment current_s;
+  (if Flags.do_beautify() && Buffer.length current > 0 &&
+    (!between_com || not(null_comment current_s)) then
+    let bp = match !comment_begin with
+        Some bp -> bp
+      | None ->
+          msgerrnl(str "No begin location for comment '"
+                   ++ str current_s ++str"' ending at  "
+                   ++ int ep);
+          ep-1 in
+    Pp.comments := ((bp,ep),current_s) :: !Pp.comments);
+  Buffer.clear current;
+  comment_begin := None;
+  between_com := false
+
+(* Does not unescape!!! *)
+let rec comm_string bp = parser
+  | [< ''"' >] -> push_string "\""
+  | [< ''\\'; _ =
+           (parser [< ' ('"' | '\\' as c) >] ->
+              let () = match c with
+              | '"' -> real_push_char c
+              | _ -> ()
+              in
+              real_push_char c
+                 | [< >] -> real_push_char '\\'); s >]
+     -> comm_string bp s
+  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_string
+  | [< 'c; s >] -> real_push_char c; comm_string bp s
+
+let rec comment bp = parser bp2
+  | [< ''(';
+       _ = (parser
+              | [< ''*'; s >] -> push_string "(*"; comment bp s
+              | [< >] -> push_string "(" );
+       s >] -> comment bp s
+  | [< ''*';
+       _ = parser
+         | [< '')' >] -> push_string "*)";
+         | [< s >] -> real_push_char '*'; comment bp s >] -> ()
+  | [< ''"'; s >] ->
+      if Flags.do_beautify() then (push_string"\"";comm_string bp2 s)
+      else ignore (string (Some 0) bp2 0 s);
+      comment bp s
+  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_comment
+  | [< 'z; s >] -> real_push_char z; comment bp s
+
+(* Parse a special token, using the [token_tree] *)
+
+(* Peek as much utf-8 lexemes as possible *)
+(* and retain the longest valid special token obtained *)
+let rec progress_further last nj tt cs =
+  try progress_from_byte last nj tt cs (List.nth (Stream.npeek (nj+1) cs) nj)
+  with Failure _ -> last
+
+and update_longest_valid_token last nj tt cs =
+  match tt.node with
+  | Some _ as last' ->
+    stream_njunk nj cs;
+    progress_further last' 0 tt cs
+  | None ->
+    progress_further last nj tt cs
+
+(* nj is the number of char peeked since last valid token *)
+(* n the number of char in utf8 block *)
+and progress_utf8 last nj n c tt cs =
+  try
+    let tt = CharMap.find c tt.branch in
+    if Int.equal n 1 then
+      update_longest_valid_token last (nj+n) tt cs
+    else
+      match Util.List.skipn (nj+1) (Stream.npeek (nj+n) cs) with
+      | l when Int.equal (List.length l) (n - 1) ->
+         List.iter (check_utf8_trailing_byte cs) l;
+         let tt = List.fold_left (fun tt c -> CharMap.find c tt.branch) tt l in
+         update_longest_valid_token last (nj+n) tt cs
+      | _ ->
+          error_utf8 cs
+  with Not_found ->
+    last
+
+and progress_from_byte last nj tt cs c =
+  progress_utf8 last nj (utf8_char_size cs c) c tt cs
+
+let find_keyword id s =
+  let tt = ttree_find !token_tree id in
+  match progress_further tt.node 0 tt s with
+  | None -> raise Not_found
+  | Some c -> KEYWORD c
+
+let process_sequence bp c cs =
+  let rec aux n cs =
+    match Stream.peek cs with
+    | Some c' when c == c' -> Stream.junk cs; aux (n+1) cs
+    | _ -> BULLET (String.make n c), (bp, Stream.count cs)
+  in
+  aux 1 cs
+
+(* Must be a special token *)
+let process_chars bp c cs =
+  let t = progress_from_byte None (-1) !token_tree cs c in
+  let ep = Stream.count cs in
+  match t with
+    | Some t -> (KEYWORD t, (bp, ep))
+    | None ->
+        let ep' = bp + utf8_char_size cs c in
+        njunk (ep' - ep) cs;
+        err (bp, ep') Undefined_token
+
+let token_of_special c s = match c with
+  | '$' -> METAIDENT s
+  | '.' -> FIELD s
+  | _ -> assert false
+
+(* Parse what follows a dot / a dollar *)
+
+let parse_after_special c bp =
+  parser
+  | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail (store 0 d) >] ->
+      token_of_special c (get_buff len)
+  | [< s >] ->
+      match lookup_utf8 s with
+      | Utf8Token (Unicode.Letter, n) ->
+          token_of_special c (get_buff (ident_tail (nstore n 0 s) s))
+      | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp c s)
+
+(* Parse what follows a question mark *)
+
+let parse_after_qmark bp s =
+  match Stream.peek s with
+    | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK
+    | None -> KEYWORD "?"
+    | _ ->
+        match lookup_utf8 s with
+          | Utf8Token (Unicode.Letter, _) -> LEFTQMARK
+          | AsciiChar | Utf8Token _ | EmptyStream ->
+            fst (process_chars bp '?' s)
+
+let blank_or_eof cs =
+  match Stream.peek cs with
+    | None -> true
+    | Some (' ' | '\t' | '\n' |'\r') -> true
+    | _ -> false
+
+(* Parse a token in a char stream *)
+
+let rec next_token = parser bp
+  | [< '' ' | '\t' | '\n' |'\r' as c; s >] ->
+      comm_loc bp; push_char c; next_token s
+  | [< ''$' as c; t = parse_after_special c bp >] ep ->
+      comment_stop bp; (t, (ep, bp))
+  | [< ''.' as c; t = parse_after_special c bp; s >] ep ->
+      comment_stop bp;
+      (* We enforce that "." should either be part of a larger keyword,
+         for instance ".(", or followed by a blank or eof. *)
+      let () = match t with
+      | KEYWORD ("." | "...") ->
+        if not (blank_or_eof s) then err (bp,ep+1) Undefined_token;
+        between_com := true;
+      | _ -> ()
+      in
+      (t, (bp,ep))
+  | [< ' ('-'|'+'|'*' as c); s >] ->
+      let t,new_between_com =
+        if !between_com then process_sequence bp c s,true
+        else process_chars bp c s,false
+      in
+      comment_stop bp; between_com := new_between_com; t
+  | [< ''?'; s >] ep ->
+      let t = parse_after_qmark bp s in comment_stop bp; (t, (ep, bp))
+  | [< ' ('a'..'z' | 'A'..'Z' | '_' as c);
+       len = ident_tail (store 0 c); s >] ep ->
+      let id = get_buff len in
+      comment_stop bp;
+      (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
+  | [< ' ('0'..'9' as c); len = number (store 0 c) >] ep ->
+      comment_stop bp;
+      (INT (get_buff len), (bp, ep))
+  | [< ''\"'; len = string None bp 0 >] ep ->
+      comment_stop bp;
+      (STRING (get_buff len), (bp, ep))
+  | [< ' ('(' as c);
+      t = parser
+        | [< ''*'; s >] ->
+            comm_loc bp;
+            push_string "(*";
+            comment bp s;
+            next_token s
+        | [< t = process_chars bp c >] -> comment_stop bp; t >] ->
+      t
+  | [< s >] ->
+      match lookup_utf8 s with
+        | Utf8Token (Unicode.Letter, n) ->
+            let len = ident_tail (nstore n 0 s) s in
+            let id = get_buff len in
+            let ep = Stream.count s in
+            comment_stop bp;
+            (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
+        | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) ->
+            let t = process_chars bp (Stream.next s) s in
+            let new_between_com = match t with
+              (KEYWORD ("{"|"}"),_) -> !between_com | _ -> false in
+            comment_stop bp; between_com := new_between_com; t
+        | EmptyStream ->
+            comment_stop bp; (EOI, (bp, bp + 1))
+
+(* (* Debug: uncomment this for tracing tokens seen by coq...*)
+let next_token s =
+  let (t,(bp,ep)) = next_token s in Printf.eprintf "[%s]\n%!" (Tok.to_string t);
+  (t,(bp,ep))
+*)
+
+(* Location table system for creating tables associating a token count
+   to its location in a char stream (the source) *)
+
+let locerr () = invalid_arg "Lexer: location function"
+
+let loct_create () = Hashtbl.create 207
+
+let loct_func loct i =
+  try Hashtbl.find loct i with Not_found -> locerr ()
+
+let loct_add loct i loc = Hashtbl.add loct i loc
+
+let current_location_table = ref (loct_create ())
+
+type location_table = (int, CompatLoc.t) Hashtbl.t
+let location_table () = !current_location_table
+let restore_location_table t = current_location_table := t
+
+(** {6 The lexer of Coq} *)
+
+(** Note: removing a token.
+   We do nothing because [remove_token] is called only when removing a grammar
+   rule with [Grammar.delete_rule]. The latter command is called only when
+   unfreezing the state of the grammar entries (see GRAMMAR summary, file
+   env/metasyntax.ml). Therefore, instead of removing tokens one by one,
+   we unfreeze the state of the lexer. This restores the behaviour of the
+   lexer. B.B. *)
+
+IFDEF CAMLP5 THEN
+
+type te = Tok.t
+
+(** Names of tokens, for this lexer, used in Grammar error messages *)
+
+let token_text = function
+  | ("", t) -> "'" ^ t ^ "'"
+  | ("IDENT", "") -> "identifier"
+  | ("IDENT", t) -> "'" ^ t ^ "'"
+  | ("INT", "") -> "integer"
+  | ("INT", s) -> "'" ^ s ^ "'"
+  | ("STRING", "") -> "string"
+  | ("EOI", "") -> "end of input"
+  | (con, "") -> con
+  | (con, prm) -> con ^ " \"" ^ prm ^ "\""
+
+let func cs =
+  let loct = loct_create () in
+  let ts =
+    Stream.from
+      (fun i ->
+         let (tok, loc) = next_token cs in
+         loct_add loct i (make_loc loc); Some tok)
+  in
+  current_location_table := loct;
+  (ts, loct_func loct)
+
+let lexer = {
+  Token.tok_func = func;
+  Token.tok_using =
+    (fun pat -> match Tok.of_pattern pat with
+       | KEYWORD s -> add_keyword s
+       | _ -> ());
+  Token.tok_removing = (fun _ -> ());
+  Token.tok_match = Tok.match_pattern;
+  Token.tok_comm = None;
+  Token.tok_text = token_text }
+
+ELSE (* official camlp4 for ocaml >= 3.10 *)
+
+module M_ = Camlp4.ErrorHandler.Register (Error)
+
+module Loc = CompatLoc
+module Token = struct
+  include Tok (* Cf. tok.ml *)
+  module Loc = CompatLoc
+  module Error = Camlp4.Struct.EmptyError
+  module Filter = struct
+    type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t
+    type t = unit
+    let mk _is_kwd = ()
+    let keyword_added () kwd _ = add_keyword kwd
+    let keyword_removed () _ = ()
+    let filter () x = x
+    let define_filter () _ = ()
+  end
+end
+
+let mk () _init_loc(*FIXME*) cs =
+  let loct = loct_create () in
+  let rec self =
+    parser i
+       [< (tok, loc) = next_token; s >] ->
+          let loc = make_loc loc in
+            loct_add loct i loc;
+            [< '(tok, loc); self s >]
+      | [< >] -> [< >]
+  in current_location_table := loct; self cs
+
+END
+
+(** Terminal symbols interpretation *)
+
+let is_ident_not_keyword s =
+  is_ident s && not (is_keyword s)
+
+let is_number s =
+  let rec aux i =
+    Int.equal (String.length s) i ||
+    match s.[i] with '0'..'9' -> aux (i+1) | _ -> false
+  in aux 0
+
+let strip s =
+  let len =
+    let rec loop i len =
+      if Int.equal i (String.length s) then len
+      else if s.[i] == ' ' then loop (i + 1) len
+      else loop (i + 1) (len + 1)
+    in
+    loop 0 0
+  in
+  if len == String.length s then s
+  else
+    let s' = String.create len in
+    let rec loop i i' =
+      if i == String.length s then s'
+      else if s.[i] == ' ' then loop (i + 1) i'
+      else begin s'.[i'] <- s.[i]; loop (i + 1) (i' + 1) end
+    in
+    loop 0 0
+
+let terminal s =
+  let s = strip s in
+  let () = match s with "" -> Errors.error "empty token." | _ -> () in
+  if is_ident_not_keyword s then IDENT s
+  else if is_number s then INT s
+  else KEYWORD s
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
new file mode 100644
index 0000000000..24b0ec8471
--- /dev/null
+++ b/parsing/cLexer.mli
@@ -0,0 +1,38 @@
+(************************************************************************)
+(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
+(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016     *)
+(*   \VV/  **************************************************************)
+(*    //   *      This file is distributed under the terms of the       *)
+(*         *       GNU Lesser General Public License Version 2.1        *)
+(************************************************************************)
+
+val add_keyword : string -> unit
+val remove_keyword : string -> unit
+val is_keyword : string -> bool
+
+(* val location_function : int -> Loc.t *)
+
+(** for coqdoc *)
+type location_table
+val location_table : unit -> location_table
+val restore_location_table : location_table -> unit
+
+val check_ident : string -> unit
+val is_ident : string -> bool
+val check_keyword : string -> unit
+
+type frozen_t
+val freeze : unit -> frozen_t
+val unfreeze : frozen_t -> unit
+
+type com_state
+val com_state: unit -> com_state
+val restore_com_state: com_state -> unit
+
+val xml_output_comment : (string -> unit) Hook.t
+
+val terminal : string -> Tok.t
+
+(** The lexer of Coq: *)
+
+include Compat.LexerSig
diff --git a/parsing/compat.ml4 b/parsing/compat.ml4
index 17038ab5fc..1481c31f45 100644
--- a/parsing/compat.ml4
+++ b/parsing/compat.ml4
@@ -135,7 +135,7 @@ let to_coq_position = function
 END
 
 
-(** Signature of Lexer *)
+(** Signature of CLexer *)
 
 IFDEF CAMLP5 THEN
 
diff --git a/parsing/egramcoq.ml b/parsing/egramcoq.ml
index b0bbdd8136..4c77433038 100644
--- a/parsing/egramcoq.ml
+++ b/parsing/egramcoq.ml
@@ -317,9 +317,9 @@ let recover_constr_grammar ntn prec =
 (* Summary functions: the state of the lexer is included in that of the parser.
    Because the grammar affects the set of keywords when adding or removing
    grammar rules. *)
-type frozen_t = (int * all_grammar_command) list * Lexer.frozen_t
+type frozen_t = (int * all_grammar_command) list * CLexer.frozen_t
 
-let freeze _ : frozen_t = (!grammar_state, Lexer.freeze ())
+let freeze _ : frozen_t = (!grammar_state, CLexer.freeze ())
 
 (* We compare the current state of the grammar and the state to unfreeze,
    by computing the longest common suffixes *)
@@ -335,7 +335,7 @@ let unfreeze (grams, lex) =
   remove_grammars n;
   remove_levels n;
   grammar_state := common;
-  Lexer.unfreeze lex;
+  CLexer.unfreeze lex;
   List.iter extend_grammar (List.rev_map snd redo)
 
 (** No need to provide an init function : the grammar state is
diff --git a/parsing/g_constr.ml4 b/parsing/g_constr.ml4
index 5edb7b8082..73b088b467 100644
--- a/parsing/g_constr.ml4
+++ b/parsing/g_constr.ml4
@@ -29,7 +29,7 @@ let constr_kw =
     "Prop"; "Set"; "Type"; ".("; "_"; "..";
     "`{"; "`("; "{|"; "|}" ]
 
-let _ = List.iter Lexer.add_keyword constr_kw
+let _ = List.iter CLexer.add_keyword constr_kw
 
 let mk_cast = function
     (c,(_,None)) -> c
diff --git a/parsing/g_prim.ml4 b/parsing/g_prim.ml4
index 5297c163bc..20fbccb2db 100644
--- a/parsing/g_prim.ml4
+++ b/parsing/g_prim.ml4
@@ -15,7 +15,7 @@ open Pcoq
 open Pcoq.Prim
 
 let prim_kw = ["{"; "}"; "["; "]"; "("; ")"; "'"]
-let _ = List.iter Lexer.add_keyword prim_kw
+let _ = List.iter CLexer.add_keyword prim_kw
 
 
 let local_make_qualid l id = make_qualid (DirPath.make l) id
diff --git a/parsing/g_tactic.ml4 b/parsing/g_tactic.ml4
index 2a00a17640..8f3d04eeb6 100644
--- a/parsing/g_tactic.ml4
+++ b/parsing/g_tactic.ml4
@@ -25,7 +25,7 @@ open Pcoq
 let all_with delta = Redops.make_red_flag [FBeta;FIota;FZeta;delta]
 
 let tactic_kw = [ "->"; "<-" ; "by" ]
-let _ = List.iter Lexer.add_keyword tactic_kw
+let _ = List.iter CLexer.add_keyword tactic_kw
 
 let err () = raise Stream.Failure
 
diff --git a/parsing/g_vernac.ml4 b/parsing/g_vernac.ml4
index f3766a7d79..b19ad8807e 100644
--- a/parsing/g_vernac.ml4
+++ b/parsing/g_vernac.ml4
@@ -27,7 +27,7 @@ open Pcoq.Vernac_
 open Pcoq.Module
 
 let vernac_kw = [ ";"; ","; ">->"; ":<"; "<:"; "where"; "at" ]
-let _ = List.iter Lexer.add_keyword vernac_kw
+let _ = List.iter CLexer.add_keyword vernac_kw
 
 (* Rem: do not join the different GEXTEND into one, it breaks native *)
 (* compilation on PowerPC and Sun architectures *)
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
deleted file mode 100644
index 5d96873f31..0000000000
--- a/parsing/lexer.ml4
+++ /dev/null
@@ -1,695 +0,0 @@
-(************************************************************************)
-(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
-(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016     *)
-(*   \VV/  **************************************************************)
-(*    //   *      This file is distributed under the terms of the       *)
-(*         *       GNU Lesser General Public License Version 2.1        *)
-(************************************************************************)
-
-open Pp
-open Util
-open Compat
-open Tok
-
-(* Dictionaries: trees annotated with string options, each node being a map
-   from chars to dictionaries (the subtrees). A trie, in other words. *)
-
-module CharOrd = struct type t = char let compare : char -> char -> int = compare end
-module CharMap = Map.Make (CharOrd)
-
-type ttree = {
-  node : string option;
-  branch : ttree CharMap.t }
-
-let empty_ttree = { node = None; branch = CharMap.empty }
-
-let ttree_add ttree str =
-  let rec insert tt i =
-    if i == String.length str then
-      {node = Some str; branch = tt.branch}
-    else
-      let c = str.[i] in
-      let br =
-        match try Some (CharMap.find c tt.branch) with Not_found -> None with
-          | Some tt' ->
-              CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
-          | None ->
-              let tt' = {node = None; branch = CharMap.empty} in
-              CharMap.add c (insert tt' (i + 1)) tt.branch
-      in
-      { node = tt.node; branch = br }
-  in
-  insert ttree 0
-
-(* Search a string in a dictionary: raises [Not_found]
-   if the word is not present. *)
-
-let ttree_find ttree str =
-  let rec proc_rec tt i =
-    if i == String.length str then tt
-    else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
-  in
-  proc_rec ttree 0
-
-(* Removes a string from a dictionary: returns an equal dictionary
-   if the word not present. *)
-let ttree_remove ttree str =
-  let rec remove tt i =
-    if i == String.length str then
-      {node = None; branch = tt.branch}
-    else
-      let c = str.[i] in
-      let br =
-        match try Some (CharMap.find c tt.branch) with Not_found -> None with
-          | Some tt' ->
-              CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
-          | None -> tt.branch
-      in
-      { node = tt.node; branch = br }
-  in
-  remove ttree 0
-
-
-(* Errors occurring while lexing (explained as "Lexer error: ...") *)
-
-module Error = struct
-
-  type t =
-    | Illegal_character
-    | Unterminated_comment
-    | Unterminated_string
-    | Undefined_token
-    | Bad_token of string
-    | UnsupportedUnicode of int
-
-  exception E of t
-
-  let to_string x =
-    "Syntax Error: Lexer: " ^
-      (match x with
-         | Illegal_character -> "Illegal character"
-         | Unterminated_comment -> "Unterminated comment"
-         | Unterminated_string -> "Unterminated string"
-         | Undefined_token -> "Undefined token"
-         | Bad_token tok -> Format.sprintf "Bad token %S" tok
-         | UnsupportedUnicode x ->
-             Printf.sprintf "Unsupported Unicode character (0x%x)" x)
-
-  (* Require to fix the Camlp4 signature *)
-  let print ppf x = Pp.pp_with ppf (Pp.str (to_string x))
-
-end
-open Error
-
-let err loc str = Loc.raise (Loc.make_loc loc) (Error.E str)
-
-let bad_token str = raise (Error.E (Bad_token str))
-
-(* Lexer conventions on tokens *)
-
-type token_kind =
-  | Utf8Token of (Unicode.status * int)
-  | AsciiChar
-  | EmptyStream
-
-let error_unsupported_unicode_character n unicode cs =
-  let bp = Stream.count cs in
-  err (bp,bp+n) (UnsupportedUnicode unicode)
-
-let error_utf8 cs =
-  let bp = Stream.count cs in
-  Stream.junk cs; (* consume the char to avoid read it and fail again *)
-  err (bp, bp+1) Illegal_character
-
-let utf8_char_size cs = function
-  (* Utf8 leading byte *)
-  | '\x00'..'\x7F' -> 1
-  | '\xC0'..'\xDF' -> 2
-  | '\xE0'..'\xEF' -> 3
-  | '\xF0'..'\xF7' -> 4
-  | _ (* '\x80'..\xBF'|'\xF8'..'\xFF' *) -> error_utf8 cs
-
-let njunk n = Util.repeat n Stream.junk
-
-let check_utf8_trailing_byte cs c =
-  if not (Int.equal (Char.code c land 0xC0) 0x80) then error_utf8 cs
-
-(* Recognize utf8 blocks (of length less than 4 bytes) *)
-(* but don't certify full utf8 compliance (e.g. no emptyness check) *)
-let lookup_utf8_tail c cs =
-  let c1 = Char.code c in
-  if Int.equal (c1 land 0x40) 0 || Int.equal (c1 land 0x38) 0x38 then error_utf8 cs
-  else
-    let n, unicode =
-      if Int.equal (c1 land 0x20) 0 then
-      match Stream.npeek 2 cs with
-      | [_;c2] ->
-          check_utf8_trailing_byte cs c2;
-          2, (c1 land 0x1F) lsl 6 + (Char.code c2 land 0x3F)
-      | _ -> error_utf8 cs
-      else if Int.equal (c1 land 0x10) 0 then
-      match Stream.npeek 3 cs with
-      | [_;c2;c3] ->
-          check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3;
-          3, (c1 land 0x0F) lsl 12 + (Char.code c2 land 0x3F) lsl 6 +
-          (Char.code c3 land 0x3F)
-      | _ -> error_utf8 cs
-      else match Stream.npeek 4 cs with
-      | [_;c2;c3;c4] ->
-          check_utf8_trailing_byte cs c2; check_utf8_trailing_byte cs c3;
-          check_utf8_trailing_byte cs c4;
-          4, (c1 land 0x07) lsl 18 + (Char.code c2 land 0x3F) lsl 12 +
-          (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F)
-      | _ -> error_utf8 cs
-    in
-    try Unicode.classify unicode, n
-    with Unicode.Unsupported ->
-      njunk n cs; error_unsupported_unicode_character n unicode cs
-
-let lookup_utf8 cs =
-  match Stream.peek cs with
-    | Some ('\x00'..'\x7F') -> AsciiChar
-    | Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail c cs)
-    | None -> EmptyStream
-
-let unlocated f x = f x
-  (** FIXME: should we still unloc the exception? *)
-(*   try f x with Loc.Exc_located (_, exc) -> raise exc *)
-
-let check_keyword str =
-  let rec loop_symb = parser
-    | [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str
-    | [< s >] ->
-        match unlocated lookup_utf8 s with
-        | Utf8Token (_,n) -> njunk n s; loop_symb s
-        | AsciiChar -> Stream.junk s; loop_symb s
-        | EmptyStream -> ()
-  in
-  loop_symb (Stream.of_string str)
-
-let check_keyword_to_add s =
-  try check_keyword s
-  with Error.E (UnsupportedUnicode unicode) ->
-    Flags.if_verbose msg_warning
-      (strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \
-                               which will not be parsable." s unicode))
-
-let check_ident str =
-  let rec loop_id intail = parser
-    | [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] ->
-        loop_id true s
-    | [< ' ('0'..'9' | ''') when intail; s >] ->
-        loop_id true s
-    | [< s >] ->
-        match unlocated lookup_utf8 s with
-        | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s
-        | Utf8Token (Unicode.IdentPart, n) when intail ->
-          njunk n s;
-          loop_id true s
-        | EmptyStream -> ()
-        | Utf8Token _ | AsciiChar -> bad_token str
-  in
-  loop_id false (Stream.of_string str)
-
-let is_ident str =
-  try let _ = check_ident str in true with Error.E _ -> false
-
-(* Keyword and symbol dictionary *)
-let token_tree = ref empty_ttree
-
-let is_keyword s =
-  try match (ttree_find !token_tree s).node with None -> false | Some _ -> true
-  with Not_found -> false
-
-let add_keyword str =
-  if not (is_keyword str) then
-    begin
-      check_keyword_to_add str;
-      token_tree := ttree_add !token_tree str
-    end
-
-let remove_keyword str =
-  token_tree := ttree_remove !token_tree str
-
-(* Freeze and unfreeze the state of the lexer *)
-type frozen_t = ttree
-
-let freeze () = !token_tree
-let unfreeze tt = (token_tree := tt)
-
-(* The string buffering machinery *)
-
-let buff = ref (String.create 80)
-
-let store len x =
-  if len >= String.length !buff then
-    buff := !buff ^ String.create (String.length !buff);
-  !buff.[len] <- x;
-  succ len
-
-let rec nstore n len cs =
-  if n>0 then nstore (n-1) (store len (Stream.next cs)) cs else len
-
-let get_buff len = String.sub !buff 0 len
-
-(* The classical lexer: idents, numbers, quoted strings, comments *)
-
-let rec ident_tail len = parser
-  | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
-      ident_tail (store len c) s
-  | [< s >] ->
-      match lookup_utf8 s with
-      | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
-          ident_tail (nstore n len s) s
-      | _ -> len
-
-let rec number len = parser
-  | [< ' ('0'..'9' as c); s >] -> number (store len c) s
-  | [< >] -> len
-
-let rec string in_comments bp len = parser
-  | [< ''"'; esc=(parser [<''"' >] -> true | [< >] -> false); s >] ->
-      if esc then string in_comments bp (store len '"') s else len
-  | [< ''('; s >] ->
-      (parser
-        | [< ''*'; s >] ->
-            string
-              (Option.map succ in_comments)
-              bp (store (store len '(') '*')
-              s
-        | [< >] ->
-            string in_comments bp (store len '(') s) s
-  | [< ''*'; s >] ->
-      (parser
-        | [< '')'; s >] ->
-            let () = match in_comments with
-            | Some 0 ->
-                msg_warning
-                  (strbrk
-                     "Not interpreting \"*)\" as the end of current \
-                      non-terminated comment because it occurs in a \
-                      non-terminated string of the comment.")
-            | _ -> ()
-            in
-            let in_comments = Option.map pred in_comments in
-            string in_comments bp (store (store len '*') ')') s
-        | [< >] ->
-            string in_comments bp (store len '*') s) s
-  | [< 'c; s >] -> string in_comments bp (store len c) s
-  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_string
-
-(* Hook for exporting comment into xml theory files *)
-let (f_xml_output_comment, xml_output_comment) = Hook.make ~default:ignore ()
-
-(* Utilities for comments in beautify *)
-let comment_begin = ref None
-let comm_loc bp = match !comment_begin with
-| None -> comment_begin := Some bp
-| _ -> ()
-
-let current = Buffer.create 8192
-let between_com = ref true
-
-type com_state = int option * string * bool
-let restore_com_state (o,s,b) =
-  comment_begin := o;
-  Buffer.clear current; Buffer.add_string current s;
-  between_com := b
-let dflt_com = (None,"",true)
-let com_state () =
-  let s = (!comment_begin, Buffer.contents current, !between_com) in
-  restore_com_state dflt_com; s
-
-let real_push_char c = Buffer.add_char current c
-
-(* Add a char if it is between two commands, if it is a newline or
-   if the last char is not a space itself. *)
-let push_char c =
-  if
-    !between_com || List.mem c ['\n';'\r'] ||
-    (List.mem c [' ';'\t']&&
-     (Int.equal (Buffer.length current) 0 ||
-      not (let s = Buffer.contents current in
-           List.mem s.[String.length s - 1] [' ';'\t';'\n';'\r'])))
-  then
-    real_push_char c
-
-let push_string s = Buffer.add_string current s
-
-let null_comment s =
-  let rec null i =
-    i<0 || (List.mem s.[i] [' ';'\t';'\n';'\r'] && null (i-1)) in
-  null (String.length s - 1)
-
-let comment_stop ep =
-  let current_s = Buffer.contents current in
-  if !Flags.xml_export && Buffer.length current > 0 &&
-    (!between_com || not(null_comment current_s)) then
-      Hook.get f_xml_output_comment current_s;
-  (if Flags.do_beautify() && Buffer.length current > 0 &&
-    (!between_com || not(null_comment current_s)) then
-    let bp = match !comment_begin with
-        Some bp -> bp
-      | None ->
-          msgerrnl(str "No begin location for comment '"
-                   ++ str current_s ++str"' ending at  "
-                   ++ int ep);
-          ep-1 in
-    Pp.comments := ((bp,ep),current_s) :: !Pp.comments);
-  Buffer.clear current;
-  comment_begin := None;
-  between_com := false
-
-(* Does not unescape!!! *)
-let rec comm_string bp = parser
-  | [< ''"' >] -> push_string "\""
-  | [< ''\\'; _ =
-           (parser [< ' ('"' | '\\' as c) >] ->
-              let () = match c with
-              | '"' -> real_push_char c
-              | _ -> ()
-              in
-              real_push_char c
-                 | [< >] -> real_push_char '\\'); s >]
-     -> comm_string bp s
-  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_string
-  | [< 'c; s >] -> real_push_char c; comm_string bp s
-
-let rec comment bp = parser bp2
-  | [< ''(';
-       _ = (parser
-              | [< ''*'; s >] -> push_string "(*"; comment bp s
-              | [< >] -> push_string "(" );
-       s >] -> comment bp s
-  | [< ''*';
-       _ = parser
-         | [< '')' >] -> push_string "*)";
-         | [< s >] -> real_push_char '*'; comment bp s >] -> ()
-  | [< ''"'; s >] ->
-      if Flags.do_beautify() then (push_string"\"";comm_string bp2 s)
-      else ignore (string (Some 0) bp2 0 s);
-      comment bp s
-  | [< _ = Stream.empty >] ep -> err (bp, ep) Unterminated_comment
-  | [< 'z; s >] -> real_push_char z; comment bp s
-
-(* Parse a special token, using the [token_tree] *)
-
-(* Peek as much utf-8 lexemes as possible *)
-(* and retain the longest valid special token obtained *)
-let rec progress_further last nj tt cs =
-  try progress_from_byte last nj tt cs (List.nth (Stream.npeek (nj+1) cs) nj)
-  with Failure _ -> last
-
-and update_longest_valid_token last nj tt cs =
-  match tt.node with
-  | Some _ as last' ->
-    stream_njunk nj cs;
-    progress_further last' 0 tt cs
-  | None ->
-    progress_further last nj tt cs
-
-(* nj is the number of char peeked since last valid token *)
-(* n the number of char in utf8 block *)
-and progress_utf8 last nj n c tt cs =
-  try
-    let tt = CharMap.find c tt.branch in
-    if Int.equal n 1 then
-      update_longest_valid_token last (nj+n) tt cs
-    else
-      match Util.List.skipn (nj+1) (Stream.npeek (nj+n) cs) with
-      | l when Int.equal (List.length l) (n - 1) ->
-         List.iter (check_utf8_trailing_byte cs) l;
-         let tt = List.fold_left (fun tt c -> CharMap.find c tt.branch) tt l in
-         update_longest_valid_token last (nj+n) tt cs
-      | _ ->
-          error_utf8 cs
-  with Not_found ->
-    last
-
-and progress_from_byte last nj tt cs c =
-  progress_utf8 last nj (utf8_char_size cs c) c tt cs
-
-let find_keyword id s =
-  let tt = ttree_find !token_tree id in
-  match progress_further tt.node 0 tt s with
-  | None -> raise Not_found
-  | Some c -> KEYWORD c
-
-let process_sequence bp c cs =
-  let rec aux n cs =
-    match Stream.peek cs with
-    | Some c' when c == c' -> Stream.junk cs; aux (n+1) cs
-    | _ -> BULLET (String.make n c), (bp, Stream.count cs)
-  in
-  aux 1 cs
-
-(* Must be a special token *)
-let process_chars bp c cs =
-  let t = progress_from_byte None (-1) !token_tree cs c in
-  let ep = Stream.count cs in
-  match t with
-    | Some t -> (KEYWORD t, (bp, ep))
-    | None ->
-        let ep' = bp + utf8_char_size cs c in
-        njunk (ep' - ep) cs;
-        err (bp, ep') Undefined_token
-
-let token_of_special c s = match c with
-  | '$' -> METAIDENT s
-  | '.' -> FIELD s
-  | _ -> assert false
-
-(* Parse what follows a dot / a dollar *)
-
-let parse_after_special c bp =
-  parser
-  | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail (store 0 d) >] ->
-      token_of_special c (get_buff len)
-  | [< s >] ->
-      match lookup_utf8 s with
-      | Utf8Token (Unicode.Letter, n) ->
-          token_of_special c (get_buff (ident_tail (nstore n 0 s) s))
-      | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp c s)
-
-(* Parse what follows a question mark *)
-
-let parse_after_qmark bp s =
-  match Stream.peek s with
-    | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK
-    | None -> KEYWORD "?"
-    | _ ->
-        match lookup_utf8 s with
-          | Utf8Token (Unicode.Letter, _) -> LEFTQMARK
-          | AsciiChar | Utf8Token _ | EmptyStream ->
-            fst (process_chars bp '?' s)
-
-let blank_or_eof cs =
-  match Stream.peek cs with
-    | None -> true
-    | Some (' ' | '\t' | '\n' |'\r') -> true
-    | _ -> false
-
-(* Parse a token in a char stream *)
-
-let rec next_token = parser bp
-  | [< '' ' | '\t' | '\n' |'\r' as c; s >] ->
-      comm_loc bp; push_char c; next_token s
-  | [< ''$' as c; t = parse_after_special c bp >] ep ->
-      comment_stop bp; (t, (ep, bp))
-  | [< ''.' as c; t = parse_after_special c bp; s >] ep ->
-      comment_stop bp;
-      (* We enforce that "." should either be part of a larger keyword,
-         for instance ".(", or followed by a blank or eof. *)
-      let () = match t with
-      | KEYWORD ("." | "...") ->
-        if not (blank_or_eof s) then err (bp,ep+1) Undefined_token;
-        between_com := true;
-      | _ -> ()
-      in
-      (t, (bp,ep))
-  | [< ' ('-'|'+'|'*' as c); s >] ->
-      let t,new_between_com =
-        if !between_com then process_sequence bp c s,true
-        else process_chars bp c s,false
-      in
-      comment_stop bp; between_com := new_between_com; t
-  | [< ''?'; s >] ep ->
-      let t = parse_after_qmark bp s in comment_stop bp; (t, (ep, bp))
-  | [< ' ('a'..'z' | 'A'..'Z' | '_' as c);
-       len = ident_tail (store 0 c); s >] ep ->
-      let id = get_buff len in
-      comment_stop bp;
-      (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
-  | [< ' ('0'..'9' as c); len = number (store 0 c) >] ep ->
-      comment_stop bp;
-      (INT (get_buff len), (bp, ep))
-  | [< ''\"'; len = string None bp 0 >] ep ->
-      comment_stop bp;
-      (STRING (get_buff len), (bp, ep))
-  | [< ' ('(' as c);
-      t = parser
-        | [< ''*'; s >] ->
-            comm_loc bp;
-            push_string "(*";
-            comment bp s;
-            next_token s
-        | [< t = process_chars bp c >] -> comment_stop bp; t >] ->
-      t
-  | [< s >] ->
-      match lookup_utf8 s with
-        | Utf8Token (Unicode.Letter, n) ->
-            let len = ident_tail (nstore n 0 s) s in
-            let id = get_buff len in
-            let ep = Stream.count s in
-            comment_stop bp;
-            (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
-        | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) ->
-            let t = process_chars bp (Stream.next s) s in
-            let new_between_com = match t with
-              (KEYWORD ("{"|"}"),_) -> !between_com | _ -> false in
-            comment_stop bp; between_com := new_between_com; t
-        | EmptyStream ->
-            comment_stop bp; (EOI, (bp, bp + 1))
-
-(* (* Debug: uncomment this for tracing tokens seen by coq...*)
-let next_token s =
-  let (t,(bp,ep)) = next_token s in Printf.eprintf "[%s]\n%!" (Tok.to_string t);
-  (t,(bp,ep))
-*)
-
-(* Location table system for creating tables associating a token count
-   to its location in a char stream (the source) *)
-
-let locerr () = invalid_arg "Lexer: location function"
-
-let loct_create () = Hashtbl.create 207
-
-let loct_func loct i =
-  try Hashtbl.find loct i with Not_found -> locerr ()
-
-let loct_add loct i loc = Hashtbl.add loct i loc
-
-let current_location_table = ref (loct_create ())
-
-type location_table = (int, CompatLoc.t) Hashtbl.t
-let location_table () = !current_location_table
-let restore_location_table t = current_location_table := t
-
-(** {6 The lexer of Coq} *)
-
-(** Note: removing a token.
-   We do nothing because [remove_token] is called only when removing a grammar
-   rule with [Grammar.delete_rule]. The latter command is called only when
-   unfreezing the state of the grammar entries (see GRAMMAR summary, file
-   env/metasyntax.ml). Therefore, instead of removing tokens one by one,
-   we unfreeze the state of the lexer. This restores the behaviour of the
-   lexer. B.B. *)
-
-IFDEF CAMLP5 THEN
-
-type te = Tok.t
-
-(** Names of tokens, for this lexer, used in Grammar error messages *)
-
-let token_text = function
-  | ("", t) -> "'" ^ t ^ "'"
-  | ("IDENT", "") -> "identifier"
-  | ("IDENT", t) -> "'" ^ t ^ "'"
-  | ("INT", "") -> "integer"
-  | ("INT", s) -> "'" ^ s ^ "'"
-  | ("STRING", "") -> "string"
-  | ("EOI", "") -> "end of input"
-  | (con, "") -> con
-  | (con, prm) -> con ^ " \"" ^ prm ^ "\""
-
-let func cs =
-  let loct = loct_create () in
-  let ts =
-    Stream.from
-      (fun i ->
-         let (tok, loc) = next_token cs in
-         loct_add loct i (make_loc loc); Some tok)
-  in
-  current_location_table := loct;
-  (ts, loct_func loct)
-
-let lexer = {
-  Token.tok_func = func;
-  Token.tok_using =
-    (fun pat -> match Tok.of_pattern pat with
-       | KEYWORD s -> add_keyword s
-       | _ -> ());
-  Token.tok_removing = (fun _ -> ());
-  Token.tok_match = Tok.match_pattern;
-  Token.tok_comm = None;
-  Token.tok_text = token_text }
-
-ELSE (* official camlp4 for ocaml >= 3.10 *)
-
-module M_ = Camlp4.ErrorHandler.Register (Error)
-
-module Loc = CompatLoc
-module Token = struct
-  include Tok (* Cf. tok.ml *)
-  module Loc = CompatLoc
-  module Error = Camlp4.Struct.EmptyError
-  module Filter = struct
-    type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t
-    type t = unit
-    let mk _is_kwd = ()
-    let keyword_added () kwd _ = add_keyword kwd
-    let keyword_removed () _ = ()
-    let filter () x = x
-    let define_filter () _ = ()
-  end
-end
-
-let mk () _init_loc(*FIXME*) cs =
-  let loct = loct_create () in
-  let rec self =
-    parser i
-       [< (tok, loc) = next_token; s >] ->
-          let loc = make_loc loc in
-            loct_add loct i loc;
-            [< '(tok, loc); self s >]
-      | [< >] -> [< >]
-  in current_location_table := loct; self cs
-
-END
-
-(** Terminal symbols interpretation *)
-
-let is_ident_not_keyword s =
-  is_ident s && not (is_keyword s)
-
-let is_number s =
-  let rec aux i =
-    Int.equal (String.length s) i ||
-    match s.[i] with '0'..'9' -> aux (i+1) | _ -> false
-  in aux 0
-
-let strip s =
-  let len =
-    let rec loop i len =
-      if Int.equal i (String.length s) then len
-      else if s.[i] == ' ' then loop (i + 1) len
-      else loop (i + 1) (len + 1)
-    in
-    loop 0 0
-  in
-  if len == String.length s then s
-  else
-    let s' = String.create len in
-    let rec loop i i' =
-      if i == String.length s then s'
-      else if s.[i] == ' ' then loop (i + 1) i'
-      else begin s'.[i'] <- s.[i]; loop (i + 1) (i' + 1) end
-    in
-    loop 0 0
-
-let terminal s =
-  let s = strip s in
-  let () = match s with "" -> Errors.error "empty token." | _ -> () in
-  if is_ident_not_keyword s then IDENT s
-  else if is_number s then INT s
-  else KEYWORD s
diff --git a/parsing/lexer.mli b/parsing/lexer.mli
deleted file mode 100644
index 24b0ec8471..0000000000
--- a/parsing/lexer.mli
+++ /dev/null
@@ -1,38 +0,0 @@
-(************************************************************************)
-(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
-(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016     *)
-(*   \VV/  **************************************************************)
-(*    //   *      This file is distributed under the terms of the       *)
-(*         *       GNU Lesser General Public License Version 2.1        *)
-(************************************************************************)
-
-val add_keyword : string -> unit
-val remove_keyword : string -> unit
-val is_keyword : string -> bool
-
-(* val location_function : int -> Loc.t *)
-
-(** for coqdoc *)
-type location_table
-val location_table : unit -> location_table
-val restore_location_table : location_table -> unit
-
-val check_ident : string -> unit
-val is_ident : string -> bool
-val check_keyword : string -> unit
-
-type frozen_t
-val freeze : unit -> frozen_t
-val unfreeze : frozen_t -> unit
-
-type com_state
-val com_state: unit -> com_state
-val restore_com_state: com_state -> unit
-
-val xml_output_comment : (string -> unit) Hook.t
-
-val terminal : string -> Tok.t
-
-(** The lexer of Coq: *)
-
-include Compat.LexerSig
diff --git a/parsing/parsing.mllib b/parsing/parsing.mllib
index a0cb831931..0e1c79c91b 100644
--- a/parsing/parsing.mllib
+++ b/parsing/parsing.mllib
@@ -1,6 +1,6 @@
 Tok
 Compat
-Lexer
+CLexer
 Pcoq
 Egramml
 Egramcoq
diff --git a/parsing/pcoq.ml4 b/parsing/pcoq.ml4
index 28dc74e81b..f01e25c616 100644
--- a/parsing/pcoq.ml4
+++ b/parsing/pcoq.ml4
@@ -18,7 +18,7 @@ open Tok (* necessary for camlp4 *)
 
 (** The parser of Coq *)
 
-module G = GrammarMake (Lexer)
+module G = GrammarMake (CLexer)
 
 (* TODO: this is a workaround, since there isn't such
    [warning_verbose] in new camlp4. In camlp5, this ref
@@ -51,7 +51,7 @@ ELSE
   | tok -> Stoken ((=) tok, to_string tok)
 END
 
-let gram_token_of_string s = gram_token_of_token (Lexer.terminal s)
+let gram_token_of_string s = gram_token_of_token (CLexer.terminal s)
 
 let camlp4_verbosity silent f x =
   let a = !warning_verbose in
diff --git a/tactics/tacinterp.ml b/tactics/tacinterp.ml
index 5ecc46d670..a7acdf7594 100644
--- a/tactics/tacinterp.ml
+++ b/tactics/tacinterp.ml
@@ -491,7 +491,7 @@ let interp_fresh_id ist env sigma l =
 	String.concat "" (List.map (function
 	  | ArgArg s -> s
 	  | ArgVar (_,id) -> Id.to_string (interp_ident ist env sigma id)) l) in
-      let s = if Lexer.is_keyword s then s^"0" else s in
+      let s = if CLexer.is_keyword s then s^"0" else s in
       Id.of_string s in
   Tactics.fresh_id_in_env avoid id env
 
diff --git a/toplevel/cerrors.ml b/toplevel/cerrors.ml
index 600683d359..d8ee5d90da 100644
--- a/toplevel/cerrors.ml
+++ b/toplevel/cerrors.ml
@@ -35,7 +35,7 @@ let explain_exn_default = function
   (* Basic interaction exceptions *)
   | Stream.Error txt -> hov 0 (str "Syntax error: " ++ str txt ++ str ".")
   | Compat.Token.Error txt -> hov 0 (str "Syntax error: " ++ str txt ++ str ".")
-  | Lexer.Error.E err -> hov 0 (str (Lexer.Error.to_string err))
+  | CLexer.Error.E err -> hov 0 (str (CLexer.Error.to_string err))
   | Sys_error msg -> hov 0 (str "System error: " ++ guill msg)
   | Out_of_memory -> hov 0 (str "Out of memory.")
   | Stack_overflow -> hov 0 (str "Stack overflow.")
diff --git a/toplevel/coqloop.ml b/toplevel/coqloop.ml
index 063ed89643..040c339249 100644
--- a/toplevel/coqloop.ml
+++ b/toplevel/coqloop.ml
@@ -288,7 +288,7 @@ let rec discard_to_dot () =
   try
     Gram.entry_parse parse_to_dot top_buffer.tokens
   with
-    | Compat.Token.Error _ | Lexer.Error.E _ -> discard_to_dot ()
+    | Compat.Token.Error _ | CLexer.Error.E _ -> discard_to_dot ()
     | End_of_input -> raise End_of_input
     | e when Errors.noncritical e -> ()
 
diff --git a/toplevel/coqtop.ml b/toplevel/coqtop.ml
index 9e1a76bbd6..08b38a7037 100644
--- a/toplevel/coqtop.ml
+++ b/toplevel/coqtop.ml
@@ -230,11 +230,11 @@ let compile_files () =
     | [vf] -> compile_file vf (* One compilation : no need to save init state *)
     | l ->
       let init_state = States.freeze ~marshallable:`No in
-      let coqdoc_init_state = Lexer.location_table () in
+      let coqdoc_init_state = CLexer.location_table () in
       List.iter
         (fun vf ->
 	  States.unfreeze init_state;
-	  Lexer.restore_location_table coqdoc_init_state;
+	  CLexer.restore_location_table coqdoc_init_state;
           compile_file vf)
         (List.rev l)
 
diff --git a/toplevel/metasyntax.ml b/toplevel/metasyntax.ml
index 2ccd27acb9..c33726550d 100644
--- a/toplevel/metasyntax.ml
+++ b/toplevel/metasyntax.ml
@@ -31,7 +31,7 @@ open Nameops
 (**********************************************************************)
 (* Tokens                                                             *)
 
-let cache_token (_,s) = Lexer.add_keyword s
+let cache_token (_,s) = CLexer.add_keyword s
 
 let inToken : string -> obj =
   declare_object {(default_object "TOKEN") with
@@ -428,7 +428,7 @@ let rec interp_list_parser hd = function
 (* To protect alphabetic tokens and quotes from being seen as variables *)
 let quote_notation_token x =
   let n = String.length x in
-  let norm = Lexer.is_ident x in
+  let norm = CLexer.is_ident x in
   if (n > 0 && norm) || (n > 2 && x.[0] == '\'') then "'"^x^"'"
   else x
 
@@ -436,7 +436,7 @@ let rec raw_analyze_notation_tokens = function
   | []    -> []
   | String ".." :: sl -> NonTerminal ldots_var :: raw_analyze_notation_tokens sl
   | String "_" :: _ -> error "_ must be quoted."
-  | String x :: sl when Lexer.is_ident x ->
+  | String x :: sl when CLexer.is_ident x ->
       NonTerminal (Names.Id.of_string x) :: raw_analyze_notation_tokens sl
   | String s :: sl ->
       Terminal (String.drop_simple_quotes s) :: raw_analyze_notation_tokens sl
@@ -725,7 +725,7 @@ let rec define_keywords_aux = function
   | GramConstrNonTerminal(e,Some _) as n1 :: GramConstrTerminal(IDENT k) :: l
       when is_not_small_constr e ->
       Flags.if_verbose msg_info (str "Identifier '" ++ str k ++ str "' now a keyword");
-      Lexer.add_keyword k;
+      CLexer.add_keyword k;
       n1 :: GramConstrTerminal(KEYWORD k) :: define_keywords_aux l
   | n :: l -> n :: define_keywords_aux l
   | [] -> []
@@ -734,7 +734,7 @@ let rec define_keywords_aux = function
 let define_keywords = function
   | GramConstrTerminal(IDENT k)::l ->
       Flags.if_verbose msg_info (str "Identifier '" ++ str k ++ str "' now a keyword");
-      Lexer.add_keyword k;
+      CLexer.add_keyword k;
       GramConstrTerminal(KEYWORD k) :: define_keywords_aux l
   | l -> define_keywords_aux l
 
@@ -763,12 +763,12 @@ let make_production etyps symbols =
 	    let typ = List.assoc m etyps in
 	    distribute [GramConstrNonTerminal (typ, Some m)] ll
         | Terminal s ->
-	    distribute [GramConstrTerminal (Lexer.terminal s)] ll
+	    distribute [GramConstrTerminal (CLexer.terminal s)] ll
         | Break _ ->
             ll
         | SProdList (x,sl) ->
             let tkl = List.flatten
-              (List.map (function Terminal s -> [Lexer.terminal s]
+              (List.map (function Terminal s -> [CLexer.terminal s]
                 | Break _ -> []
                 | _ -> anomaly (Pp.str "Found a non terminal token in recursive notation separator")) sl) in
 	    match List.assoc x etyps with
diff --git a/toplevel/vernac.ml b/toplevel/vernac.ml
index 7c4920dfb8..a5f502503b 100644
--- a/toplevel/vernac.ml
+++ b/toplevel/vernac.ml
@@ -164,17 +164,17 @@ let save_translator_coqdoc () =
   (* translator state *)
   let ch = !chan_beautify in
   let cl = !Pp.comments in
-  let cs = Lexer.com_state() in
+  let cs = CLexer.com_state() in
   (* end translator state *)
-  let coqdocstate = Lexer.location_table () in
+  let coqdocstate = CLexer.location_table () in
   ch,cl,cs,coqdocstate
 
 let restore_translator_coqdoc (ch,cl,cs,coqdocstate) =
   if !Flags.beautify_file then close_out !chan_beautify;
   chan_beautify := ch;
   Pp.comments := cl;
-  Lexer.restore_com_state cs;
-  Lexer.restore_location_table coqdocstate
+  CLexer.restore_com_state cs;
+  CLexer.restore_location_table coqdocstate
 
 (* For coqtop -time, we display the position in the file,
    and a glimpse of the executed command *)
-- 
cgit v1.2.3