6 files changed, 538 insertions, 224 deletions
diff --git a/tools/coqdoc/cpretty.mll b/tools/coqdoc/cpretty.mll
index af8bac2c10..b700531739 100644
--- a/tools/coqdoc/cpretty.mll
+++ b/tools/coqdoc/cpretty.mll
@@ -18,7 +18,7 @@
   (* A function that emulates Lexing.new_line (which does not exist in OCaml < 3.11.0) *)
   let new_line lexbuf =
     let pos = lexbuf.lex_curr_p in
-      lexbuf.lex_curr_p <- { pos with 
+      lexbuf.lex_curr_p <- { pos with
 	pos_lnum = pos.pos_lnum + 1;
 	pos_bol = pos.pos_cnum }
 
@@ -40,6 +40,12 @@
     in
       count 0 0
 
+  let remove_newline s =
+    let n = String.length s in
+    let rec count i = if i == n || s.[i] <> '\n' then i else count (i + 1) in
+    let i = count 0 in
+    i, String.sub s i (n - i)
+
   let count_dashes s =
     let c = ref 0 in
     for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done;
@@ -76,7 +82,7 @@
 
   let start_emph () = in_emph := true; Output.start_emph ()
   let stop_emph () = if !in_emph then (Output.stop_emph (); in_emph := false)
-    
+
   let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos;
     lexbuf.lex_curr_p <- lexbuf.lex_start_p
 
@@ -247,8 +253,6 @@
     else
       String.sub s 1 (String.length s - 3)
 
-  let symbol lexbuf s = Output.symbol s (lexeme_start lexbuf)
-
   let output_indented_keyword s lexbuf =
     let nbsp,isp = count_spaces s in
     Output.indentation nbsp;
@@ -269,7 +273,7 @@ let firstchar =
   '\194' '\185' |
   (* utf-8 latin 1 supplement *)
   '\195' ['\128'-'\191'] |
-  (* utf-8 letterlike symbols *) 
+  (* utf-8 letterlike symbols *)
   (* '\206' ([ '\145' - '\183'] | '\187') | *)
   (* '\xCF' [ '\x00' - '\xCE' ] |  *)
   (* utf-8 letterlike symbols *)
@@ -284,37 +288,12 @@ let pfx_id = (id '.')*
 let identifier =
   id | pfx_id id
 
-let utf8_multibyte =
-   [ '\xC0'-'\xDF' ] _
- | [ '\xE0'-'\xEF' ] _ _
- | [ '\xF0'-'\xF7' ] _ _ _
-
 (* This misses unicode stuff, and it adds "[" and "]".  It's only an
    approximation of idents - used for detecting whether an underscore
    is part of an identifier or meant to indicate emphasis *)
-let nonidentchar =
-  [^ 'A'-'Z' 'a'-'z' '_' '[' ']'
-      (* iso 8859-1 accents *)
-    '\192'-'\214' '\216'-'\246' '\248'-'\255'
-    '\'' '0'-'9' '@' ]
-
-let symbolchar_symbol_no_brackets =
-  ['!' '$' '%' '&' '*' '+' ',' '^' '#'
-      '\\' '/' '-' '<' '>' '|' ':' '?' '=' '~' ] |
-  (* utf-8 symbols *)
-  '\226' ['\134'-'\143' '\152'-'\155' '\164'-'\165' '\168'-'\171'] _
-let symbolchar_no_brackets = symbolchar_symbol_no_brackets |
-    [ '@' '{' '}' '(' ')' 'A'-'Z' 'a'-'z' '_']
-let symbolchar = symbolchar_no_brackets | '[' | ']'
-let token_no_brackets = symbolchar_symbol_no_brackets symbolchar_no_brackets*
-let token = symbolchar_symbol_no_brackets symbolchar* | '[' [^ '[' ']' ':']* ']'
-let printing_token = (token | id)+
-
-(* tokens with balanced brackets *)
-let token_brackets =
-  ( token_no_brackets ('[' token_no_brackets? ']')*
-  | token_no_brackets? ('[' token_no_brackets? ']')+ )
-  token_no_brackets?
+let nonidentchar = [^ 'A'-'Z' 'a'-'z' '_' '[' ']' '\'' '0'-'9' '@' ]
+
+let printing_token = [^ ' ' '\t']*
 
 let thm_token =
   "Theorem"
@@ -454,8 +433,8 @@ let end_verb = "(*" space* "end" space+ "verb" space* "*)"
 
 rule coq_bol = parse
   | space* nl+
-      { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light)) 
-        then Output.empty_line_of_code (); 
+      { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light))
+        then Output.empty_line_of_code ();
         coq_bol lexbuf }
   | space* "(**" space_nl
       { Output.end_coq (); Output.start_doc ();
@@ -538,7 +517,7 @@ rule coq_bol = parse
 	ignore (comment lexbuf);
 	coq_bol lexbuf }
   | space* "(**" space+ "remove" space+ "printing" space+
-      (identifier | token) space* "*)"
+      printing_token space* "*)"
       { remove_printing_token (lexeme lexbuf);
 	coq_bol lexbuf }
   | space* "(**" space+ "remove" space+ "printing" space+
@@ -591,7 +570,18 @@ and coq = parse
           else coq lexbuf
       }
   | nl+ space* "]]"
-      { if not !formatted then begin symbol lexbuf (lexeme lexbuf); coq lexbuf end }
+      { if not !formatted then
+	begin
+	  (* Isn't this an anomaly *)
+	  let s = lexeme lexbuf in
+	  let nlsp,s = remove_newline s in
+	  let nbsp,isp = count_spaces s in
+	  Output.indentation nbsp;
+	  let loc = lexeme_start lexbuf + isp + nlsp in
+	  Output.sublexer ']' loc;
+	  Output.sublexer ']' (loc+1);
+	  coq lexbuf
+	end }
   | eof
       { () }
   | gallina_kw_to_hide
@@ -891,22 +881,27 @@ and verbatim = parse
 and escaped_coq = parse
   | "]"
       { decr brackets;
-	if !brackets > 0 then begin Output.char ']'; escaped_coq lexbuf end }
+	if !brackets > 0 then
+	  (Output.sublexer ']' (lexeme_start lexbuf); escaped_coq lexbuf)
+	else Tokens.flush_sublexer () }
   | "["
-      { incr brackets; Output.char '['; escaped_coq lexbuf }
+      { incr brackets;
+        Output.sublexer '[' (lexeme_start lexbuf); escaped_coq lexbuf }
   | "(*"
-      { comment_level := 1; ignore (comment lexbuf); escaped_coq lexbuf }
+      { Tokens.flush_sublexer (); comment_level := 1;
+        ignore (comment lexbuf); escaped_coq lexbuf }
   | "*)"
       { (* likely to be a syntax error: we escape *) backtrack lexbuf }
   | eof
-      { () }
-  | token_brackets
-      { let s = lexeme lexbuf in
-	  symbol lexbuf s; escaped_coq lexbuf }
+      { Tokens.flush_sublexer () }
   | (identifier '.')* identifier
       { Output.ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf }
+  | space
+      { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
+        escaped_coq lexbuf }
   | _
-      { Output.char (lexeme_char lexbuf 0); escaped_coq lexbuf }
+      { Output.sublexer (lexeme_char lexbuf 0) (lexeme_start lexbuf);
+        escaped_coq lexbuf }
 
 (*s Coq "Comments" command. *)
 
@@ -1004,11 +999,18 @@ and body_bol = parse
   | _ { backtrack lexbuf; Output.indentation 0; body lexbuf }
 
 and body = parse
-  | nl {Output.line_break(); new_line lexbuf; body_bol lexbuf}
+  | nl {Tokens.flush_sublexer(); Output.line_break(); new_line lexbuf; body_bol lexbuf}
   | nl+ space* "]]" space* nl
-      { if not !formatted then
+      { Tokens.flush_sublexer();
+        if not !formatted then
           begin
-            symbol lexbuf (lexeme lexbuf);
+            let s = lexeme lexbuf in
+            let nlsp,s = remove_newline s in
+            let _,isp = count_spaces s in
+            let loc = lexeme_start lexbuf + nlsp + isp in
+            Output.sublexer ']' loc;
+            Output.sublexer ']' (loc+1);
+            Tokens.flush_sublexer();
             body lexbuf
           end
         else
@@ -1017,9 +1019,14 @@ and body = parse
             true
           end }
   | "]]" space* nl
-      { if not !formatted then
+      { Tokens.flush_sublexer();
+        if not !formatted then
           begin
-            symbol lexbuf (lexeme lexbuf);
+	    let loc = lexeme_start lexbuf in
+	    Output.sublexer ']' loc;
+	    Output.sublexer ']' (loc+1);
+	    Tokens.flush_sublexer();
+	    Output.line_break();
             body lexbuf
           end
         else
@@ -1027,12 +1034,12 @@ and body = parse
             Output.paragraph ();
             true
           end }
-  | eof { false }
+  | eof { Tokens.flush_sublexer(); false }
   | '.' space* nl | '.' space* eof
-	{ Output.char '.'; Output.line_break();
+	{ Tokens.flush_sublexer(); Output.char '.'; Output.line_break();
 	  if not !formatted then true else body_bol lexbuf }
   | '.' space* nl "]]" space* nl
-	{ Output.char '.';
+	{ Tokens.flush_sublexer(); Output.char '.';
         if not !formatted then
           begin
             eprintf "Error: stray ]] at %d\n"  (lexeme_start lexbuf);
@@ -1045,50 +1052,46 @@ and body = parse
             true
           end
       }
-  | '.' space+ { Output.char '.'; Output.char ' ';
+  | '.' space+
+        { Tokens.flush_sublexer(); Output.char '.'; Output.char ' ';
 	  if not !formatted then false else body lexbuf }
   | "(**" space_nl
-      { Output.end_coq (); Output.start_doc ();
+      { Tokens.flush_sublexer(); Output.end_coq (); Output.start_doc ();
 	let eol = doc_bol lexbuf in
 	  Output.end_doc (); Output.start_coq ();
 	  if eol then body_bol lexbuf else body lexbuf }
-  | "(*" { comment_level := 1;
+  | "(*" { Tokens.flush_sublexer(); comment_level := 1;
 	   if !Cdglobals.parse_comments then Output.start_comment ();
 	   let eol = comment lexbuf in
 	     if eol
 	     then begin if not !Cdglobals.parse_comments then Output.line_break(); body_bol lexbuf end
 	     else body lexbuf }
   | "where" space*
-      { let s = lexeme lexbuf in
-	Output.ident s (lexeme_start lexbuf);
+      { Tokens.flush_sublexer();
+        Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
 	start_notation_string lexbuf }
   | identifier
-      { let s = lexeme lexbuf in
-	  Output.ident s (lexeme_start lexbuf);
-	  body lexbuf }
-  | token_no_brackets
-      { let s = lexeme lexbuf in
-	  symbol lexbuf s; body lexbuf }
+      { Tokens.flush_sublexer();
+        Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
+	body lexbuf }
   | ".."
-      { Output.char '.'; Output.char '.';
+      { Tokens.flush_sublexer(); Output.char '.'; Output.char '.';
         body lexbuf }
-  | '"' 
-      { Output.char '"'; 
+  | '"'
+      { Tokens.flush_sublexer(); Output.char '"';
         string lexbuf;
         body lexbuf }
-
-  | utf8_multibyte
-      { let c = lexeme lexbuf in
-	symbol lexbuf c;
+  | space
+      { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
         body lexbuf }
 
   | _ { let c = lexeme_char lexbuf 0 in
-	  Output.char c;
-          body lexbuf }
+	Output.sublexer c (lexeme_start lexbuf);
+        body lexbuf }
 
 and start_notation_string = parse
   | '"' (* a true notation *)
-      { symbol lexbuf "\"";
+      { Output.sublexer '"' (lexeme_start lexbuf);
         notation_string lexbuf;
 	body lexbuf }
   | _ (* an abbreviation *)
@@ -1099,13 +1102,9 @@ and notation_string = parse
       { Output.char '"'; Output.char '"'; (* Unlikely! *)
         notation_string lexbuf }
   | '"'
-      { Output.char '"' }
-  | token
-      { let s = lexeme lexbuf in
-        symbol lexbuf s;
-        notation_string lexbuf }
+      { Tokens.flush_sublexer(); Output.char '"' }
   | _ { let c = lexeme_char lexbuf 0 in
-        Output.char c;
+        Output.sublexer c (lexeme_start lexbuf);
         notation_string lexbuf }
 
 and string = parse
@@ -1164,6 +1163,7 @@ and st_subtitle = parse
     let c = open_in f in
     let lb = from_channel c in
       (Index.current_library := m;
+       Output.initialize ();
        Output.start_module ();
        Output.start_coq (); coq_bol lb; Output.end_coq ();
        close_in c)
diff --git a/tools/coqdoc/main.ml b/tools/coqdoc/main.ml
index b52c28ff29..67c63865af 100644
--- a/tools/coqdoc/main.ml
+++ b/tools/coqdoc/main.ml
@@ -359,7 +359,6 @@ let parse () =
 	add_file (what_file f); parse_rec rem
   in
     parse_rec (List.tl (Array.to_list Sys.argv));
-    Output.initialize ();
     List.rev !files
 
 
diff --git a/tools/coqdoc/output.ml b/tools/coqdoc/output.ml
index 168a28f20b..0f36b775cd 100644
--- a/tools/coqdoc/output.ml
+++ b/tools/coqdoc/output.ml
@@ -96,22 +96,32 @@ let set_module m sub = current_module := (m,sub);
 let item_level = ref 0
 let in_doc = ref false
 
-(*s Customized pretty-print *)
-
-let token_pp = Hashtbl.create 97
-
-let add_printing_token = Hashtbl.replace token_pp
-
-let find_printing_token tok =
-  try Hashtbl.find token_pp tok with Not_found -> None, None
-
-let remove_printing_token = Hashtbl.remove token_pp
-
-(* predefined pretty-prints *)
-let initialize () =
+(*s Customized and predefined pretty-print *)
+
+let initialize_texmacs () =
+  let ensuremath x = sprintf "<with|mode|math|\\<%s\\>>" x in
+  List.fold_right (fun (s,t) tt -> Tokens.ttree_add tt s t)
+    [ "*", ensuremath "times";
+      "->", ensuremath "rightarrow";
+      "<-", ensuremath "leftarrow";
+      "<->", ensuremath "leftrightarrow";
+      "=>", ensuremath "Rightarrow";
+      "<=", ensuremath "le";
+      ">=", ensuremath "ge";
+      "<>", ensuremath "noteq";
+      "~",  ensuremath "lnot";
+      "/\\", ensuremath "land";
+      "\\/", ensuremath "lor";
+      "|-", ensuremath "vdash"
+    ] Tokens.empty_ttree
+
+let token_tree_texmacs = lazy (initialize_texmacs ())
+
+let initialize_tex_html () =
   let if_utf8 = if !Cdglobals.utf8 then fun x -> Some x else fun _ -> None in
-    List.iter
-      (fun (s,l,l') -> Hashtbl.add token_pp s (Some l, l'))
+    List.fold_right (fun (s,l,l') (tt,tt') ->
+      (Tokens.ttree_add tt s l,
+       match l' with None -> tt' | Some l' -> Tokens.ttree_add tt' s l'))
       [ "*" ,  "\\ensuremath{\\times}", if_utf8 "×";
 	"|", "\\ensuremath{|}", None;
 	"->",  "\\ensuremath{\\rightarrow}", if_utf8 "→";
@@ -132,7 +142,20 @@ let initialize () =
 	"Π", "\\ensuremath{\\Pi}", if_utf8 "Π";
 	"λ", "\\ensuremath{\\lambda}", if_utf8 "λ";
 	(* "fun", "\\ensuremath{\\lambda}" ? *)
-      ]
+      ] (Tokens.empty_ttree,Tokens.empty_ttree)
+
+let token_tree_latex = lazy (fst (initialize_tex_html ()))
+let token_tree_html = lazy (snd (initialize_tex_html ()))
+
+let add_printing_token s (t1,t2) =
+  match
+    (match !Cdglobals.target_language with LaTeX -> t1 | HTML -> t2 | _ -> None)
+  with
+  | None -> ()
+  | Some t -> Tokens.token_tree := Tokens.ttree_add !Tokens.token_tree s t
+
+let remove_printing_token s =
+  Tokens.token_tree := Tokens.ttree_remove !Tokens.token_tree s
 
 (*s Table of contents *)
 
@@ -187,6 +210,8 @@ module Latex = struct
       printf "\\end{document}\n"
     end
 
+  (*s Latex low-level translation *)
+
   let nbsp () = output_char '~'
 
   let char c = match c with
@@ -200,10 +225,13 @@ module Latex = struct
 	output_char c
 
   let label_char c = match c with
-    | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_'
-    | '^' | '~' -> ()
-    | _ ->
-	output_char c
+    | '_' -> output_char ' '
+    | '\\' | '$' | '#' | '%' | '&' | '{' | '}'
+    | '^' | '~' -> printf "x%X" (Char.code c)
+    | _ -> if c >= '\x80' then printf "x%X" (Char.code c) else output_char c
+
+  let label_ident s =
+    for i = 0 to String.length s - 1 do label_char s.[i] done
 
   let latex_char = output_char
   let latex_string = output_string
@@ -211,11 +239,27 @@ module Latex = struct
   let html_char _ = ()
   let html_string _ = ()
 
-  let raw_ident s =
-    for i = 0 to String.length s - 1 do char s.[i] done
-
-  let label_ident s =
-    for i = 0 to String.length s - 1 do label_char s.[i] done
+  (*s Latex char escaping *)
+
+  let escaped =
+    let buff = Buffer.create 5 in
+    fun s ->
+      Buffer.clear buff;
+      for i = 0 to String.length s - 1 do
+	match s.[i] with
+	| '\\' ->
+	    Buffer.add_string buff "\\symbol{92}"
+	| '$' | '#' | '%' | '&' | '{' | '}' | '_' as c ->
+	    Buffer.add_char buff '\\'; Buffer.add_char buff c
+	| '^' | '~' as c ->
+	    Buffer.add_char buff '\\'; Buffer.add_char buff c;
+	    Buffer.add_string buff "{}"
+	| c ->
+	    Buffer.add_char buff c
+      done;
+      Buffer.contents buff
+
+  (*s Latex reference and symbol translation *)
 
   let start_module () =
     let ln = !lib_name in
@@ -224,9 +268,7 @@ module Latex = struct
         label_ident (get_module false);
         printf "}{";
         if ln <> "" then printf "%s " ln;
-        printf "}{";
-        raw_ident (get_module true);
-        printf "}\n\n"
+        printf "}{%s}\n\n" (escaped (get_module true))
     end
 
   let start_latex_math () = output_char '$'
@@ -244,35 +286,33 @@ module Latex = struct
       let space = 0.5 *. (float n) in
       printf "\\coqdocindent{%2.2fem}\n" space
 
-  let with_latex_printing f tok =
-    try
-      (match Hashtbl.find token_pp tok with
-	 | Some s, _ -> output_string s
-	 | _ -> f tok)
-    with Not_found ->
-      f tok
-
   let module_ref m s =
-    printf "\\moduleid{%s}{" m; raw_ident s; printf "}"
+    printf "\\moduleid{%s}{%s}" m (escaped s)
 
   let ident_ref m fid typ s =
     let id = if fid <> "" then (m ^ "." ^ fid) else m in
     match find_module m with
-      | Local -> 
-	  if typ = Variable then (printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}")
+      | Local ->
+	  if typ = Variable then
+	    printf "\\coqdoc%s{%s}" (type_name typ) s
 	  else
-	    (printf "\\coqref{"; label_ident id; printf "}{\\coqdoc%s{" (type_name typ);
-	     raw_ident s; printf "}}")
+	    (printf "\\coqref{"; label_ident id;
+	     printf "}{\\coqdoc%s{%s}}" (type_name typ) s)
       | External m when !externals ->
-	  printf "\\coqexternalref{"; raw_ident m; printf "}{";
-	  label_ident fid; printf "}{\\coqdoc%s{" (type_name typ); raw_ident s; printf "}}"
+	  printf "\\coqexternalref{"; label_ident fid;
+	  printf "}{%s}{\\coqdoc%s{%s}}" (escaped m) (type_name typ) s
       | External _ | Unknown ->
-	  (* printf "\\coqref{"; label_ident id; printf "}{" *)
-	  printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}"
+	  printf "\\coqdoc%s{%s}" (type_name typ) s
 
   let defref m id ty s =
-    printf "\\coqdef{"; label_ident (m ^ "." ^ id); printf "}{"; raw_ident s; printf "}{";
-    printf "\\coqdoc%s{" (type_name ty); raw_ident s; printf "}}"
+    if ty <> Notation then
+      (printf "\\coqdef{"; label_ident (m ^ "." ^ id);
+       printf "}{%s}{\\coqdoc%s{%s}}" s (type_name ty) s)
+    else
+      (* Glob file still not able to say the exact extent of the definition *)
+      (* so we currently renounce to highlight the notation location *)
+      (printf "\\coqdef{"; label_ident (m ^ "." ^ id);
+       printf "}{%s}{%s}" s s)
 
   let reference s = function
     | Def (fullid,typ) ->
@@ -282,31 +322,60 @@ module Latex = struct
     | Ref (m,fullid,typ) ->
 	ident_ref m fullid typ s
     | Mod _ ->
-	printf "\\coqdocvar{"; raw_ident s; printf "}"
+	printf "\\coqdocvar{%s}" (escaped s)
+
+  (*s The sublexer buffers symbol characters and attached
+       uninterpreted ident and try to apply special translation such as,
+       predefined, translation "->" to "\ensuremath{\rightarrow}" or,
+       virtually, a user-level translation from "=_h" to "\ensuremath{=_{h}}" *)
+
+  let output_sublexer_string doescape issymbchar tag s =
+    let s = if doescape then escaped s else s in
+    match tag with
+    | Some ref -> reference s ref
+    | None -> if issymbchar then output_string s else printf "\\coqdocvar{%s}" s
+
+  let sublexer c loc =
+    let tag =
+      try Some (Index.find (get_module false) loc) with Not_found -> None
+    in
+    Tokens.output_tagged_symbol_char tag c
+
+  let initialize () =
+    Tokens.token_tree := Lazy.force token_tree_latex;
+    Tokens.outfun := output_sublexer_string
+
+  (*s Interpreting ident with fallback on sublexer if unknown ident *)
+
+  let translate s =
+    match Tokens.translate s with Some s -> s | None -> escaped s
 
   let ident s loc =
     try
-      reference s (Index.find (get_module false) loc)
+      let tag = Index.find (get_module false) loc in
+      reference (translate s) tag
     with Not_found ->
       if is_tactic s then
-	(printf "\\coqdoctac{"; raw_ident s; printf "}")
+	printf "\\coqdoctac{%s}" (translate s)
       else if is_keyword s then
-	(printf "\\coqdockw{"; raw_ident s; printf "}")
+	printf "\\coqdockw{%s}" (translate s)
       else if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
       then
-	try reference s (Index.find_string (get_module false) s)
-	with _ -> (printf "\\coqdocvar{"; raw_ident s; printf "}")
-      else (printf "\\coqdocvar{"; raw_ident s; printf "}")
+	try
+	  let tag = Index.find_string (get_module false) s in
+	  reference (translate s) tag
+	with _ -> Tokens.output_tagged_ident_string s
+      else Tokens.output_tagged_ident_string s
 
   let ident s l =
     if !in_title then (
       printf "\\texorpdfstring{\\protect";
-      with_latex_printing (fun s -> ident s l) s;
-      printf "}{"; raw_ident s; printf "}")
+      ident s l;
+      printf "}{%s}" (translate s))
     else
-      with_latex_printing (fun s -> ident s l) s
+      ident s l
 
-  let symbol s l = with_latex_printing raw_ident s
+  (*s Translating structure *)
 
   let proofbox () = printf "\\ensuremath{\\Box}"
 
@@ -330,8 +399,6 @@ module Latex = struct
 
   let end_doc () = in_doc := false; stop_item ()
 
-  let comment c = char c
-
   (* This is broken if we are in math mode, but coqdoc currently isn't
      tracking that *)
   let start_emph () = printf "\\textit{"
@@ -460,7 +527,21 @@ module Html = struct
     | '&' -> printf "&amp;"
     | c -> output_char c
 
-  let raw_ident s = for i = 0 to String.length s - 1 do char s.[i] done
+  let raw_string s =
+    for i = 0 to String.length s - 1 do char s.[i] done
+
+  let escaped =
+    let buff = Buffer.create 5 in
+    fun s ->
+      Buffer.clear buff;
+      for i = 0 to String.length s - 1 do
+	match s.[i] with
+	| '<' -> Buffer.add_string buff "&lt;"
+	| '>' -> Buffer.add_string buff "&gt;"
+	| '&' -> Buffer.add_string buff "&amp;"
+	| c -> Buffer.add_char buff c
+      done;
+      Buffer.contents buff
 
   let latex_char _ = ()
   let latex_string _ = ()
@@ -477,78 +558,72 @@ module Html = struct
   let module_ref m s =
     match find_module m with
       | Local ->
-	  printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>"
+	  printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s
       | External m when !externals ->
-	    printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>"
+	  printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s
       | External _ | Unknown ->
-	  raw_ident s
+	  output_string s
 
   let ident_ref m fid typ s =
     match find_module m with
     | Local ->
 	printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
-	printf "<span class=\"id\" type=\"%s\">" typ;
-	raw_ident s;
-	printf "</span></a>"
+	printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s
     | External m when !externals ->
-	  printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
-	  printf "<span class=\"id\" type=\"%s\">" typ;
-	  raw_ident s; printf "</span></a>"
+	printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
+	printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s
     | External _ | Unknown ->
-	printf "<span class=\"id\" type=\"%s\">" typ; raw_ident s; printf "</span>"
+	printf "<span class=\"id\" type=\"%s\">%s</span>" typ s
 
   let reference s r =
     match r with
     | Def (fullid,ty) ->
-	  printf "<a name=\"%s\">" fullid;
-	  printf "<span class=\"id\" type=\"%s\">" (type_name ty);
-	  raw_ident s; printf "</span></a>"
+	printf "<a name=\"%s\">" fullid;
+	printf "<span class=\"id\" type=\"%s\">%s</span></a>" (type_name ty) s
     | Mod (m,s') when s = s' ->
-	  module_ref m s
+	module_ref m s
     | Ref (m,fullid,ty) ->
-	  ident_ref m fullid (type_name ty) s
+	ident_ref m fullid (type_name ty) s
     | Mod _ ->
-	  printf "<span class=\"id\" type=\"mod\">"; raw_ident s ; printf "</span>"
-
-  let ident s loc =
-    if is_keyword s then begin
-      printf "<span class=\"id\" type=\"keyword\">";
-      raw_ident s;
-      printf "</span>"
-    end else
-      begin
-	  try reference s (Index.find (get_module false) loc)
-	  with Not_found ->
-	    if is_tactic s then
-		(printf "<span class=\"id\" type=\"tactic\">"; raw_ident s; printf "</span>")
-	    else
-		if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
-		then
-	        try reference s (Index.find_string (get_module false) s)
-	        with _ ->
-		    (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>")
-	    else (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>")
-      end
-
-  let with_html_printing f tok loc =
-    try
-      (match Hashtbl.find token_pp tok with
-	 | _, Some s ->
-	     (try reference s (Index.find (get_module false) loc)
-	      with Not_found -> output_string s)
-	 | _ -> f tok loc)
-    with Not_found ->
-      f tok loc
+	printf "<span class=\"id\" type=\"mod\">%s</span>" s
+
+  let output_sublexer_string doescape issymbchar tag s =
+    let s = if doescape then escaped s else s in
+    match tag with
+    | Some ref -> reference s ref
+    | None ->
+	if issymbchar then output_string s
+	else printf "<span class=\"id\" type=\"var\">%s</span>" s
+
+  let sublexer c loc =
+    let tag =
+      try Some (Index.find (get_module false) loc) with Not_found -> None
+    in
+    Tokens.output_tagged_symbol_char tag c
 
-  let ident s l =
-    with_html_printing ident s l
+  let initialize () =
+    Tokens.token_tree := Lazy.force token_tree_html;
+    Tokens.outfun := output_sublexer_string
 
-  let raw_symbol s loc =
-    try reference s (Index.find (get_module false) loc)
-    with Not_found -> raw_ident s
+  let translate s =
+    match Tokens.translate s with Some s -> s | None -> escaped s
 
-  let symbol s l =
-    with_html_printing raw_symbol s l
+  let ident s loc =
+    if is_keyword s then begin
+      printf "<span class=\"id\" type=\"keyword\">%s</span>" (translate s)
+    end else begin
+      try reference (translate s) (Index.find (get_module false) loc)
+      with Not_found ->
+	if is_tactic s then
+	  printf "<span class=\"id\" type=\"tactic\">%s</span>" (translate s)
+	else
+	  if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
+	  then
+	    try reference (translate s) (Index.find_string (get_module false) s)
+	    with _ -> Tokens.output_tagged_ident_string s
+	  else
+	    Tokens.output_tagged_ident_string s
+    end
 
   let proofbox () = printf "<font size=-2>&#9744;</font>"
 
@@ -803,24 +878,15 @@ module TeXmacs = struct
 
   let ident s _ = if !in_doc then ident_true s else raw_ident s
 
-  let symbol_true s =
-    let ensuremath x = printf "<with|mode|math|\\<%s\\>>" x in
-      match s with
-	| "*"  -> ensuremath "times"
-	| "->" -> ensuremath "rightarrow"
-	| "<-" -> ensuremath "leftarrow"
-	| "<->" ->ensuremath "leftrightarrow"
-	| "=>" -> ensuremath "Rightarrow"
-	| "<=" -> ensuremath "le"
-	| ">=" -> ensuremath "ge"
-	| "<>" -> ensuremath "noteq"
-	| "~" ->  ensuremath "lnot"
-	| "/\\" -> ensuremath "land"
-	| "\\/" -> ensuremath "lor"
-	| "|-" -> ensuremath "vdash"
-	| s    -> raw_ident s
-
-  let symbol s _ = if !in_doc then symbol_true s else raw_ident s
+  let output_sublexer_string doescape issymbchar tag s =
+    if doescape then raw_ident s else output_string s
+
+  let sublexer c l =
+    if !in_doc then Tokens.output_tagged_symbol_char None c else char c
+
+  let initialize () =
+    Tokens.token_tree := Lazy.force token_tree_texmacs;
+    Tokens.outfun := output_sublexer_string
 
   let proofbox () = printf "QED"
 
@@ -897,7 +963,7 @@ module TeXmacs = struct
 end
 
 
-(*s LaTeX output *)
+(*s Raw output *)
 
 module Raw = struct
 
@@ -909,12 +975,6 @@ module Raw = struct
 
   let char = output_char
 
-  let label_char c = match c with
-    | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_'
-    | '^' | '~' -> ()
-    | _ ->
-	output_char c
-
   let latex_char = output_char
   let latex_string = output_string
 
@@ -939,7 +999,11 @@ module Raw = struct
 
   let ident s loc = raw_ident s
 
-  let symbol s loc = raw_ident s
+  let sublexer c l = char c
+
+  let initialize () =
+    Tokens.token_tree := Tokens.empty_ttree;
+    Tokens.outfun := (fun _ _ _ _ -> failwith "Useless")
 
   let proofbox () = printf "[]"
 
@@ -1027,7 +1091,7 @@ let end_inline_coq =
   select Latex.end_inline_coq Html.end_inline_coq TeXmacs.end_inline_coq Raw.end_inline_coq
 
 let start_inline_coq_block =
-  select Latex.start_inline_coq_block Html.start_inline_coq_block 
+  select Latex.start_inline_coq_block Html.start_inline_coq_block
     TeXmacs.start_inline_coq_block Raw.start_inline_coq_block
 let end_inline_coq_block =
   select Latex.end_inline_coq_block Html.end_inline_coq_block TeXmacs.end_inline_coq_block Raw.end_inline_coq_block
@@ -1047,7 +1111,8 @@ let rule = select Latex.rule Html.rule TeXmacs.rule Raw.rule
 let nbsp = select Latex.nbsp Html.nbsp TeXmacs.nbsp Raw.nbsp
 let char = select Latex.char Html.char TeXmacs.char Raw.char
 let ident = select Latex.ident Html.ident TeXmacs.ident Raw.ident
-let symbol = select Latex.symbol Html.symbol TeXmacs.symbol Raw.symbol
+let sublexer = select Latex.sublexer Html.sublexer TeXmacs.sublexer Raw.sublexer
+let initialize = select Latex.initialize Html.initialize TeXmacs.initialize Raw.initialize
 
 let proofbox = select Latex.proofbox Html.proofbox TeXmacs.proofbox Raw.proofbox
 
diff --git a/tools/coqdoc/output.mli b/tools/coqdoc/output.mli
index 1ccbac2f94..d836f6b397 100644
--- a/tools/coqdoc/output.mli
+++ b/tools/coqdoc/output.mli
@@ -63,7 +63,8 @@ val rule : unit -> unit
 val nbsp : unit -> unit
 val char : char -> unit
 val ident : string -> loc -> unit
-val symbol : string -> loc -> unit
+val sublexer : char -> loc -> unit
+val initialize : unit -> unit
 
 val proofbox : unit -> unit
 
diff --git a/tools/coqdoc/tokens.ml b/tools/coqdoc/tokens.ml
new file mode 100644
index 0000000000..7de3ad80da
--- /dev/null
+++ b/tools/coqdoc/tokens.ml
@@ -0,0 +1,171 @@
+(************************************************************************)
+(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(*   \VV/  **************************************************************)
+(*    //   *      This file is distributed under the terms of the       *)
+(*         *       GNU Lesser General Public License Version 2.1        *)
+(************************************************************************)
+
+(* Application of printing rules based on a dictionary specific to the
+   target language *)
+
+open Cdglobals
+
+(*s Dictionaries: trees annotated with string options, each node being a map
+    from chars to dictionaries (the subtrees). A trie, in other words.
+
+    (code copied from parsing/lexer.ml4 for the use of coqdoc, Apr 2010)
+*)
+
+module CharMap = Map.Make (struct type t = char let compare = compare end)
+
+type ttree = {
+  node : string option;
+  branch : ttree CharMap.t }
+
+let empty_ttree = { node = None; branch = CharMap.empty }
+
+let ttree_add ttree str translated =
+  let rec insert tt i =
+    if i == String.length str then
+      {node = Some translated; branch = tt.branch}
+    else
+      let c = str.[i] in
+      let br =
+        match try Some (CharMap.find c tt.branch) with Not_found -> None with
+          | Some tt' ->
+              CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
+          | None ->
+              let tt' = {node = None; branch = CharMap.empty} in
+              CharMap.add c (insert tt' (i + 1)) tt.branch
+      in
+      { node = tt.node; branch = br }
+  in
+  insert ttree 0
+
+(* Removes a string from a dictionary: returns an equal dictionary
+   if the word not present. *)
+let ttree_remove ttree str =
+  let rec remove tt i =
+    if i == String.length str then
+      {node = None; branch = tt.branch}
+    else
+      let c = str.[i] in
+      let br =
+        match try Some (CharMap.find c tt.branch) with Not_found -> None with
+          | Some tt' ->
+              CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
+          | None -> tt.branch
+      in
+      { node = tt.node; branch = br }
+  in
+  remove ttree 0
+
+let ttree_descend ttree c = CharMap.find c ttree.branch
+
+let ttree_find ttree str =
+  let rec proc_rec tt i =
+    if i == String.length str then tt
+    else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
+  in
+  proc_rec ttree 0
+
+(*s Parameters of the translation automaton *)
+
+type out_function = bool -> bool -> Index.index_entry option -> string -> unit
+
+let token_tree = ref empty_ttree
+let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized")
+
+(*s Translation automaton *)
+
+let buff = Buffer.create 4
+
+let flush_buffer was_symbolchar tag tok =
+  let hastr = String.length tok <> 0 in
+  if hastr then !outfun false was_symbolchar tag tok;
+  if Buffer.length buff <> 0 then
+    !outfun true (if hastr then not was_symbolchar else was_symbolchar)
+      tag (Buffer.contents buff);
+  Buffer.clear buff
+
+type sublexer_state =
+  | Neutral
+  | Buffering of bool * Index.index_entry option * string * ttree
+
+let translation_state = ref Neutral
+
+let buffer_char is_symbolchar ctag c =
+  let rec aux = function
+  | Neutral ->
+      restart_buffering ()
+  | Buffering (was_symbolchar,tag,translated,tt) ->
+      if tag <> ctag then
+	(* A strong tag comes from Coq; if different Coq tags *)
+	(* hence, we don't try to see the chars as part of a single token *)
+	let translated =
+	  match tt.node with
+	  | Some tok -> Buffer.clear buff; tok
+	  | None -> translated in
+	flush_buffer was_symbolchar tag translated;
+	restart_buffering ()
+      else
+	begin
+	(* If we change the category of characters (symbol vs ident) *)
+	(* we accept this as a possible token cut point and remember the *)
+	(* translated token up to that point *)
+	let translated =
+	  if is_symbolchar <> was_symbolchar then
+	    match tt.node with
+	    | Some tok -> Buffer.clear buff; tok
+	    | None -> translated
+	  else translated in
+	(* We try to make a significant token from the current *)
+	(* buffer and the new character *)
+	try
+	  let tt = ttree_descend tt c in
+	  Buffer.add_char buff c;
+	  Buffering (is_symbolchar,ctag,translated,tt)
+	with Not_found ->
+	(* No existing translation for the given set of chars *)
+	if is_symbolchar <> was_symbolchar then
+	  (* If we changed the category of character read, we accept it *)
+	  (* as a possible cut point and restart looking for a translation *)
+	  (flush_buffer was_symbolchar tag translated;
+	   restart_buffering ())
+	else
+	  (* If we did not change the category of character read, we do *)
+	  (* not want to cut arbitrarily in the middle of the sequence of *)
+	  (*  symbol characters or identifier characters *)
+	  (Buffer.add_char buff c;
+	   Buffering (is_symbolchar,tag,translated,empty_ttree))
+	end
+
+  and restart_buffering () =
+    let tt = try ttree_descend !token_tree c with Not_found -> empty_ttree in
+    Buffer.add_char buff c;
+    Buffering (is_symbolchar,ctag,"",tt)
+
+  in
+  translation_state := aux !translation_state
+
+let output_tagged_ident_string s =
+  for i = 0 to String.length s - 1 do buffer_char false None s.[i] done
+
+let output_tagged_symbol_char tag c =
+  buffer_char true tag c
+
+let flush_sublexer () =
+  match !translation_state with
+  | Neutral -> ()
+  | Buffering (was_symbolchar,tag,translated,tt) ->
+      let translated =
+	match tt.node with
+	| Some tok -> Buffer.clear buff; tok
+	| None -> translated in
+      flush_buffer was_symbolchar tag translated;
+      translation_state := Neutral
+
+(* Translation not using the automaton *)
+let translate s =
+  try (ttree_find !token_tree s).node with Not_found -> None
diff --git a/tools/coqdoc/tokens.mli b/tools/coqdoc/tokens.mli
new file mode 100644
index 0000000000..4e53108ac0
--- /dev/null
+++ b/tools/coqdoc/tokens.mli
@@ -0,0 +1,78 @@
+(************************************************************************)
+(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(*   \VV/  **************************************************************)
+(*    //   *      This file is distributed under the terms of the       *)
+(*         *       GNU Lesser General Public License Version 2.1        *)
+(************************************************************************)
+
+(* Type of dictionaries *)
+
+type ttree
+
+val empty_ttree : ttree
+
+(* Add a string with some translation in dictionary *)
+val ttree_add : ttree -> string -> string -> ttree
+
+(* Remove a translation from a dictionary: returns an equal dictionary
+   if the word not present *)
+val ttree_remove : ttree -> string -> ttree
+
+(* Translate a string *)
+val translate : string -> string option
+
+(* Sublexer automaton *)
+
+(* The sublexer buffers the chars it receives; if after some time, it
+   recognizes that a sequence of chars has a translation in the
+   current dictionary, it replaces the buffer by the translation *)
+
+(* Received chars can come with a "tag" (usually made from
+   informations from the globalization file). A sequence of chars can
+   be considered a word only, if all chars have the same "tag". Rules
+   for cutting words are the following:
+
+   - in a sequence like "**" where * is in the dictionary but not **,
+     "**" is not translated; otherwise said, to be translated, a sequence
+     must not be surrounded by other symbol-like chars
+
+   - in a sequence like "<>_h*", where <>_h is in the dictionary, the
+     translation is done because the switch from a letter to a symbol char
+     is an acceptable cutting point
+
+   - in a sequence like "<>_ha", where <>_h is in the dictionary, the
+     translation is not done because it is considered that h and a are
+     not separable (however, if h and a have different tags, and h has
+     the same tags as <, > and _, the translation happens)
+
+   - in a sequence like "<>_ha", where <> but not <>_h is in the
+     dictionary, the translation is done for <> and _ha is considered
+     independently because  the switch from a symbol char to a letter
+     is considered to be an acceptable cutting point
+
+   - the longest-word rule applies: if both <> and <>_h are in the
+     dictionary, "<>_h" is one word and gets translated
+*)
+
+(* Warning: do not output anything on output channel inbetween a call
+   to [output_tagged_*] and [flush_sublexer]!! *)
+
+type out_function =
+  bool (* needs escape *) ->
+  bool (* it is a symbol, not a pure ident *) ->
+  Index.index_entry option (* the index type of the token if any *) ->
+  string -> unit
+
+(* This must be initialized before calling the sublexer *)
+val token_tree : ttree ref
+val outfun : out_function ref
+
+(* Process an ident part that might be a symbol part *)
+val output_tagged_ident_string : string -> unit
+
+(* Process a non-ident char (possibly equipped with a tag) *)
+val output_tagged_symbol_char : Index.index_entry option -> char -> unit
+
+(* Flush the buffered content of the lexer using [outfun] *)
+val flush_sublexer : unit -> unit