aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.common2
-rw-r--r--test-suite/coqdoc/links.v2
-rw-r--r--tools/coqdoc/cpretty.mll160
-rw-r--r--tools/coqdoc/main.ml1
-rw-r--r--tools/coqdoc/output.ml349
-rw-r--r--tools/coqdoc/output.mli3
-rw-r--r--tools/coqdoc/tokens.ml171
-rw-r--r--tools/coqdoc/tokens.mli78
8 files changed, 540 insertions, 226 deletions
diff --git a/Makefile.common b/Makefile.common
index c4169009b2..e3a94f56de 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -249,7 +249,7 @@ DEBUGPRINTERS:=dev/top_printers.cmo dev/vm_printers.cmo dev/printers.cma
COQDEPCMO:=$(COQENVCMO) tools/coqdep_lexer.cmo tools/coqdep_common.cmo tools/coqdep.cmo
COQDOCCMO:=$(CONFIG) $(addprefix tools/coqdoc/, \
- cdglobals.cmo alpha.cmo index.cmo output.cmo cpretty.cmo main.cmo )
+ cdglobals.cmo alpha.cmo index.cmo tokens.cmo output.cmo cpretty.cmo main.cmo )
###########################################################################
# vo files
diff --git a/test-suite/coqdoc/links.v b/test-suite/coqdoc/links.v
index 16028fe17d..581029bdd2 100644
--- a/test-suite/coqdoc/links.v
+++ b/test-suite/coqdoc/links.v
@@ -3,7 +3,7 @@
- symbols should not be inlined in string g
- links to both kinds of notations in a' should work to the right notation
- with utf8 option, forall must be unicode
-- spliting between symbols and ident should be correct in a' and c
+- splitting between symbols and ident should be correct in a' and c
- ".." should be rendered correctly
*)
diff --git a/tools/coqdoc/cpretty.mll b/tools/coqdoc/cpretty.mll
index af8bac2c10..b700531739 100644
--- a/tools/coqdoc/cpretty.mll
+++ b/tools/coqdoc/cpretty.mll
@@ -18,7 +18,7 @@
(* A function that emulates Lexing.new_line (which does not exist in OCaml < 3.11.0) *)
let new_line lexbuf =
let pos = lexbuf.lex_curr_p in
- lexbuf.lex_curr_p <- { pos with
+ lexbuf.lex_curr_p <- { pos with
pos_lnum = pos.pos_lnum + 1;
pos_bol = pos.pos_cnum }
@@ -40,6 +40,12 @@
in
count 0 0
+ let remove_newline s =
+ let n = String.length s in
+ let rec count i = if i == n || s.[i] <> '\n' then i else count (i + 1) in
+ let i = count 0 in
+ i, String.sub s i (n - i)
+
let count_dashes s =
let c = ref 0 in
for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done;
@@ -76,7 +82,7 @@
let start_emph () = in_emph := true; Output.start_emph ()
let stop_emph () = if !in_emph then (Output.stop_emph (); in_emph := false)
-
+
let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos;
lexbuf.lex_curr_p <- lexbuf.lex_start_p
@@ -247,8 +253,6 @@
else
String.sub s 1 (String.length s - 3)
- let symbol lexbuf s = Output.symbol s (lexeme_start lexbuf)
-
let output_indented_keyword s lexbuf =
let nbsp,isp = count_spaces s in
Output.indentation nbsp;
@@ -269,7 +273,7 @@ let firstchar =
'\194' '\185' |
(* utf-8 latin 1 supplement *)
'\195' ['\128'-'\191'] |
- (* utf-8 letterlike symbols *)
+ (* utf-8 letterlike symbols *)
(* '\206' ([ '\145' - '\183'] | '\187') | *)
(* '\xCF' [ '\x00' - '\xCE' ] | *)
(* utf-8 letterlike symbols *)
@@ -284,37 +288,12 @@ let pfx_id = (id '.')*
let identifier =
id | pfx_id id
-let utf8_multibyte =
- [ '\xC0'-'\xDF' ] _
- | [ '\xE0'-'\xEF' ] _ _
- | [ '\xF0'-'\xF7' ] _ _ _
-
(* This misses unicode stuff, and it adds "[" and "]". It's only an
approximation of idents - used for detecting whether an underscore
is part of an identifier or meant to indicate emphasis *)
-let nonidentchar =
- [^ 'A'-'Z' 'a'-'z' '_' '[' ']'
- (* iso 8859-1 accents *)
- '\192'-'\214' '\216'-'\246' '\248'-'\255'
- '\'' '0'-'9' '@' ]
-
-let symbolchar_symbol_no_brackets =
- ['!' '$' '%' '&' '*' '+' ',' '^' '#'
- '\\' '/' '-' '<' '>' '|' ':' '?' '=' '~' ] |
- (* utf-8 symbols *)
- '\226' ['\134'-'\143' '\152'-'\155' '\164'-'\165' '\168'-'\171'] _
-let symbolchar_no_brackets = symbolchar_symbol_no_brackets |
- [ '@' '{' '}' '(' ')' 'A'-'Z' 'a'-'z' '_']
-let symbolchar = symbolchar_no_brackets | '[' | ']'
-let token_no_brackets = symbolchar_symbol_no_brackets symbolchar_no_brackets*
-let token = symbolchar_symbol_no_brackets symbolchar* | '[' [^ '[' ']' ':']* ']'
-let printing_token = (token | id)+
-
-(* tokens with balanced brackets *)
-let token_brackets =
- ( token_no_brackets ('[' token_no_brackets? ']')*
- | token_no_brackets? ('[' token_no_brackets? ']')+ )
- token_no_brackets?
+let nonidentchar = [^ 'A'-'Z' 'a'-'z' '_' '[' ']' '\'' '0'-'9' '@' ]
+
+let printing_token = [^ ' ' '\t']*
let thm_token =
"Theorem"
@@ -454,8 +433,8 @@ let end_verb = "(*" space* "end" space+ "verb" space* "*)"
rule coq_bol = parse
| space* nl+
- { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light))
- then Output.empty_line_of_code ();
+ { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light))
+ then Output.empty_line_of_code ();
coq_bol lexbuf }
| space* "(**" space_nl
{ Output.end_coq (); Output.start_doc ();
@@ -538,7 +517,7 @@ rule coq_bol = parse
ignore (comment lexbuf);
coq_bol lexbuf }
| space* "(**" space+ "remove" space+ "printing" space+
- (identifier | token) space* "*)"
+ printing_token space* "*)"
{ remove_printing_token (lexeme lexbuf);
coq_bol lexbuf }
| space* "(**" space+ "remove" space+ "printing" space+
@@ -591,7 +570,18 @@ and coq = parse
else coq lexbuf
}
| nl+ space* "]]"
- { if not !formatted then begin symbol lexbuf (lexeme lexbuf); coq lexbuf end }
+ { if not !formatted then
+ begin
+ (* Isn't this an anomaly *)
+ let s = lexeme lexbuf in
+ let nlsp,s = remove_newline s in
+ let nbsp,isp = count_spaces s in
+ Output.indentation nbsp;
+ let loc = lexeme_start lexbuf + isp + nlsp in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ coq lexbuf
+ end }
| eof
{ () }
| gallina_kw_to_hide
@@ -891,22 +881,27 @@ and verbatim = parse
and escaped_coq = parse
| "]"
{ decr brackets;
- if !brackets > 0 then begin Output.char ']'; escaped_coq lexbuf end }
+ if !brackets > 0 then
+ (Output.sublexer ']' (lexeme_start lexbuf); escaped_coq lexbuf)
+ else Tokens.flush_sublexer () }
| "["
- { incr brackets; Output.char '['; escaped_coq lexbuf }
+ { incr brackets;
+ Output.sublexer '[' (lexeme_start lexbuf); escaped_coq lexbuf }
| "(*"
- { comment_level := 1; ignore (comment lexbuf); escaped_coq lexbuf }
+ { Tokens.flush_sublexer (); comment_level := 1;
+ ignore (comment lexbuf); escaped_coq lexbuf }
| "*)"
{ (* likely to be a syntax error: we escape *) backtrack lexbuf }
| eof
- { () }
- | token_brackets
- { let s = lexeme lexbuf in
- symbol lexbuf s; escaped_coq lexbuf }
+ { Tokens.flush_sublexer () }
| (identifier '.')* identifier
{ Output.ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf }
+ | space
+ { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
+ escaped_coq lexbuf }
| _
- { Output.char (lexeme_char lexbuf 0); escaped_coq lexbuf }
+ { Output.sublexer (lexeme_char lexbuf 0) (lexeme_start lexbuf);
+ escaped_coq lexbuf }
(*s Coq "Comments" command. *)
@@ -1004,11 +999,18 @@ and body_bol = parse
| _ { backtrack lexbuf; Output.indentation 0; body lexbuf }
and body = parse
- | nl {Output.line_break(); new_line lexbuf; body_bol lexbuf}
+ | nl {Tokens.flush_sublexer(); Output.line_break(); new_line lexbuf; body_bol lexbuf}
| nl+ space* "]]" space* nl
- { if not !formatted then
+ { Tokens.flush_sublexer();
+ if not !formatted then
begin
- symbol lexbuf (lexeme lexbuf);
+ let s = lexeme lexbuf in
+ let nlsp,s = remove_newline s in
+ let _,isp = count_spaces s in
+ let loc = lexeme_start lexbuf + nlsp + isp in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ Tokens.flush_sublexer();
body lexbuf
end
else
@@ -1017,9 +1019,14 @@ and body = parse
true
end }
| "]]" space* nl
- { if not !formatted then
+ { Tokens.flush_sublexer();
+ if not !formatted then
begin
- symbol lexbuf (lexeme lexbuf);
+ let loc = lexeme_start lexbuf in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ Tokens.flush_sublexer();
+ Output.line_break();
body lexbuf
end
else
@@ -1027,12 +1034,12 @@ and body = parse
Output.paragraph ();
true
end }
- | eof { false }
+ | eof { Tokens.flush_sublexer(); false }
| '.' space* nl | '.' space* eof
- { Output.char '.'; Output.line_break();
+ { Tokens.flush_sublexer(); Output.char '.'; Output.line_break();
if not !formatted then true else body_bol lexbuf }
| '.' space* nl "]]" space* nl
- { Output.char '.';
+ { Tokens.flush_sublexer(); Output.char '.';
if not !formatted then
begin
eprintf "Error: stray ]] at %d\n" (lexeme_start lexbuf);
@@ -1045,50 +1052,46 @@ and body = parse
true
end
}
- | '.' space+ { Output.char '.'; Output.char ' ';
+ | '.' space+
+ { Tokens.flush_sublexer(); Output.char '.'; Output.char ' ';
if not !formatted then false else body lexbuf }
| "(**" space_nl
- { Output.end_coq (); Output.start_doc ();
+ { Tokens.flush_sublexer(); Output.end_coq (); Output.start_doc ();
let eol = doc_bol lexbuf in
Output.end_doc (); Output.start_coq ();
if eol then body_bol lexbuf else body lexbuf }
- | "(*" { comment_level := 1;
+ | "(*" { Tokens.flush_sublexer(); comment_level := 1;
if !Cdglobals.parse_comments then Output.start_comment ();
let eol = comment lexbuf in
if eol
then begin if not !Cdglobals.parse_comments then Output.line_break(); body_bol lexbuf end
else body lexbuf }
| "where" space*
- { let s = lexeme lexbuf in
- Output.ident s (lexeme_start lexbuf);
+ { Tokens.flush_sublexer();
+ Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
start_notation_string lexbuf }
| identifier
- { let s = lexeme lexbuf in
- Output.ident s (lexeme_start lexbuf);
- body lexbuf }
- | token_no_brackets
- { let s = lexeme lexbuf in
- symbol lexbuf s; body lexbuf }
+ { Tokens.flush_sublexer();
+ Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
+ body lexbuf }
| ".."
- { Output.char '.'; Output.char '.';
+ { Tokens.flush_sublexer(); Output.char '.'; Output.char '.';
body lexbuf }
- | '"'
- { Output.char '"';
+ | '"'
+ { Tokens.flush_sublexer(); Output.char '"';
string lexbuf;
body lexbuf }
-
- | utf8_multibyte
- { let c = lexeme lexbuf in
- symbol lexbuf c;
+ | space
+ { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
body lexbuf }
| _ { let c = lexeme_char lexbuf 0 in
- Output.char c;
- body lexbuf }
+ Output.sublexer c (lexeme_start lexbuf);
+ body lexbuf }
and start_notation_string = parse
| '"' (* a true notation *)
- { symbol lexbuf "\"";
+ { Output.sublexer '"' (lexeme_start lexbuf);
notation_string lexbuf;
body lexbuf }
| _ (* an abbreviation *)
@@ -1099,13 +1102,9 @@ and notation_string = parse
{ Output.char '"'; Output.char '"'; (* Unlikely! *)
notation_string lexbuf }
| '"'
- { Output.char '"' }
- | token
- { let s = lexeme lexbuf in
- symbol lexbuf s;
- notation_string lexbuf }
+ { Tokens.flush_sublexer(); Output.char '"' }
| _ { let c = lexeme_char lexbuf 0 in
- Output.char c;
+ Output.sublexer c (lexeme_start lexbuf);
notation_string lexbuf }
and string = parse
@@ -1164,6 +1163,7 @@ and st_subtitle = parse
let c = open_in f in
let lb = from_channel c in
(Index.current_library := m;
+ Output.initialize ();
Output.start_module ();
Output.start_coq (); coq_bol lb; Output.end_coq ();
close_in c)
diff --git a/tools/coqdoc/main.ml b/tools/coqdoc/main.ml
index b52c28ff29..67c63865af 100644
--- a/tools/coqdoc/main.ml
+++ b/tools/coqdoc/main.ml
@@ -359,7 +359,6 @@ let parse () =
add_file (what_file f); parse_rec rem
in
parse_rec (List.tl (Array.to_list Sys.argv));
- Output.initialize ();
List.rev !files
diff --git a/tools/coqdoc/output.ml b/tools/coqdoc/output.ml
index 168a28f20b..0f36b775cd 100644
--- a/tools/coqdoc/output.ml
+++ b/tools/coqdoc/output.ml
@@ -96,22 +96,32 @@ let set_module m sub = current_module := (m,sub);
let item_level = ref 0
let in_doc = ref false
-(*s Customized pretty-print *)
-
-let token_pp = Hashtbl.create 97
-
-let add_printing_token = Hashtbl.replace token_pp
-
-let find_printing_token tok =
- try Hashtbl.find token_pp tok with Not_found -> None, None
-
-let remove_printing_token = Hashtbl.remove token_pp
-
-(* predefined pretty-prints *)
-let initialize () =
+(*s Customized and predefined pretty-print *)
+
+let initialize_texmacs () =
+ let ensuremath x = sprintf "<with|mode|math|\\<%s\\>>" x in
+ List.fold_right (fun (s,t) tt -> Tokens.ttree_add tt s t)
+ [ "*", ensuremath "times";
+ "->", ensuremath "rightarrow";
+ "<-", ensuremath "leftarrow";
+ "<->", ensuremath "leftrightarrow";
+ "=>", ensuremath "Rightarrow";
+ "<=", ensuremath "le";
+ ">=", ensuremath "ge";
+ "<>", ensuremath "noteq";
+ "~", ensuremath "lnot";
+ "/\\", ensuremath "land";
+ "\\/", ensuremath "lor";
+ "|-", ensuremath "vdash"
+ ] Tokens.empty_ttree
+
+let token_tree_texmacs = lazy (initialize_texmacs ())
+
+let initialize_tex_html () =
let if_utf8 = if !Cdglobals.utf8 then fun x -> Some x else fun _ -> None in
- List.iter
- (fun (s,l,l') -> Hashtbl.add token_pp s (Some l, l'))
+ List.fold_right (fun (s,l,l') (tt,tt') ->
+ (Tokens.ttree_add tt s l,
+ match l' with None -> tt' | Some l' -> Tokens.ttree_add tt' s l'))
[ "*" , "\\ensuremath{\\times}", if_utf8 "×";
"|", "\\ensuremath{|}", None;
"->", "\\ensuremath{\\rightarrow}", if_utf8 "→";
@@ -132,7 +142,20 @@ let initialize () =
"Π", "\\ensuremath{\\Pi}", if_utf8 "Π";
"λ", "\\ensuremath{\\lambda}", if_utf8 "λ";
(* "fun", "\\ensuremath{\\lambda}" ? *)
- ]
+ ] (Tokens.empty_ttree,Tokens.empty_ttree)
+
+let token_tree_latex = lazy (fst (initialize_tex_html ()))
+let token_tree_html = lazy (snd (initialize_tex_html ()))
+
+let add_printing_token s (t1,t2) =
+ match
+ (match !Cdglobals.target_language with LaTeX -> t1 | HTML -> t2 | _ -> None)
+ with
+ | None -> ()
+ | Some t -> Tokens.token_tree := Tokens.ttree_add !Tokens.token_tree s t
+
+let remove_printing_token s =
+ Tokens.token_tree := Tokens.ttree_remove !Tokens.token_tree s
(*s Table of contents *)
@@ -187,6 +210,8 @@ module Latex = struct
printf "\\end{document}\n"
end
+ (*s Latex low-level translation *)
+
let nbsp () = output_char '~'
let char c = match c with
@@ -200,10 +225,13 @@ module Latex = struct
output_char c
let label_char c = match c with
- | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_'
- | '^' | '~' -> ()
- | _ ->
- output_char c
+ | '_' -> output_char ' '
+ | '\\' | '$' | '#' | '%' | '&' | '{' | '}'
+ | '^' | '~' -> printf "x%X" (Char.code c)
+ | _ -> if c >= '\x80' then printf "x%X" (Char.code c) else output_char c
+
+ let label_ident s =
+ for i = 0 to String.length s - 1 do label_char s.[i] done
let latex_char = output_char
let latex_string = output_string
@@ -211,11 +239,27 @@ module Latex = struct
let html_char _ = ()
let html_string _ = ()
- let raw_ident s =
- for i = 0 to String.length s - 1 do char s.[i] done
-
- let label_ident s =
- for i = 0 to String.length s - 1 do label_char s.[i] done
+ (*s Latex char escaping *)
+
+ let escaped =
+ let buff = Buffer.create 5 in
+ fun s ->
+ Buffer.clear buff;
+ for i = 0 to String.length s - 1 do
+ match s.[i] with
+ | '\\' ->
+ Buffer.add_string buff "\\symbol{92}"
+ | '$' | '#' | '%' | '&' | '{' | '}' | '_' as c ->
+ Buffer.add_char buff '\\'; Buffer.add_char buff c
+ | '^' | '~' as c ->
+ Buffer.add_char buff '\\'; Buffer.add_char buff c;
+ Buffer.add_string buff "{}"
+ | c ->
+ Buffer.add_char buff c
+ done;
+ Buffer.contents buff
+
+ (*s Latex reference and symbol translation *)
let start_module () =
let ln = !lib_name in
@@ -224,9 +268,7 @@ module Latex = struct
label_ident (get_module false);
printf "}{";
if ln <> "" then printf "%s " ln;
- printf "}{";
- raw_ident (get_module true);
- printf "}\n\n"
+ printf "}{%s}\n\n" (escaped (get_module true))
end
let start_latex_math () = output_char '$'
@@ -244,35 +286,33 @@ module Latex = struct
let space = 0.5 *. (float n) in
printf "\\coqdocindent{%2.2fem}\n" space
- let with_latex_printing f tok =
- try
- (match Hashtbl.find token_pp tok with
- | Some s, _ -> output_string s
- | _ -> f tok)
- with Not_found ->
- f tok
-
let module_ref m s =
- printf "\\moduleid{%s}{" m; raw_ident s; printf "}"
+ printf "\\moduleid{%s}{%s}" m (escaped s)
let ident_ref m fid typ s =
let id = if fid <> "" then (m ^ "." ^ fid) else m in
match find_module m with
- | Local ->
- if typ = Variable then (printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}")
+ | Local ->
+ if typ = Variable then
+ printf "\\coqdoc%s{%s}" (type_name typ) s
else
- (printf "\\coqref{"; label_ident id; printf "}{\\coqdoc%s{" (type_name typ);
- raw_ident s; printf "}}")
+ (printf "\\coqref{"; label_ident id;
+ printf "}{\\coqdoc%s{%s}}" (type_name typ) s)
| External m when !externals ->
- printf "\\coqexternalref{"; raw_ident m; printf "}{";
- label_ident fid; printf "}{\\coqdoc%s{" (type_name typ); raw_ident s; printf "}}"
+ printf "\\coqexternalref{"; label_ident fid;
+ printf "}{%s}{\\coqdoc%s{%s}}" (escaped m) (type_name typ) s
| External _ | Unknown ->
- (* printf "\\coqref{"; label_ident id; printf "}{" *)
- printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}"
+ printf "\\coqdoc%s{%s}" (type_name typ) s
let defref m id ty s =
- printf "\\coqdef{"; label_ident (m ^ "." ^ id); printf "}{"; raw_ident s; printf "}{";
- printf "\\coqdoc%s{" (type_name ty); raw_ident s; printf "}}"
+ if ty <> Notation then
+ (printf "\\coqdef{"; label_ident (m ^ "." ^ id);
+ printf "}{%s}{\\coqdoc%s{%s}}" s (type_name ty) s)
+ else
+ (* Glob file still not able to say the exact extent of the definition *)
+ (* so we currently renounce to highlight the notation location *)
+ (printf "\\coqdef{"; label_ident (m ^ "." ^ id);
+ printf "}{%s}{%s}" s s)
let reference s = function
| Def (fullid,typ) ->
@@ -282,31 +322,60 @@ module Latex = struct
| Ref (m,fullid,typ) ->
ident_ref m fullid typ s
| Mod _ ->
- printf "\\coqdocvar{"; raw_ident s; printf "}"
+ printf "\\coqdocvar{%s}" (escaped s)
+
+ (*s The sublexer buffers symbol characters and attached
+ uninterpreted ident and try to apply special translation such as,
+ predefined, translation "->" to "\ensuremath{\rightarrow}" or,
+ virtually, a user-level translation from "=_h" to "\ensuremath{=_{h}}" *)
+
+ let output_sublexer_string doescape issymbchar tag s =
+ let s = if doescape then escaped s else s in
+ match tag with
+ | Some ref -> reference s ref
+ | None -> if issymbchar then output_string s else printf "\\coqdocvar{%s}" s
+
+ let sublexer c loc =
+ let tag =
+ try Some (Index.find (get_module false) loc) with Not_found -> None
+ in
+ Tokens.output_tagged_symbol_char tag c
+
+ let initialize () =
+ Tokens.token_tree := Lazy.force token_tree_latex;
+ Tokens.outfun := output_sublexer_string
+
+ (*s Interpreting ident with fallback on sublexer if unknown ident *)
+
+ let translate s =
+ match Tokens.translate s with Some s -> s | None -> escaped s
let ident s loc =
try
- reference s (Index.find (get_module false) loc)
+ let tag = Index.find (get_module false) loc in
+ reference (translate s) tag
with Not_found ->
if is_tactic s then
- (printf "\\coqdoctac{"; raw_ident s; printf "}")
+ printf "\\coqdoctac{%s}" (translate s)
else if is_keyword s then
- (printf "\\coqdockw{"; raw_ident s; printf "}")
+ printf "\\coqdockw{%s}" (translate s)
else if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
then
- try reference s (Index.find_string (get_module false) s)
- with _ -> (printf "\\coqdocvar{"; raw_ident s; printf "}")
- else (printf "\\coqdocvar{"; raw_ident s; printf "}")
+ try
+ let tag = Index.find_string (get_module false) s in
+ reference (translate s) tag
+ with _ -> Tokens.output_tagged_ident_string s
+ else Tokens.output_tagged_ident_string s
let ident s l =
if !in_title then (
printf "\\texorpdfstring{\\protect";
- with_latex_printing (fun s -> ident s l) s;
- printf "}{"; raw_ident s; printf "}")
+ ident s l;
+ printf "}{%s}" (translate s))
else
- with_latex_printing (fun s -> ident s l) s
+ ident s l
- let symbol s l = with_latex_printing raw_ident s
+ (*s Translating structure *)
let proofbox () = printf "\\ensuremath{\\Box}"
@@ -330,8 +399,6 @@ module Latex = struct
let end_doc () = in_doc := false; stop_item ()
- let comment c = char c
-
(* This is broken if we are in math mode, but coqdoc currently isn't
tracking that *)
let start_emph () = printf "\\textit{"
@@ -460,7 +527,21 @@ module Html = struct
| '&' -> printf "&amp;"
| c -> output_char c
- let raw_ident s = for i = 0 to String.length s - 1 do char s.[i] done
+ let raw_string s =
+ for i = 0 to String.length s - 1 do char s.[i] done
+
+ let escaped =
+ let buff = Buffer.create 5 in
+ fun s ->
+ Buffer.clear buff;
+ for i = 0 to String.length s - 1 do
+ match s.[i] with
+ | '<' -> Buffer.add_string buff "&lt;"
+ | '>' -> Buffer.add_string buff "&gt;"
+ | '&' -> Buffer.add_string buff "&amp;"
+ | c -> Buffer.add_char buff c
+ done;
+ Buffer.contents buff
let latex_char _ = ()
let latex_string _ = ()
@@ -477,78 +558,72 @@ module Html = struct
let module_ref m s =
match find_module m with
| Local ->
- printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>"
+ printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s
| External m when !externals ->
- printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>"
+ printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s
| External _ | Unknown ->
- raw_ident s
+ output_string s
let ident_ref m fid typ s =
match find_module m with
| Local ->
printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
- printf "<span class=\"id\" type=\"%s\">" typ;
- raw_ident s;
- printf "</span></a>"
+ printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s
| External m when !externals ->
- printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
- printf "<span class=\"id\" type=\"%s\">" typ;
- raw_ident s; printf "</span></a>"
+ printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid;
+ printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s
| External _ | Unknown ->
- printf "<span class=\"id\" type=\"%s\">" typ; raw_ident s; printf "</span>"
+ printf "<span class=\"id\" type=\"%s\">%s</span>" typ s
let reference s r =
match r with
| Def (fullid,ty) ->
- printf "<a name=\"%s\">" fullid;
- printf "<span class=\"id\" type=\"%s\">" (type_name ty);
- raw_ident s; printf "</span></a>"
+ printf "<a name=\"%s\">" fullid;
+ printf "<span class=\"id\" type=\"%s\">%s</span></a>" (type_name ty) s
| Mod (m,s') when s = s' ->
- module_ref m s
+ module_ref m s
| Ref (m,fullid,ty) ->
- ident_ref m fullid (type_name ty) s
+ ident_ref m fullid (type_name ty) s
| Mod _ ->
- printf "<span class=\"id\" type=\"mod\">"; raw_ident s ; printf "</span>"
-
- let ident s loc =
- if is_keyword s then begin
- printf "<span class=\"id\" type=\"keyword\">";
- raw_ident s;
- printf "</span>"
- end else
- begin
- try reference s (Index.find (get_module false) loc)
- with Not_found ->
- if is_tactic s then
- (printf "<span class=\"id\" type=\"tactic\">"; raw_ident s; printf "</span>")
- else
- if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
- then
- try reference s (Index.find_string (get_module false) s)
- with _ ->
- (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>")
- else (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>")
- end
-
- let with_html_printing f tok loc =
- try
- (match Hashtbl.find token_pp tok with
- | _, Some s ->
- (try reference s (Index.find (get_module false) loc)
- with Not_found -> output_string s)
- | _ -> f tok loc)
- with Not_found ->
- f tok loc
+ printf "<span class=\"id\" type=\"mod\">%s</span>" s
+
+ let output_sublexer_string doescape issymbchar tag s =
+ let s = if doescape then escaped s else s in
+ match tag with
+ | Some ref -> reference s ref
+ | None ->
+ if issymbchar then output_string s
+ else printf "<span class=\"id\" type=\"var\">%s</span>" s
+
+ let sublexer c loc =
+ let tag =
+ try Some (Index.find (get_module false) loc) with Not_found -> None
+ in
+ Tokens.output_tagged_symbol_char tag c
- let ident s l =
- with_html_printing ident s l
+ let initialize () =
+ Tokens.token_tree := Lazy.force token_tree_html;
+ Tokens.outfun := output_sublexer_string
- let raw_symbol s loc =
- try reference s (Index.find (get_module false) loc)
- with Not_found -> raw_ident s
+ let translate s =
+ match Tokens.translate s with Some s -> s | None -> escaped s
- let symbol s l =
- with_html_printing raw_symbol s l
+ let ident s loc =
+ if is_keyword s then begin
+ printf "<span class=\"id\" type=\"keyword\">%s</span>" (translate s)
+ end else begin
+ try reference (translate s) (Index.find (get_module false) loc)
+ with Not_found ->
+ if is_tactic s then
+ printf "<span class=\"id\" type=\"tactic\">%s</span>" (translate s)
+ else
+ if !Cdglobals.interpolate && !in_doc (* always a var otherwise *)
+ then
+ try reference (translate s) (Index.find_string (get_module false) s)
+ with _ -> Tokens.output_tagged_ident_string s
+ else
+ Tokens.output_tagged_ident_string s
+ end
let proofbox () = printf "<font size=-2>&#9744;</font>"
@@ -803,24 +878,15 @@ module TeXmacs = struct
let ident s _ = if !in_doc then ident_true s else raw_ident s
- let symbol_true s =
- let ensuremath x = printf "<with|mode|math|\\<%s\\>>" x in
- match s with
- | "*" -> ensuremath "times"
- | "->" -> ensuremath "rightarrow"
- | "<-" -> ensuremath "leftarrow"
- | "<->" ->ensuremath "leftrightarrow"
- | "=>" -> ensuremath "Rightarrow"
- | "<=" -> ensuremath "le"
- | ">=" -> ensuremath "ge"
- | "<>" -> ensuremath "noteq"
- | "~" -> ensuremath "lnot"
- | "/\\" -> ensuremath "land"
- | "\\/" -> ensuremath "lor"
- | "|-" -> ensuremath "vdash"
- | s -> raw_ident s
-
- let symbol s _ = if !in_doc then symbol_true s else raw_ident s
+ let output_sublexer_string doescape issymbchar tag s =
+ if doescape then raw_ident s else output_string s
+
+ let sublexer c l =
+ if !in_doc then Tokens.output_tagged_symbol_char None c else char c
+
+ let initialize () =
+ Tokens.token_tree := Lazy.force token_tree_texmacs;
+ Tokens.outfun := output_sublexer_string
let proofbox () = printf "QED"
@@ -897,7 +963,7 @@ module TeXmacs = struct
end
-(*s LaTeX output *)
+(*s Raw output *)
module Raw = struct
@@ -909,12 +975,6 @@ module Raw = struct
let char = output_char
- let label_char c = match c with
- | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_'
- | '^' | '~' -> ()
- | _ ->
- output_char c
-
let latex_char = output_char
let latex_string = output_string
@@ -939,7 +999,11 @@ module Raw = struct
let ident s loc = raw_ident s
- let symbol s loc = raw_ident s
+ let sublexer c l = char c
+
+ let initialize () =
+ Tokens.token_tree := Tokens.empty_ttree;
+ Tokens.outfun := (fun _ _ _ _ -> failwith "Useless")
let proofbox () = printf "[]"
@@ -1027,7 +1091,7 @@ let end_inline_coq =
select Latex.end_inline_coq Html.end_inline_coq TeXmacs.end_inline_coq Raw.end_inline_coq
let start_inline_coq_block =
- select Latex.start_inline_coq_block Html.start_inline_coq_block
+ select Latex.start_inline_coq_block Html.start_inline_coq_block
TeXmacs.start_inline_coq_block Raw.start_inline_coq_block
let end_inline_coq_block =
select Latex.end_inline_coq_block Html.end_inline_coq_block TeXmacs.end_inline_coq_block Raw.end_inline_coq_block
@@ -1047,7 +1111,8 @@ let rule = select Latex.rule Html.rule TeXmacs.rule Raw.rule
let nbsp = select Latex.nbsp Html.nbsp TeXmacs.nbsp Raw.nbsp
let char = select Latex.char Html.char TeXmacs.char Raw.char
let ident = select Latex.ident Html.ident TeXmacs.ident Raw.ident
-let symbol = select Latex.symbol Html.symbol TeXmacs.symbol Raw.symbol
+let sublexer = select Latex.sublexer Html.sublexer TeXmacs.sublexer Raw.sublexer
+let initialize = select Latex.initialize Html.initialize TeXmacs.initialize Raw.initialize
let proofbox = select Latex.proofbox Html.proofbox TeXmacs.proofbox Raw.proofbox
diff --git a/tools/coqdoc/output.mli b/tools/coqdoc/output.mli
index 1ccbac2f94..d836f6b397 100644
--- a/tools/coqdoc/output.mli
+++ b/tools/coqdoc/output.mli
@@ -63,7 +63,8 @@ val rule : unit -> unit
val nbsp : unit -> unit
val char : char -> unit
val ident : string -> loc -> unit
-val symbol : string -> loc -> unit
+val sublexer : char -> loc -> unit
+val initialize : unit -> unit
val proofbox : unit -> unit
diff --git a/tools/coqdoc/tokens.ml b/tools/coqdoc/tokens.ml
new file mode 100644
index 0000000000..7de3ad80da
--- /dev/null
+++ b/tools/coqdoc/tokens.ml
@@ -0,0 +1,171 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* Application of printing rules based on a dictionary specific to the
+ target language *)
+
+open Cdglobals
+
+(*s Dictionaries: trees annotated with string options, each node being a map
+ from chars to dictionaries (the subtrees). A trie, in other words.
+
+ (code copied from parsing/lexer.ml4 for the use of coqdoc, Apr 2010)
+*)
+
+module CharMap = Map.Make (struct type t = char let compare = compare end)
+
+type ttree = {
+ node : string option;
+ branch : ttree CharMap.t }
+
+let empty_ttree = { node = None; branch = CharMap.empty }
+
+let ttree_add ttree str translated =
+ let rec insert tt i =
+ if i == String.length str then
+ {node = Some translated; branch = tt.branch}
+ else
+ let c = str.[i] in
+ let br =
+ match try Some (CharMap.find c tt.branch) with Not_found -> None with
+ | Some tt' ->
+ CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch)
+ | None ->
+ let tt' = {node = None; branch = CharMap.empty} in
+ CharMap.add c (insert tt' (i + 1)) tt.branch
+ in
+ { node = tt.node; branch = br }
+ in
+ insert ttree 0
+
+(* Removes a string from a dictionary: returns an equal dictionary
+ if the word not present. *)
+let ttree_remove ttree str =
+ let rec remove tt i =
+ if i == String.length str then
+ {node = None; branch = tt.branch}
+ else
+ let c = str.[i] in
+ let br =
+ match try Some (CharMap.find c tt.branch) with Not_found -> None with
+ | Some tt' ->
+ CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch)
+ | None -> tt.branch
+ in
+ { node = tt.node; branch = br }
+ in
+ remove ttree 0
+
+let ttree_descend ttree c = CharMap.find c ttree.branch
+
+let ttree_find ttree str =
+ let rec proc_rec tt i =
+ if i == String.length str then tt
+ else proc_rec (CharMap.find str.[i] tt.branch) (i+1)
+ in
+ proc_rec ttree 0
+
+(*s Parameters of the translation automaton *)
+
+type out_function = bool -> bool -> Index.index_entry option -> string -> unit
+
+let token_tree = ref empty_ttree
+let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized")
+
+(*s Translation automaton *)
+
+let buff = Buffer.create 4
+
+let flush_buffer was_symbolchar tag tok =
+ let hastr = String.length tok <> 0 in
+ if hastr then !outfun false was_symbolchar tag tok;
+ if Buffer.length buff <> 0 then
+ !outfun true (if hastr then not was_symbolchar else was_symbolchar)
+ tag (Buffer.contents buff);
+ Buffer.clear buff
+
+type sublexer_state =
+ | Neutral
+ | Buffering of bool * Index.index_entry option * string * ttree
+
+let translation_state = ref Neutral
+
+let buffer_char is_symbolchar ctag c =
+ let rec aux = function
+ | Neutral ->
+ restart_buffering ()
+ | Buffering (was_symbolchar,tag,translated,tt) ->
+ if tag <> ctag then
+ (* A strong tag comes from Coq; if different Coq tags *)
+ (* hence, we don't try to see the chars as part of a single token *)
+ let translated =
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated in
+ flush_buffer was_symbolchar tag translated;
+ restart_buffering ()
+ else
+ begin
+ (* If we change the category of characters (symbol vs ident) *)
+ (* we accept this as a possible token cut point and remember the *)
+ (* translated token up to that point *)
+ let translated =
+ if is_symbolchar <> was_symbolchar then
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated
+ else translated in
+ (* We try to make a significant token from the current *)
+ (* buffer and the new character *)
+ try
+ let tt = ttree_descend tt c in
+ Buffer.add_char buff c;
+ Buffering (is_symbolchar,ctag,translated,tt)
+ with Not_found ->
+ (* No existing translation for the given set of chars *)
+ if is_symbolchar <> was_symbolchar then
+ (* If we changed the category of character read, we accept it *)
+ (* as a possible cut point and restart looking for a translation *)
+ (flush_buffer was_symbolchar tag translated;
+ restart_buffering ())
+ else
+ (* If we did not change the category of character read, we do *)
+ (* not want to cut arbitrarily in the middle of the sequence of *)
+ (* symbol characters or identifier characters *)
+ (Buffer.add_char buff c;
+ Buffering (is_symbolchar,tag,translated,empty_ttree))
+ end
+
+ and restart_buffering () =
+ let tt = try ttree_descend !token_tree c with Not_found -> empty_ttree in
+ Buffer.add_char buff c;
+ Buffering (is_symbolchar,ctag,"",tt)
+
+ in
+ translation_state := aux !translation_state
+
+let output_tagged_ident_string s =
+ for i = 0 to String.length s - 1 do buffer_char false None s.[i] done
+
+let output_tagged_symbol_char tag c =
+ buffer_char true tag c
+
+let flush_sublexer () =
+ match !translation_state with
+ | Neutral -> ()
+ | Buffering (was_symbolchar,tag,translated,tt) ->
+ let translated =
+ match tt.node with
+ | Some tok -> Buffer.clear buff; tok
+ | None -> translated in
+ flush_buffer was_symbolchar tag translated;
+ translation_state := Neutral
+
+(* Translation not using the automaton *)
+let translate s =
+ try (ttree_find !token_tree s).node with Not_found -> None
diff --git a/tools/coqdoc/tokens.mli b/tools/coqdoc/tokens.mli
new file mode 100644
index 0000000000..4e53108ac0
--- /dev/null
+++ b/tools/coqdoc/tokens.mli
@@ -0,0 +1,78 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* Type of dictionaries *)
+
+type ttree
+
+val empty_ttree : ttree
+
+(* Add a string with some translation in dictionary *)
+val ttree_add : ttree -> string -> string -> ttree
+
+(* Remove a translation from a dictionary: returns an equal dictionary
+ if the word not present *)
+val ttree_remove : ttree -> string -> ttree
+
+(* Translate a string *)
+val translate : string -> string option
+
+(* Sublexer automaton *)
+
+(* The sublexer buffers the chars it receives; if after some time, it
+ recognizes that a sequence of chars has a translation in the
+ current dictionary, it replaces the buffer by the translation *)
+
+(* Received chars can come with a "tag" (usually made from
+ informations from the globalization file). A sequence of chars can
+ be considered a word only, if all chars have the same "tag". Rules
+ for cutting words are the following:
+
+ - in a sequence like "**" where * is in the dictionary but not **,
+ "**" is not translated; otherwise said, to be translated, a sequence
+ must not be surrounded by other symbol-like chars
+
+ - in a sequence like "<>_h*", where <>_h is in the dictionary, the
+ translation is done because the switch from a letter to a symbol char
+ is an acceptable cutting point
+
+ - in a sequence like "<>_ha", where <>_h is in the dictionary, the
+ translation is not done because it is considered that h and a are
+ not separable (however, if h and a have different tags, and h has
+ the same tags as <, > and _, the translation happens)
+
+ - in a sequence like "<>_ha", where <> but not <>_h is in the
+ dictionary, the translation is done for <> and _ha is considered
+ independently because the switch from a symbol char to a letter
+ is considered to be an acceptable cutting point
+
+ - the longest-word rule applies: if both <> and <>_h are in the
+ dictionary, "<>_h" is one word and gets translated
+*)
+
+(* Warning: do not output anything on output channel inbetween a call
+ to [output_tagged_*] and [flush_sublexer]!! *)
+
+type out_function =
+ bool (* needs escape *) ->
+ bool (* it is a symbol, not a pure ident *) ->
+ Index.index_entry option (* the index type of the token if any *) ->
+ string -> unit
+
+(* This must be initialized before calling the sublexer *)
+val token_tree : ttree ref
+val outfun : out_function ref
+
+(* Process an ident part that might be a symbol part *)
+val output_tagged_ident_string : string -> unit
+
+(* Process a non-ident char (possibly equipped with a tag) *)
+val output_tagged_symbol_char : Index.index_entry option -> char -> unit
+
+(* Flush the buffered content of the lexer using [outfun] *)
+val flush_sublexer : unit -> unit