diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/coqdoc/cpretty.mll | 160 | ||||
| -rw-r--r-- | tools/coqdoc/main.ml | 1 | ||||
| -rw-r--r-- | tools/coqdoc/output.ml | 349 | ||||
| -rw-r--r-- | tools/coqdoc/output.mli | 3 | ||||
| -rw-r--r-- | tools/coqdoc/tokens.ml | 171 | ||||
| -rw-r--r-- | tools/coqdoc/tokens.mli | 78 |
6 files changed, 538 insertions, 224 deletions
diff --git a/tools/coqdoc/cpretty.mll b/tools/coqdoc/cpretty.mll index af8bac2c10..b700531739 100644 --- a/tools/coqdoc/cpretty.mll +++ b/tools/coqdoc/cpretty.mll @@ -18,7 +18,7 @@ (* A function that emulates Lexing.new_line (which does not exist in OCaml < 3.11.0) *) let new_line lexbuf = let pos = lexbuf.lex_curr_p in - lexbuf.lex_curr_p <- { pos with + lexbuf.lex_curr_p <- { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum } @@ -40,6 +40,12 @@ in count 0 0 + let remove_newline s = + let n = String.length s in + let rec count i = if i == n || s.[i] <> '\n' then i else count (i + 1) in + let i = count 0 in + i, String.sub s i (n - i) + let count_dashes s = let c = ref 0 in for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done; @@ -76,7 +82,7 @@ let start_emph () = in_emph := true; Output.start_emph () let stop_emph () = if !in_emph then (Output.stop_emph (); in_emph := false) - + let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos; lexbuf.lex_curr_p <- lexbuf.lex_start_p @@ -247,8 +253,6 @@ else String.sub s 1 (String.length s - 3) - let symbol lexbuf s = Output.symbol s (lexeme_start lexbuf) - let output_indented_keyword s lexbuf = let nbsp,isp = count_spaces s in Output.indentation nbsp; @@ -269,7 +273,7 @@ let firstchar = '\194' '\185' | (* utf-8 latin 1 supplement *) '\195' ['\128'-'\191'] | - (* utf-8 letterlike symbols *) + (* utf-8 letterlike symbols *) (* '\206' ([ '\145' - '\183'] | '\187') | *) (* '\xCF' [ '\x00' - '\xCE' ] | *) (* utf-8 letterlike symbols *) @@ -284,37 +288,12 @@ let pfx_id = (id '.')* let identifier = id | pfx_id id -let utf8_multibyte = - [ '\xC0'-'\xDF' ] _ - | [ '\xE0'-'\xEF' ] _ _ - | [ '\xF0'-'\xF7' ] _ _ _ - (* This misses unicode stuff, and it adds "[" and "]". It's only an approximation of idents - used for detecting whether an underscore is part of an identifier or meant to indicate emphasis *) -let nonidentchar = - [^ 'A'-'Z' 'a'-'z' '_' '[' ']' - (* iso 8859-1 accents *) - '\192'-'\214' '\216'-'\246' '\248'-'\255' - '\'' '0'-'9' '@' ] - -let symbolchar_symbol_no_brackets = - ['!' '$' '%' '&' '*' '+' ',' '^' '#' - '\\' '/' '-' '<' '>' '|' ':' '?' '=' '~' ] | - (* utf-8 symbols *) - '\226' ['\134'-'\143' '\152'-'\155' '\164'-'\165' '\168'-'\171'] _ -let symbolchar_no_brackets = symbolchar_symbol_no_brackets | - [ '@' '{' '}' '(' ')' 'A'-'Z' 'a'-'z' '_'] -let symbolchar = symbolchar_no_brackets | '[' | ']' -let token_no_brackets = symbolchar_symbol_no_brackets symbolchar_no_brackets* -let token = symbolchar_symbol_no_brackets symbolchar* | '[' [^ '[' ']' ':']* ']' -let printing_token = (token | id)+ - -(* tokens with balanced brackets *) -let token_brackets = - ( token_no_brackets ('[' token_no_brackets? ']')* - | token_no_brackets? ('[' token_no_brackets? ']')+ ) - token_no_brackets? +let nonidentchar = [^ 'A'-'Z' 'a'-'z' '_' '[' ']' '\'' '0'-'9' '@' ] + +let printing_token = [^ ' ' '\t']* let thm_token = "Theorem" @@ -454,8 +433,8 @@ let end_verb = "(*" space* "end" space+ "verb" space* "*)" rule coq_bol = parse | space* nl+ - { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light)) - then Output.empty_line_of_code (); + { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light)) + then Output.empty_line_of_code (); coq_bol lexbuf } | space* "(**" space_nl { Output.end_coq (); Output.start_doc (); @@ -538,7 +517,7 @@ rule coq_bol = parse ignore (comment lexbuf); coq_bol lexbuf } | space* "(**" space+ "remove" space+ "printing" space+ - (identifier | token) space* "*)" + printing_token space* "*)" { remove_printing_token (lexeme lexbuf); coq_bol lexbuf } | space* "(**" space+ "remove" space+ "printing" space+ @@ -591,7 +570,18 @@ and coq = parse else coq lexbuf } | nl+ space* "]]" - { if not !formatted then begin symbol lexbuf (lexeme lexbuf); coq lexbuf end } + { if not !formatted then + begin + (* Isn't this an anomaly *) + let s = lexeme lexbuf in + let nlsp,s = remove_newline s in + let nbsp,isp = count_spaces s in + Output.indentation nbsp; + let loc = lexeme_start lexbuf + isp + nlsp in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + coq lexbuf + end } | eof { () } | gallina_kw_to_hide @@ -891,22 +881,27 @@ and verbatim = parse and escaped_coq = parse | "]" { decr brackets; - if !brackets > 0 then begin Output.char ']'; escaped_coq lexbuf end } + if !brackets > 0 then + (Output.sublexer ']' (lexeme_start lexbuf); escaped_coq lexbuf) + else Tokens.flush_sublexer () } | "[" - { incr brackets; Output.char '['; escaped_coq lexbuf } + { incr brackets; + Output.sublexer '[' (lexeme_start lexbuf); escaped_coq lexbuf } | "(*" - { comment_level := 1; ignore (comment lexbuf); escaped_coq lexbuf } + { Tokens.flush_sublexer (); comment_level := 1; + ignore (comment lexbuf); escaped_coq lexbuf } | "*)" { (* likely to be a syntax error: we escape *) backtrack lexbuf } | eof - { () } - | token_brackets - { let s = lexeme lexbuf in - symbol lexbuf s; escaped_coq lexbuf } + { Tokens.flush_sublexer () } | (identifier '.')* identifier { Output.ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf } + | space + { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0); + escaped_coq lexbuf } | _ - { Output.char (lexeme_char lexbuf 0); escaped_coq lexbuf } + { Output.sublexer (lexeme_char lexbuf 0) (lexeme_start lexbuf); + escaped_coq lexbuf } (*s Coq "Comments" command. *) @@ -1004,11 +999,18 @@ and body_bol = parse | _ { backtrack lexbuf; Output.indentation 0; body lexbuf } and body = parse - | nl {Output.line_break(); new_line lexbuf; body_bol lexbuf} + | nl {Tokens.flush_sublexer(); Output.line_break(); new_line lexbuf; body_bol lexbuf} | nl+ space* "]]" space* nl - { if not !formatted then + { Tokens.flush_sublexer(); + if not !formatted then begin - symbol lexbuf (lexeme lexbuf); + let s = lexeme lexbuf in + let nlsp,s = remove_newline s in + let _,isp = count_spaces s in + let loc = lexeme_start lexbuf + nlsp + isp in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + Tokens.flush_sublexer(); body lexbuf end else @@ -1017,9 +1019,14 @@ and body = parse true end } | "]]" space* nl - { if not !formatted then + { Tokens.flush_sublexer(); + if not !formatted then begin - symbol lexbuf (lexeme lexbuf); + let loc = lexeme_start lexbuf in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + Tokens.flush_sublexer(); + Output.line_break(); body lexbuf end else @@ -1027,12 +1034,12 @@ and body = parse Output.paragraph (); true end } - | eof { false } + | eof { Tokens.flush_sublexer(); false } | '.' space* nl | '.' space* eof - { Output.char '.'; Output.line_break(); + { Tokens.flush_sublexer(); Output.char '.'; Output.line_break(); if not !formatted then true else body_bol lexbuf } | '.' space* nl "]]" space* nl - { Output.char '.'; + { Tokens.flush_sublexer(); Output.char '.'; if not !formatted then begin eprintf "Error: stray ]] at %d\n" (lexeme_start lexbuf); @@ -1045,50 +1052,46 @@ and body = parse true end } - | '.' space+ { Output.char '.'; Output.char ' '; + | '.' space+ + { Tokens.flush_sublexer(); Output.char '.'; Output.char ' '; if not !formatted then false else body lexbuf } | "(**" space_nl - { Output.end_coq (); Output.start_doc (); + { Tokens.flush_sublexer(); Output.end_coq (); Output.start_doc (); let eol = doc_bol lexbuf in Output.end_doc (); Output.start_coq (); if eol then body_bol lexbuf else body lexbuf } - | "(*" { comment_level := 1; + | "(*" { Tokens.flush_sublexer(); comment_level := 1; if !Cdglobals.parse_comments then Output.start_comment (); let eol = comment lexbuf in if eol then begin if not !Cdglobals.parse_comments then Output.line_break(); body_bol lexbuf end else body lexbuf } | "where" space* - { let s = lexeme lexbuf in - Output.ident s (lexeme_start lexbuf); + { Tokens.flush_sublexer(); + Output.ident (lexeme lexbuf) (lexeme_start lexbuf); start_notation_string lexbuf } | identifier - { let s = lexeme lexbuf in - Output.ident s (lexeme_start lexbuf); - body lexbuf } - | token_no_brackets - { let s = lexeme lexbuf in - symbol lexbuf s; body lexbuf } + { Tokens.flush_sublexer(); + Output.ident (lexeme lexbuf) (lexeme_start lexbuf); + body lexbuf } | ".." - { Output.char '.'; Output.char '.'; + { Tokens.flush_sublexer(); Output.char '.'; Output.char '.'; body lexbuf } - | '"' - { Output.char '"'; + | '"' + { Tokens.flush_sublexer(); Output.char '"'; string lexbuf; body lexbuf } - - | utf8_multibyte - { let c = lexeme lexbuf in - symbol lexbuf c; + | space + { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0); body lexbuf } | _ { let c = lexeme_char lexbuf 0 in - Output.char c; - body lexbuf } + Output.sublexer c (lexeme_start lexbuf); + body lexbuf } and start_notation_string = parse | '"' (* a true notation *) - { symbol lexbuf "\""; + { Output.sublexer '"' (lexeme_start lexbuf); notation_string lexbuf; body lexbuf } | _ (* an abbreviation *) @@ -1099,13 +1102,9 @@ and notation_string = parse { Output.char '"'; Output.char '"'; (* Unlikely! *) notation_string lexbuf } | '"' - { Output.char '"' } - | token - { let s = lexeme lexbuf in - symbol lexbuf s; - notation_string lexbuf } + { Tokens.flush_sublexer(); Output.char '"' } | _ { let c = lexeme_char lexbuf 0 in - Output.char c; + Output.sublexer c (lexeme_start lexbuf); notation_string lexbuf } and string = parse @@ -1164,6 +1163,7 @@ and st_subtitle = parse let c = open_in f in let lb = from_channel c in (Index.current_library := m; + Output.initialize (); Output.start_module (); Output.start_coq (); coq_bol lb; Output.end_coq (); close_in c) diff --git a/tools/coqdoc/main.ml b/tools/coqdoc/main.ml index b52c28ff29..67c63865af 100644 --- a/tools/coqdoc/main.ml +++ b/tools/coqdoc/main.ml @@ -359,7 +359,6 @@ let parse () = add_file (what_file f); parse_rec rem in parse_rec (List.tl (Array.to_list Sys.argv)); - Output.initialize (); List.rev !files diff --git a/tools/coqdoc/output.ml b/tools/coqdoc/output.ml index 168a28f20b..0f36b775cd 100644 --- a/tools/coqdoc/output.ml +++ b/tools/coqdoc/output.ml @@ -96,22 +96,32 @@ let set_module m sub = current_module := (m,sub); let item_level = ref 0 let in_doc = ref false -(*s Customized pretty-print *) - -let token_pp = Hashtbl.create 97 - -let add_printing_token = Hashtbl.replace token_pp - -let find_printing_token tok = - try Hashtbl.find token_pp tok with Not_found -> None, None - -let remove_printing_token = Hashtbl.remove token_pp - -(* predefined pretty-prints *) -let initialize () = +(*s Customized and predefined pretty-print *) + +let initialize_texmacs () = + let ensuremath x = sprintf "<with|mode|math|\\<%s\\>>" x in + List.fold_right (fun (s,t) tt -> Tokens.ttree_add tt s t) + [ "*", ensuremath "times"; + "->", ensuremath "rightarrow"; + "<-", ensuremath "leftarrow"; + "<->", ensuremath "leftrightarrow"; + "=>", ensuremath "Rightarrow"; + "<=", ensuremath "le"; + ">=", ensuremath "ge"; + "<>", ensuremath "noteq"; + "~", ensuremath "lnot"; + "/\\", ensuremath "land"; + "\\/", ensuremath "lor"; + "|-", ensuremath "vdash" + ] Tokens.empty_ttree + +let token_tree_texmacs = lazy (initialize_texmacs ()) + +let initialize_tex_html () = let if_utf8 = if !Cdglobals.utf8 then fun x -> Some x else fun _ -> None in - List.iter - (fun (s,l,l') -> Hashtbl.add token_pp s (Some l, l')) + List.fold_right (fun (s,l,l') (tt,tt') -> + (Tokens.ttree_add tt s l, + match l' with None -> tt' | Some l' -> Tokens.ttree_add tt' s l')) [ "*" , "\\ensuremath{\\times}", if_utf8 "×"; "|", "\\ensuremath{|}", None; "->", "\\ensuremath{\\rightarrow}", if_utf8 "→"; @@ -132,7 +142,20 @@ let initialize () = "Π", "\\ensuremath{\\Pi}", if_utf8 "Π"; "λ", "\\ensuremath{\\lambda}", if_utf8 "λ"; (* "fun", "\\ensuremath{\\lambda}" ? *) - ] + ] (Tokens.empty_ttree,Tokens.empty_ttree) + +let token_tree_latex = lazy (fst (initialize_tex_html ())) +let token_tree_html = lazy (snd (initialize_tex_html ())) + +let add_printing_token s (t1,t2) = + match + (match !Cdglobals.target_language with LaTeX -> t1 | HTML -> t2 | _ -> None) + with + | None -> () + | Some t -> Tokens.token_tree := Tokens.ttree_add !Tokens.token_tree s t + +let remove_printing_token s = + Tokens.token_tree := Tokens.ttree_remove !Tokens.token_tree s (*s Table of contents *) @@ -187,6 +210,8 @@ module Latex = struct printf "\\end{document}\n" end + (*s Latex low-level translation *) + let nbsp () = output_char '~' let char c = match c with @@ -200,10 +225,13 @@ module Latex = struct output_char c let label_char c = match c with - | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_' - | '^' | '~' -> () - | _ -> - output_char c + | '_' -> output_char ' ' + | '\\' | '$' | '#' | '%' | '&' | '{' | '}' + | '^' | '~' -> printf "x%X" (Char.code c) + | _ -> if c >= '\x80' then printf "x%X" (Char.code c) else output_char c + + let label_ident s = + for i = 0 to String.length s - 1 do label_char s.[i] done let latex_char = output_char let latex_string = output_string @@ -211,11 +239,27 @@ module Latex = struct let html_char _ = () let html_string _ = () - let raw_ident s = - for i = 0 to String.length s - 1 do char s.[i] done - - let label_ident s = - for i = 0 to String.length s - 1 do label_char s.[i] done + (*s Latex char escaping *) + + let escaped = + let buff = Buffer.create 5 in + fun s -> + Buffer.clear buff; + for i = 0 to String.length s - 1 do + match s.[i] with + | '\\' -> + Buffer.add_string buff "\\symbol{92}" + | '$' | '#' | '%' | '&' | '{' | '}' | '_' as c -> + Buffer.add_char buff '\\'; Buffer.add_char buff c + | '^' | '~' as c -> + Buffer.add_char buff '\\'; Buffer.add_char buff c; + Buffer.add_string buff "{}" + | c -> + Buffer.add_char buff c + done; + Buffer.contents buff + + (*s Latex reference and symbol translation *) let start_module () = let ln = !lib_name in @@ -224,9 +268,7 @@ module Latex = struct label_ident (get_module false); printf "}{"; if ln <> "" then printf "%s " ln; - printf "}{"; - raw_ident (get_module true); - printf "}\n\n" + printf "}{%s}\n\n" (escaped (get_module true)) end let start_latex_math () = output_char '$' @@ -244,35 +286,33 @@ module Latex = struct let space = 0.5 *. (float n) in printf "\\coqdocindent{%2.2fem}\n" space - let with_latex_printing f tok = - try - (match Hashtbl.find token_pp tok with - | Some s, _ -> output_string s - | _ -> f tok) - with Not_found -> - f tok - let module_ref m s = - printf "\\moduleid{%s}{" m; raw_ident s; printf "}" + printf "\\moduleid{%s}{%s}" m (escaped s) let ident_ref m fid typ s = let id = if fid <> "" then (m ^ "." ^ fid) else m in match find_module m with - | Local -> - if typ = Variable then (printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}") + | Local -> + if typ = Variable then + printf "\\coqdoc%s{%s}" (type_name typ) s else - (printf "\\coqref{"; label_ident id; printf "}{\\coqdoc%s{" (type_name typ); - raw_ident s; printf "}}") + (printf "\\coqref{"; label_ident id; + printf "}{\\coqdoc%s{%s}}" (type_name typ) s) | External m when !externals -> - printf "\\coqexternalref{"; raw_ident m; printf "}{"; - label_ident fid; printf "}{\\coqdoc%s{" (type_name typ); raw_ident s; printf "}}" + printf "\\coqexternalref{"; label_ident fid; + printf "}{%s}{\\coqdoc%s{%s}}" (escaped m) (type_name typ) s | External _ | Unknown -> - (* printf "\\coqref{"; label_ident id; printf "}{" *) - printf "\\coqdoc%s{" (type_name typ); raw_ident s; printf "}" + printf "\\coqdoc%s{%s}" (type_name typ) s let defref m id ty s = - printf "\\coqdef{"; label_ident (m ^ "." ^ id); printf "}{"; raw_ident s; printf "}{"; - printf "\\coqdoc%s{" (type_name ty); raw_ident s; printf "}}" + if ty <> Notation then + (printf "\\coqdef{"; label_ident (m ^ "." ^ id); + printf "}{%s}{\\coqdoc%s{%s}}" s (type_name ty) s) + else + (* Glob file still not able to say the exact extent of the definition *) + (* so we currently renounce to highlight the notation location *) + (printf "\\coqdef{"; label_ident (m ^ "." ^ id); + printf "}{%s}{%s}" s s) let reference s = function | Def (fullid,typ) -> @@ -282,31 +322,60 @@ module Latex = struct | Ref (m,fullid,typ) -> ident_ref m fullid typ s | Mod _ -> - printf "\\coqdocvar{"; raw_ident s; printf "}" + printf "\\coqdocvar{%s}" (escaped s) + + (*s The sublexer buffers symbol characters and attached + uninterpreted ident and try to apply special translation such as, + predefined, translation "->" to "\ensuremath{\rightarrow}" or, + virtually, a user-level translation from "=_h" to "\ensuremath{=_{h}}" *) + + let output_sublexer_string doescape issymbchar tag s = + let s = if doescape then escaped s else s in + match tag with + | Some ref -> reference s ref + | None -> if issymbchar then output_string s else printf "\\coqdocvar{%s}" s + + let sublexer c loc = + let tag = + try Some (Index.find (get_module false) loc) with Not_found -> None + in + Tokens.output_tagged_symbol_char tag c + + let initialize () = + Tokens.token_tree := Lazy.force token_tree_latex; + Tokens.outfun := output_sublexer_string + + (*s Interpreting ident with fallback on sublexer if unknown ident *) + + let translate s = + match Tokens.translate s with Some s -> s | None -> escaped s let ident s loc = try - reference s (Index.find (get_module false) loc) + let tag = Index.find (get_module false) loc in + reference (translate s) tag with Not_found -> if is_tactic s then - (printf "\\coqdoctac{"; raw_ident s; printf "}") + printf "\\coqdoctac{%s}" (translate s) else if is_keyword s then - (printf "\\coqdockw{"; raw_ident s; printf "}") + printf "\\coqdockw{%s}" (translate s) else if !Cdglobals.interpolate && !in_doc (* always a var otherwise *) then - try reference s (Index.find_string (get_module false) s) - with _ -> (printf "\\coqdocvar{"; raw_ident s; printf "}") - else (printf "\\coqdocvar{"; raw_ident s; printf "}") + try + let tag = Index.find_string (get_module false) s in + reference (translate s) tag + with _ -> Tokens.output_tagged_ident_string s + else Tokens.output_tagged_ident_string s let ident s l = if !in_title then ( printf "\\texorpdfstring{\\protect"; - with_latex_printing (fun s -> ident s l) s; - printf "}{"; raw_ident s; printf "}") + ident s l; + printf "}{%s}" (translate s)) else - with_latex_printing (fun s -> ident s l) s + ident s l - let symbol s l = with_latex_printing raw_ident s + (*s Translating structure *) let proofbox () = printf "\\ensuremath{\\Box}" @@ -330,8 +399,6 @@ module Latex = struct let end_doc () = in_doc := false; stop_item () - let comment c = char c - (* This is broken if we are in math mode, but coqdoc currently isn't tracking that *) let start_emph () = printf "\\textit{" @@ -460,7 +527,21 @@ module Html = struct | '&' -> printf "&" | c -> output_char c - let raw_ident s = for i = 0 to String.length s - 1 do char s.[i] done + let raw_string s = + for i = 0 to String.length s - 1 do char s.[i] done + + let escaped = + let buff = Buffer.create 5 in + fun s -> + Buffer.clear buff; + for i = 0 to String.length s - 1 do + match s.[i] with + | '<' -> Buffer.add_string buff "<" + | '>' -> Buffer.add_string buff ">" + | '&' -> Buffer.add_string buff "&" + | c -> Buffer.add_char buff c + done; + Buffer.contents buff let latex_char _ = () let latex_string _ = () @@ -477,78 +558,72 @@ module Html = struct let module_ref m s = match find_module m with | Local -> - printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>" + printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s | External m when !externals -> - printf "<a class=\"modref\" href=\"%s.html\">" m; raw_ident s; printf "</a>" + printf "<a class=\"modref\" href=\"%s.html\">%s</a>" m s | External _ | Unknown -> - raw_ident s + output_string s let ident_ref m fid typ s = match find_module m with | Local -> printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid; - printf "<span class=\"id\" type=\"%s\">" typ; - raw_ident s; - printf "</span></a>" + printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s | External m when !externals -> - printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid; - printf "<span class=\"id\" type=\"%s\">" typ; - raw_ident s; printf "</span></a>" + printf "<a class=\"idref\" href=\"%s.html#%s\">" m fid; + printf "<span class=\"id\" type=\"%s\">%s</span></a>" typ s | External _ | Unknown -> - printf "<span class=\"id\" type=\"%s\">" typ; raw_ident s; printf "</span>" + printf "<span class=\"id\" type=\"%s\">%s</span>" typ s let reference s r = match r with | Def (fullid,ty) -> - printf "<a name=\"%s\">" fullid; - printf "<span class=\"id\" type=\"%s\">" (type_name ty); - raw_ident s; printf "</span></a>" + printf "<a name=\"%s\">" fullid; + printf "<span class=\"id\" type=\"%s\">%s</span></a>" (type_name ty) s | Mod (m,s') when s = s' -> - module_ref m s + module_ref m s | Ref (m,fullid,ty) -> - ident_ref m fullid (type_name ty) s + ident_ref m fullid (type_name ty) s | Mod _ -> - printf "<span class=\"id\" type=\"mod\">"; raw_ident s ; printf "</span>" - - let ident s loc = - if is_keyword s then begin - printf "<span class=\"id\" type=\"keyword\">"; - raw_ident s; - printf "</span>" - end else - begin - try reference s (Index.find (get_module false) loc) - with Not_found -> - if is_tactic s then - (printf "<span class=\"id\" type=\"tactic\">"; raw_ident s; printf "</span>") - else - if !Cdglobals.interpolate && !in_doc (* always a var otherwise *) - then - try reference s (Index.find_string (get_module false) s) - with _ -> - (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>") - else (printf "<span class=\"id\" type=\"var\">"; raw_ident s ; printf "</span>") - end - - let with_html_printing f tok loc = - try - (match Hashtbl.find token_pp tok with - | _, Some s -> - (try reference s (Index.find (get_module false) loc) - with Not_found -> output_string s) - | _ -> f tok loc) - with Not_found -> - f tok loc + printf "<span class=\"id\" type=\"mod\">%s</span>" s + + let output_sublexer_string doescape issymbchar tag s = + let s = if doescape then escaped s else s in + match tag with + | Some ref -> reference s ref + | None -> + if issymbchar then output_string s + else printf "<span class=\"id\" type=\"var\">%s</span>" s + + let sublexer c loc = + let tag = + try Some (Index.find (get_module false) loc) with Not_found -> None + in + Tokens.output_tagged_symbol_char tag c - let ident s l = - with_html_printing ident s l + let initialize () = + Tokens.token_tree := Lazy.force token_tree_html; + Tokens.outfun := output_sublexer_string - let raw_symbol s loc = - try reference s (Index.find (get_module false) loc) - with Not_found -> raw_ident s + let translate s = + match Tokens.translate s with Some s -> s | None -> escaped s - let symbol s l = - with_html_printing raw_symbol s l + let ident s loc = + if is_keyword s then begin + printf "<span class=\"id\" type=\"keyword\">%s</span>" (translate s) + end else begin + try reference (translate s) (Index.find (get_module false) loc) + with Not_found -> + if is_tactic s then + printf "<span class=\"id\" type=\"tactic\">%s</span>" (translate s) + else + if !Cdglobals.interpolate && !in_doc (* always a var otherwise *) + then + try reference (translate s) (Index.find_string (get_module false) s) + with _ -> Tokens.output_tagged_ident_string s + else + Tokens.output_tagged_ident_string s + end let proofbox () = printf "<font size=-2>☐</font>" @@ -803,24 +878,15 @@ module TeXmacs = struct let ident s _ = if !in_doc then ident_true s else raw_ident s - let symbol_true s = - let ensuremath x = printf "<with|mode|math|\\<%s\\>>" x in - match s with - | "*" -> ensuremath "times" - | "->" -> ensuremath "rightarrow" - | "<-" -> ensuremath "leftarrow" - | "<->" ->ensuremath "leftrightarrow" - | "=>" -> ensuremath "Rightarrow" - | "<=" -> ensuremath "le" - | ">=" -> ensuremath "ge" - | "<>" -> ensuremath "noteq" - | "~" -> ensuremath "lnot" - | "/\\" -> ensuremath "land" - | "\\/" -> ensuremath "lor" - | "|-" -> ensuremath "vdash" - | s -> raw_ident s - - let symbol s _ = if !in_doc then symbol_true s else raw_ident s + let output_sublexer_string doescape issymbchar tag s = + if doescape then raw_ident s else output_string s + + let sublexer c l = + if !in_doc then Tokens.output_tagged_symbol_char None c else char c + + let initialize () = + Tokens.token_tree := Lazy.force token_tree_texmacs; + Tokens.outfun := output_sublexer_string let proofbox () = printf "QED" @@ -897,7 +963,7 @@ module TeXmacs = struct end -(*s LaTeX output *) +(*s Raw output *) module Raw = struct @@ -909,12 +975,6 @@ module Raw = struct let char = output_char - let label_char c = match c with - | '\\' | '$' | '#' | '%' | '&' | '{' | '}' | '_' - | '^' | '~' -> () - | _ -> - output_char c - let latex_char = output_char let latex_string = output_string @@ -939,7 +999,11 @@ module Raw = struct let ident s loc = raw_ident s - let symbol s loc = raw_ident s + let sublexer c l = char c + + let initialize () = + Tokens.token_tree := Tokens.empty_ttree; + Tokens.outfun := (fun _ _ _ _ -> failwith "Useless") let proofbox () = printf "[]" @@ -1027,7 +1091,7 @@ let end_inline_coq = select Latex.end_inline_coq Html.end_inline_coq TeXmacs.end_inline_coq Raw.end_inline_coq let start_inline_coq_block = - select Latex.start_inline_coq_block Html.start_inline_coq_block + select Latex.start_inline_coq_block Html.start_inline_coq_block TeXmacs.start_inline_coq_block Raw.start_inline_coq_block let end_inline_coq_block = select Latex.end_inline_coq_block Html.end_inline_coq_block TeXmacs.end_inline_coq_block Raw.end_inline_coq_block @@ -1047,7 +1111,8 @@ let rule = select Latex.rule Html.rule TeXmacs.rule Raw.rule let nbsp = select Latex.nbsp Html.nbsp TeXmacs.nbsp Raw.nbsp let char = select Latex.char Html.char TeXmacs.char Raw.char let ident = select Latex.ident Html.ident TeXmacs.ident Raw.ident -let symbol = select Latex.symbol Html.symbol TeXmacs.symbol Raw.symbol +let sublexer = select Latex.sublexer Html.sublexer TeXmacs.sublexer Raw.sublexer +let initialize = select Latex.initialize Html.initialize TeXmacs.initialize Raw.initialize let proofbox = select Latex.proofbox Html.proofbox TeXmacs.proofbox Raw.proofbox diff --git a/tools/coqdoc/output.mli b/tools/coqdoc/output.mli index 1ccbac2f94..d836f6b397 100644 --- a/tools/coqdoc/output.mli +++ b/tools/coqdoc/output.mli @@ -63,7 +63,8 @@ val rule : unit -> unit val nbsp : unit -> unit val char : char -> unit val ident : string -> loc -> unit -val symbol : string -> loc -> unit +val sublexer : char -> loc -> unit +val initialize : unit -> unit val proofbox : unit -> unit diff --git a/tools/coqdoc/tokens.ml b/tools/coqdoc/tokens.ml new file mode 100644 index 0000000000..7de3ad80da --- /dev/null +++ b/tools/coqdoc/tokens.ml @@ -0,0 +1,171 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(* Application of printing rules based on a dictionary specific to the + target language *) + +open Cdglobals + +(*s Dictionaries: trees annotated with string options, each node being a map + from chars to dictionaries (the subtrees). A trie, in other words. + + (code copied from parsing/lexer.ml4 for the use of coqdoc, Apr 2010) +*) + +module CharMap = Map.Make (struct type t = char let compare = compare end) + +type ttree = { + node : string option; + branch : ttree CharMap.t } + +let empty_ttree = { node = None; branch = CharMap.empty } + +let ttree_add ttree str translated = + let rec insert tt i = + if i == String.length str then + {node = Some translated; branch = tt.branch} + else + let c = str.[i] in + let br = + match try Some (CharMap.find c tt.branch) with Not_found -> None with + | Some tt' -> + CharMap.add c (insert tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> + let tt' = {node = None; branch = CharMap.empty} in + CharMap.add c (insert tt' (i + 1)) tt.branch + in + { node = tt.node; branch = br } + in + insert ttree 0 + +(* Removes a string from a dictionary: returns an equal dictionary + if the word not present. *) +let ttree_remove ttree str = + let rec remove tt i = + if i == String.length str then + {node = None; branch = tt.branch} + else + let c = str.[i] in + let br = + match try Some (CharMap.find c tt.branch) with Not_found -> None with + | Some tt' -> + CharMap.add c (remove tt' (i + 1)) (CharMap.remove c tt.branch) + | None -> tt.branch + in + { node = tt.node; branch = br } + in + remove ttree 0 + +let ttree_descend ttree c = CharMap.find c ttree.branch + +let ttree_find ttree str = + let rec proc_rec tt i = + if i == String.length str then tt + else proc_rec (CharMap.find str.[i] tt.branch) (i+1) + in + proc_rec ttree 0 + +(*s Parameters of the translation automaton *) + +type out_function = bool -> bool -> Index.index_entry option -> string -> unit + +let token_tree = ref empty_ttree +let outfun = ref (fun _ _ _ _ -> failwith "outfun not initialized") + +(*s Translation automaton *) + +let buff = Buffer.create 4 + +let flush_buffer was_symbolchar tag tok = + let hastr = String.length tok <> 0 in + if hastr then !outfun false was_symbolchar tag tok; + if Buffer.length buff <> 0 then + !outfun true (if hastr then not was_symbolchar else was_symbolchar) + tag (Buffer.contents buff); + Buffer.clear buff + +type sublexer_state = + | Neutral + | Buffering of bool * Index.index_entry option * string * ttree + +let translation_state = ref Neutral + +let buffer_char is_symbolchar ctag c = + let rec aux = function + | Neutral -> + restart_buffering () + | Buffering (was_symbolchar,tag,translated,tt) -> + if tag <> ctag then + (* A strong tag comes from Coq; if different Coq tags *) + (* hence, we don't try to see the chars as part of a single token *) + let translated = + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated in + flush_buffer was_symbolchar tag translated; + restart_buffering () + else + begin + (* If we change the category of characters (symbol vs ident) *) + (* we accept this as a possible token cut point and remember the *) + (* translated token up to that point *) + let translated = + if is_symbolchar <> was_symbolchar then + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated + else translated in + (* We try to make a significant token from the current *) + (* buffer and the new character *) + try + let tt = ttree_descend tt c in + Buffer.add_char buff c; + Buffering (is_symbolchar,ctag,translated,tt) + with Not_found -> + (* No existing translation for the given set of chars *) + if is_symbolchar <> was_symbolchar then + (* If we changed the category of character read, we accept it *) + (* as a possible cut point and restart looking for a translation *) + (flush_buffer was_symbolchar tag translated; + restart_buffering ()) + else + (* If we did not change the category of character read, we do *) + (* not want to cut arbitrarily in the middle of the sequence of *) + (* symbol characters or identifier characters *) + (Buffer.add_char buff c; + Buffering (is_symbolchar,tag,translated,empty_ttree)) + end + + and restart_buffering () = + let tt = try ttree_descend !token_tree c with Not_found -> empty_ttree in + Buffer.add_char buff c; + Buffering (is_symbolchar,ctag,"",tt) + + in + translation_state := aux !translation_state + +let output_tagged_ident_string s = + for i = 0 to String.length s - 1 do buffer_char false None s.[i] done + +let output_tagged_symbol_char tag c = + buffer_char true tag c + +let flush_sublexer () = + match !translation_state with + | Neutral -> () + | Buffering (was_symbolchar,tag,translated,tt) -> + let translated = + match tt.node with + | Some tok -> Buffer.clear buff; tok + | None -> translated in + flush_buffer was_symbolchar tag translated; + translation_state := Neutral + +(* Translation not using the automaton *) +let translate s = + try (ttree_find !token_tree s).node with Not_found -> None diff --git a/tools/coqdoc/tokens.mli b/tools/coqdoc/tokens.mli new file mode 100644 index 0000000000..4e53108ac0 --- /dev/null +++ b/tools/coqdoc/tokens.mli @@ -0,0 +1,78 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(* Type of dictionaries *) + +type ttree + +val empty_ttree : ttree + +(* Add a string with some translation in dictionary *) +val ttree_add : ttree -> string -> string -> ttree + +(* Remove a translation from a dictionary: returns an equal dictionary + if the word not present *) +val ttree_remove : ttree -> string -> ttree + +(* Translate a string *) +val translate : string -> string option + +(* Sublexer automaton *) + +(* The sublexer buffers the chars it receives; if after some time, it + recognizes that a sequence of chars has a translation in the + current dictionary, it replaces the buffer by the translation *) + +(* Received chars can come with a "tag" (usually made from + informations from the globalization file). A sequence of chars can + be considered a word only, if all chars have the same "tag". Rules + for cutting words are the following: + + - in a sequence like "**" where * is in the dictionary but not **, + "**" is not translated; otherwise said, to be translated, a sequence + must not be surrounded by other symbol-like chars + + - in a sequence like "<>_h*", where <>_h is in the dictionary, the + translation is done because the switch from a letter to a symbol char + is an acceptable cutting point + + - in a sequence like "<>_ha", where <>_h is in the dictionary, the + translation is not done because it is considered that h and a are + not separable (however, if h and a have different tags, and h has + the same tags as <, > and _, the translation happens) + + - in a sequence like "<>_ha", where <> but not <>_h is in the + dictionary, the translation is done for <> and _ha is considered + independently because the switch from a symbol char to a letter + is considered to be an acceptable cutting point + + - the longest-word rule applies: if both <> and <>_h are in the + dictionary, "<>_h" is one word and gets translated +*) + +(* Warning: do not output anything on output channel inbetween a call + to [output_tagged_*] and [flush_sublexer]!! *) + +type out_function = + bool (* needs escape *) -> + bool (* it is a symbol, not a pure ident *) -> + Index.index_entry option (* the index type of the token if any *) -> + string -> unit + +(* This must be initialized before calling the sublexer *) +val token_tree : ttree ref +val outfun : out_function ref + +(* Process an ident part that might be a symbol part *) +val output_tagged_ident_string : string -> unit + +(* Process a non-ident char (possibly equipped with a tag) *) +val output_tagged_symbol_char : Index.index_entry option -> char -> unit + +(* Flush the buffered content of the lexer using [outfun] *) +val flush_sublexer : unit -> unit |
