diff options
| author | ppedrot | 2011-11-24 17:12:59 +0000 |
|---|---|---|
| committer | ppedrot | 2011-11-24 17:12:59 +0000 |
| commit | 2ed4b1e88e3e304c5146d74124d7057ac62c59a2 (patch) | |
| tree | 766fd5553916b90d06eb253b72192942d80ca692 /lib/xml_parser.ml | |
| parent | fe891a11536b64cd9be0ea9ad3e7de026031ae57 (diff) | |
Fixed the XML parser CDATA handling (and changed the EOL convention of these files which where Windows-like, whoever knows why).
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@14726 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib/xml_parser.ml')
| -rw-r--r-- | lib/xml_parser.ml | 434 |
1 files changed, 227 insertions, 207 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml index 16d9922aed..0463c6e30b 100644 --- a/lib/xml_parser.ml +++ b/lib/xml_parser.ml @@ -1,207 +1,227 @@ -(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- * Copyright (C) 2003 Jacques Garrigue
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-open Printf
-
-type xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
-
-type error_pos = {
- eline : int;
- eline_start : int;
- emin : int;
- emax : int;
-}
-
-type error_msg =
- | UnterminatedComment
- | UnterminatedString
- | UnterminatedEntity
- | IdentExpected
- | CloseExpected
- | NodeExpected
- | AttributeNameExpected
- | AttributeValueExpected
- | EndOfTagExpected of string
- | EOFExpected
-
-type error = error_msg * error_pos
-
-exception Error of error
-
-exception File_not_found of string
-
-type t = {
- mutable check_eof : bool;
- mutable concat_pcdata : bool;
-}
-
-type source =
- | SFile of string
- | SChannel of in_channel
- | SString of string
- | SLexbuf of Lexing.lexbuf
-
-type state = {
- source : Lexing.lexbuf;
- stack : Xml_lexer.token Stack.t;
- xparser : t;
-}
-
-exception Internal_error of error_msg
-exception NoMoreData
-
-let xml_error = ref (fun _ -> assert false)
-let file_not_found = ref (fun _ -> assert false)
-
-let _raises e f =
- xml_error := e;
- file_not_found := f
-
-let make () =
- {
- check_eof = true;
- concat_pcdata = true;
- }
-
-let check_eof p v = p.check_eof <- v
-let concat_pcdata p v = p.concat_pcdata <- v
-
-let pop s =
- try
- Stack.pop s.stack
- with
- Stack.Empty ->
- Xml_lexer.token s.source
-
-let push t s =
- Stack.push t s.stack
-
-let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s)
- | t ->
- push t s;
- raise NoMoreData
-and
- read_elems ?tag s =
- let elems = ref [] in
- (try
- while true do
- match s.xparser.concat_pcdata , read_node s , !elems with
- | true , PCData c , (PCData c2) :: q ->
- elems := PCData (sprintf "%s\n%s" c2 c) :: q
- | _ , x , l ->
- elems := x :: l
- done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
- | Xml_lexer.Eof when tag = None -> List.rev !elems
- | t ->
- match tag with
- | None -> raise (Internal_error EOFExpected)
- | Some s -> raise (Internal_error (EndOfTagExpected s))
-
-let read_xml s = read_node s
-
-let convert = function
- | Xml_lexer.EUnterminatedComment -> UnterminatedComment
- | Xml_lexer.EUnterminatedString -> UnterminatedString
- | Xml_lexer.EIdentExpected -> IdentExpected
- | Xml_lexer.ECloseExpected -> CloseExpected
- | Xml_lexer.ENodeExpected -> NodeExpected
- | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
- | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
- | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
-
-let do_parse xparser source =
- try
- Xml_lexer.init source;
- let s = { source = source; xparser = xparser; stack = Stack.create(); } in
- let x = read_xml s in
- if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
- Xml_lexer.close source;
- x
- with
- | NoMoreData ->
- Xml_lexer.close source;
- raise (!xml_error NodeExpected source)
- | Internal_error e ->
- Xml_lexer.close source;
- raise (!xml_error e source)
- | Xml_lexer.Error e ->
- Xml_lexer.close source;
- raise (!xml_error (convert e) source)
-
-let parse p = function
- | SChannel ch -> do_parse p (Lexing.from_channel ch)
- | SString str -> do_parse p (Lexing.from_string str)
- | SLexbuf lex -> do_parse p lex
- | SFile fname ->
- let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
- try
- let x = do_parse p (Lexing.from_channel ch) in
- close_in ch;
- x
- with
- e ->
- close_in ch;
- raise e
-
-
-let error_msg = function
- | UnterminatedComment -> "Unterminated comment"
- | UnterminatedString -> "Unterminated string"
- | UnterminatedEntity -> "Unterminated entity"
- | IdentExpected -> "Ident expected"
- | CloseExpected -> "Element close expected"
- | NodeExpected -> "Xml node expected"
- | AttributeNameExpected -> "Attribute name expected"
- | AttributeValueExpected -> "Attribute value expected"
- | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
- | EOFExpected -> "End of file expected"
-
-let error (msg,pos) =
- if pos.emin = pos.emax then
- sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
- else
- sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
-
-let line e = e.eline
-
-let range e =
- e.emin - e.eline_start , e.emax - e.eline_start
-
-let abs_range e =
- e.emin , e.emax
-
-let pos source =
- let line, lstart, min, max = Xml_lexer.pos source in
- {
- eline = line;
- eline_start = lstart;
- emin = min;
- emax = max;
- }
\ No newline at end of file +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * Copyright (C) 2003 Jacques Garrigue + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf + +type xml = + | Element of (string * (string * string) list * xml list) + | PCData of string + +type error_pos = { + eline : int; + eline_start : int; + emin : int; + emax : int; +} + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +type t = { + mutable check_eof : bool; + mutable concat_pcdata : bool; +} + +type source = + | SFile of string + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf + +type state = { + source : Lexing.lexbuf; + stack : Xml_lexer.token Stack.t; + xparser : t; +} + +exception Internal_error of error_msg +exception NoMoreData + +let xml_error = ref (fun _ -> assert false) +let file_not_found = ref (fun _ -> assert false) + +let is_blank s = + let len = String.length s in + let break = ref true in + let i = ref 0 in + while !break && !i < len do + let c = s.[!i] in + (* no '\r' because we replaced them in the lexer *) + if c = ' ' || c = '\n' || c = '\t' then incr i + else break := false + done; + !i = len + +let _raises e f = + xml_error := e; + file_not_found := f + +let make () = + { + check_eof = true; + concat_pcdata = true; + } + +let check_eof p v = p.check_eof <- v +let concat_pcdata p v = p.concat_pcdata <- v + +let pop s = + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source + +let push t s = + Stack.push t s.stack + +let canonicalize l = + let has_elt = List.exists (function Element _ -> true | _ -> false) l in + if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l + else l + +let rec read_node s = + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> + let elements = read_elems ~tag s in + Element (tag, attr, canonicalize elements) + | t -> + push t s; + raise NoMoreData +and + read_elems ?tag s = + let elems = ref [] in + (try + while true do + let node = read_node s in + match node, !elems with + | PCData c , (PCData c2) :: q -> + elems := PCData (c2 ^ c) :: q + | _, l -> + elems := node :: l + done + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems + | Xml_lexer.Eof when tag = None -> List.rev !elems + | t -> + match tag with + | None -> raise (Internal_error EOFExpected) + | Some s -> raise (Internal_error (EndOfTagExpected s)) + +let read_xml s = read_node s + +let convert = function + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + +let do_parse xparser source = + try + Xml_lexer.init source; + let s = { source = source; xparser = xparser; stack = Stack.create(); } in + let x = read_xml s in + if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close source; + x + with + | NoMoreData -> + Xml_lexer.close source; + raise (!xml_error NodeExpected source) + | Internal_error e -> + Xml_lexer.close source; + raise (!xml_error e source) + | Xml_lexer.Error e -> + Xml_lexer.close source; + raise (!xml_error (convert e) source) + +let parse p = function + | SChannel ch -> do_parse p (Lexing.from_channel ch) + | SString str -> do_parse p (Lexing.from_string str) + | SLexbuf lex -> do_parse p lex + | SFile fname -> + let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in + try + let x = do_parse p (Lexing.from_channel ch) in + close_in ch; + x + with + e -> + close_in ch; + raise e + + +let error_msg = function + | UnterminatedComment -> "Unterminated comment" + | UnterminatedString -> "Unterminated string" + | UnterminatedEntity -> "Unterminated entity" + | IdentExpected -> "Ident expected" + | CloseExpected -> "Element close expected" + | NodeExpected -> "Xml node expected" + | AttributeNameExpected -> "Attribute name expected" + | AttributeValueExpected -> "Attribute value expected" + | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag + | EOFExpected -> "End of file expected" + +let error (msg,pos) = + if pos.emin = pos.emax then + sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) + else + sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) + +let line e = e.eline + +let range e = + e.emin - e.eline_start , e.emax - e.eline_start + +let abs_range e = + e.emin , e.emax + +let pos source = + let line, lstart, min, max = Xml_lexer.pos source in + { + eline = line; + eline_start = lstart; + emin = min; + emax = max; + } |
