From 2ed4b1e88e3e304c5146d74124d7057ac62c59a2 Mon Sep 17 00:00:00 2001 From: ppedrot Date: Thu, 24 Nov 2011 17:12:59 +0000 Subject: Fixed the XML parser CDATA handling (and changed the EOL convention of these files which where Windows-like, whoever knows why). git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@14726 85f007b7-540e-0410-9357-904b9bb8a0f7 --- lib/xml_lexer.mli | 86 ++++---- lib/xml_lexer.mll | 603 ++++++++++++++++++++++++++--------------------------- lib/xml_parser.ml | 434 ++++++++++++++++++++------------------ lib/xml_parser.mli | 224 ++++++++++---------- lib/xml_utils.ml | 458 ++++++++++++++++++++-------------------- lib/xml_utils.mli | 186 ++++++++--------- 6 files changed, 1000 insertions(+), 991 deletions(-) diff --git a/lib/xml_lexer.mli b/lib/xml_lexer.mli index 6edc414bfd..a1ca057651 100644 --- a/lib/xml_lexer.mli +++ b/lib/xml_lexer.mli @@ -1,44 +1,44 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -type error = - | EUnterminatedComment - | EUnterminatedString - | EIdentExpected - | ECloseExpected - | ENodeExpected - | EAttributeNameExpected - | EAttributeValueExpected - | EUnterminatedEntity - -exception Error of error - -type token = - | Tag of string * (string * string) list * bool - | PCData of string - | Endtag of string - | Eof - -type pos = int * int * int * int - -val init : Lexing.lexbuf -> unit -val close : Lexing.lexbuf -> unit -val token : Lexing.lexbuf -> token -val pos : Lexing.lexbuf -> pos +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type error = + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity + +exception Error of error + +type token = + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof + +type pos = int * int * int * int + +val init : Lexing.lexbuf -> unit +val close : Lexing.lexbuf -> unit +val token : Lexing.lexbuf -> token +val pos : Lexing.lexbuf -> pos val restore : pos -> unit \ No newline at end of file diff --git a/lib/xml_lexer.mll b/lib/xml_lexer.mll index 3e5a47ed94..2be4bf98bf 100644 --- a/lib/xml_lexer.mll +++ b/lib/xml_lexer.mll @@ -1,304 +1,299 @@ -{(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -open Lexing - -type error = - | EUnterminatedComment - | EUnterminatedString - | EIdentExpected - | ECloseExpected - | ENodeExpected - | EAttributeNameExpected - | EAttributeValueExpected - | EUnterminatedEntity - -exception Error of error - -type pos = int * int * int * int - -type token = - | Tag of string * (string * string) list * bool - | PCData of string - | Endtag of string - | Eof - -let last_pos = ref 0 -and current_line = ref 0 -and current_line_start = ref 0 - -let tmp = Buffer.create 200 - -let idents = Hashtbl.create 0 - -let _ = begin - Hashtbl.add idents "gt;" ">"; - Hashtbl.add idents "lt;" "<"; - Hashtbl.add idents "amp;" "&"; - Hashtbl.add idents "apos;" "'"; - Hashtbl.add idents "quot;" "\""; -end - -let init lexbuf = - current_line := 1; - current_line_start := lexeme_start lexbuf; - last_pos := !current_line_start - -let close lexbuf = - Buffer.reset tmp - -let pos lexbuf = - !current_line , !current_line_start , - !last_pos , - lexeme_start lexbuf - -let restore (cl,cls,lp,_) = - current_line := cl; - current_line_start := cls; - last_pos := lp - -let newline lexbuf = - incr current_line; - last_pos := lexeme_end lexbuf; - current_line_start := !last_pos - -let error lexbuf e = - last_pos := lexeme_start lexbuf; - raise (Error e) - -} - -let newline = ['\n'] -let break = ['\r'] -let space = [' ' '\t'] -let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-'] -let entitychar = ['A'-'Z' 'a'-'z'] -let pcchar = [^ '\r' '\n' '<' '>' '&'] - -rule token = parse - | newline - { - newline lexbuf; - token lexbuf - } - | (space | break) + - { - last_pos := lexeme_end lexbuf; - token lexbuf - } - | "" - { () } - | eof - { raise (Error EUnterminatedComment) } - | _ - { comment lexbuf } - -and header = parse - | newline - { - newline lexbuf; - header lexbuf - } - | "?>" - { () } - | eof - { error lexbuf ECloseExpected } - | _ - { header lexbuf } - -and pcdata = parse - | pcchar+ - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf - } - | "&#" - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf; - } - | '&' - { - Buffer.add_string tmp (entity lexbuf); - pcdata lexbuf - } - | "" - { Buffer.contents tmp } - -and entity = parse - | entitychar+ ';' - { - let ident = lexeme lexbuf in - try - Hashtbl.find idents (String.lowercase ident) - with - Not_found -> "&" ^ ident - } - | _ | eof - { raise (Error EUnterminatedEntity) } - -and ident_name = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EIdentExpected } - -and close_tag = parse - | '>' - { () } - | _ | eof - { error lexbuf ECloseExpected } - -and attributes = parse - | '>' - { [], false } - | "/>" - { [], true } - | "" (* do not read a char ! *) - { - let key = attribute lexbuf in - let data = attribute_data lexbuf in - ignore_spaces lexbuf; - let others, closed = attributes lexbuf in - (key, data) :: others, closed - } - -and attribute = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EAttributeNameExpected } - -and attribute_data = parse - | space* '=' space* '"' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - dq_string lexbuf - } - | space* '=' space* '\'' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - q_string lexbuf - } - | _ | eof - { error lexbuf EAttributeValueExpected } - -and dq_string = parse - | '"' - { Buffer.contents tmp } - | '\\' [ '"' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - dq_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - dq_string lexbuf - } - -and q_string = parse - | '\'' - { Buffer.contents tmp } - | '\\' [ '\'' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - q_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - q_string lexbuf - } +{(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Lexing + +type error = + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity + +exception Error of error + +type pos = int * int * int * int + +type token = + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof + +let last_pos = ref 0 +and current_line = ref 0 +and current_line_start = ref 0 + +let tmp = Buffer.create 200 + +let idents = Hashtbl.create 0 + +let _ = begin + Hashtbl.add idents "gt;" ">"; + Hashtbl.add idents "lt;" "<"; + Hashtbl.add idents "amp;" "&"; + Hashtbl.add idents "apos;" "'"; + Hashtbl.add idents "quot;" "\""; +end + +let init lexbuf = + current_line := 1; + current_line_start := lexeme_start lexbuf; + last_pos := !current_line_start + +let close lexbuf = + Buffer.reset tmp + +let pos lexbuf = + !current_line , !current_line_start , + !last_pos , + lexeme_start lexbuf + +let restore (cl,cls,lp,_) = + current_line := cl; + current_line_start := cls; + last_pos := lp + +let newline lexbuf = + incr current_line; + last_pos := lexeme_end lexbuf; + current_line_start := !last_pos + +let error lexbuf e = + last_pos := lexeme_start lexbuf; + raise (Error e) + +} + +let newline = ['\n'] +let break = ['\r'] +let space = [' ' '\t'] +let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-'] +let entitychar = ['A'-'Z' 'a'-'z'] +let pcchar = [^ '\r' '\n' '<' '>' '&'] + +rule token = parse + | newline | (newline break) | break + { + newline lexbuf; + PCData "\n" + } + | "" + { () } + | eof + { raise (Error EUnterminatedComment) } + | _ + { comment lexbuf } + +and header = parse + | newline | (newline break) | break + { + newline lexbuf; + header lexbuf + } + | "?>" + { () } + | eof + { error lexbuf ECloseExpected } + | _ + { header lexbuf } + +and pcdata = parse + | pcchar+ + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf + } + | "&#" + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf; + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + pcdata lexbuf + } + | "" + { Buffer.contents tmp } + +and entity = parse + | entitychar+ ';' + { + let ident = lexeme lexbuf in + try + Hashtbl.find idents (String.lowercase ident) + with + Not_found -> "&" ^ ident + } + | _ | eof + { raise (Error EUnterminatedEntity) } + +and ident_name = parse + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EIdentExpected } + +and close_tag = parse + | '>' + { () } + | _ | eof + { error lexbuf ECloseExpected } + +and attributes = parse + | '>' + { [], false } + | "/>" + { [], true } + | "" (* do not read a char ! *) + { + let key = attribute lexbuf in + let data = attribute_data lexbuf in + ignore_spaces lexbuf; + let others, closed = attributes lexbuf in + (key, data) :: others, closed + } + +and attribute = parse + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EAttributeNameExpected } + +and attribute_data = parse + | space* '=' space* '"' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + dq_string lexbuf + } + | space* '=' space* '\'' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + q_string lexbuf + } + | _ | eof + { error lexbuf EAttributeValueExpected } + +and dq_string = parse + | '"' + { Buffer.contents tmp } + | '\\' [ '"' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + dq_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + dq_string lexbuf + } + +and q_string = parse + | '\'' + { Buffer.contents tmp } + | '\\' [ '\'' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + q_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + q_string lexbuf + } diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml index 16d9922aed..0463c6e30b 100644 --- a/lib/xml_parser.ml +++ b/lib/xml_parser.ml @@ -1,207 +1,227 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * Copyright (C) 2003 Jacques Garrigue - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -open Printf - -type xml = - | Element of (string * (string * string) list * xml list) - | PCData of string - -type error_pos = { - eline : int; - eline_start : int; - emin : int; - emax : int; -} - -type error_msg = - | UnterminatedComment - | UnterminatedString - | UnterminatedEntity - | IdentExpected - | CloseExpected - | NodeExpected - | AttributeNameExpected - | AttributeValueExpected - | EndOfTagExpected of string - | EOFExpected - -type error = error_msg * error_pos - -exception Error of error - -exception File_not_found of string - -type t = { - mutable check_eof : bool; - mutable concat_pcdata : bool; -} - -type source = - | SFile of string - | SChannel of in_channel - | SString of string - | SLexbuf of Lexing.lexbuf - -type state = { - source : Lexing.lexbuf; - stack : Xml_lexer.token Stack.t; - xparser : t; -} - -exception Internal_error of error_msg -exception NoMoreData - -let xml_error = ref (fun _ -> assert false) -let file_not_found = ref (fun _ -> assert false) - -let _raises e f = - xml_error := e; - file_not_found := f - -let make () = - { - check_eof = true; - concat_pcdata = true; - } - -let check_eof p v = p.check_eof <- v -let concat_pcdata p v = p.concat_pcdata <- v - -let pop s = - try - Stack.pop s.stack - with - Stack.Empty -> - Xml_lexer.token s.source - -let push t s = - Stack.push t s.stack - -let rec read_node s = - match pop s with - | Xml_lexer.PCData s -> PCData s - | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) - | Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s) - | t -> - push t s; - raise NoMoreData -and - read_elems ?tag s = - let elems = ref [] in - (try - while true do - match s.xparser.concat_pcdata , read_node s , !elems with - | true , PCData c , (PCData c2) :: q -> - elems := PCData (sprintf "%s\n%s" c2 c) :: q - | _ , x , l -> - elems := x :: l - done - with - NoMoreData -> ()); - match pop s with - | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems - | Xml_lexer.Eof when tag = None -> List.rev !elems - | t -> - match tag with - | None -> raise (Internal_error EOFExpected) - | Some s -> raise (Internal_error (EndOfTagExpected s)) - -let read_xml s = read_node s - -let convert = function - | Xml_lexer.EUnterminatedComment -> UnterminatedComment - | Xml_lexer.EUnterminatedString -> UnterminatedString - | Xml_lexer.EIdentExpected -> IdentExpected - | Xml_lexer.ECloseExpected -> CloseExpected - | Xml_lexer.ENodeExpected -> NodeExpected - | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected - | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected - | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity - -let do_parse xparser source = - try - Xml_lexer.init source; - let s = { source = source; xparser = xparser; stack = Stack.create(); } in - let x = read_xml s in - if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected); - Xml_lexer.close source; - x - with - | NoMoreData -> - Xml_lexer.close source; - raise (!xml_error NodeExpected source) - | Internal_error e -> - Xml_lexer.close source; - raise (!xml_error e source) - | Xml_lexer.Error e -> - Xml_lexer.close source; - raise (!xml_error (convert e) source) - -let parse p = function - | SChannel ch -> do_parse p (Lexing.from_channel ch) - | SString str -> do_parse p (Lexing.from_string str) - | SLexbuf lex -> do_parse p lex - | SFile fname -> - let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in - try - let x = do_parse p (Lexing.from_channel ch) in - close_in ch; - x - with - e -> - close_in ch; - raise e - - -let error_msg = function - | UnterminatedComment -> "Unterminated comment" - | UnterminatedString -> "Unterminated string" - | UnterminatedEntity -> "Unterminated entity" - | IdentExpected -> "Ident expected" - | CloseExpected -> "Element close expected" - | NodeExpected -> "Xml node expected" - | AttributeNameExpected -> "Attribute name expected" - | AttributeValueExpected -> "Attribute value expected" - | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag - | EOFExpected -> "End of file expected" - -let error (msg,pos) = - if pos.emin = pos.emax then - sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) - else - sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) - -let line e = e.eline - -let range e = - e.emin - e.eline_start , e.emax - e.eline_start - -let abs_range e = - e.emin , e.emax - -let pos source = - let line, lstart, min, max = Xml_lexer.pos source in - { - eline = line; - eline_start = lstart; - emin = min; - emax = max; - } \ No newline at end of file +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * Copyright (C) 2003 Jacques Garrigue + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf + +type xml = + | Element of (string * (string * string) list * xml list) + | PCData of string + +type error_pos = { + eline : int; + eline_start : int; + emin : int; + emax : int; +} + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +type t = { + mutable check_eof : bool; + mutable concat_pcdata : bool; +} + +type source = + | SFile of string + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf + +type state = { + source : Lexing.lexbuf; + stack : Xml_lexer.token Stack.t; + xparser : t; +} + +exception Internal_error of error_msg +exception NoMoreData + +let xml_error = ref (fun _ -> assert false) +let file_not_found = ref (fun _ -> assert false) + +let is_blank s = + let len = String.length s in + let break = ref true in + let i = ref 0 in + while !break && !i < len do + let c = s.[!i] in + (* no '\r' because we replaced them in the lexer *) + if c = ' ' || c = '\n' || c = '\t' then incr i + else break := false + done; + !i = len + +let _raises e f = + xml_error := e; + file_not_found := f + +let make () = + { + check_eof = true; + concat_pcdata = true; + } + +let check_eof p v = p.check_eof <- v +let concat_pcdata p v = p.concat_pcdata <- v + +let pop s = + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source + +let push t s = + Stack.push t s.stack + +let canonicalize l = + let has_elt = List.exists (function Element _ -> true | _ -> false) l in + if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l + else l + +let rec read_node s = + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> + let elements = read_elems ~tag s in + Element (tag, attr, canonicalize elements) + | t -> + push t s; + raise NoMoreData +and + read_elems ?tag s = + let elems = ref [] in + (try + while true do + let node = read_node s in + match node, !elems with + | PCData c , (PCData c2) :: q -> + elems := PCData (c2 ^ c) :: q + | _, l -> + elems := node :: l + done + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems + | Xml_lexer.Eof when tag = None -> List.rev !elems + | t -> + match tag with + | None -> raise (Internal_error EOFExpected) + | Some s -> raise (Internal_error (EndOfTagExpected s)) + +let read_xml s = read_node s + +let convert = function + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + +let do_parse xparser source = + try + Xml_lexer.init source; + let s = { source = source; xparser = xparser; stack = Stack.create(); } in + let x = read_xml s in + if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close source; + x + with + | NoMoreData -> + Xml_lexer.close source; + raise (!xml_error NodeExpected source) + | Internal_error e -> + Xml_lexer.close source; + raise (!xml_error e source) + | Xml_lexer.Error e -> + Xml_lexer.close source; + raise (!xml_error (convert e) source) + +let parse p = function + | SChannel ch -> do_parse p (Lexing.from_channel ch) + | SString str -> do_parse p (Lexing.from_string str) + | SLexbuf lex -> do_parse p lex + | SFile fname -> + let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in + try + let x = do_parse p (Lexing.from_channel ch) in + close_in ch; + x + with + e -> + close_in ch; + raise e + + +let error_msg = function + | UnterminatedComment -> "Unterminated comment" + | UnterminatedString -> "Unterminated string" + | UnterminatedEntity -> "Unterminated entity" + | IdentExpected -> "Ident expected" + | CloseExpected -> "Element close expected" + | NodeExpected -> "Xml node expected" + | AttributeNameExpected -> "Attribute name expected" + | AttributeValueExpected -> "Attribute value expected" + | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag + | EOFExpected -> "End of file expected" + +let error (msg,pos) = + if pos.emin = pos.emax then + sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) + else + sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) + +let line e = e.eline + +let range e = + e.emin - e.eline_start , e.emax - e.eline_start + +let abs_range e = + e.emin , e.emax + +let pos source = + let line, lstart, min, max = Xml_lexer.pos source in + { + eline = line; + eline_start = lstart; + emin = min; + emax = max; + } diff --git a/lib/xml_parser.mli b/lib/xml_parser.mli index 34dec792d9..7f501986be 100644 --- a/lib/xml_parser.mli +++ b/lib/xml_parser.mli @@ -1,115 +1,109 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -(** Xml Light Parser - - While basic parsing functions can be used in the {!Xml} module, this module - is providing a way to create, configure and run an Xml parser. - -*) - - -(** An Xml node is either - [Element (tag-name, attributes, children)] or [PCData text] *) -type xml = - | Element of (string * (string * string) list * xml list) - | PCData of string - -(** Abstract type for an Xml parser. *) -type t - -(** {6:exc Xml Exceptions} *) - -(** Several exceptions can be raised when parsing an Xml document : {ul - {li {!Xml.Error} is raised when an xml parsing error occurs. the - {!Xml.error_msg} tells you which error occured during parsing - and the {!Xml.error_pos} can be used to retreive the document - location where the error occured at.} - {li {!Xml.File_not_found} is raised when and error occured while - opening a file with the {!Xml.parse_file} function. - } - *) - -type error_pos - -type error_msg = - | UnterminatedComment - | UnterminatedString - | UnterminatedEntity - | IdentExpected - | CloseExpected - | NodeExpected - | AttributeNameExpected - | AttributeValueExpected - | EndOfTagExpected of string - | EOFExpected - -type error = error_msg * error_pos - -exception Error of error - -exception File_not_found of string - -(** Get a full error message from an Xml error. *) -val error : error -> string - -(** Get the Xml error message as a string. *) -val error_msg : error_msg -> string - -(** Get the line the error occured at. *) -val line : error_pos -> int - -(** Get the relative character range (in current line) the error occured at.*) -val range : error_pos -> int * int - -(** Get the absolute character range the error occured at. *) -val abs_range : error_pos -> int * int - -val pos : Lexing.lexbuf -> error_pos - -(** Several kind of resources can contain Xml documents. *) -type source = - | SFile of string - | SChannel of in_channel - | SString of string - | SLexbuf of Lexing.lexbuf - -(** This function returns a new parser with default options. *) -val make : unit -> t - -(** When a Xml document is parsed, the parser will check that the end of the - document is reached, so for example parsing [""] will fail instead - of returning only the A element. You can turn off this check by setting - [check_eof] to [false] {i (by default, check_eof is true)}. *) -val check_eof : t -> bool -> unit - -(** Once the parser is configurated, you can run the parser on a any kind - of xml document source to parse its contents into an Xml data structure. *) -val parse : t -> source -> xml - -(** When several PCData elements are separed by a \n (or \r\n), you can - either split the PCData in two distincts PCData or merge them with \n - as seperator into one PCData. The default behavior is to concat the - PCData, but this can be changed for a given parser with this flag. *) -val concat_pcdata : t -> bool -> unit - -(**/**) - -(* internal usage only... *) -val _raises : (error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> unit +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Xml Light Parser + + While basic parsing functions can be used in the {!Xml} module, this module + is providing a way to create, configure and run an Xml parser. + +*) + + +(** An Xml node is either + [Element (tag-name, attributes, children)] or [PCData text] *) +type xml = + | Element of (string * (string * string) list * xml list) + | PCData of string + +(** Abstract type for an Xml parser. *) +type t + +(** {6:exc Xml Exceptions} *) + +(** Several exceptions can be raised when parsing an Xml document : {ul + {li {!Xml.Error} is raised when an xml parsing error occurs. the + {!Xml.error_msg} tells you which error occured during parsing + and the {!Xml.error_pos} can be used to retreive the document + location where the error occured at.} + {li {!Xml.File_not_found} is raised when and error occured while + opening a file with the {!Xml.parse_file} function. + } + *) + +type error_pos + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +(** Get a full error message from an Xml error. *) +val error : error -> string + +(** Get the Xml error message as a string. *) +val error_msg : error_msg -> string + +(** Get the line the error occured at. *) +val line : error_pos -> int + +(** Get the relative character range (in current line) the error occured at.*) +val range : error_pos -> int * int + +(** Get the absolute character range the error occured at. *) +val abs_range : error_pos -> int * int + +val pos : Lexing.lexbuf -> error_pos + +(** Several kind of resources can contain Xml documents. *) +type source = + | SFile of string + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf + +(** This function returns a new parser with default options. *) +val make : unit -> t + +(** When a Xml document is parsed, the parser will check that the end of the + document is reached, so for example parsing [""] will fail instead + of returning only the A element. You can turn off this check by setting + [check_eof] to [false] {i (by default, check_eof is true)}. *) +val check_eof : t -> bool -> unit + +(** Once the parser is configurated, you can run the parser on a any kind + of xml document source to parse its contents into an Xml data structure. *) +val parse : t -> source -> xml + +(**/**) + +(* internal usage only... *) +val _raises : (error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> unit diff --git a/lib/xml_utils.ml b/lib/xml_utils.ml index 0a73cec09e..2fc1b9065b 100644 --- a/lib/xml_utils.ml +++ b/lib/xml_utils.ml @@ -1,229 +1,229 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -open Printf -open Xml_parser - -exception Not_element of xml -exception Not_pcdata of xml -exception No_attribute of string - -let default_parser = Xml_parser.make() - -let parse (p:Xml_parser.t) (source:Xml_parser.source) = - (* local cast Xml.xml -> xml *) - (Obj.magic Xml_parser.parse p source : xml) - -let parse_in ch = parse default_parser (Xml_parser.SChannel ch) -let parse_string str = parse default_parser (Xml_parser.SString str) - -let parse_file f = parse default_parser (Xml_parser.SFile f) - -let tag = function - | Element (tag,_,_) -> tag - | x -> raise (Not_element x) - -let pcdata = function - | PCData text -> text - | x -> raise (Not_pcdata x) - -let attribs = function - | Element (_,attr,_) -> attr - | x -> raise (Not_element x) - -let attrib x att = - match x with - | Element (_,attr,_) -> - (try - let att = String.lowercase att in - snd (List.find (fun (n,_) -> String.lowercase n = att) attr) - with - Not_found -> - raise (No_attribute att)) - | x -> - raise (Not_element x) - -let children = function - | Element (_,_,clist) -> clist - | x -> raise (Not_element x) - -(*let enum = function - | Element (_,_,clist) -> List.to_enum clist - | x -> raise (Not_element x) -*) - -let iter f = function - | Element (_,_,clist) -> List.iter f clist - | x -> raise (Not_element x) - -let map f = function - | Element (_,_,clist) -> List.map f clist - | x -> raise (Not_element x) - -let fold f v = function - | Element (_,_,clist) -> List.fold_left f v clist - | x -> raise (Not_element x) - -let tmp = Buffer.create 200 - -let buffer_pcdata text = - let l = String.length text in - for p = 0 to l-1 do - match text.[p] with - | '>' -> Buffer.add_string tmp ">" - | '<' -> Buffer.add_string tmp "<" - | '&' -> - if p < l-1 && text.[p+1] = '#' then - Buffer.add_char tmp '&' - else - Buffer.add_string tmp "&" - | '\'' -> Buffer.add_string tmp "'" - | '"' -> Buffer.add_string tmp """ - | c -> Buffer.add_char tmp c - done - -let print_pcdata chan text = - let l = String.length text in - for p = 0 to l-1 do - match text.[p] with - | '>' -> Printf.fprintf chan ">" - | '<' -> Printf.fprintf chan "<" - | '&' -> - if p < l-1 && text.[p+1] = '#' then - Printf.fprintf chan "&" - else - Printf.fprintf chan "&" - | '\'' -> Printf.fprintf chan "'" - | '"' -> Printf.fprintf chan """ - | c -> Printf.fprintf chan "%c" c - done - -let buffer_attr (n,v) = - Buffer.add_char tmp ' '; - Buffer.add_string tmp n; - Buffer.add_string tmp "=\""; - let l = String.length v in - for p = 0 to l-1 do - match v.[p] with - | '\\' -> Buffer.add_string tmp "\\\\" - | '"' -> Buffer.add_string tmp "\\\"" - | c -> Buffer.add_char tmp c - done; - Buffer.add_char tmp '"' - -let rec print_attr chan (n, v) = - Printf.fprintf chan " %s=\"" n; - let l = String.length v in - for p = 0 to l-1 do - match v.[p] with - | '\\' -> Printf.fprintf chan "\\\\" - | '"' -> Printf.fprintf chan "\\\"" - | c -> Printf.fprintf chan "%c" c - done; - Printf.fprintf chan "\"" - -let print_attrs chan l = List.iter (print_attr chan) l - -let rec print_xml chan = function -| Element (tag, alist, []) -> - Printf.fprintf chan "<%s%a/>" tag print_attrs alist; -| Element (tag, alist, l) -> - Printf.fprintf chan "<%s%a>%a" tag print_attrs alist - (fun chan -> List.iter (print_xml chan)) l tag -| PCData text -> - print_pcdata chan text - -let to_string x = - let pcdata = ref false in - let rec loop = function - | Element (tag,alist,[]) -> - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter buffer_attr alist; - Buffer.add_string tmp "/>"; - pcdata := false; - | Element (tag,alist,l) -> - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter buffer_attr alist; - Buffer.add_char tmp '>'; - pcdata := false; - List.iter loop l; - Buffer.add_string tmp "'; - pcdata := false; - | PCData text -> - if !pcdata then Buffer.add_char tmp ' '; - buffer_pcdata text; - pcdata := true; - in - Buffer.reset tmp; - loop x; - let s = Buffer.contents tmp in - Buffer.reset tmp; - s - -let to_string_fmt x = - let rec loop ?(newl=false) tab = function - | Element (tag,alist,[]) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter buffer_attr alist; - Buffer.add_string tmp "/>"; - if newl then Buffer.add_char tmp '\n'; - | Element (tag,alist,[PCData text]) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter buffer_attr alist; - Buffer.add_string tmp ">"; - buffer_pcdata text; - Buffer.add_string tmp "'; - if newl then Buffer.add_char tmp '\n'; - | Element (tag,alist,l) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter buffer_attr alist; - Buffer.add_string tmp ">\n"; - List.iter (loop ~newl:true (tab^" ")) l; - Buffer.add_string tmp tab; - Buffer.add_string tmp "'; - if newl then Buffer.add_char tmp '\n'; - | PCData text -> - buffer_pcdata text; - if newl then Buffer.add_char tmp '\n'; - in - Buffer.reset tmp; - loop "" x; - let s = Buffer.contents tmp in - Buffer.reset tmp; - s - -;; -Xml_parser._raises (fun x p -> - (* local cast : Xml.error_msg -> error_msg *) - Error (x, pos p)) - (fun f -> File_not_found f);; +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf +open Xml_parser + +exception Not_element of xml +exception Not_pcdata of xml +exception No_attribute of string + +let default_parser = Xml_parser.make() + +let parse (p:Xml_parser.t) (source:Xml_parser.source) = + (* local cast Xml.xml -> xml *) + (Obj.magic Xml_parser.parse p source : xml) + +let parse_in ch = parse default_parser (Xml_parser.SChannel ch) +let parse_string str = parse default_parser (Xml_parser.SString str) + +let parse_file f = parse default_parser (Xml_parser.SFile f) + +let tag = function + | Element (tag,_,_) -> tag + | x -> raise (Not_element x) + +let pcdata = function + | PCData text -> text + | x -> raise (Not_pcdata x) + +let attribs = function + | Element (_,attr,_) -> attr + | x -> raise (Not_element x) + +let attrib x att = + match x with + | Element (_,attr,_) -> + (try + let att = String.lowercase att in + snd (List.find (fun (n,_) -> String.lowercase n = att) attr) + with + Not_found -> + raise (No_attribute att)) + | x -> + raise (Not_element x) + +let children = function + | Element (_,_,clist) -> clist + | x -> raise (Not_element x) + +(*let enum = function + | Element (_,_,clist) -> List.to_enum clist + | x -> raise (Not_element x) +*) + +let iter f = function + | Element (_,_,clist) -> List.iter f clist + | x -> raise (Not_element x) + +let map f = function + | Element (_,_,clist) -> List.map f clist + | x -> raise (Not_element x) + +let fold f v = function + | Element (_,_,clist) -> List.fold_left f v clist + | x -> raise (Not_element x) + +let tmp = Buffer.create 200 + +let buffer_pcdata text = + let l = String.length text in + for p = 0 to l-1 do + match text.[p] with + | '>' -> Buffer.add_string tmp ">" + | '<' -> Buffer.add_string tmp "<" + | '&' -> + if p < l-1 && text.[p+1] = '#' then + Buffer.add_char tmp '&' + else + Buffer.add_string tmp "&" + | '\'' -> Buffer.add_string tmp "'" + | '"' -> Buffer.add_string tmp """ + | c -> Buffer.add_char tmp c + done + +let print_pcdata chan text = + let l = String.length text in + for p = 0 to l-1 do + match text.[p] with + | '>' -> Printf.fprintf chan ">" + | '<' -> Printf.fprintf chan "<" + | '&' -> + if p < l-1 && text.[p+1] = '#' then + Printf.fprintf chan "&" + else + Printf.fprintf chan "&" + | '\'' -> Printf.fprintf chan "'" + | '"' -> Printf.fprintf chan """ + | c -> Printf.fprintf chan "%c" c + done + +let buffer_attr (n,v) = + Buffer.add_char tmp ' '; + Buffer.add_string tmp n; + Buffer.add_string tmp "=\""; + let l = String.length v in + for p = 0 to l-1 do + match v.[p] with + | '\\' -> Buffer.add_string tmp "\\\\" + | '"' -> Buffer.add_string tmp "\\\"" + | c -> Buffer.add_char tmp c + done; + Buffer.add_char tmp '"' + +let rec print_attr chan (n, v) = + Printf.fprintf chan " %s=\"" n; + let l = String.length v in + for p = 0 to l-1 do + match v.[p] with + | '\\' -> Printf.fprintf chan "\\\\" + | '"' -> Printf.fprintf chan "\\\"" + | c -> Printf.fprintf chan "%c" c + done; + Printf.fprintf chan "\"" + +let print_attrs chan l = List.iter (print_attr chan) l + +let rec print_xml chan = function +| Element (tag, alist, []) -> + Printf.fprintf chan "<%s%a/>" tag print_attrs alist; +| Element (tag, alist, l) -> + Printf.fprintf chan "<%s%a>%a" tag print_attrs alist + (fun chan -> List.iter (print_xml chan)) l tag +| PCData text -> + print_pcdata chan text + +let to_string x = + let pcdata = ref false in + let rec loop = function + | Element (tag,alist,[]) -> + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter buffer_attr alist; + Buffer.add_string tmp "/>"; + pcdata := false; + | Element (tag,alist,l) -> + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter buffer_attr alist; + Buffer.add_char tmp '>'; + pcdata := false; + List.iter loop l; + Buffer.add_string tmp "'; + pcdata := false; + | PCData text -> + if !pcdata then Buffer.add_char tmp ' '; + buffer_pcdata text; + pcdata := true; + in + Buffer.reset tmp; + loop x; + let s = Buffer.contents tmp in + Buffer.reset tmp; + s + +let to_string_fmt x = + let rec loop ?(newl=false) tab = function + | Element (tag,alist,[]) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter buffer_attr alist; + Buffer.add_string tmp "/>"; + if newl then Buffer.add_char tmp '\n'; + | Element (tag,alist,[PCData text]) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter buffer_attr alist; + Buffer.add_string tmp ">"; + buffer_pcdata text; + Buffer.add_string tmp "'; + if newl then Buffer.add_char tmp '\n'; + | Element (tag,alist,l) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter buffer_attr alist; + Buffer.add_string tmp ">\n"; + List.iter (loop ~newl:true (tab^" ")) l; + Buffer.add_string tmp tab; + Buffer.add_string tmp "'; + if newl then Buffer.add_char tmp '\n'; + | PCData text -> + buffer_pcdata text; + if newl then Buffer.add_char tmp '\n'; + in + Buffer.reset tmp; + loop "" x; + let s = Buffer.contents tmp in + Buffer.reset tmp; + s + +;; +Xml_parser._raises (fun x p -> + (* local cast : Xml.error_msg -> error_msg *) + Error (x, pos p)) + (fun f -> File_not_found f);; diff --git a/lib/xml_utils.mli b/lib/xml_utils.mli index 606e3182d2..4a4a1309b7 100644 --- a/lib/xml_utils.mli +++ b/lib/xml_utils.mli @@ -1,93 +1,93 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -(** Xml Light - - Xml Light is a minimal Xml parser & printer for OCaml. - It provide few functions to parse a basic Xml document into - an OCaml data structure and to print back the data structures - to an Xml document. - - Xml Light has also support for {b DTD} (Document Type Definition). - - {i (c)Copyright 2002-2003 Nicolas Cannasse} -*) - -open Xml_parser - -(** {6 Xml Functions} *) - -exception Not_element of xml -exception Not_pcdata of xml -exception No_attribute of string - -(** [tag xdata] returns the tag value of the xml node. - Raise {!Xml.Not_element} if the xml is not an element *) -val tag : xml -> string - -(** [pcdata xdata] returns the PCData value of the xml node. - Raise {!Xml.Not_pcdata} if the xml is not a PCData *) -val pcdata : xml -> string - -(** [attribs xdata] returns the attribute list of the xml node. - First string if the attribute name, second string is attribute value. - Raise {!Xml.Not_element} if the xml is not an element *) -val attribs : xml -> (string * string) list - -(** [attrib xdata "href"] returns the value of the ["href"] - attribute of the xml node (attribute matching is case-insensitive). - Raise {!Xml.No_attribute} if the attribute does not exists in the node's - attribute list - Raise {!Xml.Not_element} if the xml is not an element *) -val attrib : xml -> string -> string - -(** [children xdata] returns the children list of the xml node - Raise {!Xml.Not_element} if the xml is not an element *) -val children : xml -> xml list - -(*** [enum xdata] returns the children enumeration of the xml node - Raise {!Xml.Not_element} if the xml is not an element *) -(* val enum : xml -> xml Enum.t *) - -(** [iter f xdata] calls f on all children of the xml node. - Raise {!Xml.Not_element} if the xml is not an element *) -val iter : (xml -> unit) -> xml -> unit - -(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)] - Raise {!Xml.Not_element} if the xml is not an element *) -val map : (xml -> 'a) -> xml -> 'a list - -(** [fold f init xdata] is equivalent to - [List.fold_left f init (Xml.children xdata)] - Raise {!Xml.Not_element} if the xml is not an element *) -val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a - -(** {6 Xml Printing} *) - -(** Print the xml data structure to a channel into a compact xml string (without - any user-readable formating ). *) -val print_xml : out_channel -> xml -> unit - -(** Print the xml data structure into a compact xml string (without - any user-readable formating ). *) -val to_string : xml -> string - -(** Print the xml data structure into an user-readable string with - tabs and lines break between different nodes. *) -val to_string_fmt : xml -> string +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Xml Light + + Xml Light is a minimal Xml parser & printer for OCaml. + It provide few functions to parse a basic Xml document into + an OCaml data structure and to print back the data structures + to an Xml document. + + Xml Light has also support for {b DTD} (Document Type Definition). + + {i (c)Copyright 2002-2003 Nicolas Cannasse} +*) + +open Xml_parser + +(** {6 Xml Functions} *) + +exception Not_element of xml +exception Not_pcdata of xml +exception No_attribute of string + +(** [tag xdata] returns the tag value of the xml node. + Raise {!Xml.Not_element} if the xml is not an element *) +val tag : xml -> string + +(** [pcdata xdata] returns the PCData value of the xml node. + Raise {!Xml.Not_pcdata} if the xml is not a PCData *) +val pcdata : xml -> string + +(** [attribs xdata] returns the attribute list of the xml node. + First string if the attribute name, second string is attribute value. + Raise {!Xml.Not_element} if the xml is not an element *) +val attribs : xml -> (string * string) list + +(** [attrib xdata "href"] returns the value of the ["href"] + attribute of the xml node (attribute matching is case-insensitive). + Raise {!Xml.No_attribute} if the attribute does not exists in the node's + attribute list + Raise {!Xml.Not_element} if the xml is not an element *) +val attrib : xml -> string -> string + +(** [children xdata] returns the children list of the xml node + Raise {!Xml.Not_element} if the xml is not an element *) +val children : xml -> xml list + +(*** [enum xdata] returns the children enumeration of the xml node + Raise {!Xml.Not_element} if the xml is not an element *) +(* val enum : xml -> xml Enum.t *) + +(** [iter f xdata] calls f on all children of the xml node. + Raise {!Xml.Not_element} if the xml is not an element *) +val iter : (xml -> unit) -> xml -> unit + +(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)] + Raise {!Xml.Not_element} if the xml is not an element *) +val map : (xml -> 'a) -> xml -> 'a list + +(** [fold f init xdata] is equivalent to + [List.fold_left f init (Xml.children xdata)] + Raise {!Xml.Not_element} if the xml is not an element *) +val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a + +(** {6 Xml Printing} *) + +(** Print the xml data structure to a channel into a compact xml string (without + any user-readable formating ). *) +val print_xml : out_channel -> xml -> unit + +(** Print the xml data structure into a compact xml string (without + any user-readable formating ). *) +val to_string : xml -> string + +(** Print the xml data structure into an user-readable string with + tabs and lines break between different nodes. *) +val to_string_fmt : xml -> string -- cgit v1.2.3