aboutsummaryrefslogtreecommitdiff
path: root/lib/xml_parser.mli
blob: 34dec792d951d3cbd7a1dc8d2132ee5444bb35b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
(*
 * Xml Light, an small Xml parser/printer with DTD support.
 * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)

(** Xml Light Parser

 While basic parsing functions can be used in the {!Xml} module, this module
 is providing a way to create, configure and run an Xml parser.

*)


(** An Xml node is either
        [Element (tag-name, attributes, children)] or [PCData text] *)
type xml = 
        | Element of (string * (string * string) list * xml list)
        | PCData of string

(** Abstract type for an Xml parser. *)
type t

(** {6:exc Xml Exceptions} *)

(** Several exceptions can be raised when parsing an Xml document : {ul
        {li {!Xml.Error} is raised when an xml parsing error occurs. the
                {!Xml.error_msg} tells you which error occured during parsing
                and the {!Xml.error_pos} can be used to retreive the document
                location where the error occured at.}
        {li {!Xml.File_not_found} is raised when and error occured while
                opening a file with the {!Xml.parse_file} function.
        }
 *)

type error_pos

type error_msg =
        | UnterminatedComment
        | UnterminatedString
        | UnterminatedEntity
        | IdentExpected
        | CloseExpected
        | NodeExpected
        | AttributeNameExpected
        | AttributeValueExpected
        | EndOfTagExpected of string
        | EOFExpected

type error = error_msg * error_pos

exception Error of error

exception File_not_found of string

(** Get a full error message from an Xml error. *)
val error : error -> string

(** Get the Xml error message as a string. *)
val error_msg : error_msg -> string 

(** Get the line the error occured at. *)
val line : error_pos -> int

(** Get the relative character range (in current line) the error occured at.*)
val range : error_pos -> int * int

(** Get the absolute character range the error occured at. *)
val abs_range : error_pos -> int * int

val pos : Lexing.lexbuf -> error_pos

(** Several kind of resources can contain Xml documents. *)
type source = 
	| SFile of string
	| SChannel of in_channel
	| SString of string
	| SLexbuf of Lexing.lexbuf

(** This function returns a new parser with default options. *)
val make : unit -> t

(** When a Xml document is parsed, the parser will check that the end of the
 document is reached, so for example parsing ["<A/><B/>"] will fail instead
 of returning only the A element. You can turn off this check by setting
 [check_eof] to [false] {i (by default, check_eof is true)}. *)
val check_eof : t -> bool -> unit

(** Once the parser is configurated, you can run the parser on a any kind
 of xml document source to parse its contents into an Xml data structure. *)
val parse :  t -> source -> xml

(** When several PCData elements are separed by a \n (or \r\n), you can
 either split the PCData in two distincts PCData or merge them with \n
 as seperator into one PCData. The default behavior is to concat the
 PCData, but this can be changed for a given parser with this flag. *)
val concat_pcdata : t -> bool -> unit

(**/**)

(* internal usage only... *)
val _raises : (error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> unit