1 /* $Id: markup.hg,v 1.5 2005/01/21 12:48:05 murrayc Exp $ */
3 /* Copyright (C) 2002 The gtkmm Development Team
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library; if not, write to the Free
17 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <glibmm/error.h>
23 #include <sigc++/sigc++.h>
26 #include <glibmmconfig.h>
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 extern "C" { typedef struct _GMarkupParseContext GMarkupParseContext; }
38 /** @defgroup Markup Simple XML Subset Parser
40 * The Glib::Markup parser is intended to parse a simple markup format that's a
41 * subset of XML. This is a small, efficient, easy-to-use parser. It should not
42 * be used if you expect to interoperate with other applications generating
43 * full-scale XML. However, it's very useful for application data files, config
44 * files, etc. where you know your application will be the only one writing the
45 * file. Full-scale XML parsers should be able to parse the subset used by
46 * Glib::Markup parser, so you can easily migrate to full-scale XML at a later
47 * time if the need arises.
49 * Glib::Markup is not guaranteed to signal an error on all invalid XML;
50 * the parser may accept documents that an XML parser would not. However,
51 * invalid XML documents are not considered valid Glib::Markup documents.
53 * @par Simplifications to XML include:
55 * - Only UTF-8 encoding is allowed.
56 * - No user-defined entities.
57 * - Processing instructions, comments and the doctype declaration are "passed
58 * through" but are not interpreted in any way.
59 * - No DTD or validation.
61 * @par The markup format does support:
65 * - 5 standard entities: <tt>\& \< \> \" \'</tt>
66 * - Character references
67 * - Sections marked as <tt>CDATA</tt>
72 /** %Exception class for markup parsing errors.
74 _WRAP_GERROR(MarkupError, GMarkupError, G_MARKUP_ERROR, NO_GTYPE)
76 /*! @var MarkupError::Code MarkupError::BAD_UTF8
77 * Text being parsed was not valid UTF-8.
79 /*! @var MarkupError::Code MarkupError::EMPTY
80 * Document contained nothing, or only whitespace.
82 /*! @var MarkupError::Code MarkupError::PARSE
83 * Document was ill-formed.
85 /*! @var MarkupError::Code MarkupError::UNKNOWN_ELEMENT
86 * This error should be set by Glib::Markup::Parser virtual methods;
87 * element wasn't known.
89 /*! @var MarkupError::Code MarkupError::UNKNOWN_ATTRIBUTE
90 * This error should be set by Glib::Markup::Parser virtual methods;
91 * attribute wasn't known.
93 /*! @var MarkupError::Code MarkupError::INVALID_CONTENT
94 * This error should be set by Glib::Markup::Parser virtual methods;
95 * something was wrong with contents of the document, e.g. invalid attribute value.
98 /** @} group Markup */
106 /** @ingroup Markup */
107 typedef Glib::MarkupError Error;
110 /** Escapes text so that the markup parser will parse it verbatim.
111 * Less than, greater than, ampersand, etc. are replaced with the corresponding
112 * entities. This function would typically be used when writing out a file to
113 * be parsed with the markup parser.
115 * @param text Some valid UTF-8 text.
116 * @return Escaped text.
118 Glib::ustring escape_text(const Glib::ustring& text);
121 /** There are no flags right now. Pass <tt>Glib::Markup::ParseFlags(0)</tt> for
122 * the flags argument to all functions (this should be the default argument
125 _WRAP_ENUM(ParseFlags, GMarkupParseFlags, NO_GTYPE, s#^MARKUP_##)
127 /*! @var Markup::ParseFlags DO_NOT_USE_THIS_UNSUPPORTED_FLAG
128 * Flag you should not use.
132 /** Binary predicate used by Markup::Parser::AttributeMap.
134 * Unlike <tt>operator<(const ustring& lhs, const ustring& rhs)</tt>
135 * which would be used by the default <tt>std::less<></tt> predicate,
136 * the AttributeKeyLess predicate is locale-independent. This is both
137 * more correct and much more efficient.
139 class AttributeKeyLess
142 typedef Glib::ustring first_argument_type;
143 typedef Glib::ustring second_argument_type;
144 typedef bool result_type;
146 bool operator()(const Glib::ustring& lhs, const Glib::ustring& rhs) const;
150 #ifndef DOXYGEN_SHOULD_SKIP_THIS
151 class ParserCallbacks;
154 /** The abstract markup parser base class.
156 * To implement a parser for your markup format, derive from
157 * Glib::Markup::Parser and implement the virtual methods.
159 * You don't have to override all of the virtual methods. If a particular
160 * method is not implement the data passed to it will be ignored. Except for
161 * the error method, any of these callbacks can throw an error exception; in
162 * particular the MarkupError::UNKNOWN_ELEMENT,
163 * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT errors
164 * are intended to be thrown from these overridden methods. If you throw an
165 * error from a method, Glib::Markup::ParseContext::parse() will report that
166 * error back to its caller.
168 class Parser : public sigc::trackable
171 typedef std::map<Glib::ustring, Glib::ustring, Glib::Markup::AttributeKeyLess> AttributeMap;
173 virtual ~Parser() = 0;
176 /** Constructs a Parser object.
177 * Note that Markup::Parser is an abstract class which can't be instantiated
178 * directly. To implement the parser for your markup format, derive from
179 * Markup::Parser and implement the virtual methods.
183 /** Called for open tags <tt>\<foo bar="baz"\></tt>.
184 * This virtual method is invoked when the opening tag of an element is seen.
185 * @param context The Markup::ParseContext object the parsed data belongs to.
186 * @param element_name The name of the element.
187 * @param attributes A map of attribute name/value pairs.
188 * @throw Glib::MarkupError An exception <em>you</em> should throw if
189 * something went wrong, for instance if an unknown attribute name was
190 * encountered. In particular the MarkupError::UNKNOWN_ELEMENT,
191 * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT
192 * errors are intended to be thrown from user-implemented methods.
194 virtual void on_start_element(ParseContext& context,
195 const Glib::ustring& element_name,
196 const AttributeMap& attributes);
198 /** Called for close tags <tt>\</foo\></tt>.
199 * This virtual method is invoked when the closing tag of an element is seen.
200 * @param context The Markup::ParseContext object the parsed data belongs to.
201 * @param element_name The name of the element.
202 * @throw Glib::MarkupError An exception <em>you</em> should throw if
203 * something went wrong, for instance if an unknown attribute name was
204 * encountered. In particular the MarkupError::UNKNOWN_ELEMENT,
205 * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT
206 * errors are intended to be thrown from user-implemented methods.
208 virtual void on_end_element(ParseContext& context, const Glib::ustring& element_name);
210 /** Called for character data.
211 * This virtual method is invoked when some text is seen (text is always
212 * inside an element).
213 * @param context The Markup::ParseContext object the parsed data belongs to.
214 * @param text The parsed text in UTF-8 encoding.
215 * @throw Glib::MarkupError An exception <em>you</em> should throw if
216 * something went wrong, for instance if an unknown attribute name was
217 * encountered. In particular the MarkupError::UNKNOWN_ELEMENT,
218 * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT
219 * errors are intended to be thrown from user-implemented methods.
221 virtual void on_text(ParseContext& context, const Glib::ustring& text);
223 /** Called for strings that should be re-saved verbatim in this same
224 * position, but are not otherwise interpretable.
225 * This virtual method is invoked for comments, processing instructions and
226 * doctype declarations; if you're re-writing the parsed document, write the
227 * passthrough text back out in the same position.
228 * @param context The Markup::ParseContext object the parsed data belongs to.
229 * @param passthrough_text The text that should be passed through.
230 * @throw Glib::MarkupError An exception <em>you</em> should throw if
231 * something went wrong, for instance if an unknown attribute name was
232 * encountered. In particular the MarkupError::UNKNOWN_ELEMENT,
233 * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT
234 * errors are intended to be thrown from user-implemented methods.
236 virtual void on_passthrough(ParseContext& context, const Glib::ustring& passthrough_text);
238 /** Called on error, including one thrown by an overridden virtual method.
239 * @param context The Markup::ParseContext object the parsed data belongs to.
240 * @param error A MarkupError object with detailed information about the error.
242 virtual void on_error(ParseContext& context, const MarkupError& error);
246 Parser(const Parser&);
247 Parser& operator=(const Parser&);
249 #ifndef DOXYGEN_SHOULD_SKIP_THIS
250 friend class Glib::Markup::ParserCallbacks;
255 /** A parse context is used to parse marked-up documents.
257 * You can feed any number of documents into a context, as long as no errors
258 * occur; once an error occurs, the parse context can't continue to parse text
259 * (you have to destroy it and create a new parse context).
261 class ParseContext : public sigc::trackable
264 /** Creates a new parse context.
265 * @param parser A Markup::Parser instance.
266 * @param flags Bitwise combination of Markup::ParseFlags.
268 explicit ParseContext(Parser& parser, ParseFlags flags = ParseFlags(0));
269 virtual ~ParseContext();
271 /** Feed some data to the ParseContext.
272 * The data need not be valid UTF-8; an error will be signalled if it's
273 * invalid. The data need not be an entire document; you can feed a document
274 * into the parser incrementally, via multiple calls to this function.
275 * Typically, as you receive data from a network connection or file, you feed
276 * each received chunk of data into this function, aborting the process if an
277 * error occurs. Once an error is reported, no further data may be fed to the
278 * ParseContext; all errors are fatal.
279 * @param text Chunk of text to parse.
280 * @throw Glib::MarkupError
282 void parse(const Glib::ustring& text);
284 /** Feed some data to the ParseContext.
285 * The data need not be valid UTF-8; an error will be signalled if it's
286 * invalid. The data need not be an entire document; you can feed a document
287 * into the parser incrementally, via multiple calls to this function.
288 * Typically, as you receive data from a network connection or file, you feed
289 * each received chunk of data into this function, aborting the process if an
290 * error occurs. Once an error is reported, no further data may be fed to the
291 * ParseContext; all errors are fatal.
292 * @param text_begin Begin of chunk of text to parse.
293 * @param text_end End of chunk of text to parse.
294 * @throw Glib::MarkupError
296 void parse(const char* text_begin, const char* text_end);
298 /** Signals to the ParseContext that all data has been fed into the parse
299 * context with parse(). This method reports an error if the document isn't
300 * complete, for example if elements are still open.
301 * @throw Glib::MarkupError
305 /** Retrieves the name of the currently open element.
306 * @return The name of the currently open element, or <tt>""</tt>.
308 Glib::ustring get_element() const;
310 /** Retrieves the current line number.
311 * Intended for use in error messages; there are no strict semantics for what
312 * constitutes the "current" line number other than "the best number we could
313 * come up with for error messages."
315 int get_line_number() const;
317 /** Retrieves the number of the current character on the current line.
318 * Intended for use in error messages; there are no strict semantics for what
319 * constitutes the "current" character number other than "the best number we
320 * could come up with for error messages."
322 int get_char_number() const;
324 Parser* get_parser() { return parser_; }
325 const Parser* get_parser() const { return parser_; }
327 #ifndef DOXYGEN_SHOULD_SKIP_THIS
328 GMarkupParseContext* gobj() { return gobject_; }
329 const GMarkupParseContext* gobj() const { return gobject_; }
333 Markup::Parser* parser_;
334 GMarkupParseContext* gobject_;
337 ParseContext(const ParseContext&);
338 ParseContext& operator=(const ParseContext&);
340 static void destroy_notify_callback(void* data);
343 } // namespace Markup