/* $Id: markup.hg,v 1.5 2005/01/21 12:48:05 murrayc Exp $ */ /* Copyright (C) 2002 The gtkmm Development Team * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ _DEFS(glibmm,glib) #include #include #include #include GLIBMM_USING_STD(map) #ifndef DOXYGEN_SHOULD_SKIP_THIS extern "C" { typedef struct _GMarkupParseContext GMarkupParseContext; } #endif namespace Glib { /** @defgroup Markup Simple XML Subset Parser * * The Glib::Markup parser is intended to parse a simple markup format that's a * subset of XML. This is a small, efficient, easy-to-use parser. It should not * be used if you expect to interoperate with other applications generating * full-scale XML. However, it's very useful for application data files, config * files, etc. where you know your application will be the only one writing the * file. Full-scale XML parsers should be able to parse the subset used by * Glib::Markup parser, so you can easily migrate to full-scale XML at a later * time if the need arises. * * Glib::Markup is not guaranteed to signal an error on all invalid XML; * the parser may accept documents that an XML parser would not. However, * invalid XML documents are not considered valid Glib::Markup documents. * * @par Simplifications to XML include: * * - Only UTF-8 encoding is allowed. * - No user-defined entities. * - Processing instructions, comments and the doctype declaration are "passed * through" but are not interpreted in any way. * - No DTD or validation. * * @par The markup format does support: * * - Elements * - Attributes * - 5 standard entities: \& \< \> \" \' * - Character references * - Sections marked as CDATA * * @{ */ /** %Exception class for markup parsing errors. */ _WRAP_GERROR(MarkupError, GMarkupError, G_MARKUP_ERROR, NO_GTYPE) /*! @var MarkupError::Code MarkupError::BAD_UTF8 * Text being parsed was not valid UTF-8. */ /*! @var MarkupError::Code MarkupError::EMPTY * Document contained nothing, or only whitespace. */ /*! @var MarkupError::Code MarkupError::PARSE * Document was ill-formed. */ /*! @var MarkupError::Code MarkupError::UNKNOWN_ELEMENT * This error should be set by Glib::Markup::Parser virtual methods; * element wasn't known. */ /*! @var MarkupError::Code MarkupError::UNKNOWN_ATTRIBUTE * This error should be set by Glib::Markup::Parser virtual methods; * attribute wasn't known. */ /*! @var MarkupError::Code MarkupError::INVALID_CONTENT * This error should be set by Glib::Markup::Parser virtual methods; * something was wrong with contents of the document, e.g. invalid attribute value. */ /** @} group Markup */ namespace Markup { class ParseContext; /** @ingroup Markup */ typedef Glib::MarkupError Error; /** Escapes text so that the markup parser will parse it verbatim. * Less than, greater than, ampersand, etc. are replaced with the corresponding * entities. This function would typically be used when writing out a file to * be parsed with the markup parser. * @ingroup Markup * @param text Some valid UTF-8 text. * @return Escaped text. */ Glib::ustring escape_text(const Glib::ustring& text); /** There are no flags right now. Pass Glib::Markup::ParseFlags(0) for * the flags argument to all functions (this should be the default argument * anyway). */ _WRAP_ENUM(ParseFlags, GMarkupParseFlags, NO_GTYPE, s#^MARKUP_##) /*! @var Markup::ParseFlags DO_NOT_USE_THIS_UNSUPPORTED_FLAG * Flag you should not use. */ /** Binary predicate used by Markup::Parser::AttributeMap. * @ingroup Markup * Unlike operator<(const ustring& lhs, const ustring& rhs) * which would be used by the default std::less<> predicate, * the AttributeKeyLess predicate is locale-independent. This is both * more correct and much more efficient. */ class AttributeKeyLess { public: typedef Glib::ustring first_argument_type; typedef Glib::ustring second_argument_type; typedef bool result_type; bool operator()(const Glib::ustring& lhs, const Glib::ustring& rhs) const; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS class ParserCallbacks; #endif /** The abstract markup parser base class. * @ingroup Markup * To implement a parser for your markup format, derive from * Glib::Markup::Parser and implement the virtual methods. * * You don't have to override all of the virtual methods. If a particular * method is not implement the data passed to it will be ignored. Except for * the error method, any of these callbacks can throw an error exception; in * particular the MarkupError::UNKNOWN_ELEMENT, * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT errors * are intended to be thrown from these overridden methods. If you throw an * error from a method, Glib::Markup::ParseContext::parse() will report that * error back to its caller. */ class Parser : public sigc::trackable { public: typedef std::map AttributeMap; virtual ~Parser() = 0; protected: /** Constructs a Parser object. * Note that Markup::Parser is an abstract class which can't be instantiated * directly. To implement the parser for your markup format, derive from * Markup::Parser and implement the virtual methods. */ Parser(); /** Called for open tags \. * This virtual method is invoked when the opening tag of an element is seen. * @param context The Markup::ParseContext object the parsed data belongs to. * @param element_name The name of the element. * @param attributes A map of attribute name/value pairs. * @throw Glib::MarkupError An exception you should throw if * something went wrong, for instance if an unknown attribute name was * encountered. In particular the MarkupError::UNKNOWN_ELEMENT, * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT * errors are intended to be thrown from user-implemented methods. */ virtual void on_start_element(ParseContext& context, const Glib::ustring& element_name, const AttributeMap& attributes); /** Called for close tags \. * This virtual method is invoked when the closing tag of an element is seen. * @param context The Markup::ParseContext object the parsed data belongs to. * @param element_name The name of the element. * @throw Glib::MarkupError An exception you should throw if * something went wrong, for instance if an unknown attribute name was * encountered. In particular the MarkupError::UNKNOWN_ELEMENT, * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT * errors are intended to be thrown from user-implemented methods. */ virtual void on_end_element(ParseContext& context, const Glib::ustring& element_name); /** Called for character data. * This virtual method is invoked when some text is seen (text is always * inside an element). * @param context The Markup::ParseContext object the parsed data belongs to. * @param text The parsed text in UTF-8 encoding. * @throw Glib::MarkupError An exception you should throw if * something went wrong, for instance if an unknown attribute name was * encountered. In particular the MarkupError::UNKNOWN_ELEMENT, * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT * errors are intended to be thrown from user-implemented methods. */ virtual void on_text(ParseContext& context, const Glib::ustring& text); /** Called for strings that should be re-saved verbatim in this same * position, but are not otherwise interpretable. * This virtual method is invoked for comments, processing instructions and * doctype declarations; if you're re-writing the parsed document, write the * passthrough text back out in the same position. * @param context The Markup::ParseContext object the parsed data belongs to. * @param passthrough_text The text that should be passed through. * @throw Glib::MarkupError An exception you should throw if * something went wrong, for instance if an unknown attribute name was * encountered. In particular the MarkupError::UNKNOWN_ELEMENT, * MarkupError::UNKNOWN_ATTRIBUTE, and MarkupError::INVALID_CONTENT * errors are intended to be thrown from user-implemented methods. */ virtual void on_passthrough(ParseContext& context, const Glib::ustring& passthrough_text); /** Called on error, including one thrown by an overridden virtual method. * @param context The Markup::ParseContext object the parsed data belongs to. * @param error A MarkupError object with detailed information about the error. */ virtual void on_error(ParseContext& context, const MarkupError& error); private: // noncopyable Parser(const Parser&); Parser& operator=(const Parser&); #ifndef DOXYGEN_SHOULD_SKIP_THIS friend class Glib::Markup::ParserCallbacks; #endif }; /** A parse context is used to parse marked-up documents. * @ingroup Markup * You can feed any number of documents into a context, as long as no errors * occur; once an error occurs, the parse context can't continue to parse text * (you have to destroy it and create a new parse context). */ class ParseContext : public sigc::trackable { public: /** Creates a new parse context. * @param parser A Markup::Parser instance. * @param flags Bitwise combination of Markup::ParseFlags. */ explicit ParseContext(Parser& parser, ParseFlags flags = ParseFlags(0)); virtual ~ParseContext(); /** Feed some data to the ParseContext. * The data need not be valid UTF-8; an error will be signalled if it's * invalid. The data need not be an entire document; you can feed a document * into the parser incrementally, via multiple calls to this function. * Typically, as you receive data from a network connection or file, you feed * each received chunk of data into this function, aborting the process if an * error occurs. Once an error is reported, no further data may be fed to the * ParseContext; all errors are fatal. * @param text Chunk of text to parse. * @throw Glib::MarkupError */ void parse(const Glib::ustring& text); /** Feed some data to the ParseContext. * The data need not be valid UTF-8; an error will be signalled if it's * invalid. The data need not be an entire document; you can feed a document * into the parser incrementally, via multiple calls to this function. * Typically, as you receive data from a network connection or file, you feed * each received chunk of data into this function, aborting the process if an * error occurs. Once an error is reported, no further data may be fed to the * ParseContext; all errors are fatal. * @param text_begin Begin of chunk of text to parse. * @param text_end End of chunk of text to parse. * @throw Glib::MarkupError */ void parse(const char* text_begin, const char* text_end); /** Signals to the ParseContext that all data has been fed into the parse * context with parse(). This method reports an error if the document isn't * complete, for example if elements are still open. * @throw Glib::MarkupError */ void end_parse(); /** Retrieves the name of the currently open element. * @return The name of the currently open element, or "". */ Glib::ustring get_element() const; /** Retrieves the current line number. * Intended for use in error messages; there are no strict semantics for what * constitutes the "current" line number other than "the best number we could * come up with for error messages." */ int get_line_number() const; /** Retrieves the number of the current character on the current line. * Intended for use in error messages; there are no strict semantics for what * constitutes the "current" character number other than "the best number we * could come up with for error messages." */ int get_char_number() const; Parser* get_parser() { return parser_; } const Parser* get_parser() const { return parser_; } #ifndef DOXYGEN_SHOULD_SKIP_THIS GMarkupParseContext* gobj() { return gobject_; } const GMarkupParseContext* gobj() const { return gobject_; } #endif private: Markup::Parser* parser_; GMarkupParseContext* gobject_; // noncopyable ParseContext(const ParseContext&); ParseContext& operator=(const ParseContext&); static void destroy_notify_callback(void* data); }; } // namespace Markup } // namespace Glib