/* Copyright (C) 2007 The glibmm Development Team * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ _DEFS(glibmm,glib) #include #include #include #include #include #ifndef DOXYGEN_SHOULD_SKIP_THIS typedef struct _GRegex GRegex; #endif namespace Glib { _WRAP_ENUM(RegexCompileFlags, GRegexCompileFlags, NO_GTYPE) _WRAP_ENUM(RegexMatchFlags, GRegexMatchFlags, NO_GTYPE) /** Exception class for Regex */ _WRAP_GERROR(RegexError, GRegexError, G_REGEX_ERROR, NO_GTYPE) /** Perl-compatible regular expressions - matches strings against regular expressions. * * The Glib::Regex functions implement regular expression pattern matching using * syntax and semantics similar to Perl regular expression. * * Some functions accept a start_position argument, setting it differs from just * passing over a shortened string and setting REGEX_MATCH_NOTBOL in the case * of a pattern that begins with any kind of lookbehind assertion. For example, * consider the pattern "\Biss\B" which finds occurrences of "iss" in the middle * of words. ("\B" matches only if the current position in the subject is not a * word boundary.) When applied to the string "Mississipi" from the fourth byte, * namely "issipi", it does not match, because "\B" is always false at the * start of the subject, which is deemed to be a word boundary. However, if * the entire string is passed , but with start_position set to 4, it finds the * second occurrence of "iss" because it is able to look behind the starting point * to discover that it is preceded by a letter. * * Note that, unless you set the REGEX_RAW flag, all the strings passed to these * functions must be encoded in UTF-8. The lengths and the positions inside the * strings are in bytes and not in characters, so, for instance, * "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a single * character. If you set REGEX_RAW the strings can be non-valid UTF-8 strings * and a byte is treated as a character, so "\xc3\xa0" is two bytes and * two characters long. * * When matching a pattern, "\n" matches only against a "\n" character in the * string, and "\r" matches only a "\r" character. To match any newline sequence * use "\R". This particular group matches either the two-character sequence * CR + LF ("\r\n"), or one of the single characters LF (linefeed, U+000A, "\n"), * VT (vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), CR (carriage * return, U+000D, "\r"), NEL (next line, U+0085), LS (line separator, U+2028), * or PS (paragraph separator, U+2029). * * The behaviour of the dot, circumflex, and dollar metacharacters are affected * by newline characters, the default is to recognize any newline character (the * same characters recognized by "\R"). This can be changed with REGEX_NEWLINE_CR, * REGEX_NEWLINE_LF and REGEX_NEWLINE_CRLF compile options, and with * REGEX_MATCH_NEWLINE_ANY, REGEX_MATCH_NEWLINE_CR, REGEX_MATCH_NEWLINE_LF * and REGEX_MATCH_NEWLINE_CRLF match options. These settings are also * relevant when compiling a pattern if REGEX_EXTENDED is set, and an unescaped * "#" outside a character class is encountered. This indicates a comment that * lasts until after the next newline. * * Creating and manipulating the same Glib::Regex class from different threads is * not a problem as Glib::Regex does not modify its internal state between creation and * destruction, on the other hand Glib::MatchInfo is not threadsafe. * * The regular expressions low level functionalities are obtained through the * excellent PCRE library written by Philip Hazel. * * @newin2p14 */ class Regex { _CLASS_OPAQUE_REFCOUNTED(Regex, GRegex, NONE, g_regex_ref, g_regex_unref) _IGNORE(g_regex_ref, g_regex_unref) public: #ifdef GLIBMM_EXCEPTIONS_ENABLED static Glib::RefPtr create(const Glib::ustring& pattern, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)); #else static Glib::RefPtr create(const Glib::ustring& pattern, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0), std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ _WRAP_METHOD(Glib::ustring get_pattern() const, g_regex_get_pattern) _WRAP_METHOD(int get_max_backref() const, g_regex_get_max_backref) _WRAP_METHOD(int get_capture_count() const, g_regex_get_capture_count) _WRAP_METHOD(int get_string_number(const Glib::ustring& name) const, g_regex_get_string_number) static Glib::ustring escape_string(const Glib::ustring& string); _WRAP_METHOD(static bool match_simple(const Glib::ustring& pattern, const Glib::ustring& string, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)), g_regex_match_simple) //TODO: _WRAP_METHOD(bool match(const Glib::ustring& string, RegexMatchFlags match_options = (RegexMatchFlags)0, GMatchInfo **match_info = 0), g_regex_match) bool match(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)); //TODO: Wrap GMatchInfo as an iterator: //_WRAP_METHOD(bool match_full(const gchar* string, gssize string_len, int start_position, RegexMatchFlags match_options = (RegexMatchFlags)0, GMatchInfo** match_info = 0), g_regex_match_full, errthrow) #ifdef GLIBMM_EXCEPTIONS_ENABLED bool match(const Glib::ustring& string, int start_position, RegexMatchFlags match_options); #else bool match(const Glib::ustring& string, int start_position, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ #ifdef GLIBMM_EXCEPTIONS_ENABLED bool match(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options); #else bool match(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ //TODO: _WRAP_METHOD(bool match_all(const Glib::ustring& string, RegexMatchFlags match_options = (RegexMatchFlags)0, GMatchInfo ** match_info = 0), g_regex_match_all) bool match_all(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)); //TODO: _WRAP_METHOD(bool match_all_full(const gchar* string, gssize string_len, int start_position, RegexMatchFlags match_options = (RegexMatchFlags)0, GMatchInfo** match_info = 0), g_regex_match_all_full, errthrow) #ifdef GLIBMM_EXCEPTIONS_ENABLED bool match_all(const Glib::ustring& string, int start_position, RegexMatchFlags match_options); #else bool match_all(const Glib::ustring& string, int start_position, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ #ifdef GLIBMM_EXCEPTIONS_ENABLED bool match_all(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options); #else bool match_all(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ #m4 _CONVERSION(`gchar**',`Glib::StringArrayHandle',`Glib::StringArrayHandle($3, Glib::OWNERSHIP_DEEP)') _WRAP_METHOD(static Glib::StringArrayHandle split_simple(const Glib::ustring& pattern, const Glib::ustring& string, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)), g_regex_split_simple) _WRAP_METHOD(Glib::StringArrayHandle split(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)), g_regex_split) _WRAP_METHOD(Glib::StringArrayHandle split(const gchar* string, gssize string_len, int start_position, RegexMatchFlags match_options = static_cast(0), int max_tokens = 0) const, g_regex_split_full, errthrow) #ifdef GLIBMM_EXCEPTIONS_ENABLED Glib::StringArrayHandle split(const Glib::ustring& string, int start_position, RegexMatchFlags match_options, int max_tokens) const; #else Glib::StringArrayHandle split(const Glib::ustring& string, int start_position, RegexMatchFlags match_options, int max_tokens, std::auto_ptr& error) const; #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ _WRAP_METHOD(Glib::ustring replace(const gchar* string, gssize string_len, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options = static_cast(0)), g_regex_replace, errthrow) #ifdef GLIBMM_EXCEPTIONS_ENABLED Glib::ustring replace(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options); #else Glib::ustring replace(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ _WRAP_METHOD(Glib::ustring replace_literal(const gchar *string, gssize string_len, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options = static_cast(0)), g_regex_replace_literal, errthrow) #ifdef GLIBMM_EXCEPTIONS_ENABLED Glib::ustring replace_literal(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options); #else Glib::ustring replace_literal(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options, std::auto_ptr& error); #endif /* !GLIBMM_EXCEPTIONS_ENABLED */ _WRAP_METHOD(Glib::ustring replace_eval(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options, GRegexEvalCallback eval, gpointer user_data), g_regex_replace_eval, errthrow) _WRAP_METHOD(static bool check_replacement(const Glib::ustring& replacement, gboolean* has_references), g_regex_check_replacement, errthrow) /* Match info */ /* GRegex *g_match_info_get_regex (const GMatchInfo *match_info); const gchar *g_match_info_get_string (const GMatchInfo *match_info); void g_match_info_free (GMatchInfo *match_info); _WRAP_METHOD(bool g_match_info_next (GMatchInfo *match_info, GError **error); _WRAP_METHOD(bool g_match_info_matches (const GMatchInfo *match_info); _WRAP_METHOD(int g_match_info_get_match_count (const GMatchInfo *match_info); _WRAP_METHOD(bool g_match_info_is_partial_match (const GMatchInfo *match_info); Glib::ustring g_match_info_expand_references(const GMatchInfo *match_info, Glib::ustring& string_to_expand, GError **error); Glib::ustring g_match_info_fetch (const GMatchInfo *match_info, int match_num); _WRAP_METHOD(bool g_match_info_fetch_pos (const GMatchInfo *match_info, int match_num, int *start_pos, int *end_pos); Glib::ustring g_match_info_fetch_named (const GMatchInfo *match_info, Glib::ustring& name); _WRAP_METHOD(bool g_match_info_fetch_named_pos (const GMatchInfo *match_info, Glib::ustring& name, int *start_pos, int *end_pos); gchar **g_match_info_fetch_all (const GMatchInfo *match_info); */ }; } // namespace Glib