1 /* $Id: convert.hg,v 1.5 2006/05/12 08:08:44 murrayc Exp $ */
3 /* Copyright (C) 2002 The gtkmm Development Team
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library; if not, write to the Free
17 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <glib/gtypes.h> /* for gsize */
24 #include <glibmm/error.h>
25 #include <glibmm/ustring.h>
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 extern "C" { typedef struct _GIConv* GIConv; }
35 /** @defgroup CharsetConv Character Set Conversion
36 * Utility functions for converting strings between different character sets.
40 /** Exception class for charset conversion errors.
41 * Glib::convert() and friends throw a ConvertError exception if the charset
42 * conversion failed for some reason. When writing non-trivial applications
43 * you should always catch those errors, and then try to recover, or tell the
44 * user the input was invalid.
46 _WRAP_GERROR(ConvertError, GConvertError, G_CONVERT_ERROR, NO_GTYPE)
49 /** Thin %iconv() wrapper.
50 * glibmm provides Glib::convert() and Glib::locale_to_utf8() which
51 * are likely more convenient than the raw iconv wrappers. However,
52 * creating an IConv object once and using the convert() method could
53 * be useful when converting multiple times between the same charsets.
58 /** Open new conversion descriptor.
59 * @param to_codeset Destination codeset.
60 * @param from_codeset %Source codeset.
61 * @throw Glib::ConvertError
63 IConv(const std::string& to_codeset, const std::string& from_codeset);
65 explicit IConv(GIConv gobject);
67 /** Close conversion descriptor.
71 /** Same as the standard UNIX routine %iconv(), but may be implemented
72 * via libiconv on UNIX flavors that lack a native implementation. glibmm
73 * provides Glib::convert() and Glib::locale_to_utf8() which are likely
74 * more convenient than the raw iconv wrappers.
75 * @param inbuf Bytes to convert.
76 * @param inbytes_left In/out parameter, bytes remaining to convert in @a inbuf.
77 * @param outbuf Converted output bytes.
78 * @param outbytes_left In/out parameter, bytes available to fill in @a outbuf.
79 * @return Count of non-reversible conversions, or <tt>static_cast<size_t>(-1)</tt> on error.
81 size_t iconv(char** inbuf, gsize* inbytes_left, char** outbuf, gsize* outbytes_left);
83 /** Reset conversion descriptor to initial state.
84 * Same as <tt>iconv(0, 0, 0, 0)</tt>, but implemented slightly differently
85 * in order to work on Sun Solaris <= 7. It's also more obvious so you're
86 * encouraged to use it.
90 /** Convert from one encoding to another.
91 * @param str The string to convert.
92 * @return The converted string.
93 * @throw Glib::ConvertError
95 #ifdef GLIBMM_EXCEPTIONS_ENABLED
96 std::string convert(const std::string& str);
98 std::string convert(const std::string& str, std::auto_ptr<Glib::Error>& error);
99 #endif //GLIBMM_EXCEPTIONS_ENABLED
101 GIConv gobj() { return gobject_; }
108 IConv& operator=(const IConv&);
112 /** Get the charset used by the current locale.
113 * @return Whether the current locale uses the UTF-8 charset.
117 /** Get the charset used by the current locale.
118 * @param charset Will be filled with the charset's name.
119 * @return Whether the current locale uses the UTF-8 charset.
121 bool get_charset(std::string& charset);
123 /** Convert from one encoding to another.
124 * @param str The string to convert.
125 * @param to_codeset Name of the target charset.
126 * @param from_codeset Name of the source charset.
127 * @return The converted string.
128 * @throw Glib::ConvertError
130 #ifdef GLIBMM_EXCEPTIONS_ENABLED
131 std::string convert(const std::string& str,
132 const std::string& to_codeset,
133 const std::string& from_codeset);
135 std::string convert(const std::string& str,
136 const std::string& to_codeset,
137 const std::string& from_codeset, std::auto_ptr<Glib::Error>& error);
138 #endif //GLIBMM_EXCEPTIONS_ENABLED
140 /** Converts a string from one character set to another, possibly including
141 * fallback sequences for characters not representable in the output.
142 * Characters not in the target encoding will be represented as Unicode
143 * escapes <tt>\\x{XXXX}</tt> or <tt>\\x{XXXXXX}</tt>.
144 * @param str The string to convert.
145 * @param to_codeset Name of the target charset.
146 * @param from_codeset Name of the source charset.
147 * @return The converted string.
148 * @throw Glib::ConvertError
150 #ifdef GLIBMM_EXCEPTIONS_ENABLED
151 std::string convert_with_fallback(const std::string& str,
152 const std::string& to_codeset,
153 const std::string& from_codeset);
155 std::string convert_with_fallback(const std::string& str,
156 const std::string& to_codeset,
157 const std::string& from_codeset, std::auto_ptr<Glib::Error>& error);
158 #endif //GLIBMM_EXCEPTIONS_ENABLED
160 /** Converts a string from one character set to another, possibly including
161 * fallback sequences for characters not representable in the output.
162 * @note It is not guaranteed that the specification for the fallback sequences
163 * in @a fallback will be honored. Some systems may do a approximate conversion
164 * from @a from_codeset to @a to_codeset in their iconv() functions, in which
165 * case Glib will simply return that approximate conversion.
167 * @param str The string to convert.
168 * @param to_codeset Name of the target charset.
169 * @param from_codeset Name of the source charset.
170 * @param fallback UTF-8 string to be used in place of characters which aren't
171 * available in the target encoding. All characters in the fallback string
172 * @em must be available in the target encoding.
173 * @return The converted string.
174 * @throw Glib::ConvertError
176 #ifdef GLIBMM_EXCEPTIONS_ENABLED
177 std::string convert_with_fallback(const std::string& str,
178 const std::string& to_codeset,
179 const std::string& from_codeset,
180 const Glib::ustring& fallback);
182 std::string convert_with_fallback(const std::string& str,
183 const std::string& to_codeset,
184 const std::string& from_codeset,
185 const Glib::ustring& fallback, std::auto_ptr<Glib::Error>& error);
186 #endif //GLIBMM_EXCEPTIONS_ENABLED
188 /** Convert from the current locale's encoding to UTF-8.
189 * Convenience wrapper around Glib::convert().
190 * @param opsys_string The string to convert. Must be encoded in the charset
191 * used by the operating system's current locale.
192 * @return The input string converted to UTF-8 encoding.
193 * @throw Glib::ConvertError
195 #ifdef GLIBMM_EXCEPTIONS_ENABLED
196 Glib::ustring locale_to_utf8(const std::string& opsys_string);
198 Glib::ustring locale_to_utf8(const std::string& opsys_string, std::auto_ptr<Glib::Error>& error);
199 #endif //GLIBMM_EXCEPTIONS_ENABLED
201 /** Convert from UTF-8 to the current locale's encoding.
202 * Convenience wrapper around Glib::convert().
203 * @param utf8_string The UTF-8 string to convert.
204 * @return The input string converted to the charset used by the operating
205 * system's current locale.
206 * @throw Glib::ConvertError
208 #ifdef GLIBMM_EXCEPTIONS_ENABLED
209 std::string locale_from_utf8(const Glib::ustring& utf8_string);
211 std::string locale_from_utf8(const Glib::ustring& utf8_string, std::auto_ptr<Glib::Error>& error);
212 #endif //GLIBMM_EXCEPTIONS_ENABLED
214 /** Converts a string which is in the encoding used for filenames into
216 * @param opsys_string A string in the encoding for filenames.
217 * @return The converted string.
218 * @throw Glib::ConvertError
220 #ifdef GLIBMM_EXCEPTIONS_ENABLED
221 Glib::ustring filename_to_utf8(const std::string& opsys_string);
223 Glib::ustring filename_to_utf8(const std::string& opsys_string, std::auto_ptr<Glib::Error>& error);
224 #endif //GLIBMM_EXCEPTIONS_ENABLED
226 /** Converts a string from UTF-8 to the encoding used for filenames.
227 * @param utf8_string A UTF-8 encoded string.
228 * @return The converted string.
229 * @throw Glib::ConvertError
231 #ifdef GLIBMM_EXCEPTIONS_ENABLED
232 std::string filename_from_utf8(const Glib::ustring& utf8_string);
234 std::string filename_from_utf8(const Glib::ustring& utf8_string, std::auto_ptr<Glib::Error>& error);
235 #endif //GLIBMM_EXCEPTIONS_ENABLED
237 /** Converts an escaped UTF-8 encoded URI to a local filename
238 * in the encoding used for filenames.
239 * @param uri A string in the encoding for filenames.
240 * @param hostname Location to store hostname for the URI. If there is no
241 * hostname in the URI, <tt>""</tt> will be stored in this location.
242 * @return The resulting filename.
243 * @throw Glib::ConvertError
245 #ifdef GLIBMM_EXCEPTIONS_ENABLED
246 std::string filename_from_uri(const Glib::ustring& uri, Glib::ustring& hostname);
248 std::string filename_from_uri(const Glib::ustring& uri, Glib::ustring& hostname, std::auto_ptr<Glib::Error>& error);
249 #endif //GLIBMM_EXCEPTIONS_ENABLED
251 /** Converts an escaped UTF-8 encoded URI to a local filename in the encoding
252 * used for filenames.
253 * @param uri A string in the encoding for filenames.
254 * @return The resulting filename.
255 * @throw Glib::ConvertError
257 #ifdef GLIBMM_EXCEPTIONS_ENABLED
258 std::string filename_from_uri(const Glib::ustring& uri);
260 std::string filename_from_uri(const Glib::ustring& uri, std::auto_ptr<Glib::Error>& error);
261 #endif //GLIBMM_EXCEPTIONS_ENABLED
263 /** Converts an absolute filename to an escaped UTF-8 encoded URI.
264 * @param filename An absolute filename specified in the encoding used
265 * for filenames by the operating system.
266 * @param hostname A UTF-8 encoded hostname.
267 * @return The resulting URI.
268 * @throw Glib::ConvertError
270 #ifdef GLIBMM_EXCEPTIONS_ENABLED
271 Glib::ustring filename_to_uri(const std::string& filename, const Glib::ustring& hostname);
273 Glib::ustring filename_to_uri(const std::string& filename, const Glib::ustring& hostname, std::auto_ptr<Glib::Error>& error);
274 #endif //GLIBMM_EXCEPTIONS_ENABLED
276 /** Converts an absolute filename to an escaped UTF-8 encoded URI.
277 * @param filename An absolute filename specified in the encoding used
278 * for filenames by the operating system.
279 * @return The resulting URI.
280 * @throw Glib::ConvertError
282 #ifdef GLIBMM_EXCEPTIONS_ENABLED
283 Glib::ustring filename_to_uri(const std::string& filename);
285 Glib::ustring filename_to_uri(const std::string& filename, std::auto_ptr<Glib::Error>& error);
286 #endif //GLIBMM_EXCEPTIONS_ENABLED
288 /** Returns the display basename for the particular filename, guaranteed
289 * to be valid UTF-8. The display name might not be identical to the filename,
290 * for instance there might be problems converting it to UTF-8, and some files
291 * can be translated in the display
293 * You must pass the whole absolute pathname to this function so that
294 * translation of well known locations can be done.
296 * This function is preferred over filename_display_name() if you know the
297 * whole path, as it allows translation.
299 * @param filename An absolute pathname in the GLib file name encoding.
300 * @result A string containing a rendition of the basename of the filename in valid UTF-8
302 Glib::ustring filename_display_basename(const std::string& filename);
304 /** Converts a filename into a valid UTF-8 string. The
305 * conversion is not necessarily reversible, so you
306 * should keep the original around and use the return
307 * value of this function only for display purposes.
308 * Unlike g_filename_to_utf8(), the result is guaranteed
309 * to be non-empty even if the filename actually isn't in the GLib
310 * file name encoding.
312 * If you know the whole pathname of the file you should use
313 * g_filename_display_basename(), since that allows location-based
314 * translation of filenames.
316 * @param filename: a pathname hopefully in the GLib file name encoding
317 * @result A string containing a rendition of the filename in valid UTF-8.
319 Glib::ustring filename_display_name(const std::string& filename);
321 /** @} group CharsetConv */