libs/glibmm2/glib/glibmm/ustring.cc

   1 // -*- c++ -*-
   2 /* $Id: ustring.cc 749 2008-12-10 14:23:33Z jjongsma $ */
   3
   4 /* Copyright (C) 2002 The gtkmm Development Team
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, write to the Free
  18  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19  */
  20
  21 #include <glibmm/ustring.h>
  22 #include <glibmm/convert.h>
  23 #include <glibmm/error.h>
  24 #include <glibmm/utility.h>
  25
  26 #include <algorithm>
  27 #include <iostream>
  28 #include <cstring>
  29
  30 #ifdef HAVE_CONFIG_H
  31 #include <config.h>
  32 #endif
  33
  34 #include <glibmmconfig.h>
  35 #ifdef GLIBMM_EXCEPTIONS_ENABLED
  36 # include <stdexcept>
  37 #endif
  38 GLIBMM_USING_STD(find)
  39
  40 namespace
  41 {
  42
  43 using Glib::ustring;
  44
  45 // Little helper to make the conversion from gunichar to UTF-8 a one-liner.
  46 //
  47 struct UnicharToUtf8
  48 {
  49   char                buf[6];
  50   ustring::size_type  len;
  51
  52   explicit UnicharToUtf8(gunichar uc)
  53     : len (g_unichar_to_utf8(uc, buf)) {}
  54 };
  55
  56
  57 // All utf8_*_offset() functions return npos if offset is out of range.
  58 // The caller should decide if npos is a valid argument and just marks
  59 // the whole string, or if it is not allowed (e.g. for start positions).
  60 // In the latter case std::out_of_range should be thrown, but usually
  61 // std::string will do that for us.
  62
  63 // First overload: stop on '\0' character.
  64 static
  65 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
  66 {
  67   if(offset == ustring::npos)
  68     return ustring::npos;
  69
  70   const char *const utf8_skip = g_utf8_skip;
  71   const char* p = str;
  72
  73   for(; offset != 0; --offset)
  74   {
  75     const unsigned int c = static_cast<unsigned char>(*p);
  76
  77     if(c == 0)
  78       return ustring::npos;
  79
  80     p += utf8_skip[c];
  81   }
  82
  83   return (p - str);
  84 }
  85
  86 // Second overload: stop when reaching maxlen.
  87 static
  88 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
  89                                     ustring::size_type maxlen)
  90 {
  91   if(offset == ustring::npos)
  92     return ustring::npos;
  93
  94   const char *const utf8_skip = g_utf8_skip;
  95   const char *const pend = str + maxlen;
  96   const char* p = str;
  97
  98   for(; offset != 0; --offset)
  99   {
 100     if(p >= pend)
 101       return ustring::npos;
 102
 103     p += utf8_skip[static_cast<unsigned char>(*p)];
 104   }
 105
 106   return (p - str);
 107 }
 108
 109 // Third overload: stop when reaching str.size().
 110 //
 111 inline
 112 ustring::size_type utf8_byte_offset(const std::string& str, ustring::size_type offset)
 113 {
 114   return utf8_byte_offset(str.data(), offset, str.size());
 115 }
 116
 117 // Takes UTF-8 character offset and count in ci and cn.
 118 // Returns the byte offset and count in i and n.
 119 //
 120 struct Utf8SubstrBounds
 121 {
 122   ustring::size_type i;
 123   ustring::size_type n;
 124
 125   Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn)
 126   :
 127     i (utf8_byte_offset(str, ci)),
 128     n (ustring::npos)
 129   {
 130     if(i != ustring::npos)
 131       n = utf8_byte_offset(str.data() + i, cn, str.size() - i);
 132   }
 133 };
 134
 135 // Converts byte offset to UTF-8 character offset.
 136 inline
 137 ustring::size_type utf8_char_offset(const std::string& str, ustring::size_type offset)
 138 {
 139   if(offset == ustring::npos)
 140     return ustring::npos;
 141
 142   const char *const pdata = str.data();
 143   return g_utf8_pointer_to_offset(pdata, pdata + offset);
 144 }
 145
 146
 147 // Helper to implement ustring::find_first_of() and find_first_not_of().
 148 // Returns the UTF-8 character offset, or ustring::npos if not found.
 149 static
 150 ustring::size_type utf8_find_first_of(const std::string& str, ustring::size_type offset,
 151                                       const char* utf8_match, long utf8_match_size,
 152                                       bool find_not_of)
 153 {
 154   const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
 155   if(byte_offset == ustring::npos)
 156     return ustring::npos;
 157
 158   long ucs4_match_size = 0;
 159   const Glib::ScopedPtr<gunichar> ucs4_match
 160       (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
 161
 162   const gunichar *const match_begin = ucs4_match.get();
 163   const gunichar *const match_end   = match_begin + ucs4_match_size;
 164
 165   const char *const str_begin = str.data();
 166   const char *const str_end   = str_begin + str.size();
 167
 168   for(const char* pstr = str_begin + byte_offset;
 169       pstr < str_end;
 170       pstr = g_utf8_next_char(pstr))
 171   {
 172     const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
 173
 174     if((pfound != match_end) != find_not_of)
 175       return offset;
 176
 177     ++offset;
 178   }
 179
 180   return ustring::npos;
 181 }
 182
 183 // Helper to implement ustring::find_last_of() and find_last_not_of().
 184 // Returns the UTF-8 character offset, or ustring::npos if not found.
 185 static
 186 ustring::size_type utf8_find_last_of(const std::string& str, ustring::size_type offset,
 187                                      const char* utf8_match, long utf8_match_size,
 188                                      bool find_not_of)
 189 {
 190   long ucs4_match_size = 0;
 191   const Glib::ScopedPtr<gunichar> ucs4_match
 192       (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
 193
 194   const gunichar *const match_begin = ucs4_match.get();
 195   const gunichar *const match_end   = match_begin + ucs4_match_size;
 196
 197   const char *const str_begin = str.data();
 198   const char* pstr = str_begin;
 199
 200   // Set pstr one byte beyond the actual start position.
 201   const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
 202   pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size();
 203
 204   while(pstr > str_begin)
 205   {
 206     // Move to previous character.
 207     do
 208       --pstr;
 209     while((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80);
 210
 211     const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
 212
 213     if((pfound != match_end) != find_not_of)
 214       return g_utf8_pointer_to_offset(str_begin, pstr);
 215   }
 216
 217   return ustring::npos;
 218 }
 219
 220 } // anonymous namespace
 221
 222
 223 namespace Glib
 224 {
 225
 226 #ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS
 227 // Initialize static member here,
 228 // because the compiler did not allow us do it inline.
 229 const ustring::size_type ustring::npos = std::string::npos;
 230 #endif
 231
 232 /*
 233  * We need our own version of g_utf8_get_char(), because the std::string
 234  * iterator is not necessarily a plain pointer (it's in fact not in GCC's
 235  * libstdc++-v3).  Copying the UTF-8 data into a temporary buffer isn't an
 236  * option since this operation is quite time critical.  The implementation
 237  * is quite different from g_utf8_get_char() -- both more generic and likely
 238  * faster.
 239  *
 240  * By looking at the first byte of a UTF-8 character one can determine the
 241  * number of bytes used.  GLib offers the g_utf8_skip[] array for this purpose,
 242  * but accessing this global variable would, on IA32 at least, introduce
 243  * a function call to fetch the Global Offset Table, plus two levels of
 244  * indirection in order to read the value.  Even worse, fetching the GOT is
 245  * always done right at the start of the function instead of the branch that
 246  * actually uses the variable.
 247  *
 248  * Fortunately, there's a better way to get the byte count.  As this table
 249  * shows, there's a nice regular pattern in the UTF-8 encoding scheme:
 250  *
 251  * 0x00000000 - 0x0000007F: 0xxxxxxx
 252  * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
 253  * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
 254  * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 255  * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 256  * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 257  *
 258  * Except for the single byte case, the number of leading 1-bits equals the
 259  * byte count.  All that is needed is to shift the first byte to the left
 260  * until bit 7 becomes 0.  Naturally, doing so requires a loop -- but since
 261  * we already have one, no additional cost is introduced.  This shifting can
 262  * further be combined with the computation of the bitmask needed to eliminate
 263  * the leading length bits, thus saving yet another register.
 264  *
 265  * Note:  If you change this code, it is advisable to also review what the
 266  * compiler makes of it in the assembler output.  Except for some pointless
 267  * register moves, the generated code is sufficiently close to the optimum
 268  * with GCC 4.1.2 on x86_64.
 269  */
 270 gunichar get_unichar_from_std_iterator(std::string::const_iterator pos)
 271 {
 272   unsigned int result = static_cast<unsigned char>(*pos);
 273
 274   if((result & 0x80) != 0)
 275   {
 276     unsigned int mask = 0x40;
 277
 278     do
 279     {
 280       result <<= 6;
 281       const unsigned int c = static_cast<unsigned char>(*++pos);
 282       mask   <<= 5;
 283       result  += c - 0x80;
 284     }
 285     while((result & mask) != 0);
 286
 287     result &= mask - 1;
 288   }
 289
 290   return result;
 291 }
 292
 293
 294 /**** Glib::ustring ********************************************************/
 295
 296 ustring::ustring()
 297 :
 298   string_ ()
 299 {}
 300
 301 ustring::ustring(const ustring& other)
 302 :
 303   string_ (other.string_)
 304 {}
 305
 306 ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n)
 307 :
 308   string_ ()
 309 {
 310   const Utf8SubstrBounds bounds (src.string_, i, n);
 311   string_.assign(src.string_, bounds.i, bounds.n);
 312 }
 313
 314 ustring::ustring(const char* src, ustring::size_type n)
 315 :
 316   string_ (src, utf8_byte_offset(src, n))
 317 {}
 318
 319 ustring::ustring(const char* src)
 320 :
 321   string_ (src)
 322 {}
 323
 324 ustring::ustring(ustring::size_type n, gunichar uc)
 325 :
 326   string_ ()
 327 {
 328   if(uc < 0x80)
 329   {
 330     // Optimize the probably most common case.
 331     string_.assign(n, static_cast<char>(uc));
 332   }
 333   else
 334   {
 335     const UnicharToUtf8 conv (uc);
 336     string_.reserve(n * conv.len);
 337
 338     for(; n > 0; --n)
 339       string_.append(conv.buf, conv.len);
 340   }
 341 }
 342
 343 ustring::ustring(ustring::size_type n, char c)
 344 :
 345   string_ (n, c)
 346 {}
 347
 348 ustring::ustring(const std::string& src)
 349 :
 350   string_ (src)
 351 {}
 352
 353 ustring::~ustring()
 354 {}
 355
 356 void ustring::swap(ustring& other)
 357 {
 358   string_.swap(other.string_);
 359 }
 360
 361
 362 /**** Glib::ustring::operator=() *******************************************/
 363
 364 ustring& ustring::operator=(const ustring& other)
 365 {
 366   string_ = other.string_;
 367   return *this;
 368 }
 369
 370 ustring& ustring::operator=(const std::string& src)
 371 {
 372   string_ = src;
 373   return *this;
 374 }
 375
 376 ustring& ustring::operator=(const char* src)
 377 {
 378   string_ = src;
 379   return *this;
 380 }
 381
 382 ustring& ustring::operator=(gunichar uc)
 383 {
 384   const UnicharToUtf8 conv (uc);
 385   string_.assign(conv.buf, conv.len);
 386   return *this;
 387 }
 388
 389 ustring& ustring::operator=(char c)
 390 {
 391   string_ = c;
 392   return *this;
 393 }
 394
 395
 396 /**** Glib::ustring::assign() **********************************************/
 397
 398 ustring& ustring::assign(const ustring& src)
 399 {
 400   string_ = src.string_;
 401   return *this;
 402 }
 403
 404 ustring& ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n)
 405 {
 406   const Utf8SubstrBounds bounds (src.string_, i, n);
 407   string_.assign(src.string_, bounds.i, bounds.n);
 408   return *this;
 409 }
 410
 411 ustring& ustring::assign(const char* src, ustring::size_type n)
 412 {
 413   string_.assign(src, utf8_byte_offset(src, n));
 414   return *this;
 415 }
 416
 417 ustring& ustring::assign(const char* src)
 418 {
 419   string_ = src;
 420   return *this;
 421 }
 422
 423 ustring& ustring::assign(ustring::size_type n, gunichar uc)
 424 {
 425   ustring temp (n, uc);
 426   string_.swap(temp.string_);
 427   return *this;
 428 }
 429
 430 ustring& ustring::assign(ustring::size_type n, char c)
 431 {
 432   string_.assign(n, c);
 433   return *this;
 434 }
 435
 436
 437 /**** Glib::ustring::operator+=() ******************************************/
 438
 439 ustring& ustring::operator+=(const ustring& src)
 440 {
 441   string_ += src.string_;
 442   return *this;
 443 }
 444
 445 ustring& ustring::operator+=(const char* src)
 446 {
 447   string_ += src;
 448   return *this;
 449 }
 450
 451 ustring& ustring::operator+=(gunichar uc)
 452 {
 453   const UnicharToUtf8 conv (uc);
 454   string_.append(conv.buf, conv.len);
 455   return *this;
 456 }
 457
 458 ustring& ustring::operator+=(char c)
 459 {
 460   string_ += c;
 461   return *this;
 462 }
 463
 464
 465 /**** Glib::ustring::push_back() *******************************************/
 466
 467 void ustring::push_back(gunichar uc)
 468 {
 469   const UnicharToUtf8 conv (uc);
 470   string_.append(conv.buf, conv.len);
 471 }
 472
 473 void ustring::push_back(char c)
 474 {
 475   string_ += c;
 476 }
 477
 478
 479 /**** Glib::ustring::append() **********************************************/
 480
 481 ustring& ustring::append(const ustring& src)
 482 {
 483   string_ += src.string_;
 484   return *this;
 485 }
 486
 487 ustring& ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n)
 488 {
 489   const Utf8SubstrBounds bounds (src.string_, i, n);
 490   string_.append(src.string_, bounds.i, bounds.n);
 491   return *this;
 492 }
 493
 494 ustring& ustring::append(const char* src, ustring::size_type n)
 495 {
 496   string_.append(src, utf8_byte_offset(src, n));
 497   return *this;
 498 }
 499
 500 ustring& ustring::append(const char* src)
 501 {
 502   string_ += src;
 503   return *this;
 504 }
 505
 506 ustring& ustring::append(ustring::size_type n, gunichar uc)
 507 {
 508   string_.append(ustring(n, uc).string_);
 509   return *this;
 510 }
 511
 512 ustring& ustring::append(ustring::size_type n, char c)
 513 {
 514   string_.append(n, c);
 515   return *this;
 516 }
 517
 518
 519 /**** Glib::ustring::insert() **********************************************/
 520
 521 ustring& ustring::insert(ustring::size_type i, const ustring& src)
 522 {
 523   string_.insert(utf8_byte_offset(string_, i), src.string_);
 524   return *this;
 525 }
 526
 527 ustring& ustring::insert(ustring::size_type i, const ustring& src,
 528                          ustring::size_type i2, ustring::size_type n)
 529 {
 530   const Utf8SubstrBounds bounds2 (src.string_, i2, n);
 531   string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n);
 532   return *this;
 533 }
 534
 535 ustring& ustring::insert(ustring::size_type i, const char* src, ustring::size_type n)
 536 {
 537   string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n));
 538   return *this;
 539 }
 540
 541 ustring& ustring::insert(ustring::size_type i, const char* src)
 542 {
 543   string_.insert(utf8_byte_offset(string_, i), src);
 544   return *this;
 545 }
 546
 547 ustring& ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc)
 548 {
 549   string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_);
 550   return *this;
 551 }
 552
 553 ustring& ustring::insert(ustring::size_type i, ustring::size_type n, char c)
 554 {
 555   string_.insert(utf8_byte_offset(string_, i), n, c);
 556   return *this;
 557 }
 558
 559 ustring::iterator ustring::insert(ustring::iterator p, gunichar uc)
 560 {
 561   const size_type offset = p.base() - string_.begin();
 562   const UnicharToUtf8 conv (uc);
 563   string_.insert(offset, conv.buf, conv.len);
 564   return iterator(string_.begin() + offset);
 565 }
 566
 567 ustring::iterator ustring::insert(ustring::iterator p, char c)
 568 {
 569   return iterator(string_.insert(p.base(), c));
 570 }
 571
 572 void ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc)
 573 {
 574   string_.insert(p.base() - string_.begin(), ustring(n, uc).string_);
 575 }
 576
 577 void ustring::insert(ustring::iterator p, ustring::size_type n, char c)
 578 {
 579   string_.insert(p.base(), n, c);
 580 }
 581
 582
 583 /**** Glib::ustring::replace() *********************************************/
 584
 585 ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src)
 586 {
 587   const Utf8SubstrBounds bounds (string_, i, n);
 588   string_.replace(bounds.i, bounds.n, src.string_);
 589   return *this;
 590 }
 591
 592 ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
 593                           const ustring& src, ustring::size_type i2, ustring::size_type n2)
 594 {
 595   const Utf8SubstrBounds bounds (string_, i, n);
 596   const Utf8SubstrBounds bounds2 (src.string_, i2, n2);
 597   string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n);
 598   return *this;
 599 }
 600
 601 ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
 602                           const char* src, ustring::size_type n2)
 603 {
 604   const Utf8SubstrBounds bounds (string_, i, n);
 605   string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2));
 606   return *this;
 607 }
 608
 609 ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const char* src)
 610 {
 611   const Utf8SubstrBounds bounds (string_, i, n);
 612   string_.replace(bounds.i, bounds.n, src);
 613   return *this;
 614 }
 615
 616 ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
 617                           ustring::size_type n2, gunichar uc)
 618 {
 619   const Utf8SubstrBounds bounds (string_, i, n);
 620   string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_);
 621   return *this;
 622 }
 623
 624 ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
 625                           ustring::size_type n2, char c)
 626 {
 627   const Utf8SubstrBounds bounds (string_, i, n);
 628   string_.replace(bounds.i, bounds.n, n2, c);
 629   return *this;
 630 }
 631
 632 ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src)
 633 {
 634   string_.replace(pbegin.base(), pend.base(), src.string_);
 635   return *this;
 636 }
 637
 638 ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
 639                           const char* src, ustring::size_type n)
 640 {
 641   string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n));
 642   return *this;
 643 }
 644
 645 ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src)
 646 {
 647   string_.replace(pbegin.base(), pend.base(), src);
 648   return *this;
 649 }
 650
 651 ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
 652                           ustring::size_type n, gunichar uc)
 653 {
 654   string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_);
 655   return *this;
 656 }
 657
 658 ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
 659                           ustring::size_type n, char c)
 660 {
 661   string_.replace(pbegin.base(), pend.base(), n, c);
 662   return *this;
 663 }
 664
 665
 666 /**** Glib::ustring::erase() ***********************************************/
 667
 668 void ustring::clear()
 669 {
 670   string_.erase();
 671 }
 672
 673 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
 674 {
 675   const Utf8SubstrBounds bounds (string_, i, n);
 676   string_.erase(bounds.i, bounds.n);
 677   return *this;
 678 }
 679
 680 ustring& ustring::erase()
 681 {
 682   string_.erase();
 683   return *this;
 684 }
 685
 686 ustring::iterator ustring::erase(ustring::iterator p)
 687 {
 688   ustring::iterator iter_end = p;
 689   ++iter_end;
 690
 691   return iterator(string_.erase(p.base(), iter_end.base()));
 692 }
 693
 694 ustring::iterator ustring::erase(ustring::iterator pbegin, ustring::iterator pend)
 695 {
 696   return iterator(string_.erase(pbegin.base(), pend.base()));
 697 }
 698
 699
 700 /**** Glib::ustring::compare() *********************************************/
 701
 702 int ustring::compare(const ustring& rhs) const
 703 {
 704   return g_utf8_collate(string_.c_str(), rhs.string_.c_str());
 705 }
 706
 707 int ustring::compare(const char* rhs) const
 708 {
 709   return g_utf8_collate(string_.c_str(), rhs);
 710 }
 711
 712 int ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const
 713 {
 714   return ustring(*this, i, n).compare(rhs);
 715 }
 716
 717 int ustring::compare(ustring::size_type i, ustring::size_type n,
 718                      const ustring& rhs, ustring::size_type i2, ustring::size_type n2) const
 719 {
 720   return ustring(*this, i, n).compare(ustring(rhs, i2, n2));
 721 }
 722
 723 int ustring::compare(ustring::size_type i, ustring::size_type n,
 724                      const char* rhs, ustring::size_type n2) const
 725 {
 726   return ustring(*this, i, n).compare(ustring(rhs, n2));
 727 }
 728
 729 int ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const
 730 {
 731   return ustring(*this, i, n).compare(rhs);
 732 }
 733
 734
 735 /**** Glib::ustring -- index access ****************************************/
 736
 737 ustring::value_type ustring::operator[](ustring::size_type i) const
 738 {
 739   return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i));
 740 }
 741
 742 ustring::value_type ustring::at(ustring::size_type i) const
 743 {
 744   const size_type byte_offset = utf8_byte_offset(string_, i);
 745
 746   // Throws std::out_of_range if the index is invalid.
 747   return g_utf8_get_char(&string_.at(byte_offset));
 748 }
 749
 750
 751 /**** Glib::ustring -- iterator access *************************************/
 752
 753 ustring::iterator ustring::begin()
 754 {
 755   return iterator(string_.begin());
 756 }
 757
 758 ustring::iterator ustring::end()
 759 {
 760   return iterator(string_.end());
 761 }
 762
 763 ustring::const_iterator ustring::begin() const
 764 {
 765   return const_iterator(string_.begin());
 766 }
 767
 768 ustring::const_iterator ustring::end() const
 769 {
 770   return const_iterator(string_.end());
 771 }
 772
 773 ustring::reverse_iterator ustring::rbegin()
 774 {
 775   return reverse_iterator(iterator(string_.end()));
 776 }
 777
 778 ustring::reverse_iterator ustring::rend()
 779 {
 780   return reverse_iterator(iterator(string_.begin()));
 781 }
 782
 783 ustring::const_reverse_iterator ustring::rbegin() const
 784 {
 785   return const_reverse_iterator(const_iterator(string_.end()));
 786 }
 787
 788 ustring::const_reverse_iterator ustring::rend() const
 789 {
 790   return const_reverse_iterator(const_iterator(string_.begin()));
 791 }
 792
 793
 794 /**** Glib::ustring::find() ************************************************/
 795
 796 ustring::size_type ustring::find(const ustring& str, ustring::size_type i) const
 797 {
 798   return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i)));
 799 }
 800
 801 ustring::size_type ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const
 802 {
 803   return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i),
 804                                                      utf8_byte_offset(str, n)));
 805 }
 806
 807 ustring::size_type ustring::find(const char* str, ustring::size_type i) const
 808 {
 809   return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i)));
 810 }
 811
 812 ustring::size_type ustring::find(gunichar uc, ustring::size_type i) const
 813 {
 814   const UnicharToUtf8 conv (uc);
 815   return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len));
 816 }
 817
 818 ustring::size_type ustring::find(char c, ustring::size_type i) const
 819 {
 820   return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i)));
 821 }
 822
 823
 824 /**** Glib::ustring::rfind() ***********************************************/
 825
 826 ustring::size_type ustring::rfind(const ustring& str, ustring::size_type i) const
 827 {
 828   return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i)));
 829 }
 830
 831 ustring::size_type ustring::rfind(const char* str, ustring::size_type i,
 832                                   ustring::size_type n) const
 833 {
 834   return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i),
 835                                                       utf8_byte_offset(str, n)));
 836 }
 837
 838 ustring::size_type ustring::rfind(const char* str, ustring::size_type i) const
 839 {
 840   return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i)));
 841 }
 842
 843 ustring::size_type ustring::rfind(gunichar uc, ustring::size_type i) const
 844 {
 845   const UnicharToUtf8 conv (uc);
 846   return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len));
 847 }
 848
 849 ustring::size_type ustring::rfind(char c, ustring::size_type i) const
 850 {
 851   return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i)));
 852 }
 853
 854
 855 /**** Glib::ustring::find_first_of() ***************************************/
 856
 857 ustring::size_type ustring::find_first_of(const ustring& match, ustring::size_type i) const
 858 {
 859   return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false);
 860 }
 861
 862 ustring::size_type ustring::find_first_of(const char* match,
 863                                           ustring::size_type i, ustring::size_type n) const
 864 {
 865   return utf8_find_first_of(string_, i, match, n, false);
 866 }
 867
 868 ustring::size_type ustring::find_first_of(const char* match, ustring::size_type i) const
 869 {
 870   return utf8_find_first_of(string_, i, match, -1, false);
 871 }
 872
 873 ustring::size_type ustring::find_first_of(gunichar uc, ustring::size_type i) const
 874 {
 875   return find(uc, i);
 876 }
 877
 878 ustring::size_type ustring::find_first_of(char c, ustring::size_type i) const
 879 {
 880   return find(c, i);
 881 }
 882
 883
 884 /**** Glib::ustring::find_last_of() ****************************************/
 885
 886 ustring::size_type ustring::find_last_of(const ustring& match, ustring::size_type i) const
 887 {
 888   return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false);
 889 }
 890
 891 ustring::size_type ustring::find_last_of(const char* match,
 892                                          ustring::size_type i, ustring::size_type n) const
 893 {
 894   return utf8_find_last_of(string_, i, match, n, false);
 895 }
 896
 897 ustring::size_type ustring::find_last_of(const char* match, ustring::size_type i) const
 898 {
 899   return utf8_find_last_of(string_, i, match, -1, false);
 900 }
 901
 902 ustring::size_type ustring::find_last_of(gunichar uc, ustring::size_type i) const
 903 {
 904   return rfind(uc, i);
 905 }
 906
 907 ustring::size_type ustring::find_last_of(char c, ustring::size_type i) const
 908 {
 909   return rfind(c, i);
 910 }
 911
 912
 913 /**** Glib::ustring::find_first_not_of() ***********************************/
 914
 915 ustring::size_type ustring::find_first_not_of(const ustring& match, ustring::size_type i) const
 916 {
 917   return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true);
 918 }
 919
 920 ustring::size_type ustring::find_first_not_of(const char* match,
 921                                               ustring::size_type i, ustring::size_type n) const
 922 {
 923   return utf8_find_first_of(string_, i, match, n, true);
 924 }
 925
 926 ustring::size_type ustring::find_first_not_of(const char* match, ustring::size_type i) const
 927 {
 928   return utf8_find_first_of(string_, i, match, -1, true);
 929 }
 930
 931 // Unfortunately, all of the find_*_not_of() methods for single
 932 // characters need their own special implementation.
 933 //
 934 ustring::size_type ustring::find_first_not_of(gunichar uc, ustring::size_type i) const
 935 {
 936   const size_type bi = utf8_byte_offset(string_, i);
 937   if(bi != npos)
 938   {
 939     const char *const pbegin = string_.data();
 940     const char *const pend   = pbegin + string_.size();
 941
 942     for(const char* p = pbegin + bi;
 943         p < pend;
 944         p = g_utf8_next_char(p), ++i)
 945     {
 946       if(g_utf8_get_char(p) != uc)
 947         return i;
 948     }
 949   }
 950   return npos;
 951 }
 952
 953 ustring::size_type ustring::find_first_not_of(char c, ustring::size_type i) const
 954 {
 955   const size_type bi = utf8_byte_offset(string_, i);
 956   if(bi != npos)
 957   {
 958     const char *const pbegin = string_.data();
 959     const char *const pend   = pbegin + string_.size();
 960
 961     for(const char* p = pbegin + bi;
 962         p < pend;
 963         p = g_utf8_next_char(p), ++i)
 964     {
 965       if(*p != c)
 966         return i;
 967     }
 968   }
 969   return npos;
 970 }
 971
 972
 973 /**** Glib::ustring::find_last_not_of() ************************************/
 974
 975 ustring::size_type ustring::find_last_not_of(const ustring& match, ustring::size_type i) const
 976 {
 977   return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true);
 978 }
 979
 980 ustring::size_type ustring::find_last_not_of(const char* match,
 981                                              ustring::size_type i, ustring::size_type n) const
 982 {
 983   return utf8_find_last_of(string_, i, match, n, true);
 984 }
 985
 986 ustring::size_type ustring::find_last_not_of(const char* match, ustring::size_type i) const
 987 {
 988   return utf8_find_last_of(string_, i, match, -1, true);
 989 }
 990
 991 // Unfortunately, all of the find_*_not_of() methods for single
 992 // characters need their own special implementation.
 993 //
 994 ustring::size_type ustring::find_last_not_of(gunichar uc, ustring::size_type i) const
 995 {
 996   const char *const pbegin = string_.data();
 997   const char *const pend   = pbegin + string_.size();
 998   size_type i_cur   = 0;
 999   size_type i_found = npos;
1000
1001   for(const char* p = pbegin;
1002       p < pend && i_cur <= i;
1003       p = g_utf8_next_char(p), ++i_cur)
1004   {
1005     if(g_utf8_get_char(p) != uc)
1006       i_found = i_cur;
1007   }
1008   return i_found;
1009 }
1010
1011 ustring::size_type ustring::find_last_not_of(char c, ustring::size_type i) const
1012 {
1013   const char *const pbegin = string_.data();
1014   const char *const pend   = pbegin + string_.size();
1015   size_type i_cur   = 0;
1016   size_type i_found = npos;
1017
1018   for(const char* p = pbegin;
1019       p < pend && i_cur <= i;
1020       p = g_utf8_next_char(p), ++i_cur)
1021   {
1022     if(*p != c)
1023       i_found = i_cur;
1024   }
1025   return i_found;
1026 }
1027
1028
1029 /**** Glib::ustring -- get size and resize *********************************/
1030
1031 bool ustring::empty() const
1032 {
1033   return string_.empty();
1034 }
1035
1036 ustring::size_type ustring::size() const
1037 {
1038   const char *const pdata = string_.data();
1039   return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1040 }
1041
1042 ustring::size_type ustring::length() const
1043 {
1044   const char *const pdata = string_.data();
1045   return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
1046 }
1047
1048 ustring::size_type ustring::bytes() const
1049 {
1050   return string_.size();
1051 }
1052
1053 ustring::size_type ustring::capacity() const
1054 {
1055   return string_.capacity();
1056 }
1057
1058 ustring::size_type ustring::max_size() const
1059 {
1060   return string_.max_size();
1061 }
1062
1063 void ustring::resize(ustring::size_type n, gunichar uc)
1064 {
1065   const size_type size_now = size();
1066   if(n < size_now)
1067     erase(n, npos);
1068   else if(n > size_now)
1069     append(n - size_now, uc);
1070 }
1071
1072 void ustring::resize(ustring::size_type n, char c)
1073 {
1074   const size_type size_now = size();
1075   if(n < size_now)
1076     erase(n, npos);
1077   else if(n > size_now)
1078     string_.append(n - size_now, c);
1079 }
1080
1081 void ustring::reserve(ustring::size_type n)
1082 {
1083   string_.reserve(n);
1084 }
1085
1086
1087 /**** Glib::ustring -- C string access *************************************/
1088
1089 const char* ustring::data() const
1090 {
1091   return string_.data();
1092 }
1093
1094 const char* ustring::c_str() const
1095 {
1096   return string_.c_str();
1097 }
1098
1099 // Note that copy() requests UTF-8 character offsets as
1100 // parameters, but returns the number of copied bytes.
1101 //
1102 ustring::size_type ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const
1103 {
1104   const Utf8SubstrBounds bounds (string_, i, n);
1105   return string_.copy(dest, bounds.n, bounds.i);
1106 }
1107
1108
1109 /**** Glib::ustring -- UTF-8 utilities *************************************/
1110
1111 bool ustring::validate() const
1112 {
1113   return (g_utf8_validate(string_.data(), string_.size(), 0) != 0);
1114 }
1115
1116 bool ustring::validate(ustring::iterator& first_invalid)
1117 {
1118   const char *const pdata = string_.data();
1119   const char* valid_end = pdata;
1120   const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1121
1122   first_invalid = iterator(string_.begin() + (valid_end - pdata));
1123   return (is_valid != 0);
1124 }
1125
1126 bool ustring::validate(ustring::const_iterator& first_invalid) const
1127 {
1128   const char *const pdata = string_.data();
1129   const char* valid_end = pdata;
1130   const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
1131
1132   first_invalid = const_iterator(string_.begin() + (valid_end - pdata));
1133   return (is_valid != 0);
1134 }
1135
1136 bool ustring::is_ascii() const
1137 {
1138   const char* p = string_.data();
1139   const char *const pend = p + string_.size();
1140
1141   for(; p != pend; ++p)
1142   {
1143     if((static_cast<unsigned char>(*p) & 0x80u) != 0)
1144       return false;
1145   }
1146
1147   return true;
1148 }
1149
1150 ustring ustring::normalize(NormalizeMode mode) const
1151 {
1152   const ScopedPtr<char> buf (g_utf8_normalize(string_.data(), string_.size(),
1153                                               static_cast<GNormalizeMode>(int(mode))));
1154   return ustring(buf.get());
1155 }
1156
1157 ustring ustring::uppercase() const
1158 {
1159   const ScopedPtr<char> buf (g_utf8_strup(string_.data(), string_.size()));
1160   return ustring(buf.get());
1161 }
1162
1163 ustring ustring::lowercase() const
1164 {
1165   const ScopedPtr<char> buf (g_utf8_strdown(string_.data(), string_.size()));
1166   return ustring(buf.get());
1167 }
1168
1169 ustring ustring::casefold() const
1170 {
1171   const ScopedPtr<char> buf (g_utf8_casefold(string_.data(), string_.size()));
1172   return ustring(buf.get());
1173 }
1174
1175 std::string ustring::collate_key() const
1176 {
1177   const ScopedPtr<char> buf (g_utf8_collate_key(string_.data(), string_.size()));
1178   return std::string(buf.get());
1179 }
1180
1181 std::string ustring::casefold_collate_key() const
1182 {
1183   char *const casefold_buf = g_utf8_casefold(string_.data(), string_.size());
1184   char *const key_buf      = g_utf8_collate_key(casefold_buf, -1);
1185   g_free(casefold_buf);
1186   return std::string(ScopedPtr<char>(key_buf).get());
1187 }
1188
1189 /**** Glib::ustring -- Message formatting **********************************/
1190
1191 // static
1192 ustring ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv)
1193 {
1194   std::string::size_type result_size = fmt.raw().size();
1195
1196   // Guesstimate the final string size.
1197   for (int i = 0; i < argc; ++i)
1198     result_size += argv[i]->raw().size();
1199
1200   std::string result;
1201   result.reserve(result_size);
1202
1203   const char* const pfmt = fmt.raw().c_str();
1204   const char* start = pfmt;
1205
1206   while (const char* const stop = std::strchr(start, '%'))
1207   {
1208     if (stop[1] == '%')
1209     {
1210       result.append(start, stop - start + 1);
1211       start = stop + 2;
1212     }
1213     else
1214     {
1215       const int index = Ascii::digit_value(stop[1]) - 1;
1216
1217       if (index >= 0 && index < argc)
1218       {
1219         result.append(start, stop - start);
1220         result += argv[index]->raw();
1221         start = stop + 2;
1222       }
1223       else
1224       {
1225         const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1);
1226
1227         // Copy invalid substitutions literally to the output.
1228         result.append(start, next - start);
1229
1230         g_warning("invalid substitution \"%s\" in fmt string \"%s\"",
1231                   result.c_str() + result.size() - (next - stop), pfmt);
1232         start = next;
1233       }
1234     }
1235   }
1236
1237   result.append(start, pfmt + fmt.raw().size() - start);
1238
1239   return result;
1240 }
1241
1242 /**** Glib::ustring::SequenceToString **************************************/
1243
1244 ustring::SequenceToString<Glib::ustring::iterator,gunichar>
1245   ::SequenceToString(Glib::ustring::iterator pbegin, Glib::ustring::iterator pend)
1246 :
1247   std::string(pbegin.base(), pend.base())
1248 {}
1249
1250 ustring::SequenceToString<Glib::ustring::const_iterator,gunichar>
1251   ::SequenceToString(Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend)
1252 :
1253   std::string(pbegin.base(), pend.base())
1254 {}
1255
1256 /**** Glib::ustring::FormatStream ******************************************/
1257
1258 ustring::FormatStream::FormatStream()
1259 :
1260   stream_ ()
1261 {}
1262
1263 ustring::FormatStream::~FormatStream()
1264 {}
1265
1266 ustring ustring::FormatStream::to_string() const
1267 {
1268   GError* error = 0;
1269
1270 #ifdef GLIBMM_HAVE_WIDE_STREAM
1271   const std::wstring str = stream_.str();
1272
1273 # if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
1274   // Avoid going through iconv if wchar_t always contains UCS-4.
1275   glong n_bytes = 0;
1276   const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(str.data()),
1277                                             str.size(), 0, &n_bytes, &error));
1278 # elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1279   // Avoid going through iconv if wchar_t always contains UTF-16.
1280   glong n_bytes = 0;
1281   const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(str.data()),
1282                                              str.size(), 0, &n_bytes, &error));
1283 # else
1284   gsize n_bytes = 0;
1285   const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(str.data()),
1286                                        str.size() * sizeof(std::wstring::value_type),
1287                                        "UTF-8", "WCHAR_T", 0, &n_bytes, &error));
1288 # endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
1289
1290 #else /* !GLIBMM_HAVE_WIDE_STREAM */
1291   const std::string str = stream_.str();
1292
1293   gsize n_bytes = 0;
1294   const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
1295 #endif /* !GLIBMM_HAVE_WIDE_STREAM */
1296
1297   if (error)
1298   {
1299 #ifdef GLIBMM_EXCEPTIONS_ENABLED
1300     Glib::Error::throw_exception(error);
1301 #else
1302     g_warning("%s: %s", G_STRFUNC, error->message);
1303     g_error_free(error);
1304     return ustring();
1305 #endif
1306   }
1307
1308   return ustring(buf.get(), buf.get() + n_bytes);
1309 }
1310
1311 /**** Glib::ustring -- stream I/O operators ********************************/
1312
1313 std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string)
1314 {
1315   std::string str;
1316   is >> str;
1317
1318   GError* error = 0;
1319   gsize n_bytes = 0;
1320   const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
1321
1322   if (error)
1323   {
1324 #ifdef GLIBMM_EXCEPTIONS_ENABLED
1325     Glib::Error::throw_exception(error);
1326 #else
1327     g_warning("%s: %s", G_STRFUNC, error->message);
1328     g_error_free(error);
1329     return is;
1330 #endif
1331   }
1332
1333   utf8_string.assign(buf.get(), buf.get() + n_bytes);
1334
1335   return is;
1336 }
1337
1338 std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string)
1339 {
1340   GError* error = 0;
1341   const ScopedPtr<char> buf (g_locale_from_utf8(utf8_string.raw().data(),
1342                                                 utf8_string.raw().size(), 0, 0, &error));
1343   if (error)
1344   {
1345 #ifdef GLIBMM_EXCEPTIONS_ENABLED
1346     Glib::Error::throw_exception(error);
1347 #else
1348     g_warning("%s: %s", G_STRFUNC, error->message);
1349     g_error_free(error);
1350     return os;
1351 #endif
1352   }
1353
1354   // This won't work if the string contains NUL characters.  Unfortunately,
1355   // std::ostream::write() ignores format flags, so we cannot use that.
1356   // The only option would be to create a temporary std::string.  However,
1357   // even then GCC's libstdc++-v3 prints only the characters up to the first
1358   // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
1359   // formatted output.  The semantics would be unclear anyway: what's the
1360   // screen width of a NUL?
1361   os << buf.get();
1362
1363   return os;
1364 }
1365
1366 #ifdef GLIBMM_HAVE_WIDE_STREAM
1367
1368 std::wistream& operator>>(std::wistream& is, ustring& utf8_string)
1369 {
1370   GError* error = 0;
1371
1372   std::wstring wstr;
1373   is >> wstr;
1374
1375 #if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
1376   // Avoid going through iconv if wchar_t always contains UCS-4.
1377   glong n_bytes = 0;
1378   const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(wstr.data()),
1379                                             wstr.size(), 0, &n_bytes, &error));
1380 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1381   // Avoid going through iconv if wchar_t always contains UTF-16.
1382   glong n_bytes = 0;
1383   const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(wstr.data()),
1384                                              wstr.size(), 0, &n_bytes, &error));
1385 #else
1386   gsize n_bytes = 0;
1387   const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(wstr.data()),
1388                                        wstr.size() * sizeof(std::wstring::value_type),
1389                                        "UTF-8", "WCHAR_T", 0, &n_bytes, &error));
1390 #endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
1391
1392   if (error)
1393   {
1394 #ifdef GLIBMM_EXCEPTIONS_ENABLED
1395     Glib::Error::throw_exception(error);
1396 #else
1397     g_warning("%s: %s", G_STRFUNC, error->message);
1398     g_error_free(error);
1399     return is;
1400 #endif
1401   }
1402
1403   utf8_string.assign(buf.get(), buf.get() + n_bytes);
1404
1405   return is;
1406 }
1407
1408 std::wostream& operator<<(std::wostream& os, const ustring& utf8_string)
1409 {
1410   GError* error = 0;
1411
1412 #if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
1413   // Avoid going through iconv if wchar_t always contains UCS-4.
1414   const ScopedPtr<gunichar> buf (g_utf8_to_ucs4(utf8_string.raw().data(),
1415                                                 utf8_string.raw().size(), 0, 0, &error));
1416 #elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
1417   // Avoid going through iconv if wchar_t always contains UTF-16.
1418   const ScopedPtr<gunichar2> buf (g_utf8_to_utf16(utf8_string.raw().data(),
1419                                                   utf8_string.raw().size(), 0, 0, &error));
1420 #else
1421   // TODO: For some reason the conversion from UTF-8 to WCHAR_T doesn't work
1422   // with g_convert(), while iconv on the command line handles it just fine.
1423   // Maybe a bug in GLib?
1424   const ScopedPtr<char> buf (g_convert(utf8_string.raw().data(), utf8_string.raw().size(),
1425                                        "WCHAR_T", "UTF-8", 0, 0, &error));
1426 #endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
1427
1428   if (error)
1429   {
1430 #ifdef GLIBMM_EXCEPTIONS_ENABLED
1431     Glib::Error::throw_exception(error);
1432 #else
1433     g_warning("%s: %s", G_STRFUNC, error->message);
1434     g_error_free(error);
1435     return os;
1436 #endif
1437   }
1438
1439   // This won't work if the string contains NUL characters.  Unfortunately,
1440   // std::wostream::write() ignores format flags, so we cannot use that.
1441   // The only option would be to create a temporary std::wstring.  However,
1442   // even then GCC's libstdc++-v3 prints only the characters up to the first
1443   // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
1444   // formatted output.  The semantics would be unclear anyway: what's the
1445   // screen width of a NUL?
1446   os << reinterpret_cast<wchar_t*>(buf.get());
1447
1448   return os;
1449 }
1450
1451 #endif /* GLIBMM_HAVE_WIDE_STREAM */
1452
1453 } // namespace Glib