2 Copyright (c) 2005-2015, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 void kumu_init_xml_dom();
67 bool kumu_UTF_8_to_XercesString(const std::string& in_str, std::basic_string<XMLCh>& out_str);
68 bool kumu_UTF_8_to_XercesString_p(const char* in_str, std::basic_string<XMLCh>& out_str);
69 bool kumu_XercesString_to_UTF_8(const std::basic_string<XMLCh>& in_str, std::string& out_str);
70 bool kumu_XercesString_to_UTF_8_p(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0, pretty);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const
209 add_spacer(outbuf, depth);
216 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i )
221 outbuf += (*i).value;
227 // body contents and children
228 if ( ! m_ChildList.empty() )
233 if ( m_Body.length() > 0 )
238 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i )
240 (*i)->RenderElement(outbuf, depth + 1, pretty);
245 add_spacer(outbuf, depth);
248 else if ( m_Body.length() > 0 )
260 Kumu::XMLElement::HasName(const char* name) const
262 if ( name == 0 || *name == 0 )
265 return (m_Name == name);
270 Kumu::XMLElement::SetName(const char* name)
278 Kumu::XMLElement::GetAttrWithName(const char* name) const
280 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
282 if ( (*i).name == name )
283 return (*i).value.c_str();
291 Kumu::XMLElement::GetChildWithName(const char* name) const
293 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
295 if ( (*i)->HasName(name) )
303 const Kumu::ElementList&
304 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
307 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
309 if ( (*i)->HasName(name) )
310 outList.push_back(*i);
312 if ( ! (*i)->m_ChildList.empty() )
313 (*i)->GetChildrenWithName(name, outList);
321 Kumu::XMLElement::DeleteAttributes()
328 Kumu::XMLElement::DeleteAttrWithName(const char* name)
331 AttributeList::iterator i = m_AttrList.begin();
333 while ( i != m_AttrList.end() )
335 if ( i->name == std::string(name) )
336 m_AttrList.erase(i++);
344 Kumu::XMLElement::DeleteChildren()
346 while ( ! m_ChildList.empty() )
348 delete m_ChildList.back();
349 m_ChildList.pop_back();
355 Kumu::XMLElement::DeleteChild(const XMLElement* element)
359 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
364 m_ChildList.erase(i);
373 Kumu::XMLElement::ForgetChild(const XMLElement* element)
377 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
381 m_ChildList.erase(i);
390 Kumu::XMLElement::ParseString(const ByteString& document)
392 return ParseString((const char*)document.RoData(), document.Length());
397 Kumu::XMLElement::ParseString(const std::string& document)
399 return ParseString(document.c_str(), document.size());
404 Kumu::XMLElement::ParseFirstFromString(const ByteString& document)
406 return ParseFirstFromString((const char*)document.RoData(), document.Length());
411 Kumu::XMLElement::ParseFirstFromString(const std::string& document)
413 return ParseFirstFromString(document.c_str(), document.size());
417 //----------------------------------------------------------------------------------------------------
422 class ExpatParseContext
424 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
428 std::stack<XMLElement*> Scope;
431 ExpatParseContext(XMLElement* root) : Root(root) {
432 Namespaces = new ns_map;
436 ~ExpatParseContext() {}
439 // expat wrapper functions
442 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
444 assert(p); assert(name); assert(attrs);
445 ExpatParseContext* Ctx = (ExpatParseContext*)p;
448 const char* ns_root = name;
449 const char* local_name = strchr(name, '|');
450 if ( local_name != 0 )
451 name = local_name + 1;
453 if ( Ctx->Scope.empty() )
455 Ctx->Scope.push(Ctx->Root);
459 Element = Ctx->Scope.top();
460 Ctx->Scope.push(Element->AddChild(name));
463 Element = Ctx->Scope.top();
464 Element->SetName(name);
468 if ( ns_root != name )
469 key.assign(ns_root, name - ns_root - 1);
471 ns_map::iterator ni = Ctx->Namespaces->find(key);
472 if ( ni != Ctx->Namespaces->end() )
473 Element->SetNamespace(ni->second);
476 for ( int i = 0; attrs[i] != 0; i += 2 )
478 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
479 local_name = attrs[i];
483 Element->SetAttr(local_name, attrs[i+1]);
489 xph_end(void* p, const XML_Char* name)
491 assert(p); assert(name);
492 ExpatParseContext* Ctx = (ExpatParseContext*)p;
498 xph_char(void* p, const XML_Char* data, int len)
500 assert(p); assert(data);
501 ExpatParseContext* Ctx = (ExpatParseContext*)p;
506 tmp_str.assign(data, len);
507 Ctx->Scope.top()->AppendBody(tmp_str);
513 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
515 assert(p); assert(ns_name);
516 ExpatParseContext* Ctx = (ExpatParseContext*)p;
518 if ( ns_prefix == 0 )
521 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
523 if ( ni != Ctx->Namespaces->end() )
525 if ( ni->second->Name() != std::string(ns_name) )
527 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
533 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
534 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
540 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
547 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
551 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
555 ExpatParseContext Ctx(this);
556 XML_SetUserData(Parser, (void*)&Ctx);
557 XML_SetElementHandler(Parser, xph_start, xph_end);
558 XML_SetCharacterDataHandler(Parser, xph_char);
559 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
561 if ( ! XML_Parse(Parser, document, doc_len, 1) )
563 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
564 XML_GetCurrentLineNumber(Parser),
565 XML_ErrorString(XML_GetErrorCode(Parser)));
566 XML_ParserFree(Parser);
570 XML_ParserFree(Parser);
572 if ( ! Ctx.Namespaces->empty() )
574 m_NamespaceOwner = (void*)Ctx.Namespaces;
580 // expat wrapper functions
583 xph_start_one_shot(void* p, const XML_Char* name, const XML_Char** attrs)
585 xph_start(p, name, attrs);
586 XML_Parser parser = (XML_Parser)p;
587 XML_StopParser(parser, false);
592 Kumu::XMLElement::ParseFirstFromString(const char* document, ui32_t doc_len)
599 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
603 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
607 ExpatParseContext Ctx(this);
608 XML_SetUserData(Parser, (void*)&Ctx);
609 XML_SetElementHandler(Parser, xph_start_one_shot, xph_end);
610 XML_SetCharacterDataHandler(Parser, xph_char);
611 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
613 if ( ! XML_Parse(Parser, document, doc_len, 1) )
615 XML_ParserFree(Parser);
619 XML_ParserFree(Parser);
621 if ( ! Ctx.Namespaces->empty() )
623 m_NamespaceOwner = (void*)Ctx.Namespaces;
633 //----------------------------------------------------------------------------------------------------
637 static Mutex sg_xerces_init_lock; // protect the xerces initialized
638 static bool sg_xml_init = false; // signal initialization
639 static Mutex sg_coder_lock; // protect the transcoder context
640 static XMLTranscoder* sg_coder = 0;
641 static const int sg_coder_buf_len = 128 * 1024;
642 static char sg_coder_buf[sg_coder_buf_len + 8];
643 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
645 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
646 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
647 chDash, chDigit_8, chNull};
655 AutoMutex AL(sg_xerces_init_lock);
661 XMLPlatformUtils::Initialize();
664 XMLTransService::Codes ret;
665 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
667 if ( ret != XMLTransService::Ok )
669 const char* message = "Undefined Error";
673 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
674 case XMLTransService::InternalFailure: message = "Internal failure"; break;
675 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
678 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
681 catch (const XMLException &e)
683 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
691 kumu_XercesString_to_UTF_8(const std::basic_string<XMLCh>& in_str, std::string& out_str) {
692 return kumu_XercesString_to_UTF_8_p(in_str.c_str(), out_str);
697 kumu_XercesString_to_UTF_8_p(const XMLCh* in_str, std::string& out_str)
701 AutoMutex AL(sg_coder_lock);
702 ui32_t str_len = XMLString::stringLen(in_str);
703 ui32_t read_total = 0;
707 while ( str_len > 0 )
709 #if XERCES_VERSION_MAJOR < 3
710 ui32_t read_count = 0;
712 XMLSize_t read_count = 0;
714 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
715 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
716 read_count, XMLTranscoder::UnRep_Throw);
718 out_str.append(sg_coder_buf, write_count);
719 str_len -= read_count;
720 read_total += read_count;
721 assert(str_len >= 0);
734 kumu_UTF_8_to_XercesString(const std::string& in_str, std::basic_string<XMLCh>& out_str) {
735 return kumu_UTF_8_to_XercesString_p(in_str.c_str(), out_str);
740 kumu_UTF_8_to_XercesString_p(const char* in_str, std::basic_string<XMLCh>& out_str)
744 AutoMutex AL(sg_coder_lock);
745 ui32_t str_len = strlen(in_str);
746 ui32_t read_total = 0;
750 while ( str_len > 0 )
752 #if XERCES_VERSION_MAJOR < 3
753 ui32_t read_count = 0;
755 XMLSize_t read_count = 0;
757 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
758 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
759 read_count, sg_coder_counts);
761 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
762 str_len -= read_count;
763 read_total += read_count;
764 assert(str_len >= 0);
776 class MyTreeHandler : public HandlerBase
778 ns_map* m_Namespaces;
779 std::stack<XMLElement*> m_Scope;
781 bool m_HasEncodeErrors;
784 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
787 m_Namespaces = new ns_map;
794 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
796 ns_map* TakeNamespaceMap()
798 if ( m_Namespaces == 0 || m_Namespaces->empty() )
801 ns_map* ret = m_Namespaces;
807 void AddNamespace(const char* ns_prefix, const char* ns_name)
812 if ( ns_prefix[0] == ':' )
818 assert(ns_prefix[0] == 0);
822 ns_map::iterator ni = m_Namespaces->find(ns_prefix);
824 if ( ni != m_Namespaces->end() )
826 if ( ni->second->Name() != std::string(ns_name) )
828 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
834 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
835 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
838 assert(!m_Namespaces->empty());
842 void startElement(const XMLCh* const x_name,
843 XERCES_CPP_NAMESPACE::AttributeList& attributes)
848 if ( ! kumu_XercesString_to_UTF_8(x_name, tx_name) )
849 m_HasEncodeErrors = true;
851 const char* name = tx_name.c_str();
853 const char* ns_root = name;
854 const char* local_name = strchr(name, ':');
856 if ( local_name != 0 )
857 name = local_name + 1;
859 if ( m_Scope.empty() )
861 m_Scope.push(m_Root);
865 Element = m_Scope.top();
866 m_Scope.push(Element->AddChild(name));
869 Element = m_Scope.top();
870 Element->SetName(name);
873 ui32_t a_len = attributes.getLength();
875 for ( ui32_t i = 0; i < a_len; i++)
877 std::string aname, value;
878 if ( ! kumu_XercesString_to_UTF_8(attributes.getName(i), aname) )
879 m_HasEncodeErrors = true;
881 if ( ! kumu_XercesString_to_UTF_8(attributes.getValue(i), value) )
882 m_HasEncodeErrors = true;
884 const char* x_aname = aname.c_str();
885 const char* x_value = value.c_str();
887 if ( strncmp(x_aname, "xmlns", 5) == 0 )
888 AddNamespace(x_aname+5, x_value);
890 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
891 local_name = x_aname;
895 Element->SetAttr(local_name, x_value);
900 if ( ns_root != name )
901 key.assign(ns_root, name - ns_root - 1);
903 ns_map::iterator ni = m_Namespaces->find(key);
904 if ( ni != m_Namespaces->end() )
905 Element->SetNamespace(ni->second);
908 void endElement(const XMLCh *const name) {
912 #if XERCES_VERSION_MAJOR < 3
913 void characters(const XMLCh *const chars, const unsigned int length)
915 void characters(const XMLCh* const chars, const XMLSize_t length)
921 if ( ! kumu_XercesString_to_UTF_8(chars, tmp) )
922 m_HasEncodeErrors = true;
924 m_Scope.top()->AppendBody(tmp);
931 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
941 SAXParser* parser = new SAXParser();
943 parser->setValidationScheme(SAXParser::Val_Always);
944 parser->setDoNamespaces(true); // optional
946 MyTreeHandler* docHandler = new MyTreeHandler(this);
947 parser->setDocumentHandler(docHandler);
948 parser->setErrorHandler(docHandler);
952 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
953 static_cast<const unsigned int>(doc_len),
956 parser->parse(xmlSource);
958 catch (const XMLException& e)
960 char* message = XMLString::transcode(e.getMessage());
961 DefaultLogSink().Error("Parser error: %s\n", message);
962 XMLString::release(&message);
965 catch (const SAXParseException& e)
967 char* message = XMLString::transcode(e.getMessage());
968 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
969 XMLString::release(&message);
974 DefaultLogSink().Error("Unexpected XML parser error\n");
978 if ( errorCount == 0 )
979 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
984 return errorCount > 0 ? false : true;
989 Kumu::XMLElement::ParseFirstFromString(const char* document, ui32_t doc_len)
999 SAXParser* parser = new SAXParser();
1001 parser->setValidationScheme(SAXParser::Val_Always);
1002 parser->setDoNamespaces(true); // optional
1004 MyTreeHandler* docHandler = new MyTreeHandler(this);
1005 parser->setDocumentHandler(docHandler);
1006 parser->setErrorHandler(docHandler);
1007 XMLPScanToken token;
1011 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
1012 static_cast<const unsigned int>(doc_len),
1015 if ( ! parser->parseFirst(xmlSource, token) )
1020 if ( ! parser->parseNext(token) )
1030 if ( errorCount == 0 )
1032 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
1038 return errorCount > 0 ? false : true;
1044 //----------------------------------------------------------------------------------------------------
1046 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
1050 Kumu::XMLElement::ParseString(const char*, ui32_t)
1052 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1057 Kumu::XMLElement::ParseFirstFromString(const char*, ui32_t)
1059 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1066 //----------------------------------------------------------------------------------------------------
1070 Kumu::GetXMLDocType(const ByteString& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1071 AttributeList& doc_attr_list)
1073 return GetXMLDocType(buf.RoData(), buf.Length(), ns_prefix, type_name, namespace_name, doc_attr_list);
1078 Kumu::GetXMLDocType(const std::string& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1079 AttributeList& doc_attr_list)
1081 return GetXMLDocType((const byte_t*)buf.c_str(), buf.size(), ns_prefix, type_name, namespace_name, doc_attr_list);
1086 Kumu::GetXMLDocType(const byte_t* buf, ui32_t buf_len, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1087 AttributeList& doc_attr_list)
1089 XMLElement tmp_element("tmp");
1091 if ( ! tmp_element.ParseFirstFromString((const char*)buf, buf_len) )
1096 const XMLNamespace* ns = tmp_element.Namespace();
1100 ns_prefix = ns->Prefix();
1101 namespace_name = ns->Name();
1104 type_name = tmp_element.GetName();
1105 doc_attr_list = tmp_element.GetAttributes();