// ---------------------------------------------------------------------------- // Copyright (C) 2002-2005 Marcin Kalicinski // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // // Based on XML grammar by Daniel C. Nuffer // http://spirit.sourceforge.net/repository/applications/xml.zip // // For more information, see www.boost.org // ---------------------------------------------------------------------------- #ifndef BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED #define BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED //#define BOOST_SPIRIT_DEBUG #include #include #include #include #include #include #include #include #include #include namespace boost { namespace property_tree { namespace xml_parser { // XML parser context template struct context { typedef typename Ptree::char_type Ch; typedef std::basic_string Str; typedef boost::spirit::position_iterator::const_iterator> It; int flags; std::vector stack; /////////////////////////////////////////////////////////////////////// // Actions struct a_key_s { context &c; a_key_s(context &c): c(c) { } void operator()(It b, It e) const { if (c.stack.empty()) throw xml_parser_error("xml parse error", b.get_position().file, b.get_position().line); Str name(b, e); Ptree *child = &c.stack.back()->push_back(std::make_pair(name, Ptree()))->second; c.stack.push_back(child); } }; struct a_key_e { context &c; a_key_e(context &c): c(c) { } void operator()(It b, It e) const { if (c.stack.size() <= 1) throw xml_parser_error("xml parse error", b.get_position().file, b.get_position().line); c.stack.pop_back(); } }; struct a_content { context &c; a_content(context &c): c(c) { } void operator()(It b, It e) const { Str s = decode_char_entities(detail::trim(condense(Str(b, e)))); if (!s.empty()) { if (c.flags & no_concat_text) c.stack.back()->push_back(std::make_pair(xmltext(), Ptree(s))); else c.stack.back()->put_own(c.stack.back()->template get_own >() + s); } } }; struct a_attr_key { context &c; a_attr_key(context &c): c(c) { } void operator()(It b, It e) const { c.stack.back()->put_child(Ch('/'), xmlattr() + Ch('/') + Str(b, e), empty_ptree()); } }; struct a_attr_data { context &c; a_attr_data(context &c): c(c) { } void operator()(It b, It e) const { Ptree &attr = c.stack.back()->get_child(xmlattr()); attr.back().second.put_own(Str(b + 1, e - 1)); } }; struct a_comment { context &c; a_comment(context &c): c(c) { } void operator()(It b, It e) const { c.stack.back()->push_back(std::make_pair(xmlcomment(), Ptree(Str(b, e)))); } }; }; /////////////////////////////////////////////////////////////////////// // Grammar template struct xml_grammar: public boost::spirit::grammar > { typedef context context_t; mutable context_t c; template struct definition { typedef typename ScannerT::value_t char_t; typedef boost::spirit::chset chset_t; boost::spirit::rule prolog, element, Misc, PEReference, Reference, PITarget, CData, doctypedecl, XMLDecl, SDDecl, VersionInfo, EncodingDecl, VersionNum, Eq, DeclSep, ExternalID, markupdecl, NotationDecl, EntityDecl, AttlistDecl, elementdecl, TextDecl, extSubsetDecl, conditionalSect, EmptyElemTag, STag, content, ETag, Attribute, contentspec, Mixed, children, choice, seq, cp, AttDef, AttType, DefaultDecl, StringType, TokenizedType, EnumeratedType, NotationType, Enumeration, EntityValue, AttValue, SystemLiteral, PubidLiteral, CharDataChar, CharData, Comment, PI, CDSect, extSubset, includeSect, ignoreSect, ignoreSectContents, Ignore, CharRef, EntityRef, GEDecl, PEDecl, EntityDef, PEDef, NDataDecl, extParsedEnt, EncName, PublicID, document, S, Name, Names, Nmtoken, Nmtokens, STagB, STagE1, STagE2; definition(const xml_grammar &self) { using namespace boost::spirit; // XML Char sets chset_t Char("\x9\xA\xD\x20-\x7F"); chset_t Sch("\x20\x9\xD\xA"); chset_t Letter("\x41-\x5A\x61-\x7A"); chset_t Digit("0-9"); chset_t XDigit("0-9A-Fa-f"); chset_t Extender("\xB7"); chset_t NameChar = Letter | Digit | (char_t)'.' | (char_t)'-' | (char_t)'_' | (char_t)':' | Extender; document = prolog >> element >> *Misc ; S = +(Sch) ; Name = (Letter | '_' | ':') >> *(NameChar) ; Names = Name >> *(S >> Name) ; Nmtoken = +NameChar ; Nmtokens = Nmtoken >> *(S >> Nmtoken) ; EntityValue = '"' >> *( (anychar_p - (chset_t(detail::widen("%&\"").c_str()))) | PEReference | Reference) >> '"' | '\'' >> *( (anychar_p - (chset_t("%&'"))) | PEReference | Reference) >> '\'' ; AttValue = '"' >> *( (anychar_p - (chset_t("<&\""))) | Reference) >> '"' | '\'' >> *( (anychar_p - (chset_t("<&'"))) | Reference) >> '\'' ; SystemLiteral= ('"' >> *(anychar_p - '"') >> '"') | ('\'' >> *(anychar_p - '\'') >> '\'') ; chset_t PubidChar("\x20\xD\xA'a-zA-Z0-9()+,./:=?;!*#@$_%-"); PubidLiteral = '"' >> *PubidChar >> '"' | '\'' >> *(PubidChar - '\'') >> '\'' ; CharDataChar = //anychar_p - (chset_t("<&")) anychar_p - (chset_t("<")) ; CharData = *(CharDataChar - "]]>") ; Comment = "" ; PI = "> PITarget >> !(S >> (*(Char - "?>"))) >> "?>" ; PITarget = Name - (as_lower_d["xml"]) ; CDSect = "> CData >> "]]>" ; CData = *(Char - "]]>") ; prolog = !XMLDecl >> *Misc >> !(doctypedecl >> *Misc) ; XMLDecl = "> VersionInfo >> !EncodingDecl >> !SDDecl >> !S >> "?>" ; VersionInfo = S >> "version" >> Eq >> ( '\'' >> VersionNum >> '\'' | '"' >> VersionNum >> '"' ) ; Eq = !S >> '=' >> !S ; chset_t VersionNumCh("a-zA-Z0-9_.:-"); VersionNum = +(VersionNumCh) ; Misc = Comment | PI | S ; doctypedecl = "> S >> Name >> !(S >> ExternalID) >> !S >> !( '[' >> *(markupdecl | DeclSep) >> ']' >> !S ) >> '>' ; DeclSep = PEReference | S ; markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment ; extSubset = !TextDecl >> extSubsetDecl ; extSubsetDecl = *( markupdecl | conditionalSect | DeclSep ) ; SDDecl = S >> "standalone" >> Eq >> ( ('\'' >> (str_p("yes") | "no") >> '\'') | ('"' >> (str_p("yes") | "no") >> '"') ) ; /* element = EmptyElemTag | STag >> content >> ETag ; */ element = STagB >> (STagE2 | (STagE1 >> content >> ETag))[typename context_t::a_key_e(self.c)] ; STag = '<' >> Name >> *(S >> Attribute) >> !S >> '>' ; STagB = '<' >> Name[typename context_t::a_key_s(self.c)] >> *(S >> Attribute) >> !S ; STagE1 = ch_p(">") ; STagE2 = str_p("/>") ; Attribute = Name[typename context_t::a_attr_key(self.c)] >> Eq >> AttValue[typename context_t::a_attr_data(self.c)] ; ETag = "> Name >> !S >> '>' ; content = !(CharData[typename context_t::a_content(self.c)]) >> *( ( element // | Reference | CDSect | PI | Comment ) >> !(CharData[typename context_t::a_content(self.c)]) ) ; EmptyElemTag = '<' >> Name >> *(S >> Attribute) >> !S >> "/>" ; elementdecl = "> S >> Name >> S >> contentspec >> !S >> '>' ; contentspec = str_p("EMPTY") | "ANY" | Mixed | children ; children = (choice | seq) >> !(ch_p('?') | '*' | '+') ; cp = (Name | choice | seq) >> !(ch_p('?') | '*' | '+') ; choice = '(' >> !S >> cp >> +(!S >> '|' >> !S >> cp) >> !S >> ')' ; seq = '(' >> !S >> cp >> *(!S >> ',' >> !S >> cp) >> !S >> ')' ; Mixed = '(' >> !S >> "#PCDATA" >> *(!S >> '|' >> !S >> Name) >> !S >> ")*" | '(' >> !S >> "#PCDATA" >> !S >> ')' ; AttlistDecl = "> S >> Name >> *AttDef >> !S >> '>' ; AttDef = S >> Name >> S >> AttType >> S >> DefaultDecl ; AttType = StringType | TokenizedType | EnumeratedType ; StringType = str_p("CDATA") ; TokenizedType = longest_d[ str_p("ID") | "IDREF" | "IDREFS" | "ENTITY" | "ENTITIES" | "NMTOKEN" | "NMTOKENS" ] ; EnumeratedType = NotationType | Enumeration ; NotationType = "NOTATION" >> S >> '(' >> !S >> Name >> *(!S >> '|' >> !S >> Name) >> !S >> ')' ; Enumeration = '(' >> !S >> Nmtoken >> *(!S >> '|' >> !S >> Nmtoken) >> !S >> ')' ; DefaultDecl = str_p("#REQUIRED") | "#IMPLIED" | !("#FIXED" >> S) >> AttValue ; conditionalSect = includeSect | ignoreSect ; includeSect = "> !S >> "INCLUDE" >> !S >> '[' >> extSubsetDecl >> "]]>" ; ignoreSect = "> !S >> "IGNORE" >> !S >> '[' >> *ignoreSectContents >> "]]>" ; ignoreSectContents = Ignore >> *("> ignoreSectContents >> "]]>" >> Ignore) ; Ignore = *(Char - (str_p("")) ; CharRef = "&#" >> +Digit >> ';' | "&#x" >> +XDigit >> ';' ; Reference = EntityRef | CharRef ; EntityRef = '&' >> Name >> ';' ; PEReference = '%' >> Name >> ';' ; EntityDecl = GEDecl | PEDecl ; GEDecl = "> S >> Name >> S >> EntityDef >> !S >> '>' ; PEDecl = "> S >> '%' >> S >> Name >> S >> PEDef >> !S >> '>' ; EntityDef = EntityValue | ExternalID >> !NDataDecl ; PEDef = EntityValue | ExternalID ; ExternalID = "SYSTEM" >> S >> SystemLiteral | "PUBLIC" >> S >> PubidLiteral >> S >> SystemLiteral ; NDataDecl = S >> "NDATA" >> S >> Name ; TextDecl = "> !VersionInfo >> EncodingDecl >> !S >> "?>" ; extParsedEnt = !TextDecl >> content ; EncodingDecl = S >> "encoding" >> Eq >> ( '"' >> EncName >> '"' | '\'' >> EncName >> '\'' ) ; EncName = Letter >> *(Letter | Digit | '.' | '_' | '-') ; NotationDecl = "> S >> Name >> S >> (ExternalID | PublicID) >> !S >> '>' ; PublicID = "PUBLIC" >> S >> PubidLiteral ; BOOST_SPIRIT_DEBUG_RULE(document); BOOST_SPIRIT_DEBUG_RULE(prolog); BOOST_SPIRIT_DEBUG_RULE(element); BOOST_SPIRIT_DEBUG_RULE(Misc); BOOST_SPIRIT_DEBUG_RULE(PEReference); BOOST_SPIRIT_DEBUG_RULE(Reference); BOOST_SPIRIT_DEBUG_RULE(PITarget); BOOST_SPIRIT_DEBUG_RULE(CData); BOOST_SPIRIT_DEBUG_RULE(doctypedecl); BOOST_SPIRIT_DEBUG_RULE(XMLDecl); BOOST_SPIRIT_DEBUG_RULE(SDDecl); BOOST_SPIRIT_DEBUG_RULE(VersionInfo); BOOST_SPIRIT_DEBUG_RULE(EncodingDecl); BOOST_SPIRIT_DEBUG_RULE(VersionNum); BOOST_SPIRIT_DEBUG_RULE(Eq); BOOST_SPIRIT_DEBUG_RULE(DeclSep); BOOST_SPIRIT_DEBUG_RULE(ExternalID); BOOST_SPIRIT_DEBUG_RULE(markupdecl); BOOST_SPIRIT_DEBUG_RULE(NotationDecl); BOOST_SPIRIT_DEBUG_RULE(EntityDecl); BOOST_SPIRIT_DEBUG_RULE(AttlistDecl); BOOST_SPIRIT_DEBUG_RULE(elementdecl); BOOST_SPIRIT_DEBUG_RULE(TextDecl); BOOST_SPIRIT_DEBUG_RULE(extSubsetDecl); BOOST_SPIRIT_DEBUG_RULE(conditionalSect); BOOST_SPIRIT_DEBUG_RULE(EmptyElemTag); BOOST_SPIRIT_DEBUG_RULE(STag); BOOST_SPIRIT_DEBUG_RULE(content); BOOST_SPIRIT_DEBUG_RULE(ETag); BOOST_SPIRIT_DEBUG_RULE(Attribute); BOOST_SPIRIT_DEBUG_RULE(contentspec); BOOST_SPIRIT_DEBUG_RULE(Mixed); BOOST_SPIRIT_DEBUG_RULE(children); BOOST_SPIRIT_DEBUG_RULE(choice); BOOST_SPIRIT_DEBUG_RULE(seq); BOOST_SPIRIT_DEBUG_RULE(cp); BOOST_SPIRIT_DEBUG_RULE(AttDef); BOOST_SPIRIT_DEBUG_RULE(AttType); BOOST_SPIRIT_DEBUG_RULE(DefaultDecl); BOOST_SPIRIT_DEBUG_RULE(StringType); BOOST_SPIRIT_DEBUG_RULE(TokenizedType); BOOST_SPIRIT_DEBUG_RULE(EnumeratedType); BOOST_SPIRIT_DEBUG_RULE(NotationType); BOOST_SPIRIT_DEBUG_RULE(Enumeration); BOOST_SPIRIT_DEBUG_RULE(EntityValue); BOOST_SPIRIT_DEBUG_RULE(AttValue); BOOST_SPIRIT_DEBUG_RULE(SystemLiteral); BOOST_SPIRIT_DEBUG_RULE(PubidLiteral); BOOST_SPIRIT_DEBUG_RULE(CharDataChar); BOOST_SPIRIT_DEBUG_RULE(CharData); BOOST_SPIRIT_DEBUG_RULE(Comment); BOOST_SPIRIT_DEBUG_RULE(PI); BOOST_SPIRIT_DEBUG_RULE(CDSect); BOOST_SPIRIT_DEBUG_RULE(extSubset); BOOST_SPIRIT_DEBUG_RULE(includeSect); BOOST_SPIRIT_DEBUG_RULE(ignoreSect); BOOST_SPIRIT_DEBUG_RULE(ignoreSectContents); BOOST_SPIRIT_DEBUG_RULE(Ignore); BOOST_SPIRIT_DEBUG_RULE(CharRef); BOOST_SPIRIT_DEBUG_RULE(EntityRef); BOOST_SPIRIT_DEBUG_RULE(GEDecl); BOOST_SPIRIT_DEBUG_RULE(PEDecl); BOOST_SPIRIT_DEBUG_RULE(EntityDef); BOOST_SPIRIT_DEBUG_RULE(PEDef); BOOST_SPIRIT_DEBUG_RULE(NDataDecl); BOOST_SPIRIT_DEBUG_RULE(extParsedEnt); BOOST_SPIRIT_DEBUG_RULE(EncName); BOOST_SPIRIT_DEBUG_RULE(PublicID); BOOST_SPIRIT_DEBUG_RULE(document); BOOST_SPIRIT_DEBUG_RULE(S); BOOST_SPIRIT_DEBUG_RULE(Name); BOOST_SPIRIT_DEBUG_RULE(Names); BOOST_SPIRIT_DEBUG_RULE(Nmtoken); BOOST_SPIRIT_DEBUG_RULE(Nmtokens); BOOST_SPIRIT_DEBUG_RULE(STagB); BOOST_SPIRIT_DEBUG_RULE(STagE1); BOOST_SPIRIT_DEBUG_RULE(STagE2); } const boost::spirit::rule &start() const { return document; } }; }; template void read_xml_internal(std::basic_istream &stream, Ptree &pt, int flags, const std::string &filename) { typedef typename Ptree::char_type Ch; typedef boost::spirit::position_iterator::const_iterator> It; BOOST_ASSERT(validate_flags(flags)); // Load data into vector std::vector v(std::istreambuf_iterator(stream.rdbuf()), std::istreambuf_iterator()); if (!stream.good()) throw xml_parser_error("read error", filename, 0); // Initialize iterators It begin(v.begin(), v.end()); It end; begin.set_position(filename); // Prepare grammar Ptree local; xml_grammar g; g.c.stack.push_back(&local); // Push root ptree on context stack g.c.flags = flags; // Parse into local boost::spirit::parse_info result = boost::spirit::parse(begin, end, g); if (!result.full || g.c.stack.size() != 1) throw xml_parser_error("xml parse error", result.stop.get_position().file, result.stop.get_position().line); // Swap local and pt pt.swap(local); } } } } #endif