From 1bdc8e6badc4fb34368dd88e69a3cd6d2144226c Mon Sep 17 00:00:00 2001 From: artemp Date: Wed, 1 Mar 2017 18:46:48 +0100 Subject: [PATCH] handle utf16 encoding correctly by parsing as sequence and applying utf16->utf32->utf8 conversion. --- .../json/unicode_string_grammar_x3_def.hpp | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/include/mapnik/json/unicode_string_grammar_x3_def.hpp b/include/mapnik/json/unicode_string_grammar_x3_def.hpp index 788ea7247..43d40c9aa 100644 --- a/include/mapnik/json/unicode_string_grammar_x3_def.hpp +++ b/include/mapnik/json/unicode_string_grammar_x3_def.hpp @@ -24,8 +24,9 @@ #define MAPNIK_JSON_UNICODE_STRING_GRAMMAR_X3_DEF_HPP #include -#include - +// boost +#include +// namespace mapnik { namespace json { namespace grammar { namespace x3 = boost::spirit::x3; @@ -52,6 +53,25 @@ auto push_char = [](auto const& ctx) { _val(ctx).push_back(_attr(ctx));}; auto push_utf8 = [](auto const& ctx) { detail::push_utf8_impl(_val(ctx), _attr(ctx));}; +auto push_utf16 = [](auto const& ctx) +{ + using iterator_type = std::vector::const_iterator; + auto const& utf16 = _attr(ctx); + try + { + boost::u16_to_u32_iterator itr(utf16.begin()); + boost::u16_to_u32_iterator end(utf16.end()); + for (; itr != end; ++itr) + { + detail::push_utf8_impl(_val(ctx), *itr); + } + } + catch( ... ) + { + // caught + } +}; + auto push_esc = [] (auto const& ctx) { std::string & utf8 = _val(ctx); @@ -85,7 +105,7 @@ using x3::eol; using x3::no_skip; x3::uint_parser const hex2 {}; -x3::uint_parser const hex4 {}; +x3::uint_parser const hex4 {}; x3::uint_parser const hex8 {}; // start rule @@ -93,13 +113,17 @@ unicode_string_grammar_type const unicode_string("Unicode String"); // rules x3::rule const double_quoted("Double-quoted string"); x3::rule const escaped("Escaped Characted"); +x3::rule> const utf16_string("UTF16 encoded string"); auto unicode_string_def = double_quoted ; + +auto utf16_string_def = lit('u') > hex4 > *(lit("\\u") > hex4); + auto const escaped_def = lit('\\') > ((lit('x') > hex2[push_char]) | - (lit('u') > hex4[push_utf8]) + utf16_string[push_utf16] | (lit('U') > hex8[push_utf8]) | @@ -116,7 +140,8 @@ auto const double_quoted_def = lit('"') > no_skip[*(escaped[append] | (~char_('" BOOST_SPIRIT_DEFINE( unicode_string, double_quoted, - escaped + escaped, + utf16_string ); #pragma GCC diagnostic pop