From 6fa266674783fdc6ed346c266a4ddad9f3d4da20 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 17 May 2021 16:06:34 +0100 Subject: [PATCH 1/2] Add convertion functions `from_u8string` to support c++20 + update tests --- benchmark/src/test_utf_encoding.cpp | 11 ++++-- include/mapnik/util/from_u8string.hpp | 48 +++++++++++++++++++++++++++ test/unit/core/expressions_test.cpp | 8 +++-- test/unit/datasource/csv.cpp | 5 +-- test/unit/text/shaping.cpp | 11 +++--- 5 files changed, 71 insertions(+), 12 deletions(-) create mode 100644 include/mapnik/util/from_u8string.hpp diff --git a/benchmark/src/test_utf_encoding.cpp b/benchmark/src/test_utf_encoding.cpp index a4ba10a88..87224ecbf 100644 --- a/benchmark/src/test_utf_encoding.cpp +++ b/benchmark/src/test_utf_encoding.cpp @@ -1,17 +1,22 @@ #include "bench_framework.hpp" #include +#include #include #include #ifndef __linux__ #include +#endif +using mapnik::util::from_u8string; + +#ifndef __linux__ class test : public benchmark::test_case { std::string utf8_; public: test(mapnik::parameters const& params) : test_case(params), - utf8_(u8"שלום") {} + utf8_(from_u8string(u8"שלום")) {} bool validate() const { std::wstring_convert, char32_t> utf32conv; @@ -42,7 +47,7 @@ class test2 : public benchmark::test_case public: test2(mapnik::parameters const& params) : test_case(params), - utf8_(u8"שלום") {} + utf8_(from_u8string(u8"שלום")) {} bool validate() const { std::u32string utf32 = boost::locale::conv::utf_to_utf(utf8_); @@ -69,7 +74,7 @@ class test3 : public benchmark::test_case public: test3(mapnik::parameters const& params) : test_case(params), - utf8_(u8"שלום") {} + utf8_(from_u8string(u8"שלום")) {} bool validate() const { mapnik::transcoder tr_("utf-8"); diff --git a/include/mapnik/util/from_u8string.hpp b/include/mapnik/util/from_u8string.hpp new file mode 100644 index 000000000..fc7bf640e --- /dev/null +++ b/include/mapnik/util/from_u8string.hpp @@ -0,0 +1,48 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2021 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#ifndef FROM_U8STRING_HPP +#define FROM_U8STRING_HPP + +// stl +#include + +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1423r2.html +// Explicit conversion functions can be used, in a C++17 compatible manner, to cope with the change of return type to// the std::filesystem::path member functions when a UTF-8 encoded path is desired in an object of type std::string. + +namespace mapnik { namespace util { + +std::string from_u8string(const std::string &s) { + return s; +} +std::string from_u8string(std::string &&s) { + return std::move(s); +} +#if defined(__cpp_lib_char8_t) +std::string from_u8string(const std::u8string &s) { + return std::string(s.begin(), s.end()); +} +#endif + +}} // end of namespace mapnik + +#endif // FROM_U8STRING_HPP diff --git a/test/unit/core/expressions_test.cpp b/test/unit/core/expressions_test.cpp index 3e8720d59..77ed51784 100644 --- a/test/unit/core/expressions_test.cpp +++ b/test/unit/core/expressions_test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -57,6 +58,7 @@ std::string parse_and_dump(std::string const& str) TEST_CASE("expressions") { using namespace std::placeholders; + using namespace mapnik::util; using properties_type = std::map; mapnik::transcoder tr("utf8"); @@ -102,11 +104,11 @@ TEST_CASE("expressions") // unicode attribute name TRY_CHECK(eval("[τ]") == prop.at("τ")); - TRY_CHECK(eval("[τ]") == eval(u8"[\u03C4]")); + TRY_CHECK(eval("[τ]") == eval(from_u8string(u8"[\u03C4]"))); // change to TRY_CHECK once \u1234 escape sequence in attribute name // is implemented in expression grammar - CHECK_NOFAIL(eval("[τ]") == eval("[\\u03C3]")); + CHECK_NOFAIL(eval("[τ]") == eval(from_u8string(u8"[\\u03C3]"))); // unary functions // sin / cos @@ -190,7 +192,7 @@ TEST_CASE("expressions") // https://en.wikipedia.org/wiki/Chess_symbols_in_Unicode //'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C' - black chess figures // replace black knights with white knights - auto val0 = eval(u8"'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C'.replace('\u265E','\u2658')"); + auto val0 = eval(from_u8string(u8"'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C'.replace('\u265E','\u2658')")); auto val1 = eval("'♜♞♝♛♚♝♞♜'.replace('♞','♘')"); // ==> expected ♜♘♝♛♚♝♘♜ TRY_CHECK(val0 == val1); TRY_CHECK(val0.to_string() == val1.to_string()); // UTF-8 diff --git a/test/unit/datasource/csv.cpp b/test/unit/datasource/csv.cpp index 1f0091302..448feec7f 100644 --- a/test/unit/datasource/csv.cpp +++ b/test/unit/datasource/csv.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,7 @@ mapnik::datasource_ptr get_csv_ds(std::string const& file_name, bool strict = tr } // anonymous namespace TEST_CASE("csv") { - + using mapnik::util::from_u8string; std::string csv_plugin("./plugins/input/csv.input"); if (mapnik::util::exists(csv_plugin)) { @@ -862,7 +863,7 @@ TEST_CASE("csv") { { using ustring = mapnik::value_unicode_string; - for (auto const &name : {std::string("Winthrop, WA"), std::string(u8"Qu\u00e9bec")}) { + for (auto const &name : {std::string("Winthrop, WA"), from_u8string(u8"Qu\u00e9bec")}) { std::string csv_string = (boost::format( "wkt,Name\n" diff --git a/test/unit/text/shaping.cpp b/test/unit/text/shaping.cpp index ea1fbd44a..c48595a3e 100644 --- a/test/unit/text/shaping.cpp +++ b/test/unit/text/shaping.cpp @@ -3,9 +3,12 @@ #include #include #include +#include namespace { +using mapnik::util::from_u8string; + void test_shaping( mapnik::font_set const& fontset, mapnik::face_manager& fm, std::vector> const& expected, char const* str, bool debug = false) { @@ -67,19 +70,19 @@ TEST_CASE("shaping") { std::vector> expected = {{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {68, 10}, {69, 11}, {70, 12}, {12, 13}}; - test_shaping(fontset, fm, expected, u8"སྤུ་ཧྲེང (abc)"); + test_shaping(fontset, fm, expected, from_u8string(u8"སྤུ་ཧྲེང (abc)").c_str()); } { std::vector> expected = {{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {0, 10}, {0, 11}, {0, 12}, {12, 13}}; - test_shaping(fontset, fm, expected, u8"སྤུ་ཧྲེང (普兰镇)"); + test_shaping(fontset, fm, expected, from_u8string(u8"སྤུ་ཧྲེང (普兰镇)").c_str()); } { std::vector> expected = {{68, 0}, {69, 1}, {70, 2}, {3, 3}, {11, 4}, {0, 5}, {0, 6}, {0, 7}, {12, 8}}; - test_shaping(fontset, fm, expected, u8"abc (普兰镇)"); + test_shaping(fontset, fm, expected, from_u8string(u8"abc (普兰镇)").c_str()); } { @@ -94,7 +97,7 @@ TEST_CASE("shaping") {{0, 0}, {0, 1}, {0, 2}, {3, 3}, {0, 4}, {0, 5}, {0, 6}, {0, 7}, {0, 8}, {0, 9}, {3, 10}, {509, 22}, {481, 21}, {438, 20}, {503, 19}, {470, 18}, {496, 17}, {43, 16}, {3, 15}, {509, 14}, {454, 13}, {496, 12}, {43, 11}}; - test_shaping(fontset, fm, expected, u8"ⵃⴰⵢ ⵚⵉⵏⴰⵄⵉ الحي الصناعي"); + test_shaping(fontset, fm, expected, from_u8string(u8"ⵃⴰⵢ ⵚⵉⵏⴰⵄⵉ الحي الصناعي").c_str()); } From 0efdcafe565831caa5d307d87fd4663a15d451f6 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 17 May 2021 16:08:35 +0100 Subject: [PATCH 2/2] make `from_u8string` inline --- include/mapnik/util/from_u8string.hpp | 54 +++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/include/mapnik/util/from_u8string.hpp b/include/mapnik/util/from_u8string.hpp index fc7bf640e..d5f2bb605 100644 --- a/include/mapnik/util/from_u8string.hpp +++ b/include/mapnik/util/from_u8string.hpp @@ -25,24 +25,72 @@ // stl #include +#include // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1423r2.html // Explicit conversion functions can be used, in a C++17 compatible manner, to cope with the change of return type to// the std::filesystem::path member functions when a UTF-8 encoded path is desired in an object of type std::string. namespace mapnik { namespace util { -std::string from_u8string(const std::string &s) { +inline std::string from_u8string(std::string const& s) { return s; } -std::string from_u8string(std::string &&s) { + +inline std::string from_u8string(std::string &&s) { return std::move(s); } #if defined(__cpp_lib_char8_t) -std::string from_u8string(const std::u8string &s) { +inline std::string from_u8string(std::u8string const&s) { return std::string(s.begin(), s.end()); } #endif +/* +template +struct char_array { + template + constexpr char_array( + const char (&r)[P], + std::index_sequence) + : + data{(I> + constexpr char_array(const char(&r)[P]) + : char_array(r, std::make_index_sequence()) + {} +#if defined(__cpp_char8_t) + template + constexpr char_array( + const char8_t (&r)[P], + std::index_sequence) + : + data{(I(r[I]):'\0')...} + {} + template> + constexpr char_array(const char8_t(&r)[P]) + : char_array(r, std::make_index_sequence()) + {} +#endif + + constexpr (&operator const char() const)[N] { + return data; + } + constexpr (&operator char())[N] { + return data; + } + + char data[N]; +}; + +template +char_array(const char(&)[N]) -> char_array; + +#if defined(__cpp_char8_t) +template +char_array(const char8_t(&)[N]) -> char_array; +#endif +*/ }} // end of namespace mapnik #endif // FROM_U8STRING_HPP