commit
0a8e353e83
5 changed files with 119 additions and 12 deletions
|
@ -1,17 +1,22 @@
|
||||||
#include "bench_framework.hpp"
|
#include "bench_framework.hpp"
|
||||||
#include <mapnik/unicode.hpp>
|
#include <mapnik/unicode.hpp>
|
||||||
|
#include <mapnik/util/from_u8string.hpp>
|
||||||
#include <mapnik/value.hpp>
|
#include <mapnik/value.hpp>
|
||||||
#include <boost/locale.hpp>
|
#include <boost/locale.hpp>
|
||||||
#ifndef __linux__
|
#ifndef __linux__
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using mapnik::util::from_u8string;
|
||||||
|
|
||||||
|
#ifndef __linux__
|
||||||
class test : public benchmark::test_case
|
class test : public benchmark::test_case
|
||||||
{
|
{
|
||||||
std::string utf8_;
|
std::string utf8_;
|
||||||
public:
|
public:
|
||||||
test(mapnik::parameters const& params)
|
test(mapnik::parameters const& params)
|
||||||
: test_case(params),
|
: test_case(params),
|
||||||
utf8_(u8"שלום") {}
|
utf8_(from_u8string(u8"שלום")) {}
|
||||||
bool validate() const
|
bool validate() const
|
||||||
{
|
{
|
||||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf32conv;
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf32conv;
|
||||||
|
@ -42,7 +47,7 @@ class test2 : public benchmark::test_case
|
||||||
public:
|
public:
|
||||||
test2(mapnik::parameters const& params)
|
test2(mapnik::parameters const& params)
|
||||||
: test_case(params),
|
: test_case(params),
|
||||||
utf8_(u8"שלום") {}
|
utf8_(from_u8string(u8"שלום")) {}
|
||||||
bool validate() const
|
bool validate() const
|
||||||
{
|
{
|
||||||
std::u32string utf32 = boost::locale::conv::utf_to_utf<char32_t>(utf8_);
|
std::u32string utf32 = boost::locale::conv::utf_to_utf<char32_t>(utf8_);
|
||||||
|
@ -69,7 +74,7 @@ class test3 : public benchmark::test_case
|
||||||
public:
|
public:
|
||||||
test3(mapnik::parameters const& params)
|
test3(mapnik::parameters const& params)
|
||||||
: test_case(params),
|
: test_case(params),
|
||||||
utf8_(u8"שלום") {}
|
utf8_(from_u8string(u8"שלום")) {}
|
||||||
bool validate() const
|
bool validate() const
|
||||||
{
|
{
|
||||||
mapnik::transcoder tr_("utf-8");
|
mapnik::transcoder tr_("utf-8");
|
||||||
|
|
96
include/mapnik/util/from_u8string.hpp
Normal file
96
include/mapnik/util/from_u8string.hpp
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
*
|
||||||
|
* This file is part of Mapnik (c++ mapping toolkit)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2021 Artem Pavlenko
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this library; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef FROM_U8STRING_HPP
|
||||||
|
#define FROM_U8STRING_HPP
|
||||||
|
|
||||||
|
// stl
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1423r2.html
|
||||||
|
// Explicit conversion functions can be used, in a C++17 compatible manner, to cope with the change of return type to// the std::filesystem::path member functions when a UTF-8 encoded path is desired in an object of type std::string.
|
||||||
|
|
||||||
|
namespace mapnik { namespace util {
|
||||||
|
|
||||||
|
inline std::string from_u8string(std::string const& s) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string from_u8string(std::string &&s) {
|
||||||
|
return std::move(s);
|
||||||
|
}
|
||||||
|
#if defined(__cpp_lib_char8_t)
|
||||||
|
inline std::string from_u8string(std::u8string const&s) {
|
||||||
|
return std::string(s.begin(), s.end());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
template<std::size_t N>
|
||||||
|
struct char_array {
|
||||||
|
template<std::size_t P, std::size_t... I>
|
||||||
|
constexpr char_array(
|
||||||
|
const char (&r)[P],
|
||||||
|
std::index_sequence<I...>)
|
||||||
|
:
|
||||||
|
data{(I<P?r[I]:'\0')...}
|
||||||
|
{}
|
||||||
|
template<std::size_t P, typename = std::enable_if_t<(P<=N)>>
|
||||||
|
constexpr char_array(const char(&r)[P])
|
||||||
|
: char_array(r, std::make_index_sequence<N>())
|
||||||
|
{}
|
||||||
|
|
||||||
|
#if defined(__cpp_char8_t)
|
||||||
|
template<std::size_t P, std::size_t... I>
|
||||||
|
constexpr char_array(
|
||||||
|
const char8_t (&r)[P],
|
||||||
|
std::index_sequence<I...>)
|
||||||
|
:
|
||||||
|
data{(I<P?static_cast<char>(r[I]):'\0')...}
|
||||||
|
{}
|
||||||
|
template<std::size_t P, typename = std::enable_if_t<(P<=N)>>
|
||||||
|
constexpr char_array(const char8_t(&r)[P])
|
||||||
|
: char_array(r, std::make_index_sequence<N>())
|
||||||
|
{}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
constexpr (&operator const char() const)[N] {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
constexpr (&operator char())[N] {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
char data[N];
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t N>
|
||||||
|
char_array(const char(&)[N]) -> char_array<N>;
|
||||||
|
|
||||||
|
#if defined(__cpp_char8_t)
|
||||||
|
template<std::size_t N>
|
||||||
|
char_array(const char8_t(&)[N]) -> char_array<N>;
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
}} // end of namespace mapnik
|
||||||
|
|
||||||
|
#endif // FROM_U8STRING_HPP
|
|
@ -7,6 +7,7 @@
|
||||||
#include <mapnik/feature.hpp>
|
#include <mapnik/feature.hpp>
|
||||||
#include <mapnik/feature_factory.hpp>
|
#include <mapnik/feature_factory.hpp>
|
||||||
#include <mapnik/unicode.hpp>
|
#include <mapnik/unicode.hpp>
|
||||||
|
#include <mapnik/util/from_u8string.hpp>
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -57,6 +58,7 @@ std::string parse_and_dump(std::string const& str)
|
||||||
TEST_CASE("expressions")
|
TEST_CASE("expressions")
|
||||||
{
|
{
|
||||||
using namespace std::placeholders;
|
using namespace std::placeholders;
|
||||||
|
using namespace mapnik::util;
|
||||||
using properties_type = std::map<std::string, mapnik::value>;
|
using properties_type = std::map<std::string, mapnik::value>;
|
||||||
mapnik::transcoder tr("utf8");
|
mapnik::transcoder tr("utf8");
|
||||||
|
|
||||||
|
@ -102,11 +104,11 @@ TEST_CASE("expressions")
|
||||||
|
|
||||||
// unicode attribute name
|
// unicode attribute name
|
||||||
TRY_CHECK(eval("[τ]") == prop.at("τ"));
|
TRY_CHECK(eval("[τ]") == prop.at("τ"));
|
||||||
TRY_CHECK(eval("[τ]") == eval(u8"[\u03C4]"));
|
TRY_CHECK(eval("[τ]") == eval(from_u8string(u8"[\u03C4]")));
|
||||||
|
|
||||||
// change to TRY_CHECK once \u1234 escape sequence in attribute name
|
// change to TRY_CHECK once \u1234 escape sequence in attribute name
|
||||||
// is implemented in expression grammar
|
// is implemented in expression grammar
|
||||||
CHECK_NOFAIL(eval("[τ]") == eval("[\\u03C3]"));
|
CHECK_NOFAIL(eval("[τ]") == eval(from_u8string(u8"[\\u03C3]")));
|
||||||
|
|
||||||
// unary functions
|
// unary functions
|
||||||
// sin / cos
|
// sin / cos
|
||||||
|
@ -190,7 +192,7 @@ TEST_CASE("expressions")
|
||||||
// https://en.wikipedia.org/wiki/Chess_symbols_in_Unicode
|
// https://en.wikipedia.org/wiki/Chess_symbols_in_Unicode
|
||||||
//'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C' - black chess figures
|
//'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C' - black chess figures
|
||||||
// replace black knights with white knights
|
// replace black knights with white knights
|
||||||
auto val0 = eval(u8"'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C'.replace('\u265E','\u2658')");
|
auto val0 = eval(from_u8string(u8"'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C'.replace('\u265E','\u2658')"));
|
||||||
auto val1 = eval("'♜♞♝♛♚♝♞♜'.replace('♞','♘')"); // ==> expected ♜♘♝♛♚♝♘♜
|
auto val1 = eval("'♜♞♝♛♚♝♞♜'.replace('♞','♘')"); // ==> expected ♜♘♝♛♚♝♘♜
|
||||||
TRY_CHECK(val0 == val1);
|
TRY_CHECK(val0 == val1);
|
||||||
TRY_CHECK(val0.to_string() == val1.to_string()); // UTF-8
|
TRY_CHECK(val0.to_string() == val1.to_string()); // UTF-8
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include <mapnik/expression.hpp>
|
#include <mapnik/expression.hpp>
|
||||||
#include <mapnik/expression_evaluator.hpp>
|
#include <mapnik/expression_evaluator.hpp>
|
||||||
#include <mapnik/debug.hpp>
|
#include <mapnik/debug.hpp>
|
||||||
|
#include <mapnik/util/from_u8string.hpp>
|
||||||
#include <mapnik/util/fs.hpp>
|
#include <mapnik/util/fs.hpp>
|
||||||
#include <boost/format.hpp>
|
#include <boost/format.hpp>
|
||||||
#include <boost/optional/optional_io.hpp>
|
#include <boost/optional/optional_io.hpp>
|
||||||
|
@ -84,7 +85,7 @@ mapnik::datasource_ptr get_csv_ds(std::string const& file_name, bool strict = tr
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
TEST_CASE("csv") {
|
TEST_CASE("csv") {
|
||||||
|
using mapnik::util::from_u8string;
|
||||||
std::string csv_plugin("./plugins/input/csv.input");
|
std::string csv_plugin("./plugins/input/csv.input");
|
||||||
if (mapnik::util::exists(csv_plugin))
|
if (mapnik::util::exists(csv_plugin))
|
||||||
{
|
{
|
||||||
|
@ -862,7 +863,7 @@ TEST_CASE("csv") {
|
||||||
{
|
{
|
||||||
using ustring = mapnik::value_unicode_string;
|
using ustring = mapnik::value_unicode_string;
|
||||||
|
|
||||||
for (auto const &name : {std::string("Winthrop, WA"), std::string(u8"Qu\u00e9bec")}) {
|
for (auto const &name : {std::string("Winthrop, WA"), from_u8string(u8"Qu\u00e9bec")}) {
|
||||||
std::string csv_string =
|
std::string csv_string =
|
||||||
(boost::format(
|
(boost::format(
|
||||||
"wkt,Name\n"
|
"wkt,Name\n"
|
||||||
|
|
|
@ -3,9 +3,12 @@
|
||||||
#include <mapnik/text/harfbuzz_shaper.hpp>
|
#include <mapnik/text/harfbuzz_shaper.hpp>
|
||||||
#include <mapnik/text/font_library.hpp>
|
#include <mapnik/text/font_library.hpp>
|
||||||
#include <mapnik/unicode.hpp>
|
#include <mapnik/unicode.hpp>
|
||||||
|
#include <mapnik/util/from_u8string.hpp>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
using mapnik::util::from_u8string;
|
||||||
|
|
||||||
void test_shaping( mapnik::font_set const& fontset, mapnik::face_manager& fm,
|
void test_shaping( mapnik::font_set const& fontset, mapnik::face_manager& fm,
|
||||||
std::vector<std::pair<unsigned, unsigned>> const& expected, char const* str, bool debug = false)
|
std::vector<std::pair<unsigned, unsigned>> const& expected, char const* str, bool debug = false)
|
||||||
{
|
{
|
||||||
|
@ -67,19 +70,19 @@ TEST_CASE("shaping")
|
||||||
{
|
{
|
||||||
std::vector<std::pair<unsigned, unsigned>> expected =
|
std::vector<std::pair<unsigned, unsigned>> expected =
|
||||||
{{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {68, 10}, {69, 11}, {70, 12}, {12, 13}};
|
{{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {68, 10}, {69, 11}, {70, 12}, {12, 13}};
|
||||||
test_shaping(fontset, fm, expected, u8"སྤུ་ཧྲེང (abc)");
|
test_shaping(fontset, fm, expected, from_u8string(u8"སྤུ་ཧྲེང (abc)").c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
std::vector<std::pair<unsigned, unsigned>> expected =
|
std::vector<std::pair<unsigned, unsigned>> expected =
|
||||||
{{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {0, 10}, {0, 11}, {0, 12}, {12, 13}};
|
{{977, 0}, {1094, 3}, {1038, 4}, {1168, 4}, {9, 7}, {3, 8}, {11, 9}, {0, 10}, {0, 11}, {0, 12}, {12, 13}};
|
||||||
test_shaping(fontset, fm, expected, u8"སྤུ་ཧྲེང (普兰镇)");
|
test_shaping(fontset, fm, expected, from_u8string(u8"སྤུ་ཧྲེང (普兰镇)").c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
std::vector<std::pair<unsigned, unsigned>> expected =
|
std::vector<std::pair<unsigned, unsigned>> expected =
|
||||||
{{68, 0}, {69, 1}, {70, 2}, {3, 3}, {11, 4}, {0, 5}, {0, 6}, {0, 7}, {12, 8}};
|
{{68, 0}, {69, 1}, {70, 2}, {3, 3}, {11, 4}, {0, 5}, {0, 6}, {0, 7}, {12, 8}};
|
||||||
test_shaping(fontset, fm, expected, u8"abc (普兰镇)");
|
test_shaping(fontset, fm, expected, from_u8string(u8"abc (普兰镇)").c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -94,7 +97,7 @@ TEST_CASE("shaping")
|
||||||
{{0, 0}, {0, 1}, {0, 2}, {3, 3}, {0, 4}, {0, 5}, {0, 6}, {0, 7},
|
{{0, 0}, {0, 1}, {0, 2}, {3, 3}, {0, 4}, {0, 5}, {0, 6}, {0, 7},
|
||||||
{0, 8}, {0, 9}, {3, 10}, {509, 22}, {481, 21}, {438, 20}, {503, 19},
|
{0, 8}, {0, 9}, {3, 10}, {509, 22}, {481, 21}, {438, 20}, {503, 19},
|
||||||
{470, 18}, {496, 17}, {43, 16}, {3, 15}, {509, 14}, {454, 13}, {496, 12}, {43, 11}};
|
{470, 18}, {496, 17}, {43, 16}, {3, 15}, {509, 14}, {454, 13}, {496, 12}, {43, 11}};
|
||||||
test_shaping(fontset, fm, expected, u8"ⵃⴰⵢ ⵚⵉⵏⴰⵄⵉ الحي الصناعي");
|
test_shaping(fontset, fm, expected, from_u8string(u8"ⵃⴰⵢ ⵚⵉⵏⴰⵄⵉ الحي الصناعي").c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue