Merge pull request #3485 from mapnik/regex-unicode

Regex unicode
This commit is contained in:
Dane Springmeyer 2016-08-09 14:58:51 -07:00 committed by GitHub
commit 3bfc358cb8
4 changed files with 35 additions and 13 deletions

View file

@ -1,6 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
#set -eu set -eu
set -o pipefail
: ' : '
@ -10,7 +11,7 @@ todo
- shrink icu data - shrink icu data
' '
MASON_VERSION="b709931" MASON_VERSION="7ed8931"
function setup_mason() { function setup_mason() {
if [[ ! -d ./.mason ]]; then if [[ ! -d ./.mason ]]; then
@ -33,7 +34,7 @@ function install() {
if [[ ${3:-false} != false ]]; then if [[ ${3:-false} != false ]]; then
LA_FILE=$(mason prefix $1 $2)/lib/$3.la LA_FILE=$(mason prefix $1 $2)/lib/$3.la
if [[ -f ${LA_FILE} ]]; then if [[ -f ${LA_FILE} ]]; then
perl -i -p -e 's:\Q$ENV{HOME}/build/mapbox/mason\E:$ENV{PWD}:g' ${LA_FILE} perl -i -p -e 's:\Q$ENV{HOME}/build/mapbox/mason\E:$ENV{PWD}:g' ${LA_FILE}
else else
echo "$LA_FILE not found" echo "$LA_FILE not found"
fi fi
@ -44,6 +45,7 @@ function install() {
ICU_VERSION="55.1" ICU_VERSION="55.1"
function install_mason_deps() { function install_mason_deps() {
FAIL=0
install ccache 3.2.4 & install ccache 3.2.4 &
install jpeg_turbo 1.4.0 libjpeg & install jpeg_turbo 1.4.0 libjpeg &
install libpng 1.6.20 libpng & install libpng 1.6.20 libpng &
@ -51,7 +53,6 @@ function install_mason_deps() {
install libpq 9.4.1 & install libpq 9.4.1 &
install sqlite 3.8.8.3 libsqlite3 & install sqlite 3.8.8.3 libsqlite3 &
install expat 2.1.0 libexpat & install expat 2.1.0 libexpat &
wait
install icu ${ICU_VERSION} & install icu ${ICU_VERSION} &
install proj 4.8.0 libproj & install proj 4.8.0 libproj &
install pixman 0.32.6 libpixman-1 & install pixman 0.32.6 libpixman-1 &
@ -59,17 +60,22 @@ function install_mason_deps() {
install protobuf 2.6.1 & install protobuf 2.6.1 &
# technically protobuf is not a mapnik core dep, but installing # technically protobuf is not a mapnik core dep, but installing
# here by default helps make mapnik-vector-tile builds easier # here by default helps make mapnik-vector-tile builds easier
wait
install webp 0.4.2 libwebp & install webp 0.4.2 libwebp &
install gdal 1.11.2 libgdal & install gdal 1.11.2 libgdal &
install boost 1.61.0 & install boost 1.61.0 &
install boost_libsystem 1.61.0 & install boost_libsystem 1.61.0 &
install boost_libfilesystem 1.61.0 & install boost_libfilesystem 1.61.0 &
install boost_libprogram_options 1.61.0 & install boost_libprogram_options 1.61.0 &
install boost_libregex 1.61.0 & install boost_libregex_icu 1.61.0 &
install freetype 2.6 libfreetype & install freetype 2.6 libfreetype &
install harfbuzz 0.9.41 libharfbuzz & install harfbuzz 0.9.41 libharfbuzz &
wait for job in $(jobs -p)
do
wait $job || let "FAIL+=1"
done
if [[ "$FAIL" != "0" ]]; then
exit ${FAIL}
fi
} }
MASON_LINKED_ABS=$(pwd)/mason_packages/.link MASON_LINKED_ABS=$(pwd)/mason_packages/.link
@ -140,3 +146,8 @@ function main() {
} }
main main
# allow sourcing of script without
# causing the terminal to bail on error
set +eu
set +o pipefail

View file

@ -21,7 +21,6 @@ dependencies:
cache_directories: cache_directories:
- "~/.ccache" - "~/.ccache"
- "~/.apt-cache" - "~/.apt-cache"
- "mason_packages"
pre: pre:
# https://discuss.circleci.com/t/add-ability-to-cache-apt-get-programs/598/3 # https://discuss.circleci.com/t/add-ability-to-cache-apt-get-programs/598/3
- sudo rm -rf /var/cache/apt/archives && sudo ln -s ~/.apt-cache /var/cache/apt/archives && mkdir -p ~/.apt-cache/partial - sudo rm -rf /var/cache/apt/archives && sudo ln -s ~/.apt-cache /var/cache/apt/archives && mkdir -p ~/.apt-cache/partial

View file

@ -130,9 +130,9 @@ value regex_replace_node::apply(value const& v) const
auto const& pattern = impl_.get()->pattern_; auto const& pattern = impl_.get()->pattern_;
auto const& format = impl_.get()->format_; auto const& format = impl_.get()->format_;
#if defined(BOOST_REGEX_HAS_ICU) #if defined(BOOST_REGEX_HAS_ICU)
return boost::u32regex_replace(v.to_unicode(),pattern,format); return boost::u32regex_replace(v.to_unicode(), pattern, format);
#else #else
std::string repl = boost::regex_replace(v.to_string(),pattern,format); std::string repl = boost::regex_replace(v.to_string(), pattern, format);
transcoder tr_("utf8"); transcoder tr_("utf8");
return tr_.transcode(repl.c_str()); return tr_.transcode(repl.c_str());
#endif #endif

View file

@ -1,4 +1,3 @@
#include "catch_ext.hpp" #include "catch_ext.hpp"
#include <mapnik/expression.hpp> #include <mapnik/expression.hpp>
@ -176,6 +175,19 @@ TEST_CASE("expressions")
// regex // regex
// replace // replace
TRY_CHECK(eval(" [foo].replace('(\\B)|( )','$1 ') ") == tr.transcode("b a r")); TRY_CHECK(eval(" [foo].replace('(\\B)|( )','$1 ') ") == tr.transcode("b a r"));
// https://en.wikipedia.org/wiki/Chess_symbols_in_Unicode
//'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C' - black chess figures
// replace black knights with white knights
auto val0 = eval(u8"'\u265C\u265E\u265D\u265B\u265A\u265D\u265E\u265C'.replace('\u265E','\u2658')");
auto val1 = eval(u8"'♜♞♝♛♚♝♞♜'.replace('♞','♘')"); // ==> expected ♜♘♝♛♚♝♘♜
TRY_CHECK(val0 == val1);
TRY_CHECK(val0.to_string() == val1.to_string()); // UTF-8
TRY_CHECK(val0.to_unicode() == val1.to_unicode()); // Unicode (UTF-16)
// following test will fail if boost_regex is built without ICU support (unpaired surrogates in output)
TRY_CHECK(eval("[name].replace('(\\B)|( )',' ') ") == tr.transcode(u8"Q u é b e c"));
TRY_CHECK(eval("'Москва'.replace('(?<!^)(\\B|b)(?!$)',' ')") == tr.transcode(u8"М о с к в а"));
// 'foo' =~ s:(\w)\1:$1x:r // 'foo' =~ s:(\w)\1:$1x:r
TRY_CHECK(eval(" 'foo'.replace('(\\w)\\1', '$1x') ") == tr.transcode("fox")); TRY_CHECK(eval(" 'foo'.replace('(\\w)\\1', '$1x') ") == tr.transcode("fox"));
TRY_CHECK(parse_and_dump(" 'foo'.replace('(\\w)\\1', '$1x') ") == "'foo'.replace('(\\w)\\1','$1x')"); TRY_CHECK(parse_and_dump(" 'foo'.replace('(\\w)\\1', '$1x') ") == "'foo'.replace('(\\w)\\1','$1x')");
@ -187,8 +199,8 @@ TEST_CASE("expressions")
TRY_CHECK(parse_and_dump(" [name].match('^Q\\S*$') ") == "[name].match('^Q\\S*$')"); TRY_CHECK(parse_and_dump(" [name].match('^Q\\S*$') ") == "[name].match('^Q\\S*$')");
// string & value concatenation // string & value concatenation
// this should evaluate as two strings concatenating, but currently fails // this should evaluate as two strings concatenating
TRY_CHECK(eval("Hello + '!'") == eval("'Hello!'")); TRY_CHECK(eval("Hello + '!'") == eval("'Hello!'"));
// this should evaulate as a combination of an int value and string, but fails // this should evaulate as a combination of an int value and string
TRY_CHECK(eval("[int]+m") == eval("'123m'")); TRY_CHECK(eval("[int]+m") == eval("'123m'"));
} }