From dec6bc095081e19024a6045241ac5847880c8b70 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Fri, 1 Nov 2019 13:06:21 +0000 Subject: [PATCH 01/13] avoid potential out-of-bounds array access (undefined behaviour) + add c++ `C-array` size implementation --- src/text/scrptrun.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp index 750998764..bc16cb811 100644 --- a/src/text/scrptrun.cpp +++ b/src/text/scrptrun.cpp @@ -21,8 +21,13 @@ #pragma GCC diagnostic pop #include +#include -#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0])) +template +constexpr std::size_t ARRAY_SIZE(const T (&array)[N]) noexcept +{ + return N; +} const char ScriptRun::fgClassID=0; @@ -156,7 +161,8 @@ UBool ScriptRun::next() // characters above it on the stack will be poped. if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { - parenStack[++parenSP].pairIndex = pairIndex; + parenSP = (++parenSP) % ARRAY_SIZE(parenStack); // avoid out-of-bounds access + parenStack[parenSP].pairIndex = pairIndex; parenStack[parenSP].scriptCode = scriptCode; } else if (parenSP >= 0) { int32_t pi = pairIndex & ~1; From 1edd3b7a930f6e2f2f8656ab8f946a8cc61c9d6f Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Fri, 1 Nov 2019 16:15:05 +0000 Subject: [PATCH 02/13] use `& mask` for array bounds clipping (provided array size is 2^n) --- include/mapnik/text/scrptrun.hpp | 5 ++++- src/text/scrptrun.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/mapnik/text/scrptrun.hpp b/include/mapnik/text/scrptrun.hpp index c219a2a0d..7cee74197 100644 --- a/include/mapnik/text/scrptrun.hpp +++ b/include/mapnik/text/scrptrun.hpp @@ -24,6 +24,9 @@ #include #pragma GCC diagnostic pop +const unsigned int STACK_SIZE = 1 << 7; // 2^n +const unsigned int STACK_MASK = STACK_SIZE - 1; + struct ScriptRecord { UChar32 startChar = 0; @@ -85,7 +88,7 @@ private: int32_t scriptEnd; UScriptCode scriptCode; - ParenStackEntry parenStack[128]; + ParenStackEntry parenStack[STACK_SIZE]; int32_t parenSP; static int8_t highBit(int32_t value); diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp index bc16cb811..c2985397d 100644 --- a/src/text/scrptrun.cpp +++ b/src/text/scrptrun.cpp @@ -161,7 +161,7 @@ UBool ScriptRun::next() // characters above it on the stack will be poped. if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { - parenSP = (++parenSP) % ARRAY_SIZE(parenStack); // avoid out-of-bounds access + parenSP = (++parenSP) & STACK_MASK; // avoid out-of-bounds access parenStack[parenSP].pairIndex = pairIndex; parenStack[parenSP].scriptCode = scriptCode; } else if (parenSP >= 0) { From 8aae32ec4c4cc859bf2bf70a0d087d9960806f6d Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Fri, 1 Nov 2019 17:09:12 +0000 Subject: [PATCH 03/13] better syntax --- src/text/scrptrun.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp index c2985397d..4dd25eb03 100644 --- a/src/text/scrptrun.cpp +++ b/src/text/scrptrun.cpp @@ -161,7 +161,7 @@ UBool ScriptRun::next() // characters above it on the stack will be poped. if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { - parenSP = (++parenSP) & STACK_MASK; // avoid out-of-bounds access + parenSP = (parenSP + 1) & STACK_MASK; // avoid out-of-bounds access parenStack[parenSP].pairIndex = pairIndex; parenStack[parenSP].scriptCode = scriptCode; } else if (parenSP >= 0) { From 05d488f98dada1fec2417fbfc29b6a7e40ab3fe0 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 4 Nov 2019 10:47:35 +0000 Subject: [PATCH 04/13] add script runs unit test --- test/build.py | 4 ++++ test/unit/text/script_runs.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 test/unit/text/script_runs.cpp diff --git a/test/build.py b/test/build.py index 8a9a44523..53db47476 100644 --- a/test/build.py +++ b/test/build.py @@ -20,13 +20,16 @@ else: test_env.AppendUnique(LIBS='dl') test_env.AppendUnique(CXXFLAGS='-g') test_env['CXXFLAGS'] = copy(test_env['LIBMAPNIK_CXXFLAGS']) + test_env['CPPFLAGS'] = '-fsanitize=bounds' test_env.Append(CPPDEFINES = env['LIBMAPNIK_DEFINES']) + if test_env['HAS_CAIRO']: test_env.PrependUnique(CPPPATH=test_env['CAIRO_CPPPATHS']) test_env.Append(CPPDEFINES = '-DHAVE_CAIRO') test_env.PrependUnique(CPPPATH=['./']) if test_env['PLATFORM'] == 'Linux': test_env['LINKFLAGS'].append('-pthread') + test_env['LINKFLAGS'].append('-fsanitize=bounds') test_env.AppendUnique(LIBS='boost_program_options%s' % env['BOOST_APPEND']) test_env_local = test_env.Clone() @@ -34,6 +37,7 @@ else: # unit tests sources = glob.glob('./unit/*/*.cpp') sources.extend(glob.glob('./unit/*.cpp')) + sources.append('../src/text/scrptrun.cpp') test_program = test_env_local.Program("./unit/run", source=sources) Depends(test_program, env.subst('../src/%s' % env['MAPNIK_LIB_NAME'])) Depends(test_program, env.subst('../src/json/libmapnik-json${LIBSUFFIX}')) diff --git a/test/unit/text/script_runs.cpp b/test/unit/text/script_runs.cpp new file mode 100644 index 000000000..c5afff039 --- /dev/null +++ b/test/unit/text/script_runs.cpp @@ -0,0 +1,33 @@ +#include "catch.hpp" +#include +#include +#include + +TEST_CASE("nested script runs") +{ + mapnik::value_unicode_string text("Nested text runs(первый(second(третий)))"); //mixed scripts + ScriptRun runs(text.getBuffer(), text.length()); + std::size_t count = 0; + std::size_t size = 0; + while (runs.next()) + { + if (count & 1) CHECK(runs.getScriptCode() == USCRIPT_CYRILLIC); + else CHECK(runs.getScriptCode() == USCRIPT_LATIN); + size += runs.getScriptEnd() - runs.getScriptStart(); + ++count; + } + REQUIRE(count == 7); + REQUIRE(size == text.length()); +} + +TEST_CASE("many punctuation chars") +{ + mapnik::value_unicode_string text("(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((test"); // more than 128 "paired" chars + ScriptRun runs(text.getBuffer(), text.length()); + while (runs.next()) + { + CHECK(runs.getScriptCode() == 25); + CHECK(runs.getScriptStart() == 0); + CHECK(runs.getScriptEnd() == text.length()); + } +} From e72803935a354461e66b24906210914aac51f1c1 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 4 Nov 2019 11:22:41 +0000 Subject: [PATCH 05/13] add ICU include --- test/unit/text/script_runs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/text/script_runs.cpp b/test/unit/text/script_runs.cpp index c5afff039..e9bbcb030 100644 --- a/test/unit/text/script_runs.cpp +++ b/test/unit/text/script_runs.cpp @@ -1,7 +1,7 @@ #include "catch.hpp" +#include #include #include -#include TEST_CASE("nested script runs") { From f2732ed517a84a89563490d9edfd19082b88b957 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 4 Nov 2019 11:35:47 +0000 Subject: [PATCH 06/13] remove sanitize=bounds --- test/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/build.py b/test/build.py index 53db47476..57537bbee 100644 --- a/test/build.py +++ b/test/build.py @@ -29,7 +29,6 @@ else: test_env.PrependUnique(CPPPATH=['./']) if test_env['PLATFORM'] == 'Linux': test_env['LINKFLAGS'].append('-pthread') - test_env['LINKFLAGS'].append('-fsanitize=bounds') test_env.AppendUnique(LIBS='boost_program_options%s' % env['BOOST_APPEND']) test_env_local = test_env.Clone() From 370f38a2c35c50ad164f2b1de8b7d1a5c2d3d39b Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 4 Nov 2019 11:41:49 +0000 Subject: [PATCH 07/13] remove sanitize=bounds --- test/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/build.py b/test/build.py index 57537bbee..12c74446c 100644 --- a/test/build.py +++ b/test/build.py @@ -20,7 +20,6 @@ else: test_env.AppendUnique(LIBS='dl') test_env.AppendUnique(CXXFLAGS='-g') test_env['CXXFLAGS'] = copy(test_env['LIBMAPNIK_CXXFLAGS']) - test_env['CPPFLAGS'] = '-fsanitize=bounds' test_env.Append(CPPDEFINES = env['LIBMAPNIK_DEFINES']) if test_env['HAS_CAIRO']: From 33fac6d47da91b609c4818beebfd803ad384df2e Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Mon, 4 Nov 2019 13:59:23 +0000 Subject: [PATCH 08/13] use std::vector to avoid limiting number of paired characters. --- include/mapnik/text/scrptrun.hpp | 9 +++++++-- src/text/scrptrun.cpp | 6 ++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/mapnik/text/scrptrun.hpp b/include/mapnik/text/scrptrun.hpp index 7cee74197..d7e7b0bf6 100644 --- a/include/mapnik/text/scrptrun.hpp +++ b/include/mapnik/text/scrptrun.hpp @@ -23,9 +23,9 @@ #include #include #pragma GCC diagnostic pop +#include const unsigned int STACK_SIZE = 1 << 7; // 2^n -const unsigned int STACK_MASK = STACK_SIZE - 1; struct ScriptRecord { @@ -36,6 +36,8 @@ struct ScriptRecord struct ParenStackEntry { + ParenStackEntry(int32_t pairIndex_, UScriptCode scriptCode_) + : pairIndex(pairIndex_), scriptCode(scriptCode_) {} int32_t pairIndex = 0; UScriptCode scriptCode = USCRIPT_INVALID_CODE; }; @@ -88,7 +90,7 @@ private: int32_t scriptEnd; UScriptCode scriptCode; - ParenStackEntry parenStack[STACK_SIZE]; + std::vector parenStack; int32_t parenSP; static int8_t highBit(int32_t value); @@ -108,16 +110,19 @@ private: inline ScriptRun::ScriptRun() { + parenStack.reserve(STACK_SIZE); reset(nullptr, 0, 0); } inline ScriptRun::ScriptRun(const UChar chars[], int32_t length) { + parenStack.reserve(STACK_SIZE); reset(chars, 0, length); } inline ScriptRun::ScriptRun(const UChar chars[], int32_t start, int32_t length) { + parenStack.reserve(STACK_SIZE); reset(chars, start, length); } diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp index 4dd25eb03..071b7b239 100644 --- a/src/text/scrptrun.cpp +++ b/src/text/scrptrun.cpp @@ -21,7 +21,6 @@ #pragma GCC diagnostic pop #include -#include template constexpr std::size_t ARRAY_SIZE(const T (&array)[N]) noexcept @@ -161,9 +160,8 @@ UBool ScriptRun::next() // characters above it on the stack will be poped. if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { - parenSP = (parenSP + 1) & STACK_MASK; // avoid out-of-bounds access - parenStack[parenSP].pairIndex = pairIndex; - parenStack[parenSP].scriptCode = scriptCode; + ++parenSP; + parenStack.emplace_back(pairIndex, scriptCode); } else if (parenSP >= 0) { int32_t pi = pairIndex & ~1; From 7003255c0e6d9e80295d04ee416f25db38fa7b0d Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Tue, 5 Nov 2019 09:50:51 +0000 Subject: [PATCH 09/13] avoid negative indicies access to `parenStack` - thanks @talaj! (ref #4096) --- src/text/scrptrun.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp index 071b7b239..1bdf797b7 100644 --- a/src/text/scrptrun.cpp +++ b/src/text/scrptrun.cpp @@ -162,6 +162,7 @@ UBool ScriptRun::next() if ((pairIndex & 1) == 0) { ++parenSP; parenStack.emplace_back(pairIndex, scriptCode); + startSP = parenSP; } else if (parenSP >= 0) { int32_t pi = pairIndex & ~1; From b84c414f2e4d8461f6a6cd44e5deb81a1753bce4 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Tue, 5 Nov 2019 09:53:41 +0000 Subject: [PATCH 10/13] Add unit test for 7003255c0e6d9e80295d04ee416f25db38fa7b0d (#4096) --- test/unit/text/script_runs.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/text/script_runs.cpp b/test/unit/text/script_runs.cpp index e9bbcb030..dd40e1b1e 100644 --- a/test/unit/text/script_runs.cpp +++ b/test/unit/text/script_runs.cpp @@ -31,3 +31,18 @@ TEST_CASE("many punctuation chars") CHECK(runs.getScriptEnd() == text.length()); } } + +TEST_CASE("empty runs") +{ + mapnik::value_unicode_string text("()text"); + ScriptRun runs(text.getBuffer(), text.length()); + std::size_t count = 0; + std::size_t size = 0; + while (runs.next()) + { + size += runs.getScriptEnd() - runs.getScriptStart(); + ++count; + } + REQUIRE(count == 1); + REQUIRE(size == text.length()); +} From f33e318ac56a7d0f55d765a5eeb7308d3acecb48 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Tue, 5 Nov 2019 10:55:16 +0000 Subject: [PATCH 11/13] more compact test string initialisation via @talaj (#4096) --- test/unit/text/script_runs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/text/script_runs.cpp b/test/unit/text/script_runs.cpp index dd40e1b1e..10eec295f 100644 --- a/test/unit/text/script_runs.cpp +++ b/test/unit/text/script_runs.cpp @@ -22,7 +22,7 @@ TEST_CASE("nested script runs") TEST_CASE("many punctuation chars") { - mapnik::value_unicode_string text("(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((test"); // more than 128 "paired" chars + mapnik::value_unicode_string text((std::string(791, '(') + "test").data()); ScriptRun runs(text.getBuffer(), text.length()); while (runs.next()) { From 9e82006314da0997c576fcdffbd2777b68ec38a0 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 6 Nov 2019 15:08:30 +0000 Subject: [PATCH 12/13] make `ScriptRun` class visible --- include/mapnik/text/scrptrun.hpp | 3 ++- test/build.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mapnik/text/scrptrun.hpp b/include/mapnik/text/scrptrun.hpp index d7e7b0bf6..bbf479881 100644 --- a/include/mapnik/text/scrptrun.hpp +++ b/include/mapnik/text/scrptrun.hpp @@ -17,6 +17,7 @@ #ifndef __SCRPTRUN_H #define __SCRPTRUN_H +#include #pragma GCC diagnostic push #include #include @@ -42,7 +43,7 @@ struct ParenStackEntry UScriptCode scriptCode = USCRIPT_INVALID_CODE; }; -class ScriptRun : public icu::UObject { +class MAPNIK_DECL ScriptRun : public icu::UObject { public: ScriptRun(); diff --git a/test/build.py b/test/build.py index 12c74446c..6771bae1f 100644 --- a/test/build.py +++ b/test/build.py @@ -35,7 +35,6 @@ else: # unit tests sources = glob.glob('./unit/*/*.cpp') sources.extend(glob.glob('./unit/*.cpp')) - sources.append('../src/text/scrptrun.cpp') test_program = test_env_local.Program("./unit/run", source=sources) Depends(test_program, env.subst('../src/%s' % env['MAPNIK_LIB_NAME'])) Depends(test_program, env.subst('../src/json/libmapnik-json${LIBSUFFIX}')) From bf6ce8dd8db3c5c0ec6f2a828dfe02362a7d548f Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 6 Nov 2019 15:21:25 +0000 Subject: [PATCH 13/13] attempting to fix circleci --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 24ac78369..19cb56764 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,8 +18,9 @@ jobs: # To see the list of pre-built images that CircleCI provides for most common languages see # https://circleci.com/docs/2.0/circleci-images/ docker: + # https://discuss.circleci.com/t/aug-13-14-unable-to-run-the-job-runner-issue/31958 - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37 - command: /sbin/init + # command: /sbin/init steps: - checkout # Prepare for artifact and test results collection equivalent to how it was done on 1.0.