From 7c7dd0fa80f55293cbe437e0d7468430e117e5df Mon Sep 17 00:00:00 2001
From: Hermann Kraus <hermr2d2@gmail.com>
Date: Thu, 28 Jun 2012 21:43:28 +0200
Subject: [PATCH] Add files from https://github.com/herm/harfbuzz-test.

---
 SConstruct                            |   5 +
 include/mapnik/symbolizer_helpers.hpp |   2 +-
 include/mapnik/text/itemizer.hpp      |  65 ++++++++
 include/mapnik/text/layout.hpp        |  41 ++++++
 include/mapnik/text/scrptrun.hpp      | 157 ++++++++++++++++++++
 include/mapnik/text/shaping.hpp       |  33 +++++
 src/build.py                          |   6 +-
 src/text/itemizer.cpp                 | 126 ++++++++++++++++
 src/text/layout.cpp                   |  61 ++++++++
 src/text/scrptrun.cpp                 | 205 ++++++++++++++++++++++++++
 src/text/shaping.cpp                  |  90 +++++++++++
 11 files changed, 789 insertions(+), 2 deletions(-)
 create mode 100644 include/mapnik/text/itemizer.hpp
 create mode 100644 include/mapnik/text/layout.hpp
 create mode 100644 include/mapnik/text/scrptrun.hpp
 create mode 100644 include/mapnik/text/shaping.hpp
 create mode 100644 src/text/itemizer.cpp
 create mode 100644 src/text/layout.cpp
 create mode 100644 src/text/scrptrun.cpp
 create mode 100644 src/text/shaping.cpp

diff --git a/SConstruct b/SConstruct
index 6c45529d7..75d4f1e8b 100644
--- a/SConstruct
+++ b/SConstruct
@@ -66,6 +66,7 @@ pretty_dep_names = {
     'tiff':'TIFF C library | configure with TIFF_LIBS & TIFF_INCLUDES',
     'png':'PNG C library | configure with PNG_LIBS & PNG_INCLUDES',
     'icuuc':'ICU C++ library | configure with ICU_LIBS & ICU_INCLUDES or use ICU_LIB_NAME to specify custom lib name  | more info: http://site.icu-project.org/',
+    'harfbuzz':'HarfBuzz text shaping library | configure with HB_LIBS & HB_INCLUDES',
     'ltdl':'GNU Libtool | more info: http://www.gnu.org/software/libtool',
     'z':'Z compression library | more info: http://www.zlib.net/',
     'm':'Basic math library, part of C++ stlib',
@@ -310,6 +311,9 @@ opts.AddVariables(
     PathVariable('ICU_INCLUDES', 'Search path for ICU include files', '/usr/include', PathVariable.PathAccept),
     PathVariable('ICU_LIBS','Search path for ICU include files','/usr/' + LIBDIR_SCHEMA, PathVariable.PathAccept),
     ('ICU_LIB_NAME', 'The library name for icu (such as icuuc, sicuuc, or icucore)', 'icuuc'),
+    PathVariable('HB_INCLUDES', 'Search path for HarfBuzz include files', '/usr/include', PathVariable.PathAccept),
+    PathVariable('HB_LIBS','Search path for HarfBuzz include files','/usr/' + LIBDIR_SCHEMA, PathVariable.PathAccept),
+    ('ICU_LIB_NAME', 'The library name for icu (such as icuuc, sicuuc, or icucore)', 'icuuc'),
     PathVariable('PNG_INCLUDES', 'Search path for libpng include files', '/usr/include', PathVariable.PathAccept),
     PathVariable('PNG_LIBS','Search path for libpng include files','/usr/' + LIBDIR_SCHEMA, PathVariable.PathAccept),
     BoolVariable('JPEG', 'Build Mapnik with JPEG read and write support', 'True'),
@@ -1087,6 +1091,7 @@ if not preconfigured:
         ['z', 'zlib.h', True,'C'],
         ['proj', 'proj_api.h', True,'C'],
         [env['ICU_LIB_NAME'],'unicode/unistr.h',True,'C++'],
+        ['harfbuzz', 'harfbuzz/hb.h',True,'C++'],
     ]
 
     if env['JPEG']:
diff --git a/include/mapnik/symbolizer_helpers.hpp b/include/mapnik/symbolizer_helpers.hpp
index aafb8864b..768fab869 100644
--- a/include/mapnik/symbolizer_helpers.hpp
+++ b/include/mapnik/symbolizer_helpers.hpp
@@ -80,7 +80,7 @@ public:
     }
 
     /** Return next placement.
-     * If no more placements are found returns null pointer.
+     * If no more placements are found null pointer is returned.
      */
     bool next();
 
diff --git a/include/mapnik/text/itemizer.hpp b/include/mapnik/text/itemizer.hpp
new file mode 100644
index 000000000..3b0da3c0e
--- /dev/null
+++ b/include/mapnik/text/itemizer.hpp
@@ -0,0 +1,65 @@
+#ifndef MAPNIK_TEXT_ITEMIZER_HPP
+#define MAPNIK_TEXT_ITEMIZER_HPP
+
+//mapnik
+#include <mapnik/text_properties.hpp> //TODO: Move to text/properties.hpp
+
+// stl
+#include <string>
+#include <list>
+
+// ICU
+#include <unicode/unistr.h>
+#include <unicode/uscript.h>
+#include <unicode/ubidi.h>
+namespace mapnik
+{
+
+struct text_item
+{
+    UnicodeString str;
+    UScriptCode script;
+    char_properties format;
+    UBiDiDirection rtl;
+    text_item(UnicodeString const& str) :
+        str(str), script(), format(), rtl(UBIDI_LTR)
+    {
+
+    }
+};
+
+/** This class splits text into parts which all have the same
+ * - direction (LTR, RTL)
+ * - format
+ * - script (http://en.wikipedia.org/wiki/Scripts_in_Unicode)
+ **/
+class text_itemizer
+{
+public:
+    text_itemizer();
+    void add_text(UnicodeString str, char_properties const& format);
+    std::list<text_item> const& itemize();
+    void clear();
+    UnicodeString const& get_text() { return text; }
+private:
+    template<typename T> struct run
+    {
+        run(T data, unsigned limit) :  limit(limit), data(data){}
+        unsigned limit;
+        T data;
+    };
+    typedef run<char_properties> format_run_t;
+    typedef run<UBiDiDirection> direction_run_t;
+    typedef run<UScriptCode> script_run_t;
+    UnicodeString text;
+    std::list<format_run_t> format_runs;
+    std::list<direction_run_t> direction_runs;
+    std::list<script_run_t> script_runs;
+    void itemize_direction();
+    void itemize_script();
+    void create_item_list();
+    std::list<text_item> output;
+};
+} //ns mapnik
+
+#endif // TEXT_ITEMIZER_HPP
diff --git a/include/mapnik/text/layout.hpp b/include/mapnik/text/layout.hpp
new file mode 100644
index 000000000..a283370e2
--- /dev/null
+++ b/include/mapnik/text/layout.hpp
@@ -0,0 +1,41 @@
+#ifndef MAPNIK_TEXT_LAYOUT_HPP
+#define MAPNIK_TEXT_LAYOUT_HPP
+
+//mapnik
+#include <mapnik/text/itemizer.hpp>
+
+//stl
+#include <vector>
+
+namespace mapnik
+{
+
+struct glyph_info
+{
+      uint32_t codepoint;
+      uint32_t byte_position;
+      uint32_t x_advance;
+};
+
+class text_layout
+{
+public:
+    text_layout(double text_ratio, double wrap_width);
+    inline void add_text(UnicodeString const& str, char_properties const& format)
+    {
+        itemizer.add_text(str, format);
+    }
+
+    void break_lines();
+    void shape_text();
+
+
+private:
+    text_itemizer itemizer;
+    double text_ratio_;
+    double wrap_width_;
+    std::vector<glyph_info> glyphs_;
+};
+}
+
+#endif // TEXT_LAYOUT_HPP
diff --git a/include/mapnik/text/scrptrun.hpp b/include/mapnik/text/scrptrun.hpp
new file mode 100644
index 000000000..c8acec63b
--- /dev/null
+++ b/include/mapnik/text/scrptrun.hpp
@@ -0,0 +1,157 @@
+/*
+ *******************************************************************************
+ *
+ *   Copyright (C) 1999-2003, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+ *   file name:  scrptrun.h
+ *
+ *   created on: 10/17/2001
+ *   created by: Eric R. Mader
+ *
+ * NOTE: This file is copied from ICU.
+ * http://source.icu-project.org/repos/icu/icu/trunk/license.html
+ */
+
+#ifndef __SCRPTRUN_H
+#define __SCRPTRUN_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uscript.h"
+
+struct ScriptRecord
+{
+    UChar32 startChar;
+    UChar32 endChar;
+    UScriptCode scriptCode;
+};
+
+struct ParenStackEntry
+{
+    int32_t pairIndex;
+    UScriptCode scriptCode;
+};
+
+class ScriptRun : public UObject {
+public:
+    ScriptRun();
+
+    ScriptRun(const UChar chars[], int32_t length);
+
+    ScriptRun(const UChar chars[], int32_t start, int32_t length);
+
+    void reset();
+
+    void reset(int32_t start, int32_t count);
+
+    void reset(const UChar chars[], int32_t start, int32_t length);
+
+    int32_t getScriptStart();
+
+    int32_t getScriptEnd();
+
+    UScriptCode getScriptCode();
+
+    UBool next();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+
+private:
+
+    static UBool sameScript(int32_t scriptOne, int32_t scriptTwo);
+
+    int32_t charStart;
+    int32_t charLimit;
+    const UChar *charArray;
+
+    int32_t scriptStart;
+    int32_t scriptEnd;
+    UScriptCode scriptCode;
+
+    ParenStackEntry parenStack[128];
+    int32_t parenSP;
+
+    static int8_t highBit(int32_t value);
+    static int32_t getPairIndex(UChar32 ch);
+
+    static UChar32 pairedChars[];
+    static const int32_t pairedCharCount;
+    static const int32_t pairedCharPower;
+    static const int32_t pairedCharExtra;
+
+    /**
+     * The address of this static class variable serves as this class's ID
+     * for ICU "poor man's RTTI".
+     */
+    static const char fgClassID;
+};
+
+inline ScriptRun::ScriptRun()
+{
+    reset(NULL, 0, 0);
+}
+
+inline ScriptRun::ScriptRun(const UChar chars[], int32_t length)
+{
+    reset(chars, 0, length);
+}
+
+inline ScriptRun::ScriptRun(const UChar chars[], int32_t start, int32_t length)
+{
+    reset(chars, start, length);
+}
+
+inline int32_t ScriptRun::getScriptStart()
+{
+    return scriptStart;
+}
+
+inline int32_t ScriptRun::getScriptEnd()
+{
+    return scriptEnd;
+}
+
+inline UScriptCode ScriptRun::getScriptCode()
+{
+    return scriptCode;
+}
+
+inline void ScriptRun::reset()
+{
+    scriptStart = charStart;
+    scriptEnd   = charStart;
+    scriptCode  = USCRIPT_INVALID_CODE;
+    parenSP     = -1;
+}
+
+inline void ScriptRun::reset(int32_t start, int32_t length)
+{
+    charStart = start;
+    charLimit = start + length;
+
+    reset();
+}
+
+inline void ScriptRun::reset(const UChar chars[], int32_t start, int32_t length)
+{
+    charArray = chars;
+
+    reset(start, length);
+}
+
+
+#endif
diff --git a/include/mapnik/text/shaping.hpp b/include/mapnik/text/shaping.hpp
new file mode 100644
index 000000000..550c38d8f
--- /dev/null
+++ b/include/mapnik/text/shaping.hpp
@@ -0,0 +1,33 @@
+#ifndef MAPNIK_TEXT_SHAPING_HPP
+#define MAPNIK_TEXT_SHAPING_HPP
+
+//ICU
+#include <unicode/unistr.h>
+class hb_font_t;
+class hb_buffer_t;
+class hb_glyph_info_t;
+
+namespace mapnik
+{
+
+class text_shaping
+{
+public:
+    //TODO: Get font file from font name
+    text_shaping();
+    ~text_shaping();
+
+    uint32_t process_text(UnicodeString const& text);
+    hb_buffer_t *get_buffer() { return buffer_; }
+
+protected:
+    static void free_data(void *data);
+
+    void load_font();
+
+    hb_font_t *font_;
+    hb_buffer_t *buffer_;
+};
+} //ns mapnik
+
+#endif // TEXT_SHAPING_HPP
diff --git a/src/build.py b/src/build.py
index f34a067d8..cbbce7d96 100644
--- a/src/build.py
+++ b/src/build.py
@@ -56,7 +56,7 @@ regex = 'boost_regex%s' % env['BOOST_APPEND']
 system = 'boost_system%s' % env['BOOST_APPEND']
 
 # clear out and re-set libs for this env
-lib_env['LIBS'] = ['freetype','ltdl','png','tiff','z','proj',env['ICU_LIB_NAME'],filesystem,system,regex]
+lib_env['LIBS'] = ['freetype','ltdl','png','tiff','z','proj',env['ICU_LIB_NAME'],filesystem,system,regex,'harfbuzz']
 
 if env['JPEG']:
    lib_env['LIBS'].append('jpeg')
@@ -181,6 +181,10 @@ source = Split(
     text_properties.cpp
     xml_tree.cpp
     config_error.cpp
+    text/shaping.cpp
+    text/layout.cpp
+    text/itemizer.cpp
+    text/scrptrun.cpp
     """
     )
 
diff --git a/src/text/itemizer.cpp b/src/text/itemizer.cpp
new file mode 100644
index 000000000..87355ab67
--- /dev/null
+++ b/src/text/itemizer.cpp
@@ -0,0 +1,126 @@
+//mapnik
+#include <mapnik/text/itemizer.hpp>
+#include <mapnik/text/scrptrun.hpp>
+
+// stl
+#include <iostream>
+#include <algorithm>
+
+namespace mapnik
+{
+
+text_itemizer::text_itemizer() : text(), format_runs(), direction_runs(), script_runs()
+{
+
+}
+
+void text_itemizer::add_text(UnicodeString str, char_properties const& format)
+{
+    text += str;
+    format_runs.push_back(format_run_t(format, text.length()));
+}
+
+std::list<text_item> const& text_itemizer::itemize()
+{
+    // format itemiziation is done by add_text()
+    itemize_direction();
+    itemize_script();
+    create_item_list();
+    return output;
+}
+
+void text_itemizer::clear()
+{
+    output.clear();
+    text.remove();
+    format_runs.clear();
+}
+
+void text_itemizer::itemize_direction()
+{
+    direction_runs.clear();
+    UErrorCode error = U_ZERO_ERROR;
+    int32_t length = text.length();
+    UBiDi *bidi = ubidi_openSized(length, 0, &error);
+    ubidi_setPara(bidi, text.getBuffer(), length, UBIDI_DEFAULT_LTR, 0, &error);
+    if (U_SUCCESS(error))
+    {
+        UBiDiDirection direction = ubidi_getDirection(bidi);
+        if(direction != UBIDI_MIXED)
+        {
+            direction_runs.push_back(direction_run_t(direction, length));
+        } else
+        {
+            // mixed-directional
+            int32_t count = ubidi_countRuns(bidi, &error);
+            if(U_SUCCESS(error))
+            {
+                int32_t position = 0;
+                for(int i=0; i<count; i++)
+                {
+                    int32_t length;
+                    direction = ubidi_getVisualRun(bidi, i, 0, &length);
+                    position += length;
+                    direction_runs.push_back(direction_run_t(direction, position));
+                }
+            }
+        }
+    } else{
+        std::cerr << "ERROR:" << u_errorName(error) << "\n"; //TODO: Exception
+    }
+    if (bidi) ubidi_close(bidi);
+}
+
+void text_itemizer::itemize_script()
+{
+    script_runs.clear();
+
+
+    ScriptRun runs(text.getBuffer(), text.length());
+    while (runs.next()) {
+        script_runs.push_back(script_run_t(runs.getScriptCode(), runs.getScriptEnd()));
+    }
+}
+
+void text_itemizer::create_item_list()
+{
+    int32_t position = 0;
+    std::list<script_run_t>::const_iterator script_itr = script_runs.begin(), script_end = script_runs.end();
+    std::list<direction_run_t>::const_iterator dir_itr = direction_runs.begin(), dir_end = direction_runs.end();
+    std::list<format_run_t>::const_iterator format_itr = format_runs.begin(), format_end = format_runs.end();
+    while (position < text.length())
+    {
+        unsigned next_position = std::min(script_itr->limit, std::min(dir_itr->limit, format_itr->limit));
+        text_item item(text.tempSubStringBetween(position, next_position));
+        item.format = format_itr->data;
+        item.script = script_itr->data;
+        item.rtl = dir_itr->data;
+        output.push_back(item);
+        if (script_itr->limit == next_position)
+        {
+            if (script_itr == script_end) {
+                //TODO: EXCEPTION
+                std::cerr << "Limit error\n";
+            }
+            script_itr++;
+        }
+        if (dir_itr->limit == next_position)
+        {
+            if (dir_itr == dir_end) {
+                //TODO: EXCEPTION
+                std::cerr << "Limit error\n";
+            }
+            dir_itr++;
+        }
+        if (format_itr->limit == next_position)
+        {
+            if (format_itr == format_end) {
+                //TODO: EXCEPTION
+                std::cerr << "Limit error\n";
+            }
+            format_itr++;
+        }
+        position = next_position;
+    }
+}
+} //ns mapnik
diff --git a/src/text/layout.cpp b/src/text/layout.cpp
new file mode 100644
index 000000000..6a1ab2265
--- /dev/null
+++ b/src/text/layout.cpp
@@ -0,0 +1,61 @@
+#include <mapnik/text/layout.hpp>
+#include <mapnik/text/shaping.hpp>
+
+//stl
+#include <iostream>
+
+// harf-buzz
+#include <harfbuzz/hb.h>
+
+namespace mapnik
+{
+text_layout::text_layout(double text_ratio, double wrap_width) : text_ratio_(text_ratio), wrap_width_(wrap_width)
+{
+}
+
+void text_layout::break_lines()
+{
+}
+
+void text_layout::shape_text()
+{
+    glyphs_.reserve(itemizer.get_text().length()); //Preallocate memory
+    uint32_t byte_offset = 0;
+    std::list<text_item> const& list = itemizer.itemize();
+    std::list<text_item>::const_iterator itr = list.begin(), end = list.end();
+    for (;itr!=end; itr++)
+    {
+        text_shaping shaper;
+        uint32_t bytes = shaper.process_text(itr->str);
+        hb_buffer_t *buffer = shaper.get_buffer();
+
+        unsigned num_glyphs = hb_buffer_get_length(buffer);
+
+        hb_glyph_info_t *glyphs = hb_buffer_get_glyph_infos(buffer, NULL);
+        hb_glyph_position_t *positions = hb_buffer_get_glyph_positions(buffer, NULL);
+
+        std::string s;
+        std::cout << "Processing item '" << itr->str.toUTF8String(s) << "' (" << uscript_getName(itr->script) << "," << itr->str.length() << "," << num_glyphs << ")\n";
+
+        for (unsigned i=0; i<num_glyphs; i++)
+        {
+            glyph_info tmp;
+            tmp.byte_position = byte_offset + glyphs[i].cluster;
+            tmp.codepoint = glyphs[i].codepoint;
+            tmp.x_advance = positions[i].x_advance;
+            glyphs_.push_back(tmp);
+        }
+        byte_offset += bytes;
+    }
+    std::string s;
+    std::cout << "text_length: unicode chars: " << itemizer.get_text().length() << " bytes: " <<itemizer.get_text().toUTF8String(s).length() << " glyphs: " << glyphs_.size()  << "\n";
+    std::vector<glyph_info>::const_iterator itr2 = glyphs_.begin(), end2 = glyphs_.end();
+    for (;itr2 != end2; itr2++)
+    {
+        std::cout << "glyph codepoint:" << itr2->codepoint <<
+                 " cluster: " << itr2->byte_position <<
+                 " x_advance: "<< itr2->x_advance << "\n";
+    }
+}
+
+} //ns mapnik
diff --git a/src/text/scrptrun.cpp b/src/text/scrptrun.cpp
new file mode 100644
index 000000000..37a02e7c8
--- /dev/null
+++ b/src/text/scrptrun.cpp
@@ -0,0 +1,205 @@
+/*
+ *******************************************************************************
+ *
+ *   Copyright (C) 1999-2001, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+ *   file name:  scrptrun.cpp
+ *
+ *   created on: 10/17/2001
+ *   created by: Eric R. Mader
+ *
+ * NOTE: This file is copied from ICU.
+ * http://source.icu-project.org/repos/icu/icu/trunk/license.html
+ */
+
+#include <unicode/utypes.h>
+#include <unicode/uscript.h>
+
+#include <mapnik/text/scrptrun.hpp>
+
+#define ARRAY_SIZE(array) (sizeof array  / sizeof array[0])
+
+const char ScriptRun::fgClassID=0;
+
+UChar32 ScriptRun::pairedChars[] = {
+    0x0028, 0x0029, // ascii paired punctuation
+    0x003c, 0x003e,
+    0x005b, 0x005d,
+    0x007b, 0x007d,
+    0x00ab, 0x00bb, // guillemets
+    0x2018, 0x2019, // general punctuation
+    0x201c, 0x201d,
+    0x2039, 0x203a,
+    0x3008, 0x3009, // chinese paired punctuation
+    0x300a, 0x300b,
+    0x300c, 0x300d,
+    0x300e, 0x300f,
+    0x3010, 0x3011,
+    0x3014, 0x3015,
+    0x3016, 0x3017,
+    0x3018, 0x3019,
+    0x301a, 0x301b
+};
+
+const int32_t ScriptRun::pairedCharCount = ARRAY_SIZE(pairedChars);
+const int32_t ScriptRun::pairedCharPower = 1 << highBit(pairedCharCount);
+const int32_t ScriptRun::pairedCharExtra = pairedCharCount - pairedCharPower;
+
+int8_t ScriptRun::highBit(int32_t value)
+{
+    if (value <= 0) {
+        return -32;
+    }
+
+    int8_t bit = 0;
+
+    if (value >= 1 << 16) {
+        value >>= 16;
+        bit += 16;
+    }
+
+    if (value >= 1 << 8) {
+        value >>= 8;
+        bit += 8;
+    }
+
+    if (value >= 1 << 4) {
+        value >>= 4;
+        bit += 4;
+    }
+
+    if (value >= 1 << 2) {
+        value >>= 2;
+        bit += 2;
+    }
+
+    if (value >= 1 << 1) {
+        value >>= 1;
+        bit += 1;
+    }
+
+    return bit;
+}
+
+int32_t ScriptRun::getPairIndex(UChar32 ch)
+{
+    int32_t probe = pairedCharPower;
+    int32_t index = 0;
+
+    if (ch >= pairedChars[pairedCharExtra]) {
+        index = pairedCharExtra;
+    }
+
+    while (probe > (1 << 0)) {
+        probe >>= 1;
+
+        if (ch >= pairedChars[index + probe]) {
+            index += probe;
+        }
+    }
+
+    if (pairedChars[index] != ch) {
+        index = -1;
+    }
+
+    return index;
+}
+
+UBool ScriptRun::sameScript(int32_t scriptOne, int32_t scriptTwo)
+{
+    return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
+}
+
+UBool ScriptRun::next()
+{
+    int32_t startSP  = parenSP;  // used to find the first new open character
+    UErrorCode error = U_ZERO_ERROR;
+
+    // if we've fallen off the end of the text, we're done
+    if (scriptEnd >= charLimit) {
+        return false;
+    }
+    
+    scriptCode = USCRIPT_COMMON;
+
+    for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
+        UChar   high = charArray[scriptEnd];
+        UChar32 ch   = high;
+
+        // if the character is a high surrogate and it's not the last one
+        // in the text, see if it's followed by a low surrogate
+        if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1)
+        {
+            UChar low = charArray[scriptEnd + 1];
+
+            // if it is followed by a low surrogate,
+            // consume it and form the full character
+            if (low >= 0xDC00 && low <= 0xDFFF) {
+                ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
+                scriptEnd += 1;
+            }
+        }
+
+        UScriptCode sc = uscript_getScript(ch, &error);
+        int32_t pairIndex = getPairIndex(ch);
+
+        // Paired character handling:
+        //
+        // if it's an open character, push it onto the stack.
+        // if it's a close character, find the matching open on the
+        // stack, and use that script code. Any non-matching open
+        // characters above it on the stack will be poped.
+        if (pairIndex >= 0) {
+            if ((pairIndex & 1) == 0) {
+                parenStack[++parenSP].pairIndex = pairIndex;
+                parenStack[parenSP].scriptCode  = scriptCode;
+            } else if (parenSP >= 0) {
+                int32_t pi = pairIndex & ~1;
+
+                while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
+                    parenSP -= 1;
+                }
+
+                if (parenSP < startSP) {
+                    startSP = parenSP;
+                }
+
+                if (parenSP >= 0) {
+                    sc = parenStack[parenSP].scriptCode;
+                }
+            }
+        }
+
+        if (sameScript(scriptCode, sc)) {
+            if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
+                scriptCode = sc;
+
+                // now that we have a final script code, fix any open
+                // characters we pushed before we knew the script code.
+                while (startSP < parenSP) {
+                    parenStack[++startSP].scriptCode = scriptCode;
+                }
+            }
+
+            // if this character is a close paired character,
+            // pop it from the stack
+            if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
+                parenSP -= 1;
+                startSP -= 1;
+            }
+        } else {
+            // if the run broke on a surrogate pair,
+            // end it before the high surrogate
+            if (ch >= 0x10000) {
+                scriptEnd -= 1;
+            }
+
+            break;
+        }
+    }
+
+    return true;
+}
+
diff --git a/src/text/shaping.cpp b/src/text/shaping.cpp
new file mode 100644
index 000000000..0e9467f33
--- /dev/null
+++ b/src/text/shaping.cpp
@@ -0,0 +1,90 @@
+#include <mapnik/text/shaping.hpp>
+
+
+//stl
+#include <iostream>
+#include <fstream>
+
+//harf-buzz
+#define HAVE_FREETYPE
+#include <harfbuzz/hb.h>
+#include <harfbuzz/hb-ft.h>
+
+
+namespace mapnik
+{
+
+
+text_shaping::text_shaping()
+    : font_(0),
+      buffer_ (hb_buffer_create())
+{
+    load_font();
+}
+
+text_shaping::~text_shaping()
+{
+    hb_buffer_destroy(buffer_);
+    hb_font_destroy(font_);
+}
+
+uint32_t text_shaping::process_text(const UnicodeString &text)
+{
+    if (!font_) return 0;
+    hb_buffer_reset(buffer_);
+
+    std::string s;
+    text.toUTF8String(s);
+    hb_buffer_add_utf8(buffer_, s.c_str(), s.length(), 0, -1);
+#if 0
+    hb_buffer_set_direction(buffer, hb_direction_from_string (direction, -1));
+    hb_buffer_set_script(buffer, hb_script_from_string (script, -1));
+    hb_buffer_set_language(buffer, hb_language_from_string (language, -1));
+#endif
+    hb_shape(font_, buffer_, 0 /*features*/, 0 /*num_features*/);
+    return s.length();
+}
+
+void text_shaping::free_data(void *data)
+{
+    char *tmp = (char *)data;
+    delete [] tmp;
+}
+
+void text_shaping::load_font()
+{
+    //TODO: hb_ft_font_create
+    if (font_) return;
+
+    char *font_data;
+    unsigned int size;
+
+//    std::ifstream file("./unifont-5.1.20080907.ttf" /*TODO*/, std::ios::in|std::ios::binary|std::ios::ate);
+    std::ifstream file("./DejaVuSans.ttf" /*TODO*/, std::ios::in|std::ios::binary|std::ios::ate);
+    if (file.is_open())
+    {
+        size = file.tellg();
+        font_data = new char[size];
+        file.seekg(0, std::ios::beg);
+        file.read(font_data, size);
+        file.close();
+    } else {
+        std::cerr << "Could not open font!\n";
+        return ;//TODO: Raise exception
+    }
+
+
+    hb_blob_t *blob = hb_blob_create(font_data, size, HB_MEMORY_MODE_WRITABLE, font_data, &free_data);
+    hb_face_t *face = hb_face_create(blob, 0 /*face_index*/);
+    hb_blob_destroy(blob);
+    font_ = hb_font_create(face);
+#if 1
+    //TODO: Font size
+    unsigned int upem = hb_face_get_upem(face);
+    hb_font_set_scale(font_, upem, upem);
+#endif
+    hb_face_destroy(face);
+    hb_ft_font_set_funcs(font_);
+}
+
+}