set most common language based on script for each text run (#3655)

This commit is contained in:
artemp 2017-05-03 10:23:42 +02:00
parent 82b876e267
commit 85aebf7268

View file

@ -43,8 +43,7 @@
#include <unicode/uscript.h>
#pragma GCC diagnostic pop
namespace mapnik
{
namespace mapnik { namespace detail {
static inline hb_script_t _icu_script_to_script(UScriptCode script)
{
@ -62,6 +61,73 @@ static inline const uint16_t * uchar_to_utf16(const UChar* src)
#endif
}
static hb_language_t script_to_language(hb_script_t script)
{
switch (script)
{
// Unicode 1.1
case HB_SCRIPT_ARABIC: return hb_language_from_string("ar", -1); break;
case HB_SCRIPT_ARMENIAN: return hb_language_from_string("hy", -1); break;
case HB_SCRIPT_BENGALI: return hb_language_from_string("bn", -1); break;
case HB_SCRIPT_CANADIAN_ABORIGINAL: return hb_language_from_string("iu", -1); break;
case HB_SCRIPT_CHEROKEE: return hb_language_from_string("chr", -1); break;
case HB_SCRIPT_COPTIC: return hb_language_from_string("cop", -1); break;
case HB_SCRIPT_CYRILLIC: return hb_language_from_string("ru", -1); break;
case HB_SCRIPT_DEVANAGARI: return hb_language_from_string("hi", -1); break;
case HB_SCRIPT_GEORGIAN: return hb_language_from_string("ka", -1); break;
case HB_SCRIPT_GREEK: return hb_language_from_string("el", -1); break;
case HB_SCRIPT_GUJARATI: return hb_language_from_string("gu", -1); break;
case HB_SCRIPT_GURMUKHI: return hb_language_from_string("pa", -1); break;
case HB_SCRIPT_HANGUL: return hb_language_from_string("ko", -1); break;
case HB_SCRIPT_HAN: return hb_language_from_string("zh-Hans", -1); break;
case HB_SCRIPT_HEBREW: return hb_language_from_string("he", -1); break;
case HB_SCRIPT_HIRAGANA: return hb_language_from_string("ja", -1); break;
case HB_SCRIPT_KANNADA: return hb_language_from_string("kn", -1); break;
case HB_SCRIPT_KATAKANA: return hb_language_from_string("ja", -1); break;
case HB_SCRIPT_LAO: return hb_language_from_string("lo", -1); break;
case HB_SCRIPT_LATIN: return hb_language_from_string("en", -1); break;
case HB_SCRIPT_MALAYALAM: return hb_language_from_string("ml", -1); break;
case HB_SCRIPT_MONGOLIAN: return hb_language_from_string("mn", -1); break;
case HB_SCRIPT_ORIYA: return hb_language_from_string("or", -1); break;
case HB_SCRIPT_SYRIAC: return hb_language_from_string("syr", -1); break;
case HB_SCRIPT_TAMIL: return hb_language_from_string("ta", -1); break;
case HB_SCRIPT_TELUGU: return hb_language_from_string("te", -1); break;
case HB_SCRIPT_THAI: return hb_language_from_string("th", -1); break;
// Unicode 2.0
case HB_SCRIPT_TIBETAN: return hb_language_from_string("bo", -1); break;
// Unicode 3.0
case HB_SCRIPT_ETHIOPIC: return hb_language_from_string("am", -1); break;
case HB_SCRIPT_KHMER: return hb_language_from_string("km", -1); break;
case HB_SCRIPT_MYANMAR: return hb_language_from_string("my", -1); break;
case HB_SCRIPT_SINHALA: return hb_language_from_string("si", -1); break;
case HB_SCRIPT_THAANA: return hb_language_from_string("dv", -1); break;
// Unicode 3.2
case HB_SCRIPT_BUHID: return hb_language_from_string("bku", -1); break;
case HB_SCRIPT_HANUNOO: return hb_language_from_string("hnn", -1); break;
case HB_SCRIPT_TAGALOG: return hb_language_from_string("tl", -1); break;
case HB_SCRIPT_TAGBANWA: return hb_language_from_string("tbw", -1); break;
// Unicode 4.0
case HB_SCRIPT_UGARITIC: return hb_language_from_string("uga", -1); break;
// Unicode 4.1
case HB_SCRIPT_BUGINESE: return hb_language_from_string("bug", -1); break;
case HB_SCRIPT_OLD_PERSIAN: return hb_language_from_string("peo", -1); break;
case HB_SCRIPT_SYLOTI_NAGRI: return hb_language_from_string("syl", -1); break;
// Unicode 5.0
case HB_SCRIPT_NKO: return hb_language_from_string("nko", -1); break;
// no representative language exists
default: return HB_LANGUAGE_INVALID; break;
}
}
} // ns detail
struct harfbuzz_shaper
{
static void shape_text(text_line & line,
@ -111,10 +177,18 @@ static void shape_text(text_line & line,
{
++pos;
hb_buffer_clear_contents(buffer.get());
hb_buffer_add_utf16(buffer.get(), uchar_to_utf16(text.getBuffer()), text.length(), text_item.start, static_cast<int>(text_item.end - text_item.start));
hb_buffer_add_utf16(buffer.get(), detail::uchar_to_utf16(text.getBuffer()), text.length(), text_item.start, static_cast<int>(text_item.end - text_item.start));
hb_buffer_set_direction(buffer.get(), (text_item.dir == UBIDI_RTL) ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
hb_buffer_set_script(buffer.get(), _icu_script_to_script(text_item.script));
hb_font_t *font(hb_ft_font_create(face->get_face(), nullptr));
auto script = detail::_icu_script_to_script(text_item.script);
auto language = detail::script_to_language(script);
if (language != HB_LANGUAGE_INVALID)
{
hb_buffer_set_language(buffer.get(), language); // set most common language for the run based script
}
hb_buffer_set_script(buffer.get(), script);
// https://github.com/mapnik/test-data-visual/pull/25
#if HB_VERSION_MAJOR > 0
#if HB_VERSION_ATLEAST(1, 0 , 5)