mapnik/src/unicode.cpp

226 lines
5.9 KiB
C++
Raw Normal View History

2008-01-03 12:41:00 +01:00
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2006 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
//$Id$
#include <mapnik/unicode.hpp>
#ifdef USE_FRIBIDI
#include <fribidi/fribidi.h>
#endif
#include <string>
#ifdef MAPNIK_DEBUG
#include <iostream>
#endif
namespace mapnik {
/*
** Use FRIBIDI to encode the string.
** The return value must be freed by the caller.
*/
#ifdef USE_FRIBIDI
inline wchar_t* bidi_string(const wchar_t *logical)
{
FriBidiCharType base = FRIBIDI_TYPE_ON;
size_t len;
len = wcslen(logical);
FriBidiChar *visual;
FriBidiStrIndex *ltov, *vtol;
FriBidiLevel *levels;
FriBidiStrIndex new_len;
fribidi_boolean log2vis;
visual = (FriBidiChar *) calloc (sizeof (FriBidiChar), len + 1);
ltov = 0;
vtol = 0;
levels = 0;
/* Create a bidi string. */
log2vis = fribidi_log2vis ((FriBidiChar *)logical, len, &base,
/* output */
visual, ltov, vtol, levels);
if (!log2vis) {
return 0;
}
new_len = len;
return (wchar_t *)visual;
}
#endif
2008-02-18 22:40:34 +01:00
/*
2008-01-03 12:41:00 +01:00
inline std::wstring to_unicode(std::string const& text)
{
std::wstring out;
unsigned long code = 0;
int expect = 0;
std::string::const_iterator itr=text.begin();
std::string::const_iterator end=text.end();
while ( itr != end)
{
unsigned p = (*itr++) & 0xff;
if ( p >= 0xc0)
{
if ( p < 0xe0) // U+0080 - U+07ff
{
expect = 1;
code = p & 0x1f;
}
else if ( p < 0xf0) // U+0800 - U+ffff
{
expect = 2;
code = p & 0x0f;
}
else if ( p < 0xf8) // U+1000 - U+10ffff
{
expect = 3;
code = p & 0x07;
}
continue;
}
else if (p >= 0x80)
{
--expect;
if (expect >= 0)
{
code <<= 6;
code += p & 0x3f;
}
if (expect > 0)
continue;
expect = 0;
}
else
{
code = p; // U+0000 - U+007f (ascii)
}
out.push_back(wchar_t(code));
}
#ifdef USE_FRIBIDI
wchar_t *bidi_text = bidi_string(out.c_str());
out = bidi_text;
free(bidi_text);
#endif
return out;
}
inline std::wstring latin1_to_unicode(std::string const& text)
{
std::wstring out;
std::string::const_iterator itr=text.begin();
std::string::const_iterator end=text.end();
while ( itr != end)
{
unsigned p = (*itr++) & 0xff;
out.push_back(wchar_t(p));
}
return out;
}
inline std::string latin1_to_unicode2(std::string const& text)
{
std::string out;
std::string::const_iterator itr=text.begin();
std::string::const_iterator end=text.end();
while ( itr != end)
{
out.push_back(0x00);
out.push_back(*itr++);
}
return out;
}
2008-02-18 22:40:34 +01:00
*/
2008-01-03 12:41:00 +01:00
transcoder::transcoder (std::string const& encoding)
2008-02-18 22:40:34 +01:00
: ok_(false),
conv_(0)
2008-01-03 12:41:00 +01:00
{
#ifdef MAPNIK_DEBUG
std::cerr << "ENCODING = " << encoding << "\n";
#endif
2008-02-18 22:40:34 +01:00
//#ifndef WORDS_BIGENDIAN
// desc_ = iconv_open("UCS-4LE",encoding.c_str());
//#else
// desc_ = iconv_open("UCS-4BE",encoding.c_str());
//#endif
UErrorCode err = U_ZERO_ERROR;
conv_ = ucnv_open(encoding.c_str(),&err);
if (U_SUCCESS(err)) ok_ = true;
// TODO
2008-01-03 12:41:00 +01:00
}
2008-02-18 22:40:34 +01:00
UnicodeString transcoder::transcode(const char* data) const
2008-01-03 12:41:00 +01:00
{
2008-02-18 22:40:34 +01:00
UErrorCode err = U_ZERO_ERROR;
UnicodeString ustr(data,-1,conv_,err);
if (ustr.isBogus())
{
ustr.remove();
}
return ustr;
/*
2008-01-03 12:41:00 +01:00
if (desc_ == iconv_t(-1)) return to_unicode(input);
size_t inleft = input.size();
std::wstring output(inleft,0);
size_t outleft = inleft * sizeof(wchar_t);
2008-06-29 12:58:29 +02:00
#if (!defined(OSX_LEOPARD) && defined(DARWIN)) || defined(SUNOS) || defined(FREEBSD)
2008-01-03 12:41:00 +01:00
const char * in = input.c_str();
#else
char * in = const_cast<char*>(input.data());
#endif
char * out = const_cast<char*>(reinterpret_cast<const char*>(output.data()));
iconv(desc_,&in,&inleft,&out,&outleft);
output = output.substr(0,output.size()-(outleft/sizeof(wchar_t)));
#ifdef USE_FRIBIDI
if (output.length() > 0)
{
wchar_t *bidi_text = bidi_string(output.c_str());
output = bidi_text;
free(bidi_text);
}
#endif
return output;
2008-02-18 22:40:34 +01:00
*/
2008-01-03 12:41:00 +01:00
}
transcoder::~transcoder()
{
2008-02-18 22:40:34 +01:00
// if (desc_ != iconv_t(-1)) iconv_close(desc_);
if (conv_)
ucnv_close(conv_);
2008-01-03 12:41:00 +01:00
}
}