2012-08-17 22:46:32 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* This file is part of Mapnik (c++ mapping toolkit)
|
|
|
|
*
|
2015-06-16 12:49:16 +02:00
|
|
|
* Copyright (C) 2015 Artem Pavlenko
|
2012-08-17 22:46:32 +02:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#ifndef MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
|
|
|
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
|
|
|
|
2015-08-24 12:23:59 +02:00
|
|
|
// mapnik
|
2015-08-21 13:52:42 +02:00
|
|
|
#include <mapnik/debug.hpp>
|
|
|
|
#include <mapnik/geometry.hpp>
|
|
|
|
#include <mapnik/geometry_correct.hpp>
|
|
|
|
#include <mapnik/wkt/wkt_factory.hpp>
|
|
|
|
#include <mapnik/json/geometry_parser.hpp>
|
|
|
|
#include <mapnik/util/conversions.hpp>
|
2015-08-24 12:23:59 +02:00
|
|
|
#include <mapnik/csv/csv_grammar.hpp>
|
2015-08-24 14:13:13 +02:00
|
|
|
#include <mapnik/util/trim.hpp>
|
2015-08-24 12:23:59 +02:00
|
|
|
// boost
|
2014-10-22 01:37:27 +02:00
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
|
|
|
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
2015-06-16 04:36:36 +02:00
|
|
|
#pragma GCC diagnostic ignored "-Wsign-conversion"
|
2014-10-22 01:37:27 +02:00
|
|
|
#include <boost/algorithm/string.hpp>
|
|
|
|
#pragma GCC diagnostic pop
|
2012-08-17 22:46:32 +02:00
|
|
|
|
|
|
|
#include <string>
|
2014-10-01 01:30:36 +02:00
|
|
|
#include <cstdio>
|
2015-08-24 12:23:59 +02:00
|
|
|
#include <algorithm>
|
2012-08-17 22:46:32 +02:00
|
|
|
|
2015-09-09 11:53:17 +02:00
|
|
|
#ifndef _WINDOWS
|
|
|
|
#define CSV_MEMORY_MAPPED_FILE
|
|
|
|
#endif
|
|
|
|
|
2012-08-17 22:46:32 +02:00
|
|
|
namespace csv_utils
|
|
|
|
{
|
2015-08-24 12:23:59 +02:00
|
|
|
|
|
|
|
static const mapnik::csv_line_grammar<char const*> line_g;
|
2015-10-07 13:34:20 +02:00
|
|
|
static const mapnik::csv_white_space_skipper<char const*> skipper;
|
2015-08-24 12:23:59 +02:00
|
|
|
|
2015-08-25 15:05:04 +02:00
|
|
|
template <typename Iterator>
|
2015-10-05 10:34:02 +02:00
|
|
|
static mapnik::csv_line parse_line(Iterator start, Iterator end, char separator, char quote, std::size_t num_columns)
|
2015-08-24 12:23:59 +02:00
|
|
|
{
|
|
|
|
mapnik::csv_line values;
|
2015-08-24 16:35:32 +02:00
|
|
|
if (num_columns > 0) values.reserve(num_columns);
|
2015-10-07 13:34:20 +02:00
|
|
|
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(separator, quote), skipper, values))
|
2014-10-01 01:30:36 +02:00
|
|
|
{
|
2015-08-24 15:41:04 +02:00
|
|
|
throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end));
|
2014-10-01 01:30:36 +02:00
|
|
|
}
|
2015-08-24 12:23:59 +02:00
|
|
|
return values;
|
|
|
|
}
|
2014-10-01 01:30:36 +02:00
|
|
|
|
2015-10-05 10:34:02 +02:00
|
|
|
static inline mapnik::csv_line parse_line(std::string const& line_str, char separator, char quote)
|
2015-08-24 15:41:04 +02:00
|
|
|
{
|
|
|
|
auto start = line_str.c_str();
|
|
|
|
auto end = start + line_str.length();
|
2015-10-02 13:20:54 +02:00
|
|
|
return parse_line(start, end, separator, quote, 0);
|
2015-08-24 15:41:04 +02:00
|
|
|
}
|
|
|
|
|
2015-08-24 12:23:59 +02:00
|
|
|
static inline bool is_likely_number(std::string const& value)
|
|
|
|
{
|
2015-10-07 13:34:20 +02:00
|
|
|
return (std::strspn( value.c_str(), "e-.+0123456789" ) == value.size());
|
2015-08-24 12:23:59 +02:00
|
|
|
}
|
2014-11-20 15:25:50 +01:00
|
|
|
|
2015-08-24 12:23:59 +02:00
|
|
|
struct ignore_case_equal_pred
|
|
|
|
{
|
|
|
|
bool operator () (unsigned char a, unsigned char b) const
|
|
|
|
{
|
|
|
|
return std::tolower(a) == std::tolower(b);
|
2012-08-17 22:46:32 +02:00
|
|
|
}
|
2015-08-24 12:23:59 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
inline bool ignore_case_equal(std::string const& s0, std::string const& s1)
|
|
|
|
{
|
|
|
|
return std::equal(s0.begin(), s0.end(),
|
|
|
|
s1.begin(), ignore_case_equal_pred());
|
|
|
|
}
|
|
|
|
|
2015-10-01 19:33:32 +02:00
|
|
|
template <class CharT, class Traits, class Allocator>
|
2015-10-02 13:17:38 +02:00
|
|
|
std::basic_istream<CharT, Traits>& getline_csv(std::istream& is, std::basic_string<CharT,Traits,Allocator>& s, CharT delim, CharT quote)
|
2015-10-01 19:33:32 +02:00
|
|
|
{
|
|
|
|
typename std::basic_string<CharT,Traits,Allocator>::size_type nread = 0;
|
|
|
|
typename std::basic_istream<CharT, Traits>::sentry sentry(is, true);
|
|
|
|
if (sentry)
|
|
|
|
{
|
|
|
|
std::basic_streambuf<CharT, Traits>* buf = is.rdbuf();
|
|
|
|
s.clear();
|
2015-10-02 13:17:38 +02:00
|
|
|
bool has_quote = false;
|
2015-10-01 19:33:32 +02:00
|
|
|
while (nread < s.max_size())
|
|
|
|
{
|
|
|
|
int c1 = buf->sbumpc();
|
|
|
|
if (Traits::eq_int_type(c1, Traits::eof()))
|
|
|
|
{
|
|
|
|
is.setstate(std::ios_base::eofbit);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
++nread;
|
|
|
|
CharT c = Traits::to_char_type(c1);
|
2015-10-02 13:17:38 +02:00
|
|
|
if (Traits::eq(c, quote))
|
|
|
|
has_quote = !has_quote;
|
|
|
|
if (!Traits::eq(c, delim) || has_quote)
|
2015-10-01 19:33:32 +02:00
|
|
|
s.push_back(c);
|
|
|
|
else
|
|
|
|
break;// Character is extracted but not appended.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (nread == 0 || nread >= s.max_size())
|
|
|
|
is.setstate(std::ios_base::failbit);
|
|
|
|
|
|
|
|
return is;
|
|
|
|
}
|
|
|
|
|
2012-08-17 22:46:32 +02:00
|
|
|
}
|
|
|
|
|
2015-08-21 13:52:42 +02:00
|
|
|
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
std::size_t file_length(T & stream)
|
|
|
|
{
|
|
|
|
stream.seekg(0, std::ios::end);
|
|
|
|
return stream.tellg();
|
|
|
|
}
|
|
|
|
|
2015-10-05 16:22:09 +02:00
|
|
|
static inline char detect_separator(std::string const& str)
|
2015-08-21 13:52:42 +02:00
|
|
|
{
|
2015-10-05 10:34:02 +02:00
|
|
|
char separator = ','; // default
|
2015-08-21 13:52:42 +02:00
|
|
|
int num_commas = std::count(str.begin(), str.end(), ',');
|
|
|
|
// detect tabs
|
|
|
|
int num_tabs = std::count(str.begin(), str.end(), '\t');
|
|
|
|
if (num_tabs > 0)
|
|
|
|
{
|
|
|
|
if (num_tabs > num_commas)
|
|
|
|
{
|
2015-10-05 10:34:02 +02:00
|
|
|
separator = '\t';
|
2015-08-21 13:52:42 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else // pipes
|
|
|
|
{
|
|
|
|
int num_pipes = std::count(str.begin(), str.end(), '|');
|
|
|
|
if (num_pipes > num_commas)
|
|
|
|
{
|
2015-10-05 10:34:02 +02:00
|
|
|
separator = '|';
|
2015-08-21 13:52:42 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
|
|
|
}
|
|
|
|
else // semicolons
|
|
|
|
{
|
|
|
|
int num_semicolons = std::count(str.begin(), str.end(), ';');
|
|
|
|
if (num_semicolons > num_commas)
|
|
|
|
{
|
2015-10-05 10:34:02 +02:00
|
|
|
separator = ';';
|
2015-08-21 13:52:42 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return separator;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
2015-10-05 17:56:33 +02:00
|
|
|
std::tuple<char,bool,char> autodect_newline_and_quote(T & stream, std::size_t file_length)
|
2015-08-21 13:52:42 +02:00
|
|
|
{
|
|
|
|
// autodetect newlines
|
|
|
|
char newline = '\n';
|
|
|
|
bool has_newline = false;
|
2015-10-05 17:56:33 +02:00
|
|
|
char quote = '"';
|
|
|
|
bool has_quote = false;
|
2015-09-09 11:08:00 +02:00
|
|
|
static std::size_t const max_size = 4000;
|
|
|
|
std::size_t size = std::min(file_length, max_size);
|
2015-08-27 13:03:01 +02:00
|
|
|
for (std::size_t lidx = 0; lidx < size; ++lidx)
|
2015-08-21 13:52:42 +02:00
|
|
|
{
|
|
|
|
char c = static_cast<char>(stream.get());
|
|
|
|
if (c == '\r')
|
|
|
|
{
|
|
|
|
newline = '\r';
|
|
|
|
has_newline = true;
|
2015-10-05 17:56:33 +02:00
|
|
|
//break;
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
if (c == '\n')
|
|
|
|
{
|
|
|
|
has_newline = true;
|
2015-10-05 17:56:33 +02:00
|
|
|
//break;
|
|
|
|
}
|
|
|
|
else if (!has_quote && c == '\'')
|
|
|
|
{
|
|
|
|
quote = '\'';
|
|
|
|
has_quote = true;
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
}
|
2015-10-05 17:56:33 +02:00
|
|
|
return std::make_tuple(newline, has_newline, quote);
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
struct geometry_column_locator
|
|
|
|
{
|
|
|
|
geometry_column_locator()
|
|
|
|
: type(UNKNOWN), index(-1), index2(-1) {}
|
|
|
|
|
|
|
|
enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type;
|
|
|
|
std::size_t index;
|
|
|
|
std::size_t index2;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator)
|
|
|
|
{
|
|
|
|
std::string lower_val(header);
|
|
|
|
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
|
|
|
if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.type = geometry_column_locator::WKT;
|
|
|
|
locator.index = index;
|
|
|
|
}
|
|
|
|
else if (lower_val == "geojson")
|
|
|
|
{
|
|
|
|
locator.type = geometry_column_locator::GEOJSON;
|
|
|
|
locator.index = index;
|
|
|
|
}
|
|
|
|
else if (lower_val == "x" || lower_val == "lon"
|
2015-08-24 12:23:59 +02:00
|
|
|
|| lower_val == "lng" || lower_val == "long"
|
2015-08-21 13:52:42 +02:00
|
|
|
|| (lower_val.find("longitude") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.index = index;
|
|
|
|
locator.type = geometry_column_locator::LON_LAT;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (lower_val == "y"
|
|
|
|
|| lower_val == "lat"
|
|
|
|
|| (lower_val.find("latitude") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.index2 = index;
|
|
|
|
locator.type = geometry_column_locator::LON_LAT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-09 12:26:10 +02:00
|
|
|
static inline mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
2015-08-21 13:52:42 +02:00
|
|
|
{
|
|
|
|
mapnik::geometry::geometry<double> geom;
|
|
|
|
if (locator.type == geometry_column_locator::WKT)
|
|
|
|
{
|
|
|
|
if (mapnik::from_wkt(row[locator.index], geom))
|
|
|
|
{
|
|
|
|
// correct orientations ..
|
|
|
|
mapnik::geometry::correct(geom);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-08-25 15:05:04 +02:00
|
|
|
throw std::runtime_error("Failed to parse WKT:" + row[locator.index]);
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (locator.type == geometry_column_locator::GEOJSON)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!mapnik::json::from_geojson(row[locator.index], geom))
|
|
|
|
{
|
2015-08-25 15:05:04 +02:00
|
|
|
throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]);
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (locator.type == geometry_column_locator::LON_LAT)
|
|
|
|
{
|
|
|
|
double x, y;
|
|
|
|
if (!mapnik::util::string2double(row[locator.index],x))
|
|
|
|
{
|
2015-08-25 15:05:04 +02:00
|
|
|
throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]);
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
if (!mapnik::util::string2double(row[locator.index2],y))
|
|
|
|
{
|
2015-08-25 15:05:04 +02:00
|
|
|
throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
|
2015-08-21 13:52:42 +02:00
|
|
|
}
|
|
|
|
geom = mapnik::geometry::point<double>(x,y);
|
|
|
|
}
|
|
|
|
return geom;
|
|
|
|
}
|
|
|
|
|
2015-08-24 14:13:13 +02:00
|
|
|
template <typename Feature, typename Headers, typename Values, typename Locator, typename Transcoder>
|
|
|
|
void process_properties(Feature & feature, Headers const& headers, Values const& values, Locator const& locator, Transcoder const& tr)
|
|
|
|
{
|
|
|
|
auto val_beg = values.begin();
|
|
|
|
auto val_end = values.end();
|
|
|
|
auto num_headers = headers.size();
|
|
|
|
for (std::size_t i = 0; i < num_headers; ++i)
|
|
|
|
{
|
|
|
|
std::string const& fld_name = headers.at(i);
|
|
|
|
if (val_beg == val_end)
|
|
|
|
{
|
|
|
|
feature.put(fld_name,tr.transcode(""));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string value = mapnik::util::trim_copy(*val_beg++);
|
|
|
|
int value_length = value.length();
|
|
|
|
|
|
|
|
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT
|
|
|
|
|| locator.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
|
|
|
|
|
|
|
|
|
|
|
bool matched = false;
|
|
|
|
bool has_dot = value.find(".") != std::string::npos;
|
|
|
|
if (value.empty() ||
|
|
|
|
(value_length > 20) ||
|
|
|
|
(value_length > 1 && !has_dot && value[0] == '0'))
|
|
|
|
{
|
|
|
|
matched = true;
|
|
|
|
feature.put(fld_name,std::move(tr.transcode(value.c_str())));
|
|
|
|
}
|
|
|
|
else if (csv_utils::is_likely_number(value))
|
|
|
|
{
|
|
|
|
bool has_e = value.find("e") != std::string::npos;
|
|
|
|
if (has_dot || has_e)
|
|
|
|
{
|
|
|
|
double float_val = 0.0;
|
|
|
|
if (mapnik::util::string2double(value,float_val))
|
|
|
|
{
|
|
|
|
matched = true;
|
|
|
|
feature.put(fld_name,float_val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mapnik::value_integer int_val = 0;
|
|
|
|
if (mapnik::util::string2int(value,int_val))
|
|
|
|
{
|
|
|
|
matched = true;
|
|
|
|
feature.put(fld_name,int_val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!matched)
|
|
|
|
{
|
|
|
|
if (csv_utils::ignore_case_equal(value, "true"))
|
|
|
|
{
|
|
|
|
feature.put(fld_name, true);
|
|
|
|
}
|
|
|
|
else if (csv_utils::ignore_case_equal(value, "false"))
|
|
|
|
{
|
|
|
|
feature.put(fld_name, false);
|
|
|
|
}
|
|
|
|
else // fallback to string
|
|
|
|
{
|
|
|
|
feature.put(fld_name,std::move(tr.transcode(value.c_str())));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-08-21 13:52:42 +02:00
|
|
|
}// ns detail
|
|
|
|
|
2012-08-17 22:46:32 +02:00
|
|
|
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP
|