2012-04-08 02:20:56 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* This file is part of Mapnik (c++ mapping toolkit)
|
|
|
|
*
|
2015-06-16 12:49:16 +02:00
|
|
|
* Copyright (C) 2015 Artem Pavlenko
|
2012-04-08 02:20:56 +02:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2011-10-12 03:11:59 +02:00
|
|
|
#include "csv_datasource.hpp"
|
2012-08-17 22:46:32 +02:00
|
|
|
#include "csv_utils.hpp"
|
2011-10-12 03:11:59 +02:00
|
|
|
|
|
|
|
// boost
|
|
|
|
#include <boost/algorithm/string.hpp>
|
2015-06-09 16:22:37 +02:00
|
|
|
#include <boost/spirit/include/qi.hpp>
|
2011-10-12 03:11:59 +02:00
|
|
|
|
|
|
|
// mapnik
|
2012-04-08 02:20:56 +02:00
|
|
|
#include <mapnik/debug.hpp>
|
2015-06-02 12:10:41 +02:00
|
|
|
#include <mapnik/util/utf_conv_win.hpp>
|
2013-01-04 04:27:53 +01:00
|
|
|
#include <mapnik/unicode.hpp>
|
2011-10-12 03:11:59 +02:00
|
|
|
#include <mapnik/feature_layer_desc.hpp>
|
|
|
|
#include <mapnik/feature_factory.hpp>
|
2015-03-24 13:32:05 +01:00
|
|
|
#include <mapnik/geometry.hpp>
|
2015-05-20 23:00:30 +02:00
|
|
|
#include <mapnik/geometry_correct.hpp>
|
2015-08-19 12:04:56 +02:00
|
|
|
#include <mapnik/geometry_is_empty.hpp>
|
2011-10-12 03:11:59 +02:00
|
|
|
#include <mapnik/memory_featureset.hpp>
|
|
|
|
#include <mapnik/wkt/wkt_factory.hpp>
|
2012-08-17 22:46:32 +02:00
|
|
|
#include <mapnik/json/geometry_parser.hpp>
|
2012-06-22 22:49:53 +02:00
|
|
|
#include <mapnik/util/conversions.hpp>
|
2012-03-07 19:16:41 +01:00
|
|
|
#include <mapnik/boolean.hpp>
|
2012-12-07 23:06:13 +01:00
|
|
|
#include <mapnik/util/trim.hpp>
|
2015-03-19 12:09:07 +01:00
|
|
|
#include <mapnik/util/geometry_to_ds_type.hpp>
|
2013-01-08 23:17:31 +01:00
|
|
|
#include <mapnik/value_types.hpp>
|
2015-06-09 16:22:37 +02:00
|
|
|
#include <mapnik/csv/csv_grammar.hpp>
|
2011-10-12 03:11:59 +02:00
|
|
|
// stl
|
|
|
|
#include <sstream>
|
2011-11-10 01:45:18 +01:00
|
|
|
#include <fstream>
|
2011-11-14 04:33:57 +01:00
|
|
|
#include <vector>
|
2011-10-12 03:11:59 +02:00
|
|
|
#include <string>
|
2013-01-04 04:27:53 +01:00
|
|
|
#include <algorithm>
|
2011-10-12 03:11:59 +02:00
|
|
|
|
|
|
|
using mapnik::datasource;
|
|
|
|
using mapnik::parameters;
|
|
|
|
|
|
|
|
DATASOURCE_PLUGIN(csv_datasource)
|
|
|
|
|
2015-06-09 16:22:37 +02:00
|
|
|
namespace mapnik {
|
|
|
|
|
|
|
|
static const csv_line_grammar<char const*> line_g;
|
|
|
|
|
|
|
|
csv_line parse_line(std::string & line_str, std::string const& separator)
|
|
|
|
{
|
|
|
|
csv_line values;
|
|
|
|
auto start = line_str.c_str();
|
|
|
|
auto end = start + line_str.length();
|
|
|
|
boost::spirit::standard::blank_type blank;
|
|
|
|
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
|
|
|
|
{
|
|
|
|
throw std::runtime_error("Failed to parse CSV line:\n" + line_str);
|
|
|
|
}
|
|
|
|
return values;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-17 19:03:07 +01:00
|
|
|
csv_datasource::csv_datasource(parameters const& params)
|
2014-06-26 11:51:24 +02:00
|
|
|
: datasource(params),
|
|
|
|
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
|
|
|
|
extent_(),
|
|
|
|
filename_(),
|
|
|
|
inline_string_(),
|
|
|
|
row_limit_(*params.get<mapnik::value_integer>("row_limit", 0)),
|
|
|
|
features_(),
|
|
|
|
escape_(*params.get<std::string>("escape", "")),
|
|
|
|
separator_(*params.get<std::string>("separator", "")),
|
|
|
|
quote_(*params.get<std::string>("quote", "")),
|
|
|
|
headers_(),
|
|
|
|
manual_headers_(mapnik::util::trim_copy(*params.get<std::string>("headers", ""))),
|
2014-07-29 04:46:49 +02:00
|
|
|
strict_(*params.get<mapnik::boolean_type>("strict", false)),
|
2014-06-26 11:51:24 +02:00
|
|
|
filesize_max_(*params.get<double>("filesize_max", 20.0)), // MB
|
|
|
|
ctx_(std::make_shared<mapnik::context_type>()),
|
|
|
|
extent_initialized_(false)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2013-10-30 19:57:03 +01:00
|
|
|
boost::optional<std::string> ext = params.get<std::string>("extent");
|
|
|
|
if (ext && !ext->empty())
|
|
|
|
{
|
|
|
|
extent_initialized_ = extent_.from_string(*ext);
|
|
|
|
}
|
|
|
|
|
2012-12-17 19:03:07 +01:00
|
|
|
boost::optional<std::string> inline_string = params.get<std::string>("inline");
|
2011-10-12 03:11:59 +02:00
|
|
|
if (inline_string)
|
|
|
|
{
|
|
|
|
inline_string_ = *inline_string;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-12-17 19:03:07 +01:00
|
|
|
boost::optional<std::string> file = params.get<std::string>("file");
|
2011-10-12 03:11:59 +02:00
|
|
|
if (!file) throw mapnik::datasource_exception("CSV Plugin: missing <file> parameter");
|
2011-11-14 04:33:57 +01:00
|
|
|
|
2012-12-17 19:03:07 +01:00
|
|
|
boost::optional<std::string> base = params.get<std::string>("base");
|
2011-10-12 03:11:59 +02:00
|
|
|
if (base)
|
|
|
|
filename_ = *base + "/" + *file;
|
|
|
|
else
|
|
|
|
filename_ = *file;
|
|
|
|
}
|
|
|
|
if (!inline_string_.empty())
|
|
|
|
{
|
|
|
|
std::istringstream in(inline_string_);
|
2015-06-10 14:40:55 +02:00
|
|
|
parse_csv(in, escape_, separator_, quote_);
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2013-05-21 21:51:31 +02:00
|
|
|
#if defined (_WINDOWS)
|
|
|
|
std::ifstream in(mapnik::utf8_to_utf16(filename_),std::ios_base::in | std::ios_base::binary);
|
|
|
|
#else
|
2011-10-15 05:28:23 +02:00
|
|
|
std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary);
|
2013-05-21 21:51:31 +02:00
|
|
|
#endif
|
2011-10-12 03:11:59 +02:00
|
|
|
if (!in.is_open())
|
2013-05-21 21:55:08 +02:00
|
|
|
{
|
2011-10-12 03:11:59 +02:00
|
|
|
throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'");
|
2013-05-21 21:55:08 +02:00
|
|
|
}
|
2015-06-10 12:41:28 +02:00
|
|
|
parse_csv(in, escape_, separator_, quote_);
|
2011-10-12 03:11:59 +02:00
|
|
|
in.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-17 19:03:07 +01:00
|
|
|
|
|
|
|
csv_datasource::~csv_datasource() { }
|
|
|
|
|
2015-06-04 11:12:21 +02:00
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
std::size_t file_length(T & stream)
|
|
|
|
{
|
|
|
|
stream.seekg(0, std::ios::end);
|
|
|
|
return stream.tellg();
|
|
|
|
}
|
|
|
|
|
2015-06-09 11:17:55 +02:00
|
|
|
std::string detect_separator(std::string const& str)
|
|
|
|
{
|
|
|
|
std::string separator = ","; // default
|
|
|
|
int num_commas = std::count(str.begin(), str.end(), ',');
|
|
|
|
// detect tabs
|
|
|
|
int num_tabs = std::count(str.begin(), str.end(), '\t');
|
|
|
|
if (num_tabs > 0)
|
|
|
|
{
|
|
|
|
if (num_tabs > num_commas)
|
|
|
|
{
|
|
|
|
separator = "\t";
|
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else // pipes
|
|
|
|
{
|
|
|
|
int num_pipes = std::count(str.begin(), str.end(), '|');
|
|
|
|
if (num_pipes > num_commas)
|
|
|
|
{
|
|
|
|
separator = "|";
|
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
|
|
|
}
|
|
|
|
else // semicolons
|
|
|
|
{
|
|
|
|
int num_semicolons = std::count(str.begin(), str.end(), ';');
|
|
|
|
if (num_semicolons > num_commas)
|
|
|
|
{
|
|
|
|
separator = ";";
|
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return separator;
|
|
|
|
}
|
|
|
|
|
2015-06-19 13:30:00 +02:00
|
|
|
template <typename T>
|
|
|
|
std::tuple<char,bool> autodect_newline(T & stream, std::size_t file_length)
|
|
|
|
{
|
|
|
|
// autodetect newlines
|
|
|
|
char newline = '\n';
|
|
|
|
bool has_newline = false;
|
|
|
|
for (std::size_t lidx = 0; lidx < file_length && lidx < 4000; ++lidx)
|
|
|
|
{
|
|
|
|
char c = static_cast<char>(stream.get());
|
|
|
|
if (c == '\r')
|
|
|
|
{
|
|
|
|
newline = '\r';
|
|
|
|
has_newline = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c == '\n')
|
|
|
|
{
|
|
|
|
has_newline = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return std::make_tuple(newline,has_newline);
|
|
|
|
}
|
|
|
|
|
2015-06-04 11:12:21 +02:00
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
struct geometry_column_locator
|
|
|
|
{
|
|
|
|
geometry_column_locator()
|
|
|
|
: type(UNKNOWN), index(-1), index2(-1) {}
|
|
|
|
|
|
|
|
enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type;
|
|
|
|
std::size_t index;
|
|
|
|
std::size_t index2;
|
|
|
|
};
|
|
|
|
|
|
|
|
void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator)
|
|
|
|
{
|
|
|
|
std::string lower_val(header);
|
|
|
|
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
|
|
|
if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.type = geometry_column_locator::WKT;
|
|
|
|
locator.index = index;
|
|
|
|
}
|
|
|
|
else if (lower_val == "geojson")
|
|
|
|
{
|
|
|
|
locator.type = geometry_column_locator::GEOJSON;
|
|
|
|
locator.index = index;
|
|
|
|
}
|
|
|
|
else if (lower_val == "x" || lower_val == "lon"
|
|
|
|
|| lower_val == "lng" || lower_val == "long"
|
|
|
|
|| (lower_val.find("longitude") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.index = index;
|
|
|
|
locator.type = geometry_column_locator::LON_LAT;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (lower_val == "y"
|
|
|
|
|| lower_val == "lat"
|
|
|
|
|| (lower_val.find("latitude") != std::string::npos))
|
|
|
|
{
|
|
|
|
locator.index2 = index;
|
|
|
|
locator.type = geometry_column_locator::LON_LAT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
|
|
|
{
|
|
|
|
mapnik::geometry::geometry<double> geom;
|
|
|
|
if (locator.type == geometry_column_locator::WKT)
|
|
|
|
{
|
|
|
|
if (mapnik::from_wkt(row[locator.index], geom))
|
|
|
|
{
|
|
|
|
// correct orientations ..
|
|
|
|
mapnik::geometry::correct(geom);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw std::runtime_error("FIXME WKT");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (locator.type == geometry_column_locator::GEOJSON)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!mapnik::json::from_geojson(row[locator.index], geom))
|
|
|
|
{
|
|
|
|
throw std::runtime_error("FIXME GEOJSON");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (locator.type == geometry_column_locator::LON_LAT)
|
|
|
|
{
|
|
|
|
double x, y;
|
|
|
|
if (!mapnik::util::string2double(row[locator.index],x))
|
|
|
|
{
|
|
|
|
throw std::runtime_error("FIXME Lon");
|
|
|
|
}
|
|
|
|
if (!mapnik::util::string2double(row[locator.index2],y))
|
|
|
|
{
|
2015-06-09 11:17:55 +02:00
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
throw std::runtime_error("FIXME Lat");
|
|
|
|
}
|
|
|
|
geom = mapnik::geometry::point<double>(x,y);
|
|
|
|
}
|
|
|
|
return geom;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // ns detail
|
2015-06-09 11:17:55 +02:00
|
|
|
|
2011-10-12 03:11:59 +02:00
|
|
|
template <typename T>
|
2012-08-31 21:07:35 +02:00
|
|
|
void csv_datasource::parse_csv(T & stream,
|
2011-10-15 05:28:23 +02:00
|
|
|
std::string const& escape,
|
|
|
|
std::string const& separator,
|
2012-12-17 21:59:15 +01:00
|
|
|
std::string const& quote)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-06-04 11:12:21 +02:00
|
|
|
auto file_length = detail::file_length(stream);
|
2011-10-19 03:21:19 +02:00
|
|
|
// set back to start
|
2011-11-14 04:33:57 +01:00
|
|
|
stream.seekg(0, std::ios::beg);
|
2015-06-19 13:30:00 +02:00
|
|
|
char newline;
|
|
|
|
bool has_newline;
|
|
|
|
std::tie(newline, has_newline) = detail::autodect_newline(stream, file_length);
|
2011-10-15 05:28:23 +02:00
|
|
|
// set back to start
|
2011-11-14 04:33:57 +01:00
|
|
|
stream.seekg(0, std::ios::beg);
|
|
|
|
|
2011-10-19 03:21:19 +02:00
|
|
|
// get first line
|
|
|
|
std::string csv_line;
|
2015-06-09 16:22:37 +02:00
|
|
|
std::getline(stream,csv_line,stream.widen(newline));
|
2011-10-19 03:21:19 +02:00
|
|
|
|
|
|
|
// if user has not passed a separator manually
|
2011-10-15 05:28:23 +02:00
|
|
|
// then attempt to detect by reading first line
|
2012-12-07 23:06:13 +01:00
|
|
|
std::string sep = mapnik::util::trim_copy(separator);
|
2015-08-19 12:04:56 +02:00
|
|
|
if (sep.empty()) sep = detail::detect_separator(csv_line);
|
2011-10-19 03:21:19 +02:00
|
|
|
// set back to start
|
2011-11-14 04:33:57 +01:00
|
|
|
stream.seekg(0, std::ios::beg);
|
|
|
|
|
2012-12-07 23:06:13 +01:00
|
|
|
std::string esc = mapnik::util::trim_copy(escape);
|
2011-10-15 05:28:23 +02:00
|
|
|
if (esc.empty()) esc = "\\";
|
2011-11-14 04:33:57 +01:00
|
|
|
|
2012-12-07 23:06:13 +01:00
|
|
|
std::string quo = mapnik::util::trim_copy(quote);
|
2011-10-15 05:28:23 +02:00
|
|
|
if (quo.empty()) quo = "\"";
|
2011-10-12 03:11:59 +02:00
|
|
|
|
2012-10-05 02:59:43 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: csv grammar: sep: '" << sep
|
|
|
|
<< "' quo: '" << quo << "' esc: '" << esc << "'";
|
2011-10-15 05:28:23 +02:00
|
|
|
|
2015-06-01 15:03:53 +02:00
|
|
|
int line_number = 1;
|
2015-08-19 12:04:56 +02:00
|
|
|
detail::geometry_column_locator locator;
|
2011-10-15 05:28:23 +02:00
|
|
|
|
|
|
|
if (!manual_headers_.empty())
|
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
std::size_t index = 0;
|
|
|
|
auto headers = mapnik::parse_line(manual_headers_, sep);
|
2015-06-09 16:22:37 +02:00
|
|
|
for (auto const& header : headers)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-06-09 16:22:37 +02:00
|
|
|
std::string val = mapnik::util::trim_copy(header);
|
2015-08-19 12:04:56 +02:00
|
|
|
detail::locate_geometry_column(val, index++, locator);
|
2011-10-15 05:28:23 +02:00
|
|
|
headers_.push_back(val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else // parse first line as headers
|
|
|
|
{
|
2015-06-09 16:22:37 +02:00
|
|
|
while (std::getline(stream,csv_line,stream.widen(newline)))
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
2015-06-09 16:22:37 +02:00
|
|
|
auto headers = mapnik::parse_line(csv_line, sep);
|
2011-10-15 05:28:23 +02:00
|
|
|
// skip blank lines
|
2015-06-09 16:22:37 +02:00
|
|
|
std::string val;
|
2015-08-19 12:04:56 +02:00
|
|
|
if (headers.size() > 0 && headers[0].empty()) ++line_number;
|
2011-10-15 05:28:23 +02:00
|
|
|
else
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
std::size_t index = 0;
|
2015-06-09 16:22:37 +02:00
|
|
|
for (auto const& header : headers)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-06-09 16:22:37 +02:00
|
|
|
val = mapnik::util::trim_copy(header);
|
2011-10-15 05:28:23 +02:00
|
|
|
if (val.empty())
|
|
|
|
{
|
2011-11-02 01:33:05 +01:00
|
|
|
if (strict_)
|
|
|
|
{
|
|
|
|
std::ostringstream s;
|
2012-12-07 08:06:12 +01:00
|
|
|
s << "CSV Plugin: expected a column header at line ";
|
2015-08-19 12:04:56 +02:00
|
|
|
s << line_number << ", column " << index;
|
2012-12-07 08:06:12 +01:00
|
|
|
s << " - ensure this row contains valid header fields: '";
|
|
|
|
s << csv_line << "'\n";
|
2011-11-02 01:33:05 +01:00
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// create a placeholder for the empty header
|
|
|
|
std::ostringstream s;
|
2015-08-19 12:04:56 +02:00
|
|
|
s << "_" << index;
|
2011-11-02 01:33:05 +01:00
|
|
|
headers_.push_back(s.str());
|
|
|
|
}
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
|
|
|
else
|
2011-11-14 04:33:57 +01:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
detail::locate_geometry_column(val, index, locator);
|
2011-10-15 05:28:23 +02:00
|
|
|
headers_.push_back(val);
|
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
++index;
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
|
|
|
++line_number;
|
|
|
|
break;
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
catch (std::exception const& ex)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2012-12-07 08:06:12 +01:00
|
|
|
std::string s("CSV Plugin: error parsing headers: ");
|
|
|
|
s += ex.what();
|
|
|
|
throw mapnik::datasource_exception(s);
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
if (locator.type == detail::geometry_column_locator::UNKNOWN)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
throw mapnik::datasource_exception("CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or "
|
|
|
|
"latitude/longitude - this is required for reading geometry data");
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
|
|
|
|
2015-06-01 15:03:53 +02:00
|
|
|
mapnik::value_integer feature_count = 0;
|
2013-10-30 19:57:03 +01:00
|
|
|
bool extent_started = false;
|
|
|
|
|
2012-01-17 19:34:08 +01:00
|
|
|
std::size_t num_headers = headers_.size();
|
2015-06-01 14:58:37 +02:00
|
|
|
std::for_each(headers_.begin(), headers_.end(),
|
|
|
|
[ & ](std::string const& header){ ctx_->push(header); });
|
2012-01-17 19:34:08 +01:00
|
|
|
|
2011-10-15 05:28:23 +02:00
|
|
|
mapnik::transcoder tr(desc_.get_encoding());
|
|
|
|
|
2012-08-31 21:07:35 +02:00
|
|
|
// handle rare case of a single line of data and user-provided headers
|
|
|
|
// where a lack of a newline will mean that std::getline returns false
|
|
|
|
bool is_first_row = false;
|
|
|
|
if (!has_newline)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2012-08-31 21:07:35 +02:00
|
|
|
stream >> csv_line;
|
|
|
|
if (!csv_line.empty())
|
|
|
|
{
|
|
|
|
is_first_row = true;
|
|
|
|
}
|
|
|
|
}
|
2015-06-09 16:22:37 +02:00
|
|
|
while (std::getline(stream,csv_line, stream.widen(newline)) || is_first_row)
|
2012-08-31 21:07:35 +02:00
|
|
|
{
|
|
|
|
is_first_row = false;
|
2011-10-15 05:28:23 +02:00
|
|
|
if ((row_limit_ > 0) && (line_number > row_limit_))
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2012-04-09 03:00:51 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
|
2011-10-15 05:28:23 +02:00
|
|
|
break;
|
|
|
|
}
|
2011-11-14 04:33:57 +01:00
|
|
|
|
2011-10-17 20:03:50 +02:00
|
|
|
// skip blank lines
|
2012-08-17 03:20:48 +02:00
|
|
|
unsigned line_length = csv_line.length();
|
|
|
|
if (line_length <= 10)
|
2011-11-04 12:18:40 +01:00
|
|
|
{
|
|
|
|
std::string trimmed = csv_line;
|
2012-08-17 03:20:48 +02:00
|
|
|
boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n "));
|
2012-04-08 02:20:56 +02:00
|
|
|
if (trimmed.empty())
|
|
|
|
{
|
2011-11-04 12:18:40 +01:00
|
|
|
++line_number;
|
2012-04-09 03:00:51 +02:00
|
|
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
|
2012-04-08 02:20:56 +02:00
|
|
|
continue;
|
2011-11-04 12:18:40 +01:00
|
|
|
}
|
2011-10-17 20:03:50 +02:00
|
|
|
}
|
|
|
|
|
2011-10-15 05:28:23 +02:00
|
|
|
try
|
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
auto values = mapnik::parse_line(csv_line, sep);
|
2015-06-09 16:22:37 +02:00
|
|
|
unsigned num_fields = values.size();
|
2012-08-20 23:06:07 +02:00
|
|
|
if (num_fields > num_headers)
|
|
|
|
{
|
|
|
|
std::ostringstream s;
|
|
|
|
s << "CSV Plugin: # of columns("
|
2014-06-26 11:51:24 +02:00
|
|
|
<< num_fields << ") > # of headers("
|
|
|
|
<< num_headers << ") parsed for row " << line_number << "\n";
|
2012-08-20 23:06:07 +02:00
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
|
|
|
else if (num_fields < num_headers)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2012-08-20 23:06:07 +02:00
|
|
|
std::ostringstream s;
|
|
|
|
s << "CSV Plugin: # of headers("
|
2014-06-26 11:51:24 +02:00
|
|
|
<< num_headers << ") > # of columns("
|
|
|
|
<< num_fields << ") parsed for row " << line_number << "\n";
|
2012-08-20 23:06:07 +02:00
|
|
|
if (strict_)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
2012-08-20 23:06:07 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
MAPNIK_LOG_WARN(csv) << s.str();
|
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
2011-11-14 04:33:57 +01:00
|
|
|
|
2015-06-09 16:22:37 +02:00
|
|
|
auto beg = values.begin();
|
|
|
|
auto end = values.end();
|
2015-08-19 12:04:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
auto geom = detail::extract_geometry(values, locator);
|
|
|
|
if (!geom.is<mapnik::geometry::geometry_empty>())
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2011-11-01 00:09:29 +01:00
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_count));
|
|
|
|
feature->set_geometry(std::move(geom));
|
2011-11-14 04:33:57 +01:00
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
std::vector<std::string> collected;
|
|
|
|
for (unsigned i = 0; i < num_headers; ++i)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
std::string const& fld_name = headers_.at(i);
|
|
|
|
collected.push_back(fld_name);
|
|
|
|
std::string value;
|
|
|
|
if (beg == end) // there are more headers than column values for this row
|
2011-11-14 04:33:57 +01:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
// add an empty string here to represent a missing value
|
|
|
|
// not using null type here since nulls are not a csv thing
|
|
|
|
feature->put(fld_name,tr.transcode(value.c_str()));
|
|
|
|
if (feature_count == 1)
|
2011-11-14 04:33:57 +01:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
2011-11-14 04:33:57 +01:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
// continue here instead of break so that all missing values are
|
|
|
|
// encoded consistenly as empty strings
|
|
|
|
continue;
|
2011-11-14 04:33:57 +01:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
else
|
2012-08-17 22:46:32 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
value = mapnik::util::trim_copy(*beg++);
|
2012-12-03 14:12:09 +01:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
int value_length = value.length();
|
|
|
|
|
|
|
|
// now, add attributes, skipping any WKT or JSON fields
|
|
|
|
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT
|
|
|
|
|| locator.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
|
|
|
|
|
|
|
// First we detect likely strings,
|
|
|
|
// then try parsing likely numbers,
|
|
|
|
// then try converting to bool,
|
|
|
|
// finally falling back to string type.
|
|
|
|
// An empty string or a string of "null" will be parsed
|
|
|
|
// as a string rather than a true null value.
|
|
|
|
// Likely strings are either empty values, very long values
|
|
|
|
// or values with leading zeros like 001 (which are not safe
|
|
|
|
// to assume are numbers)
|
|
|
|
|
|
|
|
bool matched = false;
|
|
|
|
bool has_dot = value.find(".") != std::string::npos;
|
|
|
|
if (value.empty() ||
|
|
|
|
(value_length > 20) ||
|
|
|
|
(value_length > 1 && !has_dot && value[0] == '0'))
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
matched = true;
|
|
|
|
feature->put(fld_name,std::move(tr.transcode(value.c_str())));
|
|
|
|
if (feature_count == 1)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
}
|
|
|
|
else if (csv_utils::is_likely_number(value))
|
|
|
|
{
|
|
|
|
bool has_e = value.find("e") != std::string::npos;
|
|
|
|
if (has_dot || has_e)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
double float_val = 0.0;
|
|
|
|
if (mapnik::util::string2double(value,float_val))
|
|
|
|
{
|
|
|
|
matched = true;
|
|
|
|
feature->put(fld_name,float_val);
|
|
|
|
if (feature_count == 1)
|
|
|
|
{
|
|
|
|
desc_.add_descriptor(
|
|
|
|
mapnik::attribute_descriptor(
|
|
|
|
fld_name,mapnik::Double));
|
|
|
|
}
|
|
|
|
}
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2012-06-22 22:49:53 +02:00
|
|
|
else
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
mapnik::value_integer int_val = 0;
|
|
|
|
if (mapnik::util::string2int(value,int_val))
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
matched = true;
|
|
|
|
feature->put(fld_name,int_val);
|
|
|
|
if (feature_count == 1)
|
|
|
|
{
|
|
|
|
desc_.add_descriptor(
|
|
|
|
mapnik::attribute_descriptor(
|
|
|
|
fld_name,mapnik::Integer));
|
|
|
|
}
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
if (!matched)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
// NOTE: we don't use mapnik::util::string2bool
|
|
|
|
// here because we don't want to treat 'on' and 'off'
|
|
|
|
// as booleans, only 'true' and 'false'
|
|
|
|
bool bool_val = false;
|
|
|
|
std::string lower_val = value;
|
|
|
|
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
|
|
|
if (lower_val == "true")
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
matched = true;
|
|
|
|
bool_val = true;
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
else if (lower_val == "false")
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
matched = true;
|
|
|
|
bool_val = false;
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
if (matched)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
feature->put(fld_name,bool_val);
|
2011-10-29 06:50:31 +02:00
|
|
|
if (feature_count == 1)
|
2011-11-02 01:48:30 +01:00
|
|
|
{
|
2011-12-13 00:55:33 +01:00
|
|
|
desc_.add_descriptor(
|
|
|
|
mapnik::attribute_descriptor(
|
2015-08-19 12:04:56 +02:00
|
|
|
fld_name,mapnik::Boolean));
|
2011-11-02 01:48:30 +01:00
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
else
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 12:04:56 +02:00
|
|
|
// fallback to normal string
|
|
|
|
feature->put(fld_name,std::move(tr.transcode(value.c_str())));
|
2011-10-29 06:50:31 +02:00
|
|
|
if (feature_count == 1)
|
2011-11-02 01:48:30 +01:00
|
|
|
{
|
2011-12-13 00:55:33 +01:00
|
|
|
desc_.add_descriptor(
|
|
|
|
mapnik::attribute_descriptor(
|
2015-08-19 12:04:56 +02:00
|
|
|
fld_name,mapnik::String));
|
2011-11-02 01:48:30 +01:00
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
}
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
|
2015-08-19 15:24:38 +02:00
|
|
|
if (!extent_initialized_)
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
2015-08-19 15:24:38 +02:00
|
|
|
if (!extent_started)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
2015-08-19 15:24:38 +02:00
|
|
|
extent_started = true;
|
|
|
|
extent_ = feature->envelope();
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-08-19 15:24:38 +02:00
|
|
|
extent_.expand_to_include(feature->envelope());
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
}
|
2015-08-19 15:24:38 +02:00
|
|
|
features_.push_back(feature);
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
else
|
2012-08-20 23:06:07 +02:00
|
|
|
{
|
|
|
|
std::ostringstream s;
|
2015-08-19 12:04:56 +02:00
|
|
|
s << "CSV Plugin: expected geometry column: could not parse row "
|
|
|
|
<< line_number << " "
|
|
|
|
<< values[locator.index] << "'";
|
2012-08-20 23:06:07 +02:00
|
|
|
if (strict_)
|
|
|
|
{
|
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MAPNIK_LOG_ERROR(csv) << s.str();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-19 12:04:56 +02:00
|
|
|
|
2011-10-15 05:28:23 +02:00
|
|
|
++line_number;
|
|
|
|
}
|
2015-08-19 12:04:56 +02:00
|
|
|
catch (mapnik::datasource_exception const& ex )
|
2011-11-01 00:09:29 +01:00
|
|
|
{
|
|
|
|
if (strict_)
|
|
|
|
{
|
|
|
|
throw mapnik::datasource_exception(ex.what());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-04-09 03:00:51 +02:00
|
|
|
MAPNIK_LOG_ERROR(csv) << ex.what();
|
2011-11-01 00:09:29 +01:00
|
|
|
}
|
|
|
|
}
|
2012-08-20 23:06:07 +02:00
|
|
|
catch(std::exception const& ex)
|
2011-10-15 05:28:23 +02:00
|
|
|
{
|
|
|
|
std::ostringstream s;
|
|
|
|
s << "CSV Plugin: unexpected error parsing line: " << line_number
|
|
|
|
<< " - found " << headers_.size() << " with values like: " << csv_line << "\n"
|
|
|
|
<< " and got error like: " << ex.what();
|
|
|
|
if (strict_)
|
|
|
|
{
|
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-04-09 03:00:51 +02:00
|
|
|
MAPNIK_LOG_ERROR(csv) << s.str();
|
2011-10-15 05:28:23 +02:00
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
}
|
2013-05-21 21:55:08 +02:00
|
|
|
if (feature_count < 1)
|
2011-10-29 06:50:31 +02:00
|
|
|
{
|
2012-04-09 03:00:51 +02:00
|
|
|
MAPNIK_LOG_ERROR(csv) << "CSV Plugin: could not parse any lines of data";
|
2011-10-29 06:50:31 +02:00
|
|
|
}
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
|
2012-07-21 03:34:41 +02:00
|
|
|
const char * csv_datasource::name()
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
|
|
|
return "csv";
|
|
|
|
}
|
|
|
|
|
2012-01-17 07:09:46 +01:00
|
|
|
datasource::datasource_t csv_datasource::type() const
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
|
|
|
return datasource::Vector;
|
|
|
|
}
|
|
|
|
|
|
|
|
mapnik::box2d<double> csv_datasource::envelope() const
|
|
|
|
{
|
|
|
|
return extent_;
|
|
|
|
}
|
|
|
|
|
|
|
|
mapnik::layer_descriptor csv_datasource::get_descriptor() const
|
|
|
|
{
|
|
|
|
return desc_;
|
|
|
|
}
|
|
|
|
|
2015-03-24 12:13:31 +01:00
|
|
|
boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type() const
|
2015-03-19 12:09:07 +01:00
|
|
|
{
|
2015-03-24 12:13:31 +01:00
|
|
|
boost::optional<mapnik::datasource_geometry_t> result;
|
2015-03-19 12:09:07 +01:00
|
|
|
int multi_type = 0;
|
|
|
|
unsigned num_features = features_.size();
|
|
|
|
for (unsigned i = 0; i < num_features && i < 5; ++i)
|
|
|
|
{
|
2015-03-24 12:13:31 +01:00
|
|
|
result = mapnik::util::to_ds_type(features_[i]->get_geometry());
|
2015-03-19 12:09:07 +01:00
|
|
|
if (result)
|
|
|
|
{
|
|
|
|
int type = static_cast<int>(*result);
|
|
|
|
if (multi_type > 0 && multi_type != type)
|
|
|
|
{
|
2015-03-24 12:13:31 +01:00
|
|
|
result.reset(mapnik::datasource_geometry_t::Collection);
|
2015-03-19 12:09:07 +01:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
multi_type = type;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2011-10-12 03:11:59 +02:00
|
|
|
mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
|
|
|
|
{
|
2015-06-04 11:12:21 +02:00
|
|
|
std::set<std::string> const& attribute_names = q.property_names();
|
2011-12-05 21:03:38 +01:00
|
|
|
std::set<std::string>::const_iterator pos = attribute_names.begin();
|
|
|
|
while (pos != attribute_names.end())
|
|
|
|
{
|
|
|
|
bool found_name = false;
|
2012-01-17 19:34:08 +01:00
|
|
|
for (std::size_t i = 0; i < headers_.size(); ++i)
|
2011-12-05 21:03:38 +01:00
|
|
|
{
|
|
|
|
if (headers_[i] == *pos)
|
|
|
|
{
|
|
|
|
found_name = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (! found_name)
|
|
|
|
{
|
|
|
|
std::ostringstream s;
|
|
|
|
s << "CSV Plugin: no attribute '" << *pos << "'. Valid attributes are: "
|
|
|
|
<< boost::algorithm::join(headers_, ",") << ".";
|
|
|
|
throw mapnik::datasource_exception(s.str());
|
|
|
|
}
|
|
|
|
++pos;
|
|
|
|
}
|
2013-09-20 15:00:11 +02:00
|
|
|
return std::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
|
2011-10-12 03:11:59 +02:00
|
|
|
}
|
|
|
|
|
2012-09-28 15:12:10 +02:00
|
|
|
mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt, double tol) const
|
2011-10-12 03:11:59 +02:00
|
|
|
{
|
|
|
|
throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet");
|
|
|
|
}
|