mapnik/plugins/input/csv/csv_datasource.cpp

968 lines
34 KiB
C++
Raw Normal View History

2012-04-08 02:20:56 +02:00
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2011 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
2011-10-12 03:11:59 +02:00
#include "csv_datasource.hpp"
#include "csv_utils.hpp"
2011-10-12 03:11:59 +02:00
// boost
#include <boost/make_shared.hpp>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
2011-10-12 03:11:59 +02:00
// mapnik
2012-04-08 02:20:56 +02:00
#include <mapnik/debug.hpp>
2011-10-12 03:11:59 +02:00
#include <mapnik/feature_layer_desc.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/geometry.hpp>
#include <mapnik/memory_featureset.hpp>
#include <mapnik/wkt/wkt_factory.hpp>
#include <mapnik/json/geometry_parser.hpp>
2012-01-15 07:35:40 +01:00
#include <mapnik/util/geometry_to_ds_type.hpp>
#include <mapnik/util/conversions.hpp>
2012-03-07 19:16:41 +01:00
#include <mapnik/boolean.hpp>
2011-10-12 03:11:59 +02:00
// stl
#include <sstream>
2011-11-10 01:45:18 +01:00
#include <fstream>
#include <iostream>
2011-11-14 04:33:57 +01:00
#include <vector>
2011-10-12 03:11:59 +02:00
#include <string>
using mapnik::datasource;
using mapnik::parameters;
using namespace boost::spirit;
2011-10-12 03:11:59 +02:00
DATASOURCE_PLUGIN(csv_datasource)
csv_datasource::csv_datasource(parameters const& params, bool bind)
2012-04-08 02:20:56 +02:00
: datasource(params),
desc_(*params_.get<std::string>("type"), *params_.get<std::string>("encoding", "utf-8")),
extent_(),
filename_(),
inline_string_(),
file_length_(0),
row_limit_(*params_.get<int>("row_limit", 0)),
features_(),
escape_(*params_.get<std::string>("escape", "")),
separator_(*params_.get<std::string>("separator", "")),
quote_(*params_.get<std::string>("quote", "")),
headers_(),
manual_headers_(boost::trim_copy(*params_.get<std::string>("headers", ""))),
strict_(*params_.get<mapnik::boolean>("strict", false)),
quiet_(*params_.get<mapnik::boolean>("quiet", false)),
filesize_max_(*params_.get<float>("filesize_max", 20.0)), // MB
ctx_(boost::make_shared<mapnik::context_type>())
2011-10-12 03:11:59 +02:00
{
/* TODO:
2011-11-14 04:33:57 +01:00
general:
- refactor parser into generic class
- tests of grid_renderer output
- ensure that the attribute desc_ matches the first feature added
alternate large file pipeline:
- stat file, detect > 15 MB
- build up csv line-by-line iterator
- creates opportunity to filter attributes by map query
speed:
- add properties for wkt/json/lon/lat at parse time
2011-11-14 04:33:57 +01:00
- add ability to pass 'filter' keyword to drop attributes at layer init
- create quad tree on the fly for small/med size files
- memory map large files for reading
- smaller features (less memory overhead)
usability:
- enforce column names without leading digit
- better error messages (add filepath) if not reading from string
- move to spirit to tokenize and add character level error feedback:
http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/
2011-10-12 03:11:59 +02:00
*/
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
boost::optional<std::string> inline_string = params_.get<std::string>("inline");
if (inline_string)
{
inline_string_ = *inline_string;
}
else
{
boost::optional<std::string> file = params_.get<std::string>("file");
if (!file) throw mapnik::datasource_exception("CSV Plugin: missing <file> parameter");
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
boost::optional<std::string> base = params_.get<std::string>("base");
if (base)
filename_ = *base + "/" + *file;
else
filename_ = *file;
}
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
if (bind)
{
this->bind();
}
}
csv_datasource::~csv_datasource() { }
void csv_datasource::bind() const
{
if (is_bound_) return;
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
if (!inline_string_.empty())
{
std::istringstream in(inline_string_);
parse_csv(in,escape_, separator_, quote_);
2011-10-12 03:11:59 +02:00
}
else
{
std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary);
2011-10-12 03:11:59 +02:00
if (!in.is_open())
throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'");
parse_csv(in,escape_, separator_, quote_);
2011-10-12 03:11:59 +02:00
in.close();
}
is_bound_ = true;
}
template <typename T>
void csv_datasource::parse_csv(T & stream,
std::string const& escape,
std::string const& separator,
std::string const& quote) const
2011-10-12 03:11:59 +02:00
{
2011-11-14 04:33:57 +01:00
stream.seekg(0, std::ios::end);
file_length_ = stream.tellg();
2011-11-14 04:33:57 +01:00
if (filesize_max_ > 0)
{
double file_mb = static_cast<double>(file_length_)/1048576;
2011-11-14 04:33:57 +01:00
// throw if this is an unreasonably large file to read into memory
if (file_mb > filesize_max_)
{
std::ostringstream s;
s << "CSV Plugin: csv file is greater than " << filesize_max_ << "MB "
<< " - you should use a more efficient data format like sqlite, postgis or a shapefile "
<< " to render this data (set 'filesize_max=0' to disable this restriction if you have lots of memory)";
throw mapnik::datasource_exception(s.str());
}
}
// set back to start
2011-11-14 04:33:57 +01:00
stream.seekg(0, std::ios::beg);
2011-10-12 03:11:59 +02:00
// autodetect newlines
char newline = '\n';
bool has_newline = false;
for (unsigned lidx = 0; lidx < file_length_ && lidx < 4000; lidx++)
2011-10-12 03:11:59 +02:00
{
char c = static_cast<char>(stream.get());
if (c == '\r')
{
newline = '\r';
has_newline = true;
break;
}
if (c == '\n')
{
has_newline = true;
break;
}
}
// set back to start
2011-11-14 04:33:57 +01:00
stream.seekg(0, std::ios::beg);
// get first line
std::string csv_line;
std::getline(stream,csv_line,newline);
// if user has not passed a separator manually
// then attempt to detect by reading first line
std::string sep = boost::trim_copy(separator);
if (sep.empty())
{
// default to ','
sep = ",";
int num_commas = std::count(csv_line.begin(), csv_line.end(), ',');
// detect tabs
int num_tabs = std::count(csv_line.begin(), csv_line.end(), '\t');
if (num_tabs > 0)
{
if (num_tabs > num_commas)
{
sep = "\t";
2012-04-08 02:20:56 +02:00
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
}
}
else // pipes
{
int num_pipes = std::count(csv_line.begin(), csv_line.end(), '|');
if (num_pipes > num_commas)
{
sep = "|";
2012-04-08 02:20:56 +02:00
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
}
else // semicolons
{
int num_semicolons = std::count(csv_line.begin(), csv_line.end(), ';');
if (num_semicolons > num_commas)
{
sep = ";";
2012-04-08 02:20:56 +02:00
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
}
}
}
}
// set back to start
2011-11-14 04:33:57 +01:00
stream.seekg(0, std::ios::beg);
typedef boost::escaped_list_separator<char> escape_type;
2011-10-12 03:11:59 +02:00
std::string esc = boost::trim_copy(escape);
if (esc.empty()) esc = "\\";
2011-11-14 04:33:57 +01:00
std::string quo = boost::trim_copy(quote);
if (quo.empty()) quo = "\"";
2011-10-12 03:11:59 +02:00
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: csv grammer: sep: '" << sep << "' quo: '" << quo << "' esc: '" << esc;
boost::escaped_list_separator<char> grammer;
try
{
2011-11-14 04:33:57 +01:00
// grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
grammer = boost::escaped_list_separator<char>(esc, sep, quo);
}
catch(std::exception const& ex)
{
std::ostringstream s;
s << "CSV Plugin: " << ex.what();
throw mapnik::datasource_exception(s.str());
}
2011-11-14 04:33:57 +01:00
2011-10-17 20:18:44 +02:00
typedef boost::tokenizer< escape_type > Tokenizer;
int line_number(1);
bool has_wkt_field = false;
bool has_json_field = false;
bool has_lat_field = false;
bool has_lon_field = false;
2012-07-24 02:43:21 +02:00
unsigned wkt_idx(0);
unsigned json_idx(0);
2012-07-24 02:43:21 +02:00
unsigned lat_idx(0);
unsigned lon_idx(0);
if (!manual_headers_.empty())
{
2011-10-17 20:18:44 +02:00
Tokenizer tok(manual_headers_, grammer);
Tokenizer::iterator beg = tok.begin();
unsigned idx(0);
for (; beg != tok.end(); ++beg)
2011-10-12 03:11:59 +02:00
{
std::string val = boost::trim_copy(*beg);
std::string lower_val = boost::algorithm::to_lower_copy(val);
if (lower_val == "wkt"
|| (lower_val.find("geom") != std::string::npos))
2011-10-12 03:11:59 +02:00
{
wkt_idx = idx;
has_wkt_field = true;
}
if (lower_val == "geojson")
{
json_idx = idx;
has_json_field = true;
}
if (lower_val == "x"
|| lower_val == "lon"
|| lower_val == "lng"
|| lower_val == "long"
|| (lower_val.find("longitude") != std::string::npos))
{
lon_idx = idx;
has_lon_field = true;
}
if (lower_val == "y"
|| lower_val == "lat"
|| (lower_val.find("latitude") != std::string::npos))
{
lat_idx = idx;
has_lat_field = true;
}
++idx;
headers_.push_back(val);
}
}
else // parse first line as headers
{
while (std::getline(stream,csv_line,newline))
{
try
{
2011-10-17 20:18:44 +02:00
Tokenizer tok(csv_line, grammer);
Tokenizer::iterator beg = tok.begin();
2011-11-14 09:34:26 +01:00
std::string val;
if (beg != tok.end())
val = boost::trim_copy(*beg);
2011-11-14 04:33:57 +01:00
// skip blank lines
if (val.empty())
2011-10-12 03:11:59 +02:00
{
// do nothing
++line_number;
2011-10-12 03:11:59 +02:00
}
else
2011-10-12 03:11:59 +02:00
{
int idx = -1;
for (; beg != tok.end(); ++beg)
{
++idx;
val = boost::trim_copy(*beg);
if (val.empty())
{
if (strict_)
{
std::ostringstream s;
s << "CSV Plugin: expected a column header at line "
<< line_number << ", column " << idx
<< " - ensure this row contains valid header fields: '"
<< csv_line << "'\n";
throw mapnik::datasource_exception(s.str());
}
else
{
// create a placeholder for the empty header
std::ostringstream s;
s << "_" << idx;
headers_.push_back(s.str());
}
}
else
2011-11-14 04:33:57 +01:00
{
std::string lower_val = boost::algorithm::to_lower_copy(val);
if (lower_val == "wkt"
|| (lower_val.find("geom") != std::string::npos))
{
wkt_idx = idx;
has_wkt_field = true;
}
if (lower_val == "geojson")
{
json_idx = idx;
has_json_field = true;
}
if (lower_val == "x"
|| lower_val == "lon"
|| lower_val == "lng"
|| lower_val == "long"
|| (lower_val.find("longitude") != std::string::npos))
{
lon_idx = idx;
has_lon_field = true;
}
if (lower_val == "y"
|| lower_val == "lat"
|| (lower_val.find("latitude") != std::string::npos))
{
lat_idx = idx;
has_lat_field = true;
}
headers_.push_back(val);
}
}
++line_number;
break;
2011-10-12 03:11:59 +02:00
}
}
2011-11-14 04:33:57 +01:00
catch(const std::exception & ex)
{
std::ostringstream s;
s << "CSV Plugin: error parsing headers: " << ex.what();
throw mapnik::datasource_exception(s.str());
}
2011-10-12 03:11:59 +02:00
}
}
if (!has_wkt_field && !has_json_field && (!has_lon_field || !has_lat_field) )
{
std::ostringstream s;
s << "CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or latitude/longitude - this is required for reading geometry data";
throw mapnik::datasource_exception(s.str());
}
int feature_count(0);
bool extent_initialized = false;
std::size_t num_headers = headers_.size();
for (std::size_t i = 0; i < headers_.size(); ++i)
{
ctx_->push(headers_[i]);
}
mapnik::transcoder tr(desc_.get_encoding());
mapnik::wkt_parser parse_wkt;
mapnik::json::geometry_parser<std::string::const_iterator> parse_json;
// handle rare case of a single line of data and user-provided headers
// where a lack of a newline will mean that std::getline returns false
bool is_first_row = false;
if (!has_newline)
{
stream >> csv_line;
if (!csv_line.empty())
{
is_first_row = true;
}
}
while (std::getline(stream,csv_line,newline) || is_first_row)
{
is_first_row = false;
if ((row_limit_ > 0) && (line_number > row_limit_))
2011-10-12 03:11:59 +02:00
{
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
break;
}
2011-11-14 04:33:57 +01:00
// skip blank lines
unsigned line_length = csv_line.length();
if (line_length <= 10)
{
std::string trimmed = csv_line;
boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n "));
2012-04-08 02:20:56 +02:00
if (trimmed.empty())
{
++line_number;
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
2012-04-08 02:20:56 +02:00
continue;
}
}
try
{
// special handling for varieties of quoting that we will enounter with json
// TODO - test with custom "quo" option
if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6))
{
csv_utils::fix_json_quoting(csv_line);
}
2011-10-17 20:18:44 +02:00
Tokenizer tok(csv_line, grammer);
Tokenizer::iterator beg = tok.begin();
2011-11-14 04:33:57 +01:00
unsigned num_fields = std::distance(beg,tok.end());
if (num_fields > num_headers)
{
std::ostringstream s;
s << "CSV Plugin: # of columns("
<< num_fields << ") > # of headers("
<< num_headers << ") parsed for row " << line_number << "\n";
throw mapnik::datasource_exception(s.str());
}
else if (num_fields < num_headers)
2011-10-12 03:11:59 +02:00
{
std::ostringstream s;
s << "CSV Plugin: # of headers("
<< num_headers << ") > # of columns("
<< num_fields << ") parsed for row " << line_number << "\n";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_WARN(csv) << s.str();
}
2011-10-12 03:11:59 +02:00
}
2011-11-14 04:33:57 +01:00
// NOTE: we use ++feature_count here because feature id's should start at 1;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,++feature_count));
double x(0);
double y(0);
bool parsed_x = false;
bool parsed_y = false;
bool parsed_wkt = false;
bool parsed_json = false;
std::vector<std::string> collected;
for (unsigned i = 0; i < num_headers; ++i)
2011-10-12 03:11:59 +02:00
{
std::string fld_name(headers_.at(i));
collected.push_back(fld_name);
std::string value;
if (beg == tok.end()) // there are more headers than column values for this row
{
// add an empty string here to represent a missing value
// not using null type here since nulls are not a csv thing
feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
// continue here instead of break so that all missing values are
// encoded consistenly as empty strings
continue;
}
else
{
value = boost::trim_copy(*beg);
++beg;
}
int value_length = value.length();
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
// parse wkt
if (has_wkt_field)
2011-10-12 03:11:59 +02:00
{
2011-11-14 04:33:57 +01:00
if (i == wkt_idx)
{
// skip empty geoms
if (value.empty())
{
break;
}
if (parse_wkt.parse(value, feature->paths()))
2011-11-14 04:33:57 +01:00
{
parsed_wkt = true;
2011-11-14 04:33:57 +01:00
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected well known text geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
2011-11-14 04:33:57 +01:00
{
throw mapnik::datasource_exception(s.str());
2011-11-14 04:33:57 +01:00
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
2011-11-14 04:33:57 +01:00
}
}
}
2011-10-12 03:11:59 +02:00
}
// TODO - support both wkt/geojson columns
// at once to create multi-geoms?
// parse as geojson
else if (has_json_field)
{
if (i == json_idx)
{
// skip empty geoms
if (value.empty())
{
break;
}
if (parse_json.parse(value.begin(),value.end(), feature->paths()))
{
parsed_json = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected geojson geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
}
}
}
}
else
2011-10-12 03:11:59 +02:00
{
// longitude
if (i == lon_idx)
2011-10-12 03:11:59 +02:00
{
// skip empty geoms
if (value.empty())
{
break;
}
if (mapnik::util::string2double(value,x))
{
parsed_x = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected a float value for longitude: could not parse row "
<< line_number
<< ", column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
2011-11-14 04:33:57 +01:00
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
}
}
2011-10-12 03:11:59 +02:00
}
// latitude
else if (i == lat_idx)
2011-10-12 03:11:59 +02:00
{
// skip empty geoms
if (value.empty())
{
break;
}
if (mapnik::util::string2double(value,y))
{
parsed_y = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected a float value for latitude: could not parse row "
<< line_number
<< ", column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
2011-11-14 04:33:57 +01:00
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
}
}
2011-10-12 03:11:59 +02:00
}
}
2011-11-14 04:33:57 +01:00
// now, add attributes, skipping any WKT or JSON fiels
if ((has_wkt_field) && (i == wkt_idx)) continue;
if ((has_json_field) && (i == json_idx)) continue;
/* First we detect likely strings, then try parsing likely numbers,
finally falling back to string type
* We intentionally do not try to detect boolean or null types
2012-02-02 02:37:35 +01:00
since they are not common in csv
* Likely strings are either empty values, very long values
2012-02-02 02:37:35 +01:00
or value with leading zeros like 001 (which are not safe
to assume are numbers)
*/
bool has_dot = value.find(".") != std::string::npos;
if (value.empty() ||
2012-02-02 02:37:35 +01:00
(value_length > 20) ||
(value_length > 1 && !has_dot && value[0] == '0'))
2011-10-12 03:11:59 +02:00
{
feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1)
2011-11-02 01:48:30 +01:00
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
2011-11-02 01:48:30 +01:00
}
2011-10-12 03:11:59 +02:00
}
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-')
2011-10-12 03:11:59 +02:00
{
double float_val = 0.0;
std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end();
bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val);
if (r && (str_beg == str_end))
2011-10-12 03:11:59 +02:00
{
if (has_dot)
2011-10-12 03:11:59 +02:00
{
feature->put(fld_name,float_val);
if (feature_count == 1)
2011-11-02 01:48:30 +01:00
{
desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::Double));
2011-11-02 01:48:30 +01:00
}
2011-10-12 03:11:59 +02:00
}
else
{
feature->put(fld_name,static_cast<int>(float_val));
if (feature_count == 1)
2011-11-02 01:48:30 +01:00
{
desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::Integer));
2011-11-02 01:48:30 +01:00
}
2011-10-12 03:11:59 +02:00
}
}
else
{
// fallback to normal string
feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1)
2011-11-02 01:48:30 +01:00
{
desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::String));
2011-11-02 01:48:30 +01:00
}
}
}
else
{
// fallback to normal string
feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1)
{
desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::String));
2011-10-12 03:11:59 +02:00
}
}
}
2011-11-14 04:33:57 +01:00
bool null_geom = true;
if (has_wkt_field || has_json_field)
2011-10-12 03:11:59 +02:00
{
if (parsed_wkt || parsed_json)
2011-10-12 03:11:59 +02:00
{
if (!extent_initialized)
2011-10-12 03:11:59 +02:00
{
extent_initialized = true;
extent_ = feature->envelope();
}
else
{
extent_.expand_to_include(feature->envelope());
}
features_.push_back(feature);
null_geom = false;
2011-10-12 03:11:59 +02:00
}
else
2011-10-12 03:11:59 +02:00
{
std::ostringstream s;
s << "CSV Plugin: could not read WKT or GeoJSON geometry "
2011-11-14 04:33:57 +01:00
<< "for line " << line_number << " - found " << headers_.size()
<< " with values like: " << csv_line << "\n";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
continue;
}
2011-10-12 03:11:59 +02:00
}
}
else if (has_lat_field || has_lon_field)
{
if (parsed_x && parsed_y)
2011-10-12 03:11:59 +02:00
{
mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point);
pt->move_to(x,y);
feature->add_geometry(pt);
features_.push_back(feature);
null_geom = false;
if (!extent_initialized)
{
extent_initialized = true;
extent_ = feature->envelope();
}
else
{
extent_.expand_to_include(feature->envelope());
}
2011-10-12 03:11:59 +02:00
}
else if (parsed_x || parsed_y)
2011-10-12 03:11:59 +02:00
{
std::ostringstream s;
s << "CSV Plugin: does your csv have valid headers?\n";
if (!parsed_x)
2011-10-12 03:11:59 +02:00
{
s << "Could not detect or parse any rows named 'x' or 'longitude' "
2011-11-14 04:33:57 +01:00
<< "for line " << line_number << " but found " << headers_.size()
<< " with values like: " << csv_line << "\n"
<< "for: " << boost::algorithm::join(collected, ",") << "\n";
}
if (!parsed_y)
{
s << "Could not detect or parse any rows named 'y' or 'latitude' "
2011-11-14 04:33:57 +01:00
<< "for line " << line_number << " but found " << headers_.size()
<< " with values like: " << csv_line << "\n"
<< "for: " << boost::algorithm::join(collected, ",") << "\n";
}
if (strict_)
{
throw mapnik::datasource_exception(s.str());
2011-10-12 03:11:59 +02:00
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
continue;
2011-10-12 03:11:59 +02:00
}
}
}
if (null_geom)
{
std::ostringstream s;
s << "CSV Plugin: could not detect and parse valid lat/lon fields or wkt/json geometry for line "
<< line_number;
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
// with no geometry we will never
// add this feature so drop the count
feature_count--;
continue;
}
}
++line_number;
}
catch(mapnik::datasource_exception const& ex )
{
if (strict_)
{
throw mapnik::datasource_exception(ex.what());
}
else
{
MAPNIK_LOG_ERROR(csv) << ex.what();
}
}
catch(std::exception const& ex)
{
std::ostringstream s;
s << "CSV Plugin: unexpected error parsing line: " << line_number
<< " - found " << headers_.size() << " with values like: " << csv_line << "\n"
<< " and got error like: " << ex.what();
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
MAPNIK_LOG_ERROR(csv) << s.str();
}
2011-10-12 03:11:59 +02:00
}
}
if (!feature_count > 0)
{
MAPNIK_LOG_ERROR(csv) << "CSV Plugin: could not parse any lines of data";
}
2011-10-12 03:11:59 +02:00
}
const char * csv_datasource::name()
2011-10-12 03:11:59 +02:00
{
return "csv";
}
datasource::datasource_t csv_datasource::type() const
2011-10-12 03:11:59 +02:00
{
return datasource::Vector;
}
mapnik::box2d<double> csv_datasource::envelope() const
{
if (!is_bound_) bind();
return extent_;
}
boost::optional<mapnik::datasource::geometry_t> csv_datasource::get_geometry_type() const
2012-01-15 07:35:40 +01:00
{
if (! is_bound_) bind();
boost::optional<mapnik::datasource::geometry_t> result;
2012-01-15 07:35:40 +01:00
int multi_type = 0;
unsigned num_features = features_.size();
for (unsigned i = 0; i < num_features && i < 5; ++i)
2012-01-15 07:35:40 +01:00
{
mapnik::util::to_ds_type(features_[i]->paths(),result);
if (result)
2012-01-15 07:35:40 +01:00
{
int type = static_cast<int>(*result);
if (multi_type > 0 && multi_type != type)
{
result.reset(mapnik::datasource::Collection);
return result;
}
multi_type = type;
2012-01-15 07:35:40 +01:00
}
}
return result;
}
2011-10-12 03:11:59 +02:00
mapnik::layer_descriptor csv_datasource::get_descriptor() const
{
if (!is_bound_) bind();
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
return desc_;
}
mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
{
if (!is_bound_) bind();
2011-11-14 04:33:57 +01:00
const std::set<std::string>& attribute_names = q.property_names();
std::set<std::string>::const_iterator pos = attribute_names.begin();
while (pos != attribute_names.end())
{
bool found_name = false;
for (std::size_t i = 0; i < headers_.size(); ++i)
{
if (headers_[i] == *pos)
{
found_name = true;
break;
}
}
if (! found_name)
{
std::ostringstream s;
s << "CSV Plugin: no attribute '" << *pos << "'. Valid attributes are: "
<< boost::algorithm::join(headers_, ",") << ".";
throw mapnik::datasource_exception(s.str());
}
++pos;
}
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
return boost::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
}
mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt) const
{
if (!is_bound_) bind();
2011-11-14 04:33:57 +01:00
2011-10-12 03:11:59 +02:00
throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet");
}