CSV - implement spatial index access to features on disk + preserve support for inline data (work-in-progress)
This commit is contained in:
parent
4943cb4cf8
commit
4babec802a
10 changed files with 793 additions and 232 deletions
|
@ -26,6 +26,7 @@
|
|||
//#define BOOST_SPIRIT_DEBUG
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix.hpp>
|
||||
|
||||
namespace mapnik {
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ plugin_env = plugin_base.Clone()
|
|||
plugin_sources = Split(
|
||||
"""
|
||||
%(PLUGIN_NAME)s_datasource.cpp
|
||||
%(PLUGIN_NAME)s_featureset.cpp
|
||||
%(PLUGIN_NAME)s_inline_featureset.cpp
|
||||
""" % locals()
|
||||
)
|
||||
|
||||
|
|
|
@ -20,12 +20,12 @@
|
|||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include "csv_datasource.hpp"
|
||||
#include "csv_utils.hpp"
|
||||
|
||||
#include "csv_datasource.hpp"
|
||||
#include "csv_featureset.hpp"
|
||||
#include "csv_inline_featureset.hpp"
|
||||
// boost
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
|
||||
// mapnik
|
||||
#include <mapnik/debug.hpp>
|
||||
|
@ -33,18 +33,11 @@
|
|||
#include <mapnik/unicode.hpp>
|
||||
#include <mapnik/feature_layer_desc.hpp>
|
||||
#include <mapnik/feature_factory.hpp>
|
||||
#include <mapnik/geometry.hpp>
|
||||
#include <mapnik/geometry_correct.hpp>
|
||||
#include <mapnik/geometry_is_empty.hpp>
|
||||
#include <mapnik/memory_featureset.hpp>
|
||||
#include <mapnik/wkt/wkt_factory.hpp>
|
||||
#include <mapnik/json/geometry_parser.hpp>
|
||||
#include <mapnik/util/conversions.hpp>
|
||||
#include <mapnik/boolean.hpp>
|
||||
#include <mapnik/util/trim.hpp>
|
||||
#include <mapnik/util/geometry_to_ds_type.hpp>
|
||||
#include <mapnik/value_types.hpp>
|
||||
#include <mapnik/csv/csv_grammar.hpp>
|
||||
// stl
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
@ -57,24 +50,6 @@ using mapnik::parameters;
|
|||
|
||||
DATASOURCE_PLUGIN(csv_datasource)
|
||||
|
||||
namespace mapnik {
|
||||
|
||||
static const csv_line_grammar<char const*> line_g;
|
||||
|
||||
csv_line parse_line(std::string & line_str, std::string const& separator)
|
||||
{
|
||||
csv_line values;
|
||||
auto start = line_str.c_str();
|
||||
auto end = start + line_str.length();
|
||||
boost::spirit::standard::blank_type blank;
|
||||
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
|
||||
{
|
||||
throw std::runtime_error("Failed to parse CSV line:\n" + line_str);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
||||
csv_datasource::csv_datasource(parameters const& params)
|
||||
: datasource(params),
|
||||
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
|
||||
|
@ -91,7 +66,9 @@ csv_datasource::csv_datasource(parameters const& params)
|
|||
strict_(*params.get<mapnik::boolean_type>("strict", false)),
|
||||
filesize_max_(*params.get<double>("filesize_max", 20.0)), // MB
|
||||
ctx_(std::make_shared<mapnik::context_type>()),
|
||||
extent_initialized_(false)
|
||||
extent_initialized_(false),
|
||||
tree_(nullptr),
|
||||
locator_()
|
||||
{
|
||||
boost::optional<std::string> ext = params.get<std::string>("extent");
|
||||
if (ext && !ext->empty())
|
||||
|
@ -136,160 +113,7 @@ csv_datasource::csv_datasource(parameters const& params)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
csv_datasource::~csv_datasource() { }
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
std::size_t file_length(T & stream)
|
||||
{
|
||||
stream.seekg(0, std::ios::end);
|
||||
return stream.tellg();
|
||||
}
|
||||
|
||||
std::string detect_separator(std::string const& str)
|
||||
{
|
||||
std::string separator = ","; // default
|
||||
int num_commas = std::count(str.begin(), str.end(), ',');
|
||||
// detect tabs
|
||||
int num_tabs = std::count(str.begin(), str.end(), '\t');
|
||||
if (num_tabs > 0)
|
||||
{
|
||||
if (num_tabs > num_commas)
|
||||
{
|
||||
separator = "\t";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
||||
}
|
||||
}
|
||||
else // pipes
|
||||
{
|
||||
int num_pipes = std::count(str.begin(), str.end(), '|');
|
||||
if (num_pipes > num_commas)
|
||||
{
|
||||
separator = "|";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
||||
}
|
||||
else // semicolons
|
||||
{
|
||||
int num_semicolons = std::count(str.begin(), str.end(), ';');
|
||||
if (num_semicolons > num_commas)
|
||||
{
|
||||
separator = ";";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
||||
}
|
||||
}
|
||||
}
|
||||
return separator;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::tuple<char,bool> autodect_newline(T & stream, std::size_t file_length)
|
||||
{
|
||||
// autodetect newlines
|
||||
char newline = '\n';
|
||||
bool has_newline = false;
|
||||
for (std::size_t lidx = 0; lidx < file_length && lidx < 4000; ++lidx)
|
||||
{
|
||||
char c = static_cast<char>(stream.get());
|
||||
if (c == '\r')
|
||||
{
|
||||
newline = '\r';
|
||||
has_newline = true;
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
{
|
||||
has_newline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(newline,has_newline);
|
||||
}
|
||||
|
||||
|
||||
struct geometry_column_locator
|
||||
{
|
||||
geometry_column_locator()
|
||||
: type(UNKNOWN), index(-1), index2(-1) {}
|
||||
|
||||
enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type;
|
||||
std::size_t index;
|
||||
std::size_t index2;
|
||||
};
|
||||
|
||||
void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator)
|
||||
{
|
||||
std::string lower_val(header);
|
||||
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
||||
if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos))
|
||||
{
|
||||
locator.type = geometry_column_locator::WKT;
|
||||
locator.index = index;
|
||||
}
|
||||
else if (lower_val == "geojson")
|
||||
{
|
||||
locator.type = geometry_column_locator::GEOJSON;
|
||||
locator.index = index;
|
||||
}
|
||||
else if (lower_val == "x" || lower_val == "lon"
|
||||
|| lower_val == "lng" || lower_val == "long"
|
||||
|| (lower_val.find("longitude") != std::string::npos))
|
||||
{
|
||||
locator.index = index;
|
||||
locator.type = geometry_column_locator::LON_LAT;
|
||||
}
|
||||
|
||||
else if (lower_val == "y"
|
||||
|| lower_val == "lat"
|
||||
|| (lower_val.find("latitude") != std::string::npos))
|
||||
{
|
||||
locator.index2 = index;
|
||||
locator.type = geometry_column_locator::LON_LAT;
|
||||
}
|
||||
}
|
||||
|
||||
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
||||
{
|
||||
mapnik::geometry::geometry<double> geom;
|
||||
if (locator.type == geometry_column_locator::WKT)
|
||||
{
|
||||
if (mapnik::from_wkt(row[locator.index], geom))
|
||||
{
|
||||
// correct orientations ..
|
||||
mapnik::geometry::correct(geom);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("FIXME WKT");
|
||||
}
|
||||
}
|
||||
else if (locator.type == geometry_column_locator::GEOJSON)
|
||||
{
|
||||
|
||||
if (!mapnik::json::from_geojson(row[locator.index], geom))
|
||||
{
|
||||
throw std::runtime_error("FIXME GEOJSON");
|
||||
}
|
||||
}
|
||||
else if (locator.type == geometry_column_locator::LON_LAT)
|
||||
{
|
||||
double x, y;
|
||||
if (!mapnik::util::string2double(row[locator.index],x))
|
||||
{
|
||||
throw std::runtime_error("FIXME Lon");
|
||||
}
|
||||
if (!mapnik::util::string2double(row[locator.index2],y))
|
||||
{
|
||||
|
||||
throw std::runtime_error("FIXME Lat");
|
||||
}
|
||||
geom = mapnik::geometry::point<double>(x,y);
|
||||
}
|
||||
return geom;
|
||||
}
|
||||
|
||||
} // ns detail
|
||||
csv_datasource::~csv_datasource() {}
|
||||
|
||||
template <typename T>
|
||||
void csv_datasource::parse_csv(T & stream,
|
||||
|
@ -305,15 +129,17 @@ void csv_datasource::parse_csv(T & stream,
|
|||
std::tie(newline, has_newline) = detail::autodect_newline(stream, file_length);
|
||||
// set back to start
|
||||
stream.seekg(0, std::ios::beg);
|
||||
|
||||
// get first line
|
||||
std::string csv_line;
|
||||
std::getline(stream,csv_line,stream.widen(newline));
|
||||
|
||||
// if user has not passed a separator manually
|
||||
// then attempt to detect by reading first line
|
||||
|
||||
std::string sep = mapnik::util::trim_copy(separator);
|
||||
if (sep.empty()) sep = detail::detect_separator(csv_line);
|
||||
separator_ = sep; // <------------------- FIXME !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
// set back to start
|
||||
stream.seekg(0, std::ios::beg);
|
||||
|
||||
|
@ -327,8 +153,6 @@ void csv_datasource::parse_csv(T & stream,
|
|||
<< "' quo: '" << quo << "' esc: '" << esc << "'";
|
||||
|
||||
int line_number = 1;
|
||||
detail::geometry_column_locator locator;
|
||||
|
||||
if (!manual_headers_.empty())
|
||||
{
|
||||
std::size_t index = 0;
|
||||
|
@ -336,7 +160,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
for (auto const& header : headers)
|
||||
{
|
||||
std::string val = mapnik::util::trim_copy(header);
|
||||
detail::locate_geometry_column(val, index++, locator);
|
||||
detail::locate_geometry_column(val, index++, locator_);
|
||||
headers_.push_back(val);
|
||||
}
|
||||
}
|
||||
|
@ -377,7 +201,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
}
|
||||
else
|
||||
{
|
||||
detail::locate_geometry_column(val, index, locator);
|
||||
detail::locate_geometry_column(val, index, locator_);
|
||||
headers_.push_back(val);
|
||||
}
|
||||
++index;
|
||||
|
@ -395,7 +219,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
}
|
||||
}
|
||||
|
||||
if (locator.type == detail::geometry_column_locator::UNKNOWN)
|
||||
if (locator_.type == detail::geometry_column_locator::UNKNOWN)
|
||||
{
|
||||
throw mapnik::datasource_exception("CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or "
|
||||
"latitude/longitude - this is required for reading geometry data");
|
||||
|
@ -421,8 +245,15 @@ void csv_datasource::parse_csv(T & stream,
|
|||
is_first_row = true;
|
||||
}
|
||||
}
|
||||
while (std::getline(stream,csv_line, stream.widen(newline)) || is_first_row)
|
||||
|
||||
std::vector<item_type> boxes;
|
||||
auto pos = stream.tellg();
|
||||
while (std::getline(stream, csv_line, stream.widen(newline)) || is_first_row)
|
||||
{
|
||||
auto record_offset = pos;
|
||||
auto record_size = csv_line.length();
|
||||
|
||||
pos = stream.tellg();
|
||||
is_first_row = false;
|
||||
if ((row_limit_ > 0) && (line_number > row_limit_))
|
||||
{
|
||||
|
@ -474,14 +305,13 @@ void csv_datasource::parse_csv(T & stream,
|
|||
|
||||
auto beg = values.begin();
|
||||
auto end = values.end();
|
||||
|
||||
|
||||
auto geom = detail::extract_geometry(values, locator);
|
||||
auto geom = detail::extract_geometry(values, locator_);
|
||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||
{
|
||||
auto box = mapnik::geometry::envelope(geom);
|
||||
|
||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_count));
|
||||
feature->set_geometry(std::move(geom));
|
||||
boxes.emplace_back(std::move(box), make_pair(record_offset, record_size));
|
||||
++feature_count;
|
||||
|
||||
std::vector<std::string> collected;
|
||||
for (unsigned i = 0; i < num_headers; ++i)
|
||||
|
@ -493,7 +323,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
{
|
||||
// add an empty string here to represent a missing value
|
||||
// not using null type here since nulls are not a csv thing
|
||||
feature->put(fld_name,tr.transcode(value.c_str()));
|
||||
//feature->put(fld_name,tr.transcode(value.c_str()));
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
||||
|
@ -509,8 +339,8 @@ void csv_datasource::parse_csv(T & stream,
|
|||
int value_length = value.length();
|
||||
|
||||
// now, add attributes, skipping any WKT or JSON fields
|
||||
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT
|
||||
|| locator.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
||||
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|
||||
|| locator_.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
||||
|
||||
// First we detect likely strings,
|
||||
// then try parsing likely numbers,
|
||||
|
@ -529,7 +359,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
(value_length > 1 && !has_dot && value[0] == '0'))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,std::move(tr.transcode(value.c_str())));
|
||||
//feature->put(fld_name,std::move(tr.transcode(value.c_str())));
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
||||
|
@ -544,7 +374,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
if (mapnik::util::string2double(value,float_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,float_val);
|
||||
//feature->put(fld_name,float_val);
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(
|
||||
|
@ -559,7 +389,7 @@ void csv_datasource::parse_csv(T & stream,
|
|||
if (mapnik::util::string2int(value,int_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,int_val);
|
||||
//feature->put(fld_name,int_val);
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(
|
||||
|
@ -589,7 +419,6 @@ void csv_datasource::parse_csv(T & stream,
|
|||
}
|
||||
if (matched)
|
||||
{
|
||||
feature->put(fld_name,bool_val);
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(
|
||||
|
@ -600,7 +429,6 @@ void csv_datasource::parse_csv(T & stream,
|
|||
else
|
||||
{
|
||||
// fallback to normal string
|
||||
feature->put(fld_name,std::move(tr.transcode(value.c_str())));
|
||||
if (feature_count == 1)
|
||||
{
|
||||
desc_.add_descriptor(
|
||||
|
@ -616,21 +444,21 @@ void csv_datasource::parse_csv(T & stream,
|
|||
if (!extent_started)
|
||||
{
|
||||
extent_started = true;
|
||||
extent_ = feature->envelope();
|
||||
extent_ = mapnik::geometry::envelope(geom);
|
||||
}
|
||||
else
|
||||
{
|
||||
extent_.expand_to_include(feature->envelope());
|
||||
extent_.expand_to_include(mapnik::geometry::envelope(geom));
|
||||
}
|
||||
}
|
||||
features_.push_back(feature);
|
||||
//features_.push_back(feature);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: expected geometry column: could not parse row "
|
||||
<< line_number << " "
|
||||
<< values[locator.index] << "'";
|
||||
<< values[locator_.index] << "'";
|
||||
if (strict_)
|
||||
{
|
||||
throw mapnik::datasource_exception(s.str());
|
||||
|
@ -640,8 +468,6 @@ void csv_datasource::parse_csv(T & stream,
|
|||
MAPNIK_LOG_ERROR(csv) << s.str();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
++line_number;
|
||||
}
|
||||
catch (mapnik::datasource_exception const& ex )
|
||||
|
@ -671,10 +497,12 @@ void csv_datasource::parse_csv(T & stream,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (feature_count < 1)
|
||||
{
|
||||
MAPNIK_LOG_ERROR(csv) << "CSV Plugin: could not parse any lines of data";
|
||||
}
|
||||
//if (feature_count < 1)
|
||||
//{
|
||||
// MAPNIK_LOG_ERROR(csv) << "CSV Plugin: could not parse any lines of data";
|
||||
//}
|
||||
// bulk insert initialise r-tree
|
||||
tree_ = std::make_unique<spatial_index_type>(boxes);
|
||||
}
|
||||
|
||||
const char * csv_datasource::name()
|
||||
|
@ -701,10 +529,43 @@ boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type
|
|||
{
|
||||
boost::optional<mapnik::datasource_geometry_t> result;
|
||||
int multi_type = 0;
|
||||
unsigned num_features = features_.size();
|
||||
for (unsigned i = 0; i < num_features && i < 5; ++i)
|
||||
auto itr = tree_->qbegin(boost::geometry::index::intersects(extent_));
|
||||
auto end = tree_->qend();
|
||||
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
|
||||
for (std::size_t count = 0; itr !=end && count < 5; ++itr, ++count)
|
||||
{
|
||||
result = mapnik::util::to_ds_type(features_[i]->get_geometry());
|
||||
csv_datasource::item_type const& item = *itr;
|
||||
std::size_t file_offset = item.second.first;
|
||||
std::size_t size = item.second.second;
|
||||
|
||||
std::string str;
|
||||
if (inline_string_.empty())
|
||||
{
|
||||
#if defined (_WINDOWS)
|
||||
std::ifstream in(mapnik::utf8_to_utf16(filename_),std::ios_base::in | std::ios_base::binary);
|
||||
#else
|
||||
std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary);
|
||||
#endif
|
||||
if (!in.is_open())
|
||||
{
|
||||
throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'");
|
||||
}
|
||||
in.seekg(file_offset);
|
||||
std::vector<char> record;
|
||||
record.resize(size);
|
||||
in.read(record.data(), size);
|
||||
str = std::string(record.begin(), record.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
str = inline_string_.substr(file_offset, size);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
auto values = mapnik::parse_line(str, separator_);
|
||||
auto geom = detail::extract_geometry(values, locator_);
|
||||
result = mapnik::util::to_ds_type(geom);
|
||||
if (result)
|
||||
{
|
||||
int type = static_cast<int>(*result);
|
||||
|
@ -716,34 +577,66 @@ boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type
|
|||
multi_type = type;
|
||||
}
|
||||
}
|
||||
catch (std::exception const& ex)
|
||||
{
|
||||
//std::ostringstream s;
|
||||
//s << "CSV Plugin: unexpected error parsing line: " << line_number
|
||||
// << " - found " << headers_.size() << " with values like: " << csv_line << "\n"
|
||||
// << " and got error like: " << ex.what();
|
||||
if (strict_)
|
||||
{
|
||||
throw ex;
|
||||
}
|
||||
else
|
||||
{
|
||||
MAPNIK_LOG_ERROR(csv) << ex.what();
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
|
||||
{
|
||||
std::set<std::string> const& attribute_names = q.property_names();
|
||||
std::set<std::string>::const_iterator pos = attribute_names.begin();
|
||||
while (pos != attribute_names.end())
|
||||
for (auto const& name : q.property_names())
|
||||
{
|
||||
bool found_name = false;
|
||||
for (std::size_t i = 0; i < headers_.size(); ++i)
|
||||
for (auto const& header : headers_)
|
||||
{
|
||||
if (headers_[i] == *pos)
|
||||
if (header == name)
|
||||
{
|
||||
found_name = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (! found_name)
|
||||
if (!found_name)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: no attribute '" << *pos << "'. Valid attributes are: "
|
||||
s << "CSV Plugin: no attribute '" << name << "'. Valid attributes are: "
|
||||
<< boost::algorithm::join(headers_, ",") << ".";
|
||||
throw mapnik::datasource_exception(s.str());
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return std::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
|
||||
|
||||
mapnik::box2d<double> const& box = q.get_bbox();
|
||||
if (extent_.intersects(box))
|
||||
{
|
||||
csv_featureset::array_type index_array;
|
||||
if (tree_)
|
||||
{
|
||||
tree_->query(boost::geometry::index::intersects(box),std::back_inserter(index_array));
|
||||
std::sort(index_array.begin(),index_array.end(),
|
||||
[] (item_type const& item0, item_type const& item1)
|
||||
{
|
||||
return item0.second.first < item1.second.first;
|
||||
});
|
||||
if (inline_string_.empty())
|
||||
return std::make_shared<csv_featureset>(filename_, locator_, separator_, headers_, ctx_, std::move(index_array));
|
||||
else
|
||||
return std::make_shared<csv_inline_featureset>(inline_string_, locator_, separator_, headers_, ctx_, std::move(index_array));
|
||||
}
|
||||
}
|
||||
return mapnik::featureset_ptr();
|
||||
}
|
||||
|
||||
mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt, double tol) const
|
||||
|
|
|
@ -35,15 +35,72 @@
|
|||
|
||||
// boost
|
||||
#include <boost/optional.hpp>
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#pragma GCC diagnostic ignored "-Wsign-conversion"
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#include <boost/version.hpp>
|
||||
#include <boost/geometry/index/rtree.hpp>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include <mapnik/csv/csv_grammar.hpp>
|
||||
|
||||
// stl
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
|
||||
namespace mapnik {
|
||||
|
||||
static const csv_line_grammar<char const*> line_g;
|
||||
|
||||
static csv_line parse_line(std::string const& line_str, std::string const& separator)
|
||||
{
|
||||
csv_line values;
|
||||
auto start = line_str.c_str();
|
||||
auto end = start + line_str.length();
|
||||
boost::spirit::standard::blank_type blank;
|
||||
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
|
||||
{
|
||||
throw std::runtime_error("Failed to parse CSV line:\n" + line_str);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t Max, std::size_t Min>
|
||||
struct csv_linear : boost::geometry::index::linear<Max,Min> {};
|
||||
|
||||
namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
|
||||
|
||||
template <std::size_t Max, std::size_t Min>
|
||||
struct options_type<csv_linear<Max,Min> >
|
||||
{
|
||||
using type = options<csv_linear<Max, Min>,
|
||||
insert_default_tag,
|
||||
choose_by_content_diff_tag,
|
||||
split_default_tag,
|
||||
linear_tag,
|
||||
#if BOOST_VERSION >= 105700
|
||||
node_variant_static_tag>;
|
||||
#else
|
||||
node_s_mem_static_tag>;
|
||||
|
||||
#endif
|
||||
};
|
||||
}}}}}
|
||||
|
||||
class csv_datasource : public mapnik::datasource
|
||||
{
|
||||
public:
|
||||
using box_type = mapnik::box2d<double>;
|
||||
using item_type = std::pair<box_type, std::pair<std::size_t, std::size_t>>;
|
||||
using spatial_index_type = boost::geometry::index::rtree<item_type,csv_linear<16,4>>;
|
||||
|
||||
csv_datasource(mapnik::parameters const& params);
|
||||
virtual ~csv_datasource ();
|
||||
mapnik::datasource::datasource_t type() const;
|
||||
|
@ -75,6 +132,8 @@ private:
|
|||
double filesize_max_;
|
||||
mapnik::context_ptr ctx_;
|
||||
bool extent_initialized_;
|
||||
std::unique_ptr<spatial_index_type> tree_;
|
||||
detail::geometry_column_locator locator_;
|
||||
};
|
||||
|
||||
#endif // MAPNIK_CSV_DATASOURCE_HPP
|
||||
|
|
168
plugins/input/csv/csv_featureset.cpp
Normal file
168
plugins/input/csv/csv_featureset.cpp
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
// mapnik
|
||||
#include "csv_featureset.hpp"
|
||||
#include <mapnik/debug.hpp>
|
||||
#include <mapnik/feature.hpp>
|
||||
#include <mapnik/feature_factory.hpp>
|
||||
#include <mapnik/util/utf_conv_win.hpp>
|
||||
#include <mapnik/util/trim.hpp>
|
||||
// stl
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
|
||||
csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator,
|
||||
std::vector<std::string> const& headers, mapnik::context_ptr const& ctx, array_type && index_array)
|
||||
:
|
||||
#ifdef _WINDOWS
|
||||
file_(_wfopen(mapnik::utf8_to_utf16(filename).c_str(), L"rb"), std::fclose),
|
||||
#else
|
||||
file_(std::fopen(filename.c_str(),"rb"), std::fclose),
|
||||
#endif
|
||||
separator_(separator),
|
||||
headers_(headers),
|
||||
index_array_(std::move(index_array)),
|
||||
index_itr_(index_array_.begin()),
|
||||
index_end_(index_array_.end()),
|
||||
ctx_(ctx),
|
||||
locator_(locator),
|
||||
tr_("utf8")
|
||||
{
|
||||
if (!file_) throw std::runtime_error("Can't open " + filename);
|
||||
}
|
||||
|
||||
csv_featureset::~csv_featureset() {}
|
||||
|
||||
mapnik::feature_ptr csv_featureset::parse_feature(std::string const& str)
|
||||
{
|
||||
auto values = mapnik::parse_line(str, separator_);
|
||||
auto val_beg = values.begin();
|
||||
auto val_end = values.end();
|
||||
auto geom = detail::extract_geometry(values, locator_);
|
||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||
{
|
||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
||||
feature->set_geometry(std::move(geom));
|
||||
auto num_headers = headers_.size();
|
||||
for (unsigned i = 0; i < num_headers; ++i)
|
||||
{
|
||||
std::string const& fld_name = headers_.at(i);
|
||||
std::string value;
|
||||
if (val_beg == val_end)
|
||||
{
|
||||
feature->put(fld_name,tr_.transcode(value.c_str()));
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = mapnik::util::trim_copy(*val_beg++);
|
||||
}
|
||||
int value_length = value.length();
|
||||
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|
||||
|| locator_.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
||||
bool matched = false;
|
||||
bool has_dot = value.find(".") != std::string::npos;
|
||||
if (value.empty() ||
|
||||
(value_length > 20) ||
|
||||
(value_length > 1 && !has_dot && value[0] == '0'))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,std::move(tr_.transcode(value.c_str())));
|
||||
}
|
||||
else if (csv_utils::is_likely_number(value))
|
||||
{
|
||||
bool has_e = value.find("e") != std::string::npos;
|
||||
if (has_dot || has_e)
|
||||
{
|
||||
double float_val = 0.0;
|
||||
if (mapnik::util::string2double(value,float_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,float_val);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mapnik::value_integer int_val = 0;
|
||||
if (mapnik::util::string2int(value,int_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,int_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!matched)
|
||||
{
|
||||
// NOTE: we don't use mapnik::util::string2bool
|
||||
// here because we don't want to treat 'on' and 'off'
|
||||
// as booleans, only 'true' and 'false'
|
||||
bool bool_val = false;
|
||||
std::string lower_val = value;
|
||||
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
||||
if (lower_val == "true")
|
||||
{
|
||||
matched = true;
|
||||
bool_val = true;
|
||||
}
|
||||
else if (lower_val == "false")
|
||||
{
|
||||
matched = true;
|
||||
bool_val = false;
|
||||
}
|
||||
if (matched)
|
||||
{
|
||||
feature->put(fld_name,bool_val);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fallback to normal string
|
||||
feature->put(fld_name,std::move(tr_.transcode(value.c_str())));
|
||||
}
|
||||
}
|
||||
}
|
||||
return feature;
|
||||
}
|
||||
return mapnik::feature_ptr();
|
||||
}
|
||||
|
||||
mapnik::feature_ptr csv_featureset::next()
|
||||
{
|
||||
if (index_itr_ != index_end_)
|
||||
{
|
||||
csv_datasource::item_type const& item = *index_itr_++;
|
||||
std::size_t file_offset = item.second.first;
|
||||
std::size_t size = item.second.second;
|
||||
|
||||
std::fseek(file_.get(), file_offset, SEEK_SET);
|
||||
std::vector<char> record;
|
||||
record.resize(size);
|
||||
std::fread(record.data(), size, 1, file_.get());
|
||||
using chr_iterator_type = char const*;
|
||||
chr_iterator_type start = record.data();
|
||||
chr_iterator_type end = start + record.size();
|
||||
std::string str(start, end);
|
||||
return parse_feature(str);
|
||||
}
|
||||
return mapnik::feature_ptr();
|
||||
}
|
62
plugins/input/csv/csv_featureset.hpp
Normal file
62
plugins/input/csv/csv_featureset.hpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef CSV_FEATURESET_HPP
|
||||
#define CSV_FEATURESET_HPP
|
||||
|
||||
#include <mapnik/feature.hpp>
|
||||
#include <mapnik/unicode.hpp>
|
||||
#include "csv_utils.hpp"
|
||||
#include "csv_datasource.hpp"
|
||||
#include <deque>
|
||||
#include <cstdio>
|
||||
|
||||
class csv_featureset : public mapnik::Featureset
|
||||
{
|
||||
using file_ptr = std::unique_ptr<std::FILE, int (*)(std::FILE *)>;
|
||||
using locator_type = detail::geometry_column_locator;
|
||||
public:
|
||||
using array_type = std::deque<csv_datasource::item_type>;
|
||||
csv_featureset(std::string const& filename,
|
||||
locator_type const& locator,
|
||||
std::string const& separator,
|
||||
std::vector<std::string> const& headers,
|
||||
mapnik::context_ptr const& ctx,
|
||||
array_type && index_array);
|
||||
~csv_featureset();
|
||||
mapnik::feature_ptr next();
|
||||
private:
|
||||
mapnik::feature_ptr parse_feature(std::string const& str);
|
||||
file_ptr file_;
|
||||
std::string const& separator_;
|
||||
std::vector<std::string> headers_;
|
||||
const array_type index_array_;
|
||||
array_type::const_iterator index_itr_;
|
||||
array_type::const_iterator index_end_;
|
||||
mapnik::context_ptr ctx_;
|
||||
mapnik::value_integer feature_id_ = 0;
|
||||
detail::geometry_column_locator const& locator_;
|
||||
mapnik::transcoder tr_;
|
||||
};
|
||||
|
||||
|
||||
#endif // CSV_FEATURESET_HPP
|
156
plugins/input/csv/csv_inline_featureset.cpp
Normal file
156
plugins/input/csv/csv_inline_featureset.cpp
Normal file
|
@ -0,0 +1,156 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
// mapnik
|
||||
#include "csv_inline_featureset.hpp"
|
||||
#include <mapnik/debug.hpp>
|
||||
#include <mapnik/feature.hpp>
|
||||
#include <mapnik/feature_factory.hpp>
|
||||
#include <mapnik/util/utf_conv_win.hpp>
|
||||
#include <mapnik/util/trim.hpp>
|
||||
// stl
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
|
||||
csv_inline_featureset::csv_inline_featureset(std::string const& inline_string,
|
||||
detail::geometry_column_locator const& locator,
|
||||
std::string const& separator,
|
||||
std::vector<std::string> const& headers,
|
||||
mapnik::context_ptr const& ctx,
|
||||
array_type && index_array)
|
||||
: inline_string_(inline_string),
|
||||
separator_(separator),
|
||||
headers_(headers),
|
||||
index_array_(std::move(index_array)),
|
||||
index_itr_(index_array_.begin()),
|
||||
index_end_(index_array_.end()),
|
||||
ctx_(ctx),
|
||||
locator_(locator),
|
||||
tr_("utf8") {}
|
||||
|
||||
csv_inline_featureset::~csv_inline_featureset() {}
|
||||
|
||||
mapnik::feature_ptr csv_inline_featureset::parse_feature(std::string const& str)
|
||||
{
|
||||
auto values = mapnik::parse_line(str, separator_);
|
||||
auto val_beg = values.begin();
|
||||
auto val_end = values.end();
|
||||
auto geom = detail::extract_geometry(values, locator_);
|
||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||
{
|
||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
||||
feature->set_geometry(std::move(geom));
|
||||
auto num_headers = headers_.size();
|
||||
for (unsigned i = 0; i < num_headers; ++i)
|
||||
{
|
||||
std::string const& fld_name = headers_.at(i);
|
||||
std::string value;
|
||||
if (val_beg == val_end)
|
||||
{
|
||||
feature->put(fld_name,tr_.transcode(value.c_str()));
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = mapnik::util::trim_copy(*val_beg++);
|
||||
}
|
||||
int value_length = value.length();
|
||||
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|
||||
|| locator_.type == detail::geometry_column_locator::GEOJSON) ) continue;
|
||||
bool matched = false;
|
||||
bool has_dot = value.find(".") != std::string::npos;
|
||||
if (value.empty() ||
|
||||
(value_length > 20) ||
|
||||
(value_length > 1 && !has_dot && value[0] == '0'))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,std::move(tr_.transcode(value.c_str())));
|
||||
}
|
||||
else if (csv_utils::is_likely_number(value))
|
||||
{
|
||||
bool has_e = value.find("e") != std::string::npos;
|
||||
if (has_dot || has_e)
|
||||
{
|
||||
double float_val = 0.0;
|
||||
if (mapnik::util::string2double(value,float_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,float_val);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mapnik::value_integer int_val = 0;
|
||||
if (mapnik::util::string2int(value,int_val))
|
||||
{
|
||||
matched = true;
|
||||
feature->put(fld_name,int_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!matched)
|
||||
{
|
||||
// NOTE: we don't use mapnik::util::string2bool
|
||||
// here because we don't want to treat 'on' and 'off'
|
||||
// as booleans, only 'true' and 'false'
|
||||
bool bool_val = false;
|
||||
std::string lower_val = value;
|
||||
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
||||
if (lower_val == "true")
|
||||
{
|
||||
matched = true;
|
||||
bool_val = true;
|
||||
}
|
||||
else if (lower_val == "false")
|
||||
{
|
||||
matched = true;
|
||||
bool_val = false;
|
||||
}
|
||||
if (matched)
|
||||
{
|
||||
feature->put(fld_name,bool_val);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fallback to normal string
|
||||
feature->put(fld_name,std::move(tr_.transcode(value.c_str())));
|
||||
}
|
||||
}
|
||||
}
|
||||
return feature;
|
||||
}
|
||||
return mapnik::feature_ptr();
|
||||
}
|
||||
|
||||
mapnik::feature_ptr csv_inline_featureset::next()
|
||||
{
|
||||
if (index_itr_ != index_end_)
|
||||
{
|
||||
csv_datasource::item_type const& item = *index_itr_++;
|
||||
std::size_t file_offset = item.second.first;
|
||||
std::size_t size = item.second.second;
|
||||
std::string str = inline_string_.substr(file_offset, size);
|
||||
return parse_feature(str);
|
||||
}
|
||||
return mapnik::feature_ptr();
|
||||
}
|
61
plugins/input/csv/csv_inline_featureset.hpp
Normal file
61
plugins/input/csv/csv_inline_featureset.hpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef CSV_INLINE_FEATURESET_HPP
|
||||
#define CSV_INLINE_FEATURESET_HPP
|
||||
|
||||
#include <mapnik/feature.hpp>
|
||||
#include <mapnik/unicode.hpp>
|
||||
#include "csv_utils.hpp"
|
||||
#include "csv_datasource.hpp"
|
||||
#include <deque>
|
||||
#include <cstdio>
|
||||
|
||||
class csv_inline_featureset : public mapnik::Featureset
|
||||
{
|
||||
using locator_type = detail::geometry_column_locator;
|
||||
public:
|
||||
using array_type = std::deque<csv_datasource::item_type>;
|
||||
csv_inline_featureset(std::string const& inline_string,
|
||||
locator_type const& locator,
|
||||
std::string const& separator,
|
||||
std::vector<std::string> const& headers,
|
||||
mapnik::context_ptr const& ctx,
|
||||
array_type && index_array);
|
||||
~csv_inline_featureset();
|
||||
mapnik::feature_ptr next();
|
||||
private:
|
||||
mapnik::feature_ptr parse_feature(std::string const& str);
|
||||
std::string const& inline_string_;
|
||||
std::string const& separator_;
|
||||
std::vector<std::string> headers_;
|
||||
const array_type index_array_;
|
||||
array_type::const_iterator index_itr_;
|
||||
array_type::const_iterator index_end_;
|
||||
mapnik::context_ptr ctx_;
|
||||
mapnik::value_integer feature_id_ = 0;
|
||||
detail::geometry_column_locator const& locator_;
|
||||
mapnik::transcoder tr_;
|
||||
};
|
||||
|
||||
|
||||
#endif // CSV_INLINE_FEATURESET_HPP
|
|
@ -23,6 +23,12 @@
|
|||
#ifndef MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||
|
||||
#include <mapnik/debug.hpp>
|
||||
#include <mapnik/geometry.hpp>
|
||||
#include <mapnik/geometry_correct.hpp>
|
||||
#include <mapnik/wkt/wkt_factory.hpp>
|
||||
#include <mapnik/json/geometry_parser.hpp>
|
||||
#include <mapnik/util/conversions.hpp>
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||
|
@ -94,4 +100,157 @@ namespace csv_utils
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
std::size_t file_length(T & stream)
|
||||
{
|
||||
stream.seekg(0, std::ios::end);
|
||||
return stream.tellg();
|
||||
}
|
||||
|
||||
static inline std::string detect_separator(std::string const& str)
|
||||
{
|
||||
std::string separator = ","; // default
|
||||
int num_commas = std::count(str.begin(), str.end(), ',');
|
||||
// detect tabs
|
||||
int num_tabs = std::count(str.begin(), str.end(), '\t');
|
||||
if (num_tabs > 0)
|
||||
{
|
||||
if (num_tabs > num_commas)
|
||||
{
|
||||
separator = "\t";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
||||
}
|
||||
}
|
||||
else // pipes
|
||||
{
|
||||
int num_pipes = std::count(str.begin(), str.end(), '|');
|
||||
if (num_pipes > num_commas)
|
||||
{
|
||||
separator = "|";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
||||
}
|
||||
else // semicolons
|
||||
{
|
||||
int num_semicolons = std::count(str.begin(), str.end(), ';');
|
||||
if (num_semicolons > num_commas)
|
||||
{
|
||||
separator = ";";
|
||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
||||
}
|
||||
}
|
||||
}
|
||||
return separator;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::tuple<char,bool> autodect_newline(T & stream, std::size_t file_length)
|
||||
{
|
||||
// autodetect newlines
|
||||
char newline = '\n';
|
||||
bool has_newline = false;
|
||||
for (std::size_t lidx = 0; lidx < file_length && lidx < 4000; ++lidx)
|
||||
{
|
||||
char c = static_cast<char>(stream.get());
|
||||
if (c == '\r')
|
||||
{
|
||||
newline = '\r';
|
||||
has_newline = true;
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
{
|
||||
has_newline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(newline,has_newline);
|
||||
}
|
||||
|
||||
|
||||
struct geometry_column_locator
|
||||
{
|
||||
geometry_column_locator()
|
||||
: type(UNKNOWN), index(-1), index2(-1) {}
|
||||
|
||||
enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type;
|
||||
std::size_t index;
|
||||
std::size_t index2;
|
||||
};
|
||||
|
||||
static inline void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator)
|
||||
{
|
||||
std::string lower_val(header);
|
||||
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
|
||||
if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos))
|
||||
{
|
||||
locator.type = geometry_column_locator::WKT;
|
||||
locator.index = index;
|
||||
}
|
||||
else if (lower_val == "geojson")
|
||||
{
|
||||
locator.type = geometry_column_locator::GEOJSON;
|
||||
locator.index = index;
|
||||
}
|
||||
else if (lower_val == "x" || lower_val == "lon"
|
||||
|| lower_val == "lng" || lower_val == "long"
|
||||
|| (lower_val.find("longitude") != std::string::npos))
|
||||
{
|
||||
locator.index = index;
|
||||
locator.type = geometry_column_locator::LON_LAT;
|
||||
}
|
||||
|
||||
else if (lower_val == "y"
|
||||
|| lower_val == "lat"
|
||||
|| (lower_val.find("latitude") != std::string::npos))
|
||||
{
|
||||
locator.index2 = index;
|
||||
locator.type = geometry_column_locator::LON_LAT;
|
||||
}
|
||||
}
|
||||
|
||||
static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
||||
{
|
||||
mapnik::geometry::geometry<double> geom;
|
||||
if (locator.type == geometry_column_locator::WKT)
|
||||
{
|
||||
if (mapnik::from_wkt(row[locator.index], geom))
|
||||
{
|
||||
// correct orientations ..
|
||||
mapnik::geometry::correct(geom);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("FIXME WKT");
|
||||
}
|
||||
}
|
||||
else if (locator.type == geometry_column_locator::GEOJSON)
|
||||
{
|
||||
|
||||
if (!mapnik::json::from_geojson(row[locator.index], geom))
|
||||
{
|
||||
throw std::runtime_error("FIXME GEOJSON");
|
||||
}
|
||||
}
|
||||
else if (locator.type == geometry_column_locator::LON_LAT)
|
||||
{
|
||||
double x, y;
|
||||
if (!mapnik::util::string2double(row[locator.index],x))
|
||||
{
|
||||
throw std::runtime_error("FIXME Lon");
|
||||
}
|
||||
if (!mapnik::util::string2double(row[locator.index2],y))
|
||||
{
|
||||
|
||||
throw std::runtime_error("FIXME Lat");
|
||||
}
|
||||
geom = mapnik::geometry::point<double>(x,y);
|
||||
}
|
||||
return geom;
|
||||
}
|
||||
|
||||
}// ns detail
|
||||
|
||||
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||
|
|
|
@ -213,7 +213,7 @@ TEST_CASE("csv") {
|
|||
|
||||
SECTION("lon/lat detection")
|
||||
{
|
||||
for (auto const &lon_name : {std::string("lon"), std::string("lng")})
|
||||
for (auto const& lon_name : {std::string("lon"), std::string("lng")})
|
||||
{
|
||||
auto ds = get_csv_ds((boost::format("test/data/csv/%1%_lat.csv") % lon_name).str());
|
||||
auto fields = ds->get_descriptor().get_descriptors();
|
||||
|
|
Loading…
Reference in a new issue