Merge pull request #3326 from lightmare/faster-csv-compile
merge mapnik-index::process_csv_file and csv_datasource::parse_csv
This commit is contained in:
commit
31171824ec
11 changed files with 376 additions and 512 deletions
|
@ -69,21 +69,13 @@ DATASOURCE_PLUGIN(csv_datasource)
|
||||||
csv_datasource::csv_datasource(parameters const& params)
|
csv_datasource::csv_datasource(parameters const& params)
|
||||||
: datasource(params),
|
: datasource(params),
|
||||||
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
|
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
|
||||||
extent_(),
|
|
||||||
filename_(),
|
|
||||||
row_limit_(*params.get<mapnik::value_integer>("row_limit", 0)),
|
|
||||||
inline_string_(),
|
|
||||||
separator_(0),
|
|
||||||
quote_(0),
|
|
||||||
headers_(),
|
|
||||||
manual_headers_(mapnik::util::trim_copy(*params.get<std::string>("headers", ""))),
|
|
||||||
strict_(*params.get<mapnik::boolean_type>("strict", false)),
|
|
||||||
ctx_(std::make_shared<mapnik::context_type>()),
|
ctx_(std::make_shared<mapnik::context_type>()),
|
||||||
extent_initialized_(false),
|
tree_(nullptr)
|
||||||
tree_(nullptr),
|
|
||||||
locator_(),
|
|
||||||
has_disk_index_(false)
|
|
||||||
{
|
{
|
||||||
|
row_limit_ = *params.get<mapnik::value_integer>("row_limit", 0);
|
||||||
|
manual_headers_ = mapnik::util::trim_copy(*params.get<std::string>("headers", ""));
|
||||||
|
strict_ = *params.get<mapnik::boolean_type>("strict", false);
|
||||||
|
|
||||||
auto quote_param = params.get<std::string>("quote");
|
auto quote_param = params.get<std::string>("quote");
|
||||||
if (quote_param)
|
if (quote_param)
|
||||||
{
|
{
|
||||||
|
@ -174,203 +166,33 @@ csv_datasource::csv_datasource(parameters const& params)
|
||||||
|
|
||||||
csv_datasource::~csv_datasource() {}
|
csv_datasource::~csv_datasource() {}
|
||||||
|
|
||||||
template <typename T>
|
void csv_datasource::parse_csv(std::istream & csv_file)
|
||||||
void csv_datasource::parse_csv(T & stream)
|
|
||||||
{
|
{
|
||||||
auto file_length = detail::file_length(stream);
|
std::vector<item_type> boxes;
|
||||||
// set back to start
|
csv_utils::csv_file_parser::parse_csv(csv_file, boxes);
|
||||||
stream.seekg(0, std::ios::beg);
|
|
||||||
char newline;
|
|
||||||
bool has_newline;
|
|
||||||
char detected_quote;
|
|
||||||
char detected_separator;
|
|
||||||
std::tie(newline, has_newline, detected_separator, detected_quote) = detail::autodect_csv_flavour(stream, file_length);
|
|
||||||
if (quote_ == 0) quote_ = detected_quote;
|
|
||||||
if (separator_ == 0) separator_ = detected_separator;
|
|
||||||
|
|
||||||
// set back to start
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_
|
|
||||||
<< "' quote: '" << quote_ << "'";
|
|
||||||
|
|
||||||
// rewind stream
|
|
||||||
stream.seekg(0, std::ios::beg);
|
|
||||||
//
|
|
||||||
std::string csv_line;
|
|
||||||
csv_utils::getline_csv(stream, csv_line, newline, quote_);
|
|
||||||
stream.seekg(0, std::ios::beg);
|
|
||||||
int line_number = 0;
|
|
||||||
if (!manual_headers_.empty())
|
|
||||||
{
|
|
||||||
std::size_t index = 0;
|
|
||||||
auto headers = csv_utils::parse_line(manual_headers_, separator_, quote_);
|
|
||||||
for (auto const& header : headers)
|
|
||||||
{
|
|
||||||
detail::locate_geometry_column(header, index++, locator_);
|
|
||||||
headers_.push_back(header);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // parse first line as headers
|
|
||||||
{
|
|
||||||
while (csv_utils::getline_csv(stream, csv_line, newline, quote_))
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
auto headers = csv_utils::parse_line(csv_line, separator_, quote_);
|
|
||||||
// skip blank lines
|
|
||||||
std::string val;
|
|
||||||
if (headers.size() > 0 && headers[0].empty()) ++line_number;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::size_t index = 0;
|
|
||||||
for (auto const& header : headers)
|
|
||||||
{
|
|
||||||
val = mapnik::util::trim_copy(header);
|
|
||||||
if (val.empty())
|
|
||||||
{
|
|
||||||
if (strict_)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Plugin: expected a column header at line ";
|
|
||||||
s << line_number << ", column " << index;
|
|
||||||
s << " - ensure this row contains valid header fields: '";
|
|
||||||
s << csv_line;
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// create a placeholder for the empty header
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "_" << index;
|
|
||||||
headers_.push_back(s.str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
detail::locate_geometry_column(val, index, locator_);
|
|
||||||
headers_.push_back(val);
|
|
||||||
}
|
|
||||||
++index;
|
|
||||||
}
|
|
||||||
++line_number;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (std::exception const& ex)
|
|
||||||
{
|
|
||||||
std::string s("CSV Plugin: error parsing headers: ");
|
|
||||||
s += ex.what();
|
|
||||||
throw mapnik::datasource_exception(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t num_headers = headers_.size();
|
|
||||||
if (!detail::valid(locator_, num_headers))
|
|
||||||
{
|
|
||||||
std::string str("CSV Plugin: could not detect column(s) with the name(s) of wkt, geojson, x/y, or ");
|
|
||||||
str += "latitude/longitude in:\n";
|
|
||||||
str += csv_line;
|
|
||||||
str += "\n - this is required for reading geometry data";
|
|
||||||
throw mapnik::datasource_exception(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
mapnik::value_integer feature_count = 0;
|
|
||||||
bool extent_started = false;
|
|
||||||
|
|
||||||
std::for_each(headers_.begin(), headers_.end(),
|
std::for_each(headers_.begin(), headers_.end(),
|
||||||
[ & ](std::string const& header){ ctx_->push(header); });
|
[ & ](std::string const& header){ ctx_->push(header); });
|
||||||
|
|
||||||
mapnik::transcoder tr(desc_.get_encoding());
|
if (!has_disk_index_)
|
||||||
|
{
|
||||||
|
// bulk insert initialise r-tree
|
||||||
|
tree_ = std::make_unique<spatial_index_type>(boxes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto pos = stream.tellg();
|
void csv_datasource::add_feature(mapnik::value_integer index,
|
||||||
// handle rare case of a single line of data and user-provided headers
|
mapnik::csv_line const & values)
|
||||||
// where a lack of a newline will mean that csv_utils::getline_csv returns false
|
{
|
||||||
bool is_first_row = false;
|
if (index != 1) return;
|
||||||
|
|
||||||
if (!has_newline)
|
for (std::size_t i = 0; i < values.size(); ++i)
|
||||||
{
|
|
||||||
stream.setstate(std::ios::failbit);
|
|
||||||
pos = 0;
|
|
||||||
if (!csv_line.empty())
|
|
||||||
{
|
|
||||||
is_first_row = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<item_type> boxes;
|
|
||||||
while (is_first_row || csv_utils::getline_csv(stream, csv_line, newline, quote_))
|
|
||||||
{
|
|
||||||
++line_number;
|
|
||||||
if ((row_limit_ > 0) && (line_number > row_limit_))
|
|
||||||
{
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto record_offset = pos;
|
|
||||||
auto record_size = csv_line.length();
|
|
||||||
pos = stream.tellg();
|
|
||||||
is_first_row = false;
|
|
||||||
|
|
||||||
// skip blank lines
|
|
||||||
if (record_size <= 10)
|
|
||||||
{
|
|
||||||
std::string trimmed = csv_line;
|
|
||||||
boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n "));
|
|
||||||
if (trimmed.empty())
|
|
||||||
{
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
auto const* line_start = csv_line.data();
|
|
||||||
auto const* line_end = line_start + csv_line.size();
|
|
||||||
auto values = csv_utils::parse_line(line_start, line_end, separator_, quote_, num_headers);
|
|
||||||
unsigned num_fields = values.size();
|
|
||||||
if (num_fields != num_headers)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Plugin: # of columns(" << num_fields << ")";
|
|
||||||
if (num_fields > num_headers)
|
|
||||||
{
|
|
||||||
s << " > ";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
s << " < ";
|
|
||||||
}
|
|
||||||
s << "# of headers(" << num_headers << ") parsed";
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
|
||||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
|
||||||
{
|
|
||||||
auto box = mapnik::geometry::envelope(geom);
|
|
||||||
boxes.emplace_back(std::move(box), make_pair(record_offset, record_size));
|
|
||||||
if (!extent_initialized_)
|
|
||||||
{
|
|
||||||
if (!extent_started)
|
|
||||||
{
|
|
||||||
extent_started = true;
|
|
||||||
extent_ = mapnik::geometry::envelope(geom);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
extent_.expand_to_include(mapnik::geometry::envelope(geom));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (++feature_count != 1) continue;
|
|
||||||
auto beg = values.begin();
|
|
||||||
for (std::size_t i = 0; i < num_headers; ++i)
|
|
||||||
{
|
{
|
||||||
std::string const& header = headers_.at(i);
|
std::string const& header = headers_.at(i);
|
||||||
std::string value = mapnik::util::trim_copy(*beg++);
|
std::string value = mapnik::util::trim_copy(values[i]);
|
||||||
int value_length = value.length();
|
int value_length = value.length();
|
||||||
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|
if (locator_.index == i && (locator_.type == csv_utils::geometry_column_locator::WKT
|
||||||
|| locator_.type == detail::geometry_column_locator::GEOJSON)) continue;
|
|| locator_.type == csv_utils::geometry_column_locator::GEOJSON)) continue;
|
||||||
|
|
||||||
// First we detect likely strings,
|
// First we detect likely strings,
|
||||||
// then try parsing likely numbers,
|
// then try parsing likely numbers,
|
||||||
|
@ -427,44 +249,6 @@ void csv_datasource::parse_csv(T & stream)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Plugin: expected geometry column: could not parse row "
|
|
||||||
<< line_number << " "
|
|
||||||
<< values.at(locator_.index) << "'";
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (mapnik::datasource_exception const& ex )
|
|
||||||
{
|
|
||||||
if (strict_) throw ex;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MAPNIK_LOG_ERROR(csv) << ex.what() << " at line: " << line_number;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (std::exception const& ex)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Plugin: unexpected error parsing line: " << line_number
|
|
||||||
<< " - found " << headers_.size() << " with values like: " << csv_line << "\n"
|
|
||||||
<< " and got error like: " << ex.what();
|
|
||||||
if (strict_)
|
|
||||||
{
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MAPNIK_LOG_ERROR(csv) << s.str();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// return early if *.index is present
|
|
||||||
if (has_disk_index_) return;
|
|
||||||
}
|
|
||||||
// bulk insert initialise r-tree
|
|
||||||
tree_ = std::make_unique<spatial_index_type>(boxes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * csv_datasource::name()
|
const char * csv_datasource::name()
|
||||||
|
@ -487,8 +271,8 @@ mapnik::layer_descriptor csv_datasource::get_descriptor() const
|
||||||
return desc_;
|
return desc_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
boost::optional<mapnik::datasource_geometry_t>
|
||||||
boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type_impl(T & stream) const
|
csv_datasource::get_geometry_type_impl(std::istream & stream) const
|
||||||
{
|
{
|
||||||
boost::optional<mapnik::datasource_geometry_t> result;
|
boost::optional<mapnik::datasource_geometry_t> result;
|
||||||
if (tree_)
|
if (tree_)
|
||||||
|
@ -509,7 +293,7 @@ boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto values = csv_utils::parse_line(str, separator_, quote_);
|
auto values = csv_utils::parse_line(str, separator_, quote_);
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
auto geom = csv_utils::extract_geometry(values, locator_);
|
||||||
result = mapnik::util::to_ds_type(geom);
|
result = mapnik::util::to_ds_type(geom);
|
||||||
if (result)
|
if (result)
|
||||||
{
|
{
|
||||||
|
@ -552,7 +336,7 @@ boost::optional<mapnik::datasource_geometry_t> csv_datasource::get_geometry_type
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto values = csv_utils::parse_line(str, separator_, quote_);
|
auto values = csv_utils::parse_line(str, separator_, quote_);
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
auto geom = csv_utils::extract_geometry(values, locator_);
|
||||||
result = mapnik::util::to_ds_type(geom);
|
result = mapnik::util::to_ds_type(geom);
|
||||||
if (result)
|
if (result)
|
||||||
{
|
{
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
// stl
|
// stl
|
||||||
|
#include <iosfwd>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -67,7 +68,8 @@ struct options_type<csv_linear<Max,Min> >
|
||||||
};
|
};
|
||||||
}}}}}
|
}}}}}
|
||||||
|
|
||||||
class csv_datasource : public mapnik::datasource
|
class csv_datasource : public mapnik::datasource,
|
||||||
|
private csv_utils::csv_file_parser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using box_type = mapnik::box2d<double>;
|
using box_type = mapnik::box2d<double>;
|
||||||
|
@ -84,26 +86,15 @@ public:
|
||||||
mapnik::layer_descriptor get_descriptor() const;
|
mapnik::layer_descriptor get_descriptor() const;
|
||||||
boost::optional<mapnik::datasource_geometry_t> get_geometry_type() const;
|
boost::optional<mapnik::datasource_geometry_t> get_geometry_type() const;
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
void parse_csv(std::istream & );
|
||||||
void parse_csv(T & stream);
|
virtual void add_feature(mapnik::value_integer index, mapnik::csv_line const & values);
|
||||||
template <typename T>
|
boost::optional<mapnik::datasource_geometry_t> get_geometry_type_impl(std::istream & ) const;
|
||||||
boost::optional<mapnik::datasource_geometry_t> get_geometry_type_impl(T & stream) const;
|
|
||||||
|
|
||||||
mapnik::layer_descriptor desc_;
|
mapnik::layer_descriptor desc_;
|
||||||
mapnik::box2d<double> extent_;
|
|
||||||
std::string filename_;
|
std::string filename_;
|
||||||
mapnik::value_integer row_limit_;
|
|
||||||
std::string inline_string_;
|
std::string inline_string_;
|
||||||
char separator_;
|
|
||||||
char quote_;
|
|
||||||
std::vector<std::string> headers_;
|
|
||||||
std::string manual_headers_;
|
|
||||||
bool strict_;
|
|
||||||
mapnik::context_ptr ctx_;
|
mapnik::context_ptr ctx_;
|
||||||
bool extent_initialized_;
|
|
||||||
std::unique_ptr<spatial_index_type> tree_;
|
std::unique_ptr<spatial_index_type> tree_;
|
||||||
detail::geometry_column_locator locator_;
|
|
||||||
bool has_disk_index_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // MAPNIK_CSV_DATASOURCE_HPP
|
#endif // MAPNIK_CSV_DATASOURCE_HPP
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
|
||||||
csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, char separator, char quote,
|
csv_featureset::csv_featureset(std::string const& filename, locator_type const& locator, char separator, char quote,
|
||||||
std::vector<std::string> const& headers, mapnik::context_ptr const& ctx, array_type && index_array)
|
std::vector<std::string> const& headers, mapnik::context_ptr const& ctx, array_type && index_array)
|
||||||
:
|
:
|
||||||
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
|
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
|
||||||
|
@ -72,12 +72,12 @@ csv_featureset::~csv_featureset() {}
|
||||||
mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end)
|
mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end)
|
||||||
{
|
{
|
||||||
auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size());
|
auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size());
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
auto geom = csv_utils::extract_geometry(values, locator_);
|
||||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||||
{
|
{
|
||||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
||||||
feature->set_geometry(std::move(geom));
|
feature->set_geometry(std::move(geom));
|
||||||
detail::process_properties(*feature, headers_, values, locator_, tr_);
|
csv_utils::process_properties(*feature, headers_, values, locator_, tr_);
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
return mapnik::feature_ptr();
|
return mapnik::feature_ptr();
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
|
|
||||||
class csv_featureset : public mapnik::Featureset
|
class csv_featureset : public mapnik::Featureset
|
||||||
{
|
{
|
||||||
using locator_type = detail::geometry_column_locator;
|
using locator_type = csv_utils::geometry_column_locator;
|
||||||
public:
|
public:
|
||||||
using array_type = std::deque<csv_datasource::item_type>;
|
using array_type = std::deque<csv_datasource::item_type>;
|
||||||
csv_featureset(std::string const& filename,
|
csv_featureset(std::string const& filename,
|
||||||
|
@ -69,7 +69,7 @@ private:
|
||||||
array_type::const_iterator index_end_;
|
array_type::const_iterator index_end_;
|
||||||
mapnik::context_ptr ctx_;
|
mapnik::context_ptr ctx_;
|
||||||
mapnik::value_integer feature_id_ = 0;
|
mapnik::value_integer feature_id_ = 0;
|
||||||
detail::geometry_column_locator const& locator_;
|
locator_type const& locator_;
|
||||||
mapnik::transcoder tr_;
|
mapnik::transcoder tr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
|
|
||||||
csv_index_featureset::csv_index_featureset(std::string const& filename,
|
csv_index_featureset::csv_index_featureset(std::string const& filename,
|
||||||
mapnik::filter_in_box const& filter,
|
mapnik::filter_in_box const& filter,
|
||||||
detail::geometry_column_locator const& locator,
|
locator_type const& locator,
|
||||||
char separator,
|
char separator,
|
||||||
char quote,
|
char quote,
|
||||||
std::vector<std::string> const& headers,
|
std::vector<std::string> const& headers,
|
||||||
|
@ -89,12 +89,12 @@ csv_index_featureset::~csv_index_featureset() {}
|
||||||
mapnik::feature_ptr csv_index_featureset::parse_feature(char const* beg, char const* end)
|
mapnik::feature_ptr csv_index_featureset::parse_feature(char const* beg, char const* end)
|
||||||
{
|
{
|
||||||
auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size());
|
auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size());
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
auto geom = csv_utils::extract_geometry(values, locator_);
|
||||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||||
{
|
{
|
||||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
||||||
feature->set_geometry(std::move(geom));
|
feature->set_geometry(std::move(geom));
|
||||||
detail::process_properties(*feature, headers_, values, locator_, tr_);
|
csv_utils::process_properties(*feature, headers_, values, locator_, tr_);
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
return mapnik::feature_ptr();
|
return mapnik::feature_ptr();
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
class csv_index_featureset : public mapnik::Featureset
|
class csv_index_featureset : public mapnik::Featureset
|
||||||
{
|
{
|
||||||
using value_type = std::pair<std::size_t, std::size_t>;
|
using value_type = std::pair<std::size_t, std::size_t>;
|
||||||
using locator_type = detail::geometry_column_locator;
|
using locator_type = csv_utils::geometry_column_locator;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
csv_index_featureset(std::string const& filename,
|
csv_index_featureset(std::string const& filename,
|
||||||
|
@ -60,7 +60,7 @@ private:
|
||||||
std::vector<std::string> headers_;
|
std::vector<std::string> headers_;
|
||||||
mapnik::context_ptr ctx_;
|
mapnik::context_ptr ctx_;
|
||||||
mapnik::value_integer feature_id_ = 0;
|
mapnik::value_integer feature_id_ = 0;
|
||||||
detail::geometry_column_locator const& locator_;
|
locator_type const& locator_;
|
||||||
mapnik::transcoder tr_;
|
mapnik::transcoder tr_;
|
||||||
#if defined (MAPNIK_MEMORY_MAPPED_FILE)
|
#if defined (MAPNIK_MEMORY_MAPPED_FILE)
|
||||||
using file_source_type = boost::interprocess::ibufferstream;
|
using file_source_type = boost::interprocess::ibufferstream;
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
|
||||||
csv_inline_featureset::csv_inline_featureset(std::string const& inline_string,
|
csv_inline_featureset::csv_inline_featureset(std::string const& inline_string,
|
||||||
detail::geometry_column_locator const& locator,
|
locator_type const& locator,
|
||||||
char separator,
|
char separator,
|
||||||
char quote,
|
char quote,
|
||||||
std::vector<std::string> const& headers,
|
std::vector<std::string> const& headers,
|
||||||
|
@ -57,12 +57,12 @@ mapnik::feature_ptr csv_inline_featureset::parse_feature(std::string const& str)
|
||||||
auto const* start = str.data();
|
auto const* start = str.data();
|
||||||
auto const* end = start + str.size();
|
auto const* end = start + str.size();
|
||||||
auto values = csv_utils::parse_line(start, end, separator_, quote_, headers_.size());
|
auto values = csv_utils::parse_line(start, end, separator_, quote_, headers_.size());
|
||||||
auto geom = detail::extract_geometry(values, locator_);
|
auto geom = csv_utils::extract_geometry(values, locator_);
|
||||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||||
{
|
{
|
||||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
|
||||||
feature->set_geometry(std::move(geom));
|
feature->set_geometry(std::move(geom));
|
||||||
detail::process_properties(*feature, headers_, values, locator_, tr_);
|
csv_utils::process_properties(*feature, headers_, values, locator_, tr_);
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
return mapnik::feature_ptr();
|
return mapnik::feature_ptr();
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
|
|
||||||
class csv_inline_featureset : public mapnik::Featureset
|
class csv_inline_featureset : public mapnik::Featureset
|
||||||
{
|
{
|
||||||
using locator_type = detail::geometry_column_locator;
|
using locator_type = csv_utils::geometry_column_locator;
|
||||||
public:
|
public:
|
||||||
using array_type = std::deque<csv_datasource::item_type>;
|
using array_type = std::deque<csv_datasource::item_type>;
|
||||||
csv_inline_featureset(std::string const& inline_string,
|
csv_inline_featureset(std::string const& inline_string,
|
||||||
|
@ -54,7 +54,7 @@ private:
|
||||||
array_type::const_iterator index_end_;
|
array_type::const_iterator index_end_;
|
||||||
mapnik::context_ptr ctx_;
|
mapnik::context_ptr ctx_;
|
||||||
mapnik::value_integer feature_id_ = 0;
|
mapnik::value_integer feature_id_ = 0;
|
||||||
detail::geometry_column_locator const& locator_;
|
locator_type const& locator_;
|
||||||
mapnik::transcoder tr_;
|
mapnik::transcoder tr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -82,11 +82,231 @@ bool ignore_case_equal(std::string const& s0, std::string const& s1)
|
||||||
s1.begin(), ignore_case_equal_pred());
|
s1.begin(), ignore_case_equal_pred());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void csv_file_parser::add_feature(mapnik::value_integer, mapnik::csv_line const & )
|
||||||
|
{
|
||||||
|
// no-op by default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void csv_file_parser::parse_csv(std::istream & csv_file, boxes_type & boxes)
|
||||||
|
{
|
||||||
|
auto file_length = detail::file_length(csv_file);
|
||||||
|
// set back to start
|
||||||
|
csv_file.seekg(0, std::ios::beg);
|
||||||
|
char newline;
|
||||||
|
bool has_newline;
|
||||||
|
char detected_quote;
|
||||||
|
char detected_separator;
|
||||||
|
std::tie(newline, has_newline, detected_separator, detected_quote) = detail::autodect_csv_flavour(csv_file, file_length);
|
||||||
|
if (quote_ == 0) quote_ = detected_quote;
|
||||||
|
if (separator_ == 0) separator_ = detected_separator;
|
||||||
|
|
||||||
|
// set back to start
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_
|
||||||
|
<< "' quote: '" << quote_ << "'";
|
||||||
|
|
||||||
|
// rewind stream
|
||||||
|
csv_file.seekg(0, std::ios::beg);
|
||||||
|
//
|
||||||
|
std::string csv_line;
|
||||||
|
csv_utils::getline_csv(csv_file, csv_line, newline, quote_);
|
||||||
|
csv_file.seekg(0, std::ios::beg);
|
||||||
|
int line_number = 0;
|
||||||
|
if (!manual_headers_.empty())
|
||||||
|
{
|
||||||
|
std::size_t index = 0;
|
||||||
|
auto headers = csv_utils::parse_line(manual_headers_, separator_, quote_);
|
||||||
|
for (auto const& header : headers)
|
||||||
|
{
|
||||||
|
detail::locate_geometry_column(header, index++, locator_);
|
||||||
|
headers_.push_back(header);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else // parse first line as headers
|
||||||
|
{
|
||||||
|
while (csv_utils::getline_csv(csv_file, csv_line, newline, quote_))
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
auto headers = csv_utils::parse_line(csv_line, separator_, quote_);
|
||||||
|
// skip blank lines
|
||||||
|
if (headers.size() > 0 && headers[0].empty()) ++line_number;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::size_t index = 0;
|
||||||
|
for (auto & header : headers)
|
||||||
|
{
|
||||||
|
mapnik::util::trim(header);
|
||||||
|
if (header.empty())
|
||||||
|
{
|
||||||
|
if (strict_)
|
||||||
|
{
|
||||||
|
std::ostringstream s;
|
||||||
|
s << "CSV Plugin: expected a column header at line ";
|
||||||
|
s << line_number << ", column " << index;
|
||||||
|
s << " - ensure this row contains valid header fields: '";
|
||||||
|
s << csv_line;
|
||||||
|
throw mapnik::datasource_exception(s.str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// create a placeholder for the empty header
|
||||||
|
std::ostringstream s;
|
||||||
|
s << "_" << index;
|
||||||
|
headers_.push_back(s.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
detail::locate_geometry_column(header, index, locator_);
|
||||||
|
headers_.push_back(header);
|
||||||
|
}
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
++line_number;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (std::exception const& ex)
|
||||||
|
{
|
||||||
|
std::string s("CSV Plugin: error parsing headers: ");
|
||||||
|
s += ex.what();
|
||||||
|
throw mapnik::datasource_exception(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t num_headers = headers_.size();
|
||||||
|
if (!detail::valid(locator_, num_headers))
|
||||||
|
{
|
||||||
|
std::string str("CSV Plugin: could not detect column(s) with the name(s) of wkt, geojson, x/y, or ");
|
||||||
|
str += "latitude/longitude in:\n";
|
||||||
|
str += csv_line;
|
||||||
|
str += "\n - this is required for reading geometry data";
|
||||||
|
throw mapnik::datasource_exception(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
mapnik::value_integer feature_count = 0;
|
||||||
|
auto pos = csv_file.tellg();
|
||||||
|
// handle rare case of a single line of data and user-provided headers
|
||||||
|
// where a lack of a newline will mean that csv_utils::getline_csv returns false
|
||||||
|
bool is_first_row = false;
|
||||||
|
|
||||||
|
if (!has_newline)
|
||||||
|
{
|
||||||
|
csv_file.setstate(std::ios::failbit);
|
||||||
|
pos = 0;
|
||||||
|
if (!csv_line.empty())
|
||||||
|
{
|
||||||
|
is_first_row = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, newline, quote_))
|
||||||
|
{
|
||||||
|
++line_number;
|
||||||
|
if ((row_limit_ > 0) && (line_number > row_limit_))
|
||||||
|
{
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto record_offset = pos;
|
||||||
|
auto record_size = csv_line.length();
|
||||||
|
pos = csv_file.tellg();
|
||||||
|
is_first_row = false;
|
||||||
|
|
||||||
|
// skip blank lines
|
||||||
|
if (record_size <= 10)
|
||||||
|
{
|
||||||
|
std::string trimmed = csv_line;
|
||||||
|
boost::trim_if(trimmed, boost::algorithm::is_any_of("\",'\r\n "));
|
||||||
|
if (trimmed.empty())
|
||||||
|
{
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
auto const* line_start = csv_line.data();
|
||||||
|
auto const* line_end = line_start + csv_line.size();
|
||||||
|
auto values = csv_utils::parse_line(line_start, line_end, separator_, quote_, num_headers);
|
||||||
|
unsigned num_fields = values.size();
|
||||||
|
if (num_fields != num_headers)
|
||||||
|
{
|
||||||
|
std::ostringstream s;
|
||||||
|
s << "CSV Plugin: # of columns(" << num_fields << ")";
|
||||||
|
if (num_fields > num_headers)
|
||||||
|
{
|
||||||
|
s << " > ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
s << " < ";
|
||||||
|
}
|
||||||
|
s << "# of headers(" << num_headers << ") parsed";
|
||||||
|
throw mapnik::datasource_exception(s.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto geom = extract_geometry(values, locator_);
|
||||||
|
if (!geom.is<mapnik::geometry::geometry_empty>())
|
||||||
|
{
|
||||||
|
auto box = mapnik::geometry::envelope(geom);
|
||||||
|
if (!extent_initialized_)
|
||||||
|
{
|
||||||
|
if (extent_.valid())
|
||||||
|
extent_.expand_to_include(box);
|
||||||
|
else
|
||||||
|
extent_ = box;
|
||||||
|
}
|
||||||
|
boxes.emplace_back(box, make_pair(record_offset, record_size));
|
||||||
|
add_feature(++feature_count, values);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::ostringstream s;
|
||||||
|
s << "CSV Plugin: expected geometry column: could not parse row "
|
||||||
|
<< line_number << " "
|
||||||
|
<< values.at(locator_.index) << "'";
|
||||||
|
throw mapnik::datasource_exception(s.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (mapnik::datasource_exception const& ex )
|
||||||
|
{
|
||||||
|
if (strict_) throw ex;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MAPNIK_LOG_ERROR(csv) << ex.what() << " at line: " << line_number;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (std::exception const& ex)
|
||||||
|
{
|
||||||
|
std::ostringstream s;
|
||||||
|
s << "CSV Plugin: unexpected error parsing line: " << line_number
|
||||||
|
<< " - found " << headers_.size() << " with values like: " << csv_line << "\n"
|
||||||
|
<< " and got error like: " << ex.what();
|
||||||
|
if (strict_)
|
||||||
|
{
|
||||||
|
throw mapnik::datasource_exception(s.str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MAPNIK_LOG_ERROR(csv) << s.str();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// return early if *.index is present
|
||||||
|
if (has_disk_index_) return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
|
std::size_t file_length(std::istream & stream)
|
||||||
|
{
|
||||||
|
stream.seekg(0, std::ios::end);
|
||||||
|
return stream.tellg();
|
||||||
|
}
|
||||||
|
|
||||||
std::tuple<char, bool, char, char> autodect_csv_flavour(std::istream & stream, std::size_t file_length)
|
std::tuple<char, bool, char, char> autodect_csv_flavour(std::istream & stream, std::size_t file_length)
|
||||||
{
|
{
|
||||||
// autodetect newlines/quotes/separators
|
// autodetect newlines/quotes/separators
|
||||||
|
@ -228,6 +448,8 @@ bool valid(geometry_column_locator const& locator, std::size_t max_size)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
|
||||||
{
|
{
|
||||||
mapnik::geometry::geometry<double> geom;
|
mapnik::geometry::geometry<double> geom;
|
||||||
|
@ -271,4 +493,4 @@ mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> con
|
||||||
return geom;
|
return geom;
|
||||||
}
|
}
|
||||||
|
|
||||||
}// ns detail
|
} // namespace csv_utils
|
||||||
|
|
|
@ -24,16 +24,19 @@
|
||||||
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||||
|
|
||||||
// mapnik
|
// mapnik
|
||||||
#include <mapnik/util/conversions.hpp>
|
#include <mapnik/box2d.hpp>
|
||||||
#include <mapnik/geometry.hpp>
|
#include <mapnik/geometry.hpp>
|
||||||
|
#include <mapnik/value_types.hpp>
|
||||||
|
#include <mapnik/util/conversions.hpp>
|
||||||
#include <mapnik/util/trim.hpp>
|
#include <mapnik/util/trim.hpp>
|
||||||
#include <mapnik/csv/csv_types.hpp>
|
#include <mapnik/csv/csv_types.hpp>
|
||||||
|
|
||||||
|
// std
|
||||||
|
#include <iosfwd>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <ios>
|
#include <vector>
|
||||||
|
|
||||||
namespace csv_utils
|
namespace csv_utils {
|
||||||
{
|
|
||||||
|
|
||||||
mapnik::csv_line parse_line(char const* start, char const* end, char separator, char quote, std::size_t num_columns);
|
mapnik::csv_line parse_line(char const* start, char const* end, char separator, char quote, std::size_t num_columns);
|
||||||
mapnik::csv_line parse_line(std::string const& line_str, char separator, char quote);
|
mapnik::csv_line parse_line(std::string const& line_str, char separator, char quote);
|
||||||
|
@ -42,10 +45,6 @@ bool is_likely_number(std::string const& value);
|
||||||
|
|
||||||
bool ignore_case_equal(std::string const& s0, std::string const& s1);
|
bool ignore_case_equal(std::string const& s0, std::string const& s1);
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
struct geometry_column_locator
|
struct geometry_column_locator
|
||||||
{
|
{
|
||||||
geometry_column_locator()
|
geometry_column_locator()
|
||||||
|
@ -56,17 +55,17 @@ struct geometry_column_locator
|
||||||
std::size_t index2;
|
std::size_t index2;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
namespace detail {
|
||||||
std::size_t file_length(T & stream)
|
|
||||||
{
|
std::size_t file_length(std::istream & stream);
|
||||||
stream.seekg(0, std::ios::end);
|
|
||||||
return stream.tellg();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<char, bool, char, char> autodect_csv_flavour(std::istream & stream, std::size_t file_length);
|
std::tuple<char, bool, char, char> autodect_csv_flavour(std::istream & stream, std::size_t file_length);
|
||||||
|
|
||||||
void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator);
|
void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator);
|
||||||
bool valid(geometry_column_locator const& locator, std::size_t max_size);
|
bool valid(geometry_column_locator const& locator, std::size_t max_size);
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator);
|
mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator);
|
||||||
|
|
||||||
template <typename Feature, typename Headers, typename Values, typename Locator, typename Transcoder>
|
template <typename Feature, typename Headers, typename Values, typename Locator, typename Transcoder>
|
||||||
|
@ -139,6 +138,28 @@ void process_properties(Feature & feature, Headers const& headers, Values const&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}// ns detail
|
struct csv_file_parser
|
||||||
|
{
|
||||||
|
using box_type = mapnik::box2d<double>;
|
||||||
|
using item_type = std::pair<box_type, std::pair<std::size_t, std::size_t>>;
|
||||||
|
using boxes_type = std::vector<item_type>;
|
||||||
|
|
||||||
|
void parse_csv(std::istream & csv_file, boxes_type & boxes);
|
||||||
|
|
||||||
|
virtual void add_feature(mapnik::value_integer index, mapnik::csv_line const & values);
|
||||||
|
|
||||||
|
std::vector<std::string> headers_;
|
||||||
|
std::string manual_headers_;
|
||||||
|
geometry_column_locator locator_;
|
||||||
|
mapnik::box2d<double> extent_;
|
||||||
|
mapnik::value_integer row_limit_ = 0;
|
||||||
|
char separator_ = '\0';
|
||||||
|
char quote_ = '\0';
|
||||||
|
bool strict_ = false;
|
||||||
|
bool extent_initialized_ = false;
|
||||||
|
bool has_disk_index_ = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace csv_utils
|
||||||
|
|
||||||
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||||
|
|
|
@ -46,7 +46,11 @@ namespace mapnik { namespace detail {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& filename, std::string const& manual_headers, char separator, char quote)
|
std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& filename, std::string const& manual_headers, char separator, char quote)
|
||||||
{
|
{
|
||||||
mapnik::box2d<double> extent;
|
csv_utils::csv_file_parser p;
|
||||||
|
p.manual_headers_ = manual_headers;
|
||||||
|
p.separator_ = separator;
|
||||||
|
p.quote_ = quote;
|
||||||
|
|
||||||
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
|
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
|
||||||
using file_source_type = boost::interprocess::ibufferstream;
|
using file_source_type = boost::interprocess::ibufferstream;
|
||||||
file_source_type csv_file;
|
file_source_type csv_file;
|
||||||
|
@ -61,7 +65,7 @@ std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& fil
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::clog << "Error : cannot mmap " << filename << std::endl;
|
std::clog << "Error : cannot mmap " << filename << std::endl;
|
||||||
return std::make_pair(false, extent);
|
return std::make_pair(false, p.extent_);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#if defined(_WINDOWS)
|
#if defined(_WINDOWS)
|
||||||
|
@ -72,177 +76,19 @@ std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& fil
|
||||||
if (!csv_file.is_open())
|
if (!csv_file.is_open())
|
||||||
{
|
{
|
||||||
std::clog << "Error : cannot open " << filename << std::endl;
|
std::clog << "Error : cannot open " << filename << std::endl;
|
||||||
return std::make_pair(false, extent);
|
return std::make_pair(false, p.extent_);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
auto file_length = ::detail::file_length(csv_file);
|
|
||||||
// set back to start
|
|
||||||
csv_file.seekg(0, std::ios::beg);
|
|
||||||
char newline;
|
|
||||||
bool has_newline;
|
|
||||||
char detected_quote;
|
|
||||||
char detected_separator;
|
|
||||||
std::tie(newline, has_newline, detected_separator, detected_quote) = ::detail::autodect_csv_flavour(csv_file, file_length);
|
|
||||||
if (quote == 0) quote = detected_quote;
|
|
||||||
if (separator == 0) separator = detected_separator;
|
|
||||||
// set back to start
|
|
||||||
csv_file.seekg(0, std::ios::beg);
|
|
||||||
std::string csv_line;
|
|
||||||
csv_utils::getline_csv(csv_file, csv_line, newline, quote);
|
|
||||||
csv_file.seekg(0, std::ios::beg);
|
|
||||||
int line_number = 0;
|
|
||||||
|
|
||||||
::detail::geometry_column_locator locator;
|
|
||||||
std::vector<std::string> headers;
|
|
||||||
std::clog << "Parsing CSV using SEPARATOR=" << separator << " QUOTE=" << quote << std::endl;
|
|
||||||
if (!manual_headers.empty())
|
|
||||||
{
|
|
||||||
std::size_t index = 0;
|
|
||||||
headers = csv_utils::parse_line(manual_headers, separator, quote);
|
|
||||||
for (auto const& header : headers)
|
|
||||||
{
|
|
||||||
::detail::locate_geometry_column(header, index++, locator);
|
|
||||||
headers.push_back(header);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // parse first line as headers
|
|
||||||
{
|
|
||||||
while (csv_utils::getline_csv(csv_file,csv_line,newline, quote))
|
|
||||||
{
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
headers = csv_utils::parse_line(csv_line, separator, quote);
|
p.parse_csv(csv_file, boxes);
|
||||||
// skip blank lines
|
return std::make_pair(true, p.extent_);
|
||||||
if (headers.size() > 0 && headers[0].empty()) ++line_number;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::size_t index = 0;
|
|
||||||
for (auto & header : headers)
|
|
||||||
{
|
|
||||||
mapnik::util::trim(header);
|
|
||||||
if (header.empty())
|
|
||||||
{
|
|
||||||
// create a placeholder for the empty header
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "_" << index;
|
|
||||||
header = s.str();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
::detail::locate_geometry_column(header, index, locator);
|
|
||||||
}
|
|
||||||
++index;
|
|
||||||
}
|
|
||||||
++line_number;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (std::exception const& ex)
|
catch (std::exception const& ex)
|
||||||
{
|
{
|
||||||
std::string s("CSV index: error parsing headers: ");
|
std::clog << ex.what() << std::endl;
|
||||||
s += ex.what();
|
return std::make_pair(false, p.extent_);
|
||||||
std::clog << s << std::endl;
|
|
||||||
return std::make_pair(false, extent);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t num_headers = headers.size();
|
|
||||||
if (!::detail::valid(locator, num_headers))
|
|
||||||
{
|
|
||||||
std::clog << "CSV index: could not detect column(s) with the name(s) of wkt, geojson, x/y, or "
|
|
||||||
<< "latitude/longitude in:\n"
|
|
||||||
<< csv_line
|
|
||||||
<< "\n - this is required for reading geometry data"
|
|
||||||
<< std::endl;
|
|
||||||
return std::make_pair(false, extent);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto pos = csv_file.tellg();
|
|
||||||
|
|
||||||
// handle rare case of a single line of data and user-provided headers
|
|
||||||
// where a lack of a newline will mean that csv_utils::getline_csv returns false
|
|
||||||
bool is_first_row = false;
|
|
||||||
if (!has_newline)
|
|
||||||
{
|
|
||||||
csv_file.setstate(std::ios::failbit);
|
|
||||||
pos = 0;
|
|
||||||
if (!csv_line.empty())
|
|
||||||
{
|
|
||||||
is_first_row = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, newline, quote))
|
|
||||||
{
|
|
||||||
++line_number;
|
|
||||||
auto record_offset = pos;
|
|
||||||
auto record_size = csv_line.length();
|
|
||||||
pos = csv_file.tellg();
|
|
||||||
is_first_row = false;
|
|
||||||
// skip blank lines
|
|
||||||
if (record_size <= 10)
|
|
||||||
{
|
|
||||||
std::string trimmed = csv_line;
|
|
||||||
boost::trim_if(trimmed, boost::algorithm::is_any_of("\",'\r\n "));
|
|
||||||
if (trimmed.empty())
|
|
||||||
{
|
|
||||||
std::clog << "CSV index: empty row encountered at line: " << line_number << std::endl;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try
|
|
||||||
{
|
|
||||||
auto const* start_line = csv_line.data();
|
|
||||||
auto const* end_line = start_line + csv_line.size();
|
|
||||||
auto values = csv_utils::parse_line(start_line, end_line, separator, quote, num_headers);
|
|
||||||
unsigned num_fields = values.size();
|
|
||||||
if (num_fields != num_headers)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Plugin: # of columns(" << num_fields << ")";
|
|
||||||
if (num_fields > num_headers)
|
|
||||||
{
|
|
||||||
s << " > ";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
s << " < ";
|
|
||||||
}
|
|
||||||
s << "# of headers(" << num_headers << ") parsed";
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto geom = ::detail::extract_geometry(values, locator);
|
|
||||||
if (!geom.is<mapnik::geometry::geometry_empty>())
|
|
||||||
{
|
|
||||||
auto box = mapnik::geometry::envelope(geom);
|
|
||||||
if (!extent.valid()) extent = box;
|
|
||||||
else extent.expand_to_include(box);
|
|
||||||
boxes.emplace_back(std::move(box), make_pair(record_offset, record_size));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Index: expected geometry column: could not parse row "
|
|
||||||
<< line_number << " "
|
|
||||||
<< values[locator.index] << "'";
|
|
||||||
throw mapnik::datasource_exception(s.str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (mapnik::datasource_exception const& ex )
|
|
||||||
{
|
|
||||||
std::clog << ex.what() << " at line: " << line_number << std::endl;
|
|
||||||
}
|
|
||||||
catch (std::exception const& ex)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << "CSV Index: unexpected error parsing line: " << line_number
|
|
||||||
<< " - found " << headers.size() << " with values like: " << csv_line << "\n"
|
|
||||||
<< " and got error like: " << ex.what();
|
|
||||||
std::clog << s.str() << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return std::make_pair(true, extent);;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using box_type = mapnik::box2d<double>;
|
using box_type = mapnik::box2d<double>;
|
||||||
|
|
Loading…
Reference in a new issue