Merge pull request #3010 from mapnik/large_csv

Large csv
This commit is contained in:
Artem Pavlenko 2015-08-25 15:06:26 +02:00
commit 35c263612f
15 changed files with 1527 additions and 1282 deletions

2
.gitmodules vendored
View file

@ -1,7 +1,7 @@
[submodule "test/data"]
path = test/data
url = https://github.com/mapnik/test-data.git
branch = master
branch = large_csv
[submodule "test/data-visual"]
path = test/data-visual
url = https://github.com/mapnik/test-data-visual.git

View file

@ -27,6 +27,7 @@ Import('env')
base = './mapnik/'
subdirs = [
'',
'csv',
'svg',
'wkt',
'cairo',

View file

@ -0,0 +1,103 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef MAPNIK_CVS_GRAMMAR_HPP
#define MAPNIK_CVS_GRAMMAR_HPP
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace mapnik {
namespace qi = boost::spirit::qi;
using csv_value = std::string;
using csv_line = std::vector<csv_value>;
using csv_data = std::vector<csv_line>;
template <typename Iterator>
struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi::blank_type>
{
csv_line_grammar() : csv_line_grammar::base_type(line)
{
using namespace qi;
qi::_a_type _a;
qi::_r1_type _r1;
qi::lit_type lit;
//qi::eol_type eol;
qi::_1_type _1;
qi::char_type char_;
qi::omit_type omit;
unesc_char.add
("\\a", '\a')
("\\b", '\b')
("\\f", '\f')
("\\n", '\n')
("\\r", '\r')
("\\t", '\t')
("\\v", '\v')
("\\\\",'\\')
("\\\'", '\'')
("\\\"", '\"')
("\"\"", '\"') // double quote
;
line = column(_r1) % char_(_r1)
;
column = quoted | *(char_ - (lit(_r1) /*| eol*/))
;
quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a)
;
text = *(unesc_char | (char_ - char_(_r1)))
;
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
}
private:
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip
qi::rule<Iterator, csv_value(char)> text;
qi::rule<Iterator, qi::locals<char>, csv_value()> quoted;
qi::symbols<char const, char const> unesc_char;
};
template <typename Iterator>
struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi::blank_type>
{
csv_file_grammar() : csv_file_grammar::base_type(start)
{
using namespace qi;
qi::eol_type eol;
qi::_r1_type _r1;
start = -line(_r1) % eol
;
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<Iterator, csv_data(std::string const&), qi::blank_type> start;
csv_line_grammar<Iterator> line;
};
}
#endif // MAPNIK_CVS_GRAMMAR_HPP

View file

@ -30,6 +30,8 @@ plugin_env = plugin_base.Clone()
plugin_sources = Split(
"""
%(PLUGIN_NAME)s_datasource.cpp
%(PLUGIN_NAME)s_featureset.cpp
%(PLUGIN_NAME)s_inline_featureset.cpp
""" % locals()
)

File diff suppressed because it is too large Load diff

View file

@ -35,15 +35,51 @@
// boost
#include <boost/optional.hpp>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsign-conversion"
#pragma GCC diagnostic ignored "-Wconversion"
#include <boost/version.hpp>
#include <boost/geometry/index/rtree.hpp>
#pragma GCC diagnostic pop
// stl
#include <vector>
#include <deque>
#include <string>
template <std::size_t Max, std::size_t Min>
struct csv_linear : boost::geometry::index::linear<Max,Min> {};
namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
template <std::size_t Max, std::size_t Min>
struct options_type<csv_linear<Max,Min> >
{
using type = options<csv_linear<Max, Min>,
insert_default_tag,
choose_by_content_diff_tag,
split_default_tag,
linear_tag,
#if BOOST_VERSION >= 105700
node_variant_static_tag>;
#else
node_s_mem_static_tag>;
#endif
};
}}}}}
class csv_datasource : public mapnik::datasource
{
public:
using box_type = mapnik::box2d<double>;
using item_type = std::pair<box_type, std::pair<std::size_t, std::size_t>>;
using spatial_index_type = boost::geometry::index::rtree<item_type,csv_linear<16,4>>;
csv_datasource(mapnik::parameters const& params);
virtual ~csv_datasource ();
mapnik::datasource::datasource_t type() const;
@ -63,19 +99,18 @@ private:
mapnik::layer_descriptor desc_;
mapnik::box2d<double> extent_;
std::string filename_;
std::string inline_string_;
unsigned file_length_;
mapnik::value_integer row_limit_;
std::deque<mapnik::feature_ptr> features_;
std::string inline_string_;
std::string escape_;
std::string separator_;
std::string quote_;
std::vector<std::string> headers_;
std::string manual_headers_;
bool strict_;
double filesize_max_;
mapnik::context_ptr ctx_;
bool extent_initialized_;
std::unique_ptr<spatial_index_type> tree_;
detail::geometry_column_locator locator_;
};
#endif // MAPNIK_CSV_DATASOURCE_HPP

View file

@ -0,0 +1,86 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
// mapnik
#include "csv_featureset.hpp"
#include <mapnik/debug.hpp>
#include <mapnik/feature.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/util/utf_conv_win.hpp>
// stl
#include <string>
#include <vector>
#include <deque>
csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator,
std::vector<std::string> const& headers, mapnik::context_ptr const& ctx, array_type && index_array)
:
#ifdef _WINDOWS
file_(_wfopen(mapnik::utf8_to_utf16(filename).c_str(), L"rb"), std::fclose),
#else
file_(std::fopen(filename.c_str(),"rb"), std::fclose),
#endif
separator_(separator),
headers_(headers),
index_array_(std::move(index_array)),
index_itr_(index_array_.begin()),
index_end_(index_array_.end()),
ctx_(ctx),
locator_(locator),
tr_("utf8")
{
if (!file_) throw std::runtime_error("Can't open " + filename);
}
csv_featureset::~csv_featureset() {}
mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end)
{
auto values = csv_utils::parse_line(beg, end, separator_, headers_.size());
auto geom = detail::extract_geometry(values, locator_);
if (!geom.is<mapnik::geometry::geometry_empty>())
{
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
feature->set_geometry(std::move(geom));
detail::process_properties(*feature, headers_, values, locator_, tr_);
return feature;
}
return mapnik::feature_ptr();
}
mapnik::feature_ptr csv_featureset::next()
{
if (index_itr_ != index_end_)
{
csv_datasource::item_type const& item = *index_itr_++;
std::size_t file_offset = item.second.first;
std::size_t size = item.second.second;
std::fseek(file_.get(), file_offset, SEEK_SET);
std::vector<char> record;
record.resize(size);
std::fread(record.data(), size, 1, file_.get());
auto const* start = record.data();
auto const* end = start + record.size();
return parse_feature(start, end);
}
return mapnik::feature_ptr();
}

View file

@ -0,0 +1,62 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef CSV_FEATURESET_HPP
#define CSV_FEATURESET_HPP
#include <mapnik/feature.hpp>
#include <mapnik/unicode.hpp>
#include "csv_utils.hpp"
#include "csv_datasource.hpp"
#include <deque>
#include <cstdio>
class csv_featureset : public mapnik::Featureset
{
using file_ptr = std::unique_ptr<std::FILE, int (*)(std::FILE *)>;
using locator_type = detail::geometry_column_locator;
public:
using array_type = std::deque<csv_datasource::item_type>;
csv_featureset(std::string const& filename,
locator_type const& locator,
std::string const& separator,
std::vector<std::string> const& headers,
mapnik::context_ptr const& ctx,
array_type && index_array);
~csv_featureset();
mapnik::feature_ptr next();
private:
mapnik::feature_ptr parse_feature(char const* beg, char const* end);
file_ptr file_;
std::string const& separator_;
std::vector<std::string> const& headers_;
const array_type index_array_;
array_type::const_iterator index_itr_;
array_type::const_iterator index_end_;
mapnik::context_ptr ctx_;
mapnik::value_integer feature_id_ = 0;
detail::geometry_column_locator const& locator_;
mapnik::transcoder tr_;
};
#endif // CSV_FEATURESET_HPP

View file

@ -0,0 +1,78 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
// mapnik
#include "csv_inline_featureset.hpp"
#include <mapnik/debug.hpp>
#include <mapnik/feature.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/util/utf_conv_win.hpp>
#include <mapnik/util/trim.hpp>
// stl
#include <string>
#include <vector>
#include <deque>
csv_inline_featureset::csv_inline_featureset(std::string const& inline_string,
detail::geometry_column_locator const& locator,
std::string const& separator,
std::vector<std::string> const& headers,
mapnik::context_ptr const& ctx,
array_type && index_array)
: inline_string_(inline_string),
separator_(separator),
headers_(headers),
index_array_(std::move(index_array)),
index_itr_(index_array_.begin()),
index_end_(index_array_.end()),
ctx_(ctx),
locator_(locator),
tr_("utf8") {}
csv_inline_featureset::~csv_inline_featureset() {}
mapnik::feature_ptr csv_inline_featureset::parse_feature(std::string const& str)
{
auto values = csv_utils::parse_line(str, separator_);
auto geom = detail::extract_geometry(values, locator_);
if (!geom.is<mapnik::geometry::geometry_empty>())
{
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_));
feature->set_geometry(std::move(geom));
detail::process_properties(*feature, headers_, values, locator_, tr_);
return feature;
}
return mapnik::feature_ptr();
}
mapnik::feature_ptr csv_inline_featureset::next()
{
if (index_itr_ != index_end_)
{
csv_datasource::item_type const& item = *index_itr_++;
std::size_t file_offset = item.second.first;
std::size_t size = item.second.second;
std::string str = inline_string_.substr(file_offset, size);
return parse_feature(str);
}
return mapnik::feature_ptr();
}

View file

@ -0,0 +1,61 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef CSV_INLINE_FEATURESET_HPP
#define CSV_INLINE_FEATURESET_HPP
#include <mapnik/feature.hpp>
#include <mapnik/unicode.hpp>
#include "csv_utils.hpp"
#include "csv_datasource.hpp"
#include <deque>
#include <cstdio>
class csv_inline_featureset : public mapnik::Featureset
{
using locator_type = detail::geometry_column_locator;
public:
using array_type = std::deque<csv_datasource::item_type>;
csv_inline_featureset(std::string const& inline_string,
locator_type const& locator,
std::string const& separator,
std::vector<std::string> const& headers,
mapnik::context_ptr const& ctx,
array_type && index_array);
~csv_inline_featureset();
mapnik::feature_ptr next();
private:
mapnik::feature_ptr parse_feature(std::string const& str);
std::string const& inline_string_;
std::string const& separator_;
std::vector<std::string> headers_;
const array_type index_array_;
array_type::const_iterator index_itr_;
array_type::const_iterator index_end_;
mapnik::context_ptr ctx_;
mapnik::value_integer feature_id_ = 0;
detail::geometry_column_locator const& locator_;
mapnik::transcoder tr_;
};
#endif // CSV_INLINE_FEATURESET_HPP

View file

@ -23,6 +23,16 @@
#ifndef MAPNIK_CSV_UTILS_DATASOURCE_HPP
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
// mapnik
#include <mapnik/debug.hpp>
#include <mapnik/geometry.hpp>
#include <mapnik/geometry_correct.hpp>
#include <mapnik/wkt/wkt_factory.hpp>
#include <mapnik/json/geometry_parser.hpp>
#include <mapnik/util/conversions.hpp>
#include <mapnik/csv/csv_grammar.hpp>
#include <mapnik/util/trim.hpp>
// boost
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
@ -32,66 +42,275 @@
#include <string>
#include <cstdio>
#include <algorithm>
namespace csv_utils
{
static const mapnik::csv_line_grammar<char const*> line_g;
template <typename Iterator>
static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
{
mapnik::csv_line values;
if (num_columns > 0) values.reserve(num_columns);
boost::spirit::standard::blank_type blank;
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
{
throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end));
}
return values;
}
static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator)
{
auto start = line_str.c_str();
auto end = start + line_str.length();
return parse_line(start, end, separator, 0);
}
static inline bool is_likely_number(std::string const& value)
{
return( strspn( value.c_str(), "e-.+0123456789" ) == value.size() );
}
static inline void fix_json_quoting(std::string & csv_line)
struct ignore_case_equal_pred
{
std::string wrapping_char;
std::string::size_type j_idx = std::string::npos;
std::string::size_type post_idx = std::string::npos;
std::string::size_type j_idx_double = csv_line.find("\"{");
std::string::size_type j_idx_single = csv_line.find("'{");
if (j_idx_double != std::string::npos)
bool operator () (unsigned char a, unsigned char b) const
{
wrapping_char = "\"";
j_idx = j_idx_double;
post_idx = csv_line.find("}\"");
return std::tolower(a) == std::tolower(b);
}
};
inline bool ignore_case_equal(std::string const& s0, std::string const& s1)
{
return std::equal(s0.begin(), s0.end(),
s1.begin(), ignore_case_equal_pred());
}
}
else if (j_idx_single != std::string::npos)
namespace detail {
template <typename T>
std::size_t file_length(T & stream)
{
wrapping_char = "'";
j_idx = j_idx_single;
post_idx = csv_line.find("}'");
stream.seekg(0, std::ios::end);
return stream.tellg();
}
// we are positive it is valid json
if (!wrapping_char.empty())
static inline std::string detect_separator(std::string const& str)
{
// grab the json chunk
std::string json_chunk = csv_line.substr(j_idx,post_idx+wrapping_char.size());
bool does_not_have_escaped_double_quotes = (json_chunk.find("\\\"") == std::string::npos);
// ignore properly escaped quotes like \" which need no special handling
if (does_not_have_escaped_double_quotes)
std::string separator = ","; // default
int num_commas = std::count(str.begin(), str.end(), ',');
// detect tabs
int num_tabs = std::count(str.begin(), str.end(), '\t');
if (num_tabs > 0)
{
std::string pre_json = csv_line.substr(0,j_idx);
std::string post_json = csv_line.substr(post_idx+wrapping_char.size());
// handle "" in a string wrapped in "
// http://tools.ietf.org/html/rfc4180#section-2 item 7.
// e.g. "{""type"":""Point"",""coordinates"":[30.0,10.0]}"
if (json_chunk.find("\"\"") != std::string::npos)
if (num_tabs > num_commas)
{
boost::algorithm::replace_all(json_chunk,"\"\"","\\\"");
csv_line = pre_json + json_chunk + post_json;
separator = "\t";
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
}
}
else // pipes
{
int num_pipes = std::count(str.begin(), str.end(), '|');
if (num_pipes > num_commas)
{
separator = "|";
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
}
else // semicolons
{
int num_semicolons = std::count(str.begin(), str.end(), ';');
if (num_semicolons > num_commas)
{
separator = ";";
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
}
}
}
return separator;
}
template <typename T>
std::tuple<char,bool> autodect_newline(T & stream, std::size_t file_length)
{
// autodetect newlines
char newline = '\n';
bool has_newline = false;
for (std::size_t lidx = 0; lidx < file_length && lidx < 4000; ++lidx)
{
char c = static_cast<char>(stream.get());
if (c == '\r')
{
newline = '\r';
has_newline = true;
break;
}
if (c == '\n')
{
has_newline = true;
break;
}
}
return std::make_tuple(newline,has_newline);
}
struct geometry_column_locator
{
geometry_column_locator()
: type(UNKNOWN), index(-1), index2(-1) {}
enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type;
std::size_t index;
std::size_t index2;
};
static inline void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator)
{
std::string lower_val(header);
std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower);
if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos))
{
locator.type = geometry_column_locator::WKT;
locator.index = index;
}
else if (lower_val == "geojson")
{
locator.type = geometry_column_locator::GEOJSON;
locator.index = index;
}
else if (lower_val == "x" || lower_val == "lon"
|| lower_val == "lng" || lower_val == "long"
|| (lower_val.find("longitude") != std::string::npos))
{
locator.index = index;
locator.type = geometry_column_locator::LON_LAT;
}
else if (lower_val == "y"
|| lower_val == "lat"
|| (lower_val.find("latitude") != std::string::npos))
{
locator.index2 = index;
locator.type = geometry_column_locator::LON_LAT;
}
}
static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
{
mapnik::geometry::geometry<double> geom;
if (locator.type == geometry_column_locator::WKT)
{
if (mapnik::from_wkt(row[locator.index], geom))
{
// correct orientations ..
mapnik::geometry::correct(geom);
}
// handle " in a string wrapped in '
// e.g. '{"type":"Point","coordinates":[30.0,10.0]}'
else
{
// escape " because we cannot exchange for single quotes
// https://github.com/mapnik/mapnik/issues/1408
boost::algorithm::replace_all(json_chunk,"\"","\\\"");
boost::algorithm::replace_all(json_chunk,"'","\"");
csv_line = pre_json + json_chunk + post_json;
throw std::runtime_error("Failed to parse WKT:" + row[locator.index]);
}
}
else if (locator.type == geometry_column_locator::GEOJSON)
{
if (!mapnik::json::from_geojson(row[locator.index], geom))
{
throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]);
}
}
else if (locator.type == geometry_column_locator::LON_LAT)
{
double x, y;
if (!mapnik::util::string2double(row[locator.index],x))
{
throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]);
}
if (!mapnik::util::string2double(row[locator.index2],y))
{
throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
}
geom = mapnik::geometry::point<double>(x,y);
}
return geom;
}
template <typename Feature, typename Headers, typename Values, typename Locator, typename Transcoder>
void process_properties(Feature & feature, Headers const& headers, Values const& values, Locator const& locator, Transcoder const& tr)
{
auto val_beg = values.begin();
auto val_end = values.end();
auto num_headers = headers.size();
for (std::size_t i = 0; i < num_headers; ++i)
{
std::string const& fld_name = headers.at(i);
if (val_beg == val_end)
{
feature.put(fld_name,tr.transcode(""));
continue;
}
std::string value = mapnik::util::trim_copy(*val_beg++);
int value_length = value.length();
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT
|| locator.type == detail::geometry_column_locator::GEOJSON) ) continue;
bool matched = false;
bool has_dot = value.find(".") != std::string::npos;
if (value.empty() ||
(value_length > 20) ||
(value_length > 1 && !has_dot && value[0] == '0'))
{
matched = true;
feature.put(fld_name,std::move(tr.transcode(value.c_str())));
}
else if (csv_utils::is_likely_number(value))
{
bool has_e = value.find("e") != std::string::npos;
if (has_dot || has_e)
{
double float_val = 0.0;
if (mapnik::util::string2double(value,float_val))
{
matched = true;
feature.put(fld_name,float_val);
}
}
else
{
mapnik::value_integer int_val = 0;
if (mapnik::util::string2int(value,int_val))
{
matched = true;
feature.put(fld_name,int_val);
}
}
}
if (!matched)
{
if (csv_utils::ignore_case_equal(value, "true"))
{
feature.put(fld_name, true);
}
else if (csv_utils::ignore_case_equal(value, "false"))
{
feature.put(fld_name, false);
}
else // fallback to string
{
feature.put(fld_name,std::move(tr.transcode(value.c_str())));
}
}
}
}
}// ns detail
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP

View file

@ -29,7 +29,6 @@
// stl
#include <string>
#include <vector>
#include <deque>
#include "large_geojson_featureset.hpp"

View file

@ -26,9 +26,7 @@
#include <mapnik/feature.hpp>
#include "geojson_datasource.hpp"
#include <vector>
#include <deque>
#include <fstream>
#include <cstdio>
class large_geojson_featureset : public mapnik::Featureset

@ -1 +1 @@
Subproject commit cb1e7f2ed8f2482bf8fb370981ec450922fa36de
Subproject commit cbf02d3a9d173c27c69541df347dfbd22c6c1612

View file

@ -21,34 +21,40 @@
namespace bfs = boost::filesystem;
namespace {
void add_csv_files(bfs::path dir, std::vector<bfs::path> &csv_files) {
void add_csv_files(bfs::path dir, std::vector<bfs::path> &csv_files)
{
for (auto const &entry : boost::make_iterator_range(
bfs::directory_iterator(dir), bfs::directory_iterator())) {
bfs::directory_iterator(dir), bfs::directory_iterator()))
{
auto path = entry.path();
if (path.extension().native() == ".csv") {
if (path.extension().native() == ".csv")
{
csv_files.emplace_back(path);
}
}
}
mapnik::datasource_ptr get_csv_ds(std::string const &file_name, bool strict = true) {
mapnik::datasource_ptr get_csv_ds(std::string const &file_name, bool strict = true)
{
mapnik::parameters params;
params["type"] = std::string("csv");
params["file"] = file_name;
params["strict"] = mapnik::value_bool(strict);
auto ds = mapnik::datasource_cache::instance().create(params);
// require a non-null pointer returned
REQUIRE(bool(ds));
REQUIRE(ds != nullptr);
return ds;
}
void require_field_names(std::vector<mapnik::attribute_descriptor> const &fields,
std::initializer_list<std::string> const &names) {
std::initializer_list<std::string> const &names)
{
REQUIRE(fields.size() == names.size());
auto itr_a = fields.begin();
auto const end_a = fields.end();
auto itr_b = names.begin();
for (; itr_a != end_a; ++itr_a, ++itr_b) {
for (; itr_a != end_a; ++itr_a, ++itr_b)
{
CHECK(itr_a->get_name() == *itr_b);
}
}
@ -165,9 +171,7 @@ TEST_CASE("csv") {
if (mapnik::util::exists(csv_plugin))
{
REQUIRE(registered);
// make the tests silent since we intentially test error conditions that are noisy
auto const severity = mapnik::logger::instance().get_severity();
mapnik::logger::instance().set_severity(mapnik::logger::none);
@ -185,7 +189,8 @@ TEST_CASE("csv") {
add_csv_files("test/data/csv/warns", broken);
broken.emplace_back("test/data/csv/fails/does_not_exist.csv");
for (auto const &path : broken) {
for (auto const &path : broken)
{
REQUIRE_THROWS(get_csv_ds(path.native()));
}
}
@ -197,7 +202,8 @@ TEST_CASE("csv") {
add_csv_files("test/data/csv", good);
add_csv_files("test/data/csv/warns", good);
for (auto const &path : good) {
for (auto const& path : good)
{
auto ds = get_csv_ds(path.native(), false);
// require a non-null pointer returned
REQUIRE(bool(ds));
@ -205,8 +211,10 @@ TEST_CASE("csv") {
}
} // END SECTION
SECTION("lon/lat detection") {
for (auto const &lon_name : {std::string("lon"), std::string("lng")}) {
SECTION("lon/lat detection")
{
for (auto const& lon_name : {std::string("lon"), std::string("lng")})
{
auto ds = get_csv_ds((boost::format("test/data/csv/%1%_lat.csv") % lon_name).str());
auto fields = ds->get_descriptor().get_descriptors();
require_field_names(fields, {lon_name, "lat"});
@ -215,7 +223,8 @@ TEST_CASE("csv") {
CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point);
mapnik::query query(ds->envelope());
for (auto const &field : fields) {
for (auto const &field : fields)
{
query.add_property_name(field.get_name());
}
auto features = ds->features(query);
@ -587,17 +596,16 @@ TEST_CASE("csv") {
using row = std::pair<std::string, std::size_t>;
for (auto const &r : {
row{"test/data/csv/fails/needs_headers_two_lines.csv", 2}
, row{"test/data/csv/fails/needs_headers_one_line.csv", 1}
, row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}
}) {
row{"test/data/csv/fails/needs_headers_two_lines.csv", 2},
row{"test/data/csv/fails/needs_headers_one_line.csv", 1},
row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}})
{
mapnik::parameters params;
params["type"] = std::string("csv");
params["file"] = r.first;
params["headers"] = "x,y,name";
auto ds = mapnik::datasource_cache::instance().create(params);
REQUIRE(bool(ds));
auto fields = ds->get_descriptor().get_descriptors();
require_field_names(fields, {"x", "y", "name"});
require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String});
@ -633,7 +641,6 @@ TEST_CASE("csv") {
auto fields = ds->get_descriptor().get_descriptors();
require_field_names(fields, {"x", "y", "floats"});
require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::Double});
auto fs = all_features(ds);
for (double d : { .0, +.0, 1e-06, -1e-06, 0.000001, 1.234e+16, 1.234e+16 }) {
auto feature = fs->next();
@ -650,7 +657,6 @@ TEST_CASE("csv") {
params["extent"] = "-180,-90,180,90";
auto ds = mapnik::datasource_cache::instance().create(params);
REQUIRE(bool(ds));
auto box = ds->envelope();
CHECK(box.minx() == -180);
CHECK(box.miny() == -90);
@ -676,7 +682,6 @@ TEST_CASE("csv") {
auto feat = fs->next();
CHECK(feature_count(feat->get_geometry()) == 1);
} // END SECTION
mapnik::logger::instance().set_severity(severity);
}
} // END TEST CASE