CSV - impolement transduction parsing to defer std::string's ctor

This commit is contained in:
artemp 2015-08-25 15:11:54 +02:00
parent 3753d50b75
commit fb3f8704b8
5 changed files with 47 additions and 34 deletions

View file

@ -31,16 +31,20 @@
namespace mapnik { namespace mapnik {
namespace qi = boost::spirit::qi; namespace qi = boost::spirit::qi;
using csv_value = std::string; using csv_value = boost::iterator_range<char const*>;
using csv_line = std::vector<csv_value>; using csv_record = std::vector<csv_value>;
using csv_data = std::vector<csv_line>; using csv_data = std::vector<csv_record>;
template <typename Iterator> template <typename Iterator>
struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi::blank_type> struct csv_record_grammar : qi::grammar<Iterator, csv_record(std::string const&), qi::blank_type>
{ {
csv_line_grammar() : csv_line_grammar::base_type(line) using iterator_range = boost::iterator_range<Iterator>;
csv_record_grammar()
: csv_record_grammar::base_type(line)
{ {
using namespace qi; using namespace qi;
qi::raw_type raw;
qi::_a_type _a; qi::_a_type _a;
qi::_r1_type _r1; qi::_r1_type _r1;
qi::lit_type lit; qi::lit_type lit;
@ -61,25 +65,25 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
("\\\"", '\"') ("\\\"", '\"')
("\"\"", '\"') // double quote ("\"\"", '\"') // double quote
; ;
line = column(_r1) % char_(_r1)
line = column(_r1) % char_(_r1)
; ;
column = quoted | *(char_ - (lit(_r1) /*| eol*/)) column = quoted | raw[*(char_ - (lit(_r1) /*| eol*/))]
; ;
quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a) quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a)
; ;
text = *(unesc_char | (char_ - char_(_r1))) text = raw[*(unesc_char | (char_ - char_(_r1)))]
; ;
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
} }
private: private:
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line; qi::rule<Iterator, csv_record(std::string const&), qi::blank_type> line;
qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip
qi::rule<Iterator, csv_value(char)> text; qi::rule<Iterator, csv_value(char)> text;
qi::rule<Iterator, qi::locals<char>, csv_value()> quoted; qi::rule<Iterator, qi::locals<char>, csv_value()> quoted;
qi::symbols<char const, char const> unesc_char; qi::symbols<char const, char const> unesc_char;
}; };
template <typename Iterator> template <typename Iterator>
struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi::blank_type> struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi::blank_type>
{ {
@ -94,10 +98,9 @@ struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi
} }
private: private:
qi::rule<Iterator, csv_data(std::string const&), qi::blank_type> start; qi::rule<Iterator, csv_data(std::string const&), qi::blank_type> start;
csv_line_grammar<Iterator> line; csv_record_grammar<Iterator> line;
}; };
} }
#endif // MAPNIK_CVS_GRAMMAR_HPP #endif // MAPNIK_CVS_GRAMMAR_HPP

View file

@ -39,8 +39,8 @@ inline bool from_wkt(std::string const& wkt, mapnik::geometry::geometry<double>
using namespace boost::spirit; using namespace boost::spirit;
static const mapnik::wkt::wkt_grammar<std::string::const_iterator> g; static const mapnik::wkt::wkt_grammar<std::string::const_iterator> g;
ascii::space_type space; ascii::space_type space;
std::string::const_iterator first = wkt.begin(); auto first = wkt.begin();
std::string::const_iterator last = wkt.end(); auto last = wkt.end();
return qi::phrase_parse(first, last, (g)(boost::phoenix::ref(geom)), space); return qi::phrase_parse(first, last, (g)(boost::phoenix::ref(geom)), space);
} }

View file

@ -162,7 +162,7 @@ void csv_datasource::parse_csv(T & stream,
auto headers = csv_utils::parse_line(manual_headers_, sep); auto headers = csv_utils::parse_line(manual_headers_, sep);
for (auto const& header : headers) for (auto const& header : headers)
{ {
std::string val = mapnik::util::trim_copy(header); std::string val = mapnik::util::trim_copy(std::string(header.begin(), header.end()));
detail::locate_geometry_column(val, index++, locator_); detail::locate_geometry_column(val, index++, locator_);
headers_.push_back(val); headers_.push_back(val);
} }
@ -176,13 +176,13 @@ void csv_datasource::parse_csv(T & stream,
auto headers = csv_utils::parse_line(csv_line, sep); auto headers = csv_utils::parse_line(csv_line, sep);
// skip blank lines // skip blank lines
std::string val; std::string val;
if (headers.size() > 0 && headers[0].empty()) ++line_number; if (headers.size() > 0 && headers[0].begin() == headers[0].end()) ++line_number;
else else
{ {
std::size_t index = 0; std::size_t index = 0;
for (auto const& header : headers) for (auto const& header : headers)
{ {
val = mapnik::util::trim_copy(header); val = mapnik::util::trim_copy(std::string(header.begin(), header.end()));
if (val.empty()) if (val.empty())
{ {
if (strict_) if (strict_)
@ -338,7 +338,8 @@ void csv_datasource::parse_csv(T & stream,
// encoded consistenly as empty strings // encoded consistenly as empty strings
continue; continue;
} }
std::string value = mapnik::util::trim_copy(*beg++); std::string value = mapnik::util::trim_copy(std::string(beg->begin(),beg->end()));
++beg;
int value_length = value.length(); int value_length = value.length();
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|| locator_.type == detail::geometry_column_locator::GEOJSON)) continue; || locator_.type == detail::geometry_column_locator::GEOJSON)) continue;
@ -404,7 +405,7 @@ void csv_datasource::parse_csv(T & stream,
std::ostringstream s; std::ostringstream s;
s << "CSV Plugin: expected geometry column: could not parse row " s << "CSV Plugin: expected geometry column: could not parse row "
<< line_number << " " << line_number << " "
<< values[locator_.index] << "'"; << "FIXME values[locator_.index]" << "'";
if (strict_) if (strict_)
{ {
throw mapnik::datasource_exception(s.str()); throw mapnik::datasource_exception(s.str());

View file

@ -47,12 +47,12 @@
namespace csv_utils namespace csv_utils
{ {
static const mapnik::csv_line_grammar<char const*> line_g; static const mapnik::csv_record_grammar<char const*> line_g;
template <typename Iterator> template <typename Iterator>
static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns) static mapnik::csv_record parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
{ {
mapnik::csv_line values; mapnik::csv_record values;
if (num_columns > 0) values.reserve(num_columns); if (num_columns > 0) values.reserve(num_columns);
boost::spirit::standard::blank_type blank; boost::spirit::standard::blank_type blank;
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values)) if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
@ -62,7 +62,7 @@ static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string con
return values; return values;
} }
static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator) static inline mapnik::csv_record parse_line(std::string const& line_str, std::string const& separator)
{ {
auto start = line_str.c_str(); auto start = line_str.c_str();
auto end = start + line_str.length(); auto end = start + line_str.length();
@ -201,39 +201,47 @@ static inline void locate_geometry_column(std::string const& header, std::size_t
} }
} }
static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator) static mapnik::geometry::geometry<double> extract_geometry(std::vector<boost::iterator_range<char const*> > const& row,
geometry_column_locator const& locator)
{ {
mapnik::geometry::geometry<double> geom; mapnik::geometry::geometry<double> geom;
if (locator.type == geometry_column_locator::WKT) if (locator.type == geometry_column_locator::WKT)
{ {
if (mapnik::from_wkt(row[locator.index], geom)) std::string wkt(row[locator.index].begin(), row[locator.index].end());
//auto itr = row[locator.index].begin();
//auto end = row[locator.index].end();
if (mapnik::from_wkt(wkt, geom))
{ {
// correct orientations .. // correct orientations ..
mapnik::geometry::correct(geom); mapnik::geometry::correct(geom);
} }
else else
{ {
throw std::runtime_error("Failed to parse WKT:" + row[locator.index]); throw std::runtime_error("Failed to parse WKT:" + std::string(row[locator.index].begin(),row[locator.index].end()));
} }
} }
else if (locator.type == geometry_column_locator::GEOJSON) else if (locator.type == geometry_column_locator::GEOJSON)
{ {
if (!mapnik::json::from_geojson(row[locator.index], geom)) std::string json(row[locator.index].begin(), row[locator.index].end());
//if (!mapnik::json::from_geojson(row[locator.index].begin(), row[locator.index].end(), geom))
if (!mapnik::json::from_geojson(json, geom))
{ {
throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]); throw std::runtime_error("Failed to parse geojson:" + std::string(row[locator.index].begin(),row[locator.index].end()));
} }
} }
else if (locator.type == geometry_column_locator::LON_LAT) else if (locator.type == geometry_column_locator::LON_LAT)
{ {
double x, y; double x, y;
if (!mapnik::util::string2double(row[locator.index],x)) if (!mapnik::util::string2double(row[locator.index].begin(),row[locator.index].end(), x))
{ {
throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]); throw std::runtime_error("Failed to parser longitude (easting)" + std::string(row[locator.index].begin(),row[locator.index].end()));
} }
if (!mapnik::util::string2double(row[locator.index2],y)) if (!mapnik::util::string2double(row[locator.index2].begin(), row[locator.index2].end(), y))
{ {
throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
throw std::runtime_error("Failed to parser latitude (northing)" + std::string(row[locator.index2].begin(),row[locator.index2].end()));
} }
geom = mapnik::geometry::point<double>(x,y); geom = mapnik::geometry::point<double>(x,y);
} }
@ -254,7 +262,8 @@ void process_properties(Feature & feature, Headers const& headers, Values const&
feature.put(fld_name,tr.transcode("")); feature.put(fld_name,tr.transcode(""));
continue; continue;
} }
std::string value = mapnik::util::trim_copy(*val_beg++); std::string value = mapnik::util::trim_copy(std::string(val_beg->begin(),val_beg->end()));
++val_beg;
int value_length = value.length(); int value_length = value.length();
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT

View file

@ -534,7 +534,7 @@ TEST_CASE("csv") {
using mapnik::geometry::geometry_types; using mapnik::geometry::geometry_types;
for (auto const &file : { for (auto const &file : {
std::string("test/data/csv/geojson_double_quote_escape.csv") std::string("test/data/csv/geojson_double_quote_escape.csv")
, std::string("test/data/csv/geojson_single_quote.csv") , std::string("test/data/csv/geojson_single_quote.csv")
, std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv") , std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv")
}) { }) {