CSV - revive row_limit parameter + fix stderr

This commit is contained in:
artemp 2015-08-25 15:05:04 +02:00
parent 8709fb6f7c
commit 3753d50b75
4 changed files with 27 additions and 25 deletions

View file

@ -2,7 +2,7 @@
* *
* This file is part of Mapnik (c++ mapping toolkit) * This file is part of Mapnik (c++ mapping toolkit)
* *
* Copyright (C) 2014 Artem Pavlenko * Copyright (C) 2015 Artem Pavlenko
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
@ -31,26 +31,22 @@
namespace mapnik { namespace mapnik {
namespace qi = boost::spirit::qi; namespace qi = boost::spirit::qi;
using column = std::string; using csv_value = std::string;
using columns = std::vector<column>; using csv_line = std::vector<csv_value>;
using csv_line = columns;
using csv_data = std::vector<csv_line>; using csv_data = std::vector<csv_line>;
template <typename Iterator> template <typename Iterator>
struct csv_line_grammar : qi::grammar<Iterator, void(csv_line&, std::string const&), qi::blank_type> struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi::blank_type>
{ {
csv_line_grammar() : csv_line_grammar::base_type(line) csv_line_grammar() : csv_line_grammar::base_type(line)
{ {
using namespace qi; using namespace qi;
qi::_a_type _a; qi::_a_type _a;
qi::_r1_type _r1; qi::_r1_type _r1;
qi::_r2_type _r2;
qi::lit_type lit; qi::lit_type lit;
//qi::eol_type eol; //qi::eol_type eol;
qi::_val_type _val;
qi::_1_type _1; qi::_1_type _1;
qi::char_type char_; qi::char_type char_;
qi::eps_type eps;
qi::omit_type omit; qi::omit_type omit;
unesc_char.add unesc_char.add
("\\a", '\a') ("\\a", '\a')
@ -66,21 +62,21 @@ struct csv_line_grammar : qi::grammar<Iterator, void(csv_line&, std::string cons
("\"\"", '\"') // double quote ("\"\"", '\"') // double quote
; ;
line = column(_r2)[boost::phoenix::push_back(_r1,_1)] % char_(_r2) line = column(_r1) % char_(_r1)
; ;
column = quoted | *(char_ - (lit(_r1) /*| eol*/)) column = quoted | *(char_ - (lit(_r1) /*| eol*/))
; ;
quoted = omit[char_("\"'")[_a = _1]] > text(_a)[boost::phoenix::swap(_val,_1)] > -lit(_a) quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a)
; ;
text = *(unesc_char | (char_ - char_(_r1))) text = *(unesc_char | (char_ - char_(_r1)))
; ;
//BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
} }
private: private:
qi::rule<Iterator, void(csv_line&,std::string const&), qi::blank_type> line; qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, column(std::string const&)> column; // no-skip qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip
qi::rule<Iterator, std::string(char)> text; qi::rule<Iterator, csv_value(char)> text;
qi::rule<Iterator, qi::locals<char>, std::string()> quoted; qi::rule<Iterator, qi::locals<char>, csv_value()> quoted;
qi::symbols<char const, char const> unesc_char; qi::symbols<char const, char const> unesc_char;
}; };

View file

@ -61,6 +61,7 @@ csv_datasource::csv_datasource(parameters const& params)
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")), desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
extent_(), extent_(),
filename_(), filename_(),
row_limit_(*params.get<mapnik::value_integer>("row_limit", 0)),
inline_string_(), inline_string_(),
escape_(*params.get<std::string>("escape", "")), escape_(*params.get<std::string>("escape", "")),
separator_(*params.get<std::string>("separator", "")), separator_(*params.get<std::string>("separator", "")),
@ -140,7 +141,7 @@ void csv_datasource::parse_csv(T & stream,
std::string sep = mapnik::util::trim_copy(separator); std::string sep = mapnik::util::trim_copy(separator);
if (sep.empty()) sep = detail::detect_separator(csv_line); if (sep.empty()) sep = detail::detect_separator(csv_line);
separator_ = sep; // <------------------- FIXME !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! separator_ = sep;
// set back to start // set back to start
stream.seekg(0, std::ios::beg); stream.seekg(0, std::ios::beg);
@ -252,6 +253,11 @@ void csv_datasource::parse_csv(T & stream,
auto pos = stream.tellg(); auto pos = stream.tellg();
while (std::getline(stream, csv_line, stream.widen(newline)) || is_first_row) while (std::getline(stream, csv_line, stream.widen(newline)) || is_first_row)
{ {
if ((row_limit_ > 0) && (line_number++ > row_limit_))
{
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
break;
}
auto record_offset = pos; auto record_offset = pos;
auto record_size = csv_line.length(); auto record_size = csv_line.length();
pos = stream.tellg(); pos = stream.tellg();
@ -264,7 +270,6 @@ void csv_datasource::parse_csv(T & stream,
boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n ")); boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n "));
if (trimmed.empty()) if (trimmed.empty())
{ {
++line_number;
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number; MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
continue; continue;
} }
@ -409,7 +414,6 @@ void csv_datasource::parse_csv(T & stream,
MAPNIK_LOG_ERROR(csv) << s.str(); MAPNIK_LOG_ERROR(csv) << s.str();
} }
} }
++line_number;
} }
catch (mapnik::datasource_exception const& ex ) catch (mapnik::datasource_exception const& ex )
{ {

View file

@ -99,6 +99,7 @@ private:
mapnik::layer_descriptor desc_; mapnik::layer_descriptor desc_;
mapnik::box2d<double> extent_; mapnik::box2d<double> extent_;
std::string filename_; std::string filename_;
mapnik::value_integer row_limit_;
std::string inline_string_; std::string inline_string_;
std::string escape_; std::string escape_;
std::string separator_; std::string separator_;

View file

@ -49,19 +49,20 @@ namespace csv_utils
static const mapnik::csv_line_grammar<char const*> line_g; static const mapnik::csv_line_grammar<char const*> line_g;
static mapnik::csv_line parse_line(char const* start, char const* end, std::string const& separator, std::size_t num_columns) template <typename Iterator>
static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
{ {
mapnik::csv_line values; mapnik::csv_line values;
if (num_columns > 0) values.reserve(num_columns); if (num_columns > 0) values.reserve(num_columns);
boost::spirit::standard::blank_type blank; boost::spirit::standard::blank_type blank;
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::ref(values), boost::phoenix::cref(separator)), blank)) if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
{ {
throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end)); throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end));
} }
return values; return values;
} }
static mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator) static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator)
{ {
auto start = line_str.c_str(); auto start = line_str.c_str();
auto end = start + line_str.length(); auto end = start + line_str.length();
@ -212,7 +213,7 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
} }
else else
{ {
throw std::runtime_error("FIXME WKT"); throw std::runtime_error("Failed to parse WKT:" + row[locator.index]);
} }
} }
else if (locator.type == geometry_column_locator::GEOJSON) else if (locator.type == geometry_column_locator::GEOJSON)
@ -220,7 +221,7 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
if (!mapnik::json::from_geojson(row[locator.index], geom)) if (!mapnik::json::from_geojson(row[locator.index], geom))
{ {
throw std::runtime_error("FIXME GEOJSON"); throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]);
} }
} }
else if (locator.type == geometry_column_locator::LON_LAT) else if (locator.type == geometry_column_locator::LON_LAT)
@ -228,11 +229,11 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
double x, y; double x, y;
if (!mapnik::util::string2double(row[locator.index],x)) if (!mapnik::util::string2double(row[locator.index],x))
{ {
throw std::runtime_error("FIXME Lon"); throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]);
} }
if (!mapnik::util::string2double(row[locator.index2],y)) if (!mapnik::util::string2double(row[locator.index2],y))
{ {
throw std::runtime_error("FIXME Lat"); throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
} }
geom = mapnik::geometry::point<double>(x,y); geom = mapnik::geometry::point<double>(x,y);
} }