CSV - revive row_limit parameter + fix stderr

This commit is contained in:
artemp 2015-08-25 15:05:04 +02:00
parent 8709fb6f7c
commit 3753d50b75
4 changed files with 27 additions and 25 deletions

View file

@ -2,7 +2,7 @@
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2014 Artem Pavlenko
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -31,26 +31,22 @@
namespace mapnik {
namespace qi = boost::spirit::qi;
using column = std::string;
using columns = std::vector<column>;
using csv_line = columns;
using csv_value = std::string;
using csv_line = std::vector<csv_value>;
using csv_data = std::vector<csv_line>;
template <typename Iterator>
struct csv_line_grammar : qi::grammar<Iterator, void(csv_line&, std::string const&), qi::blank_type>
struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi::blank_type>
{
csv_line_grammar() : csv_line_grammar::base_type(line)
{
using namespace qi;
qi::_a_type _a;
qi::_r1_type _r1;
qi::_r2_type _r2;
qi::lit_type lit;
//qi::eol_type eol;
qi::_val_type _val;
qi::_1_type _1;
qi::char_type char_;
qi::eps_type eps;
qi::omit_type omit;
unesc_char.add
("\\a", '\a')
@ -66,21 +62,21 @@ struct csv_line_grammar : qi::grammar<Iterator, void(csv_line&, std::string cons
("\"\"", '\"') // double quote
;
line = column(_r2)[boost::phoenix::push_back(_r1,_1)] % char_(_r2)
line = column(_r1) % char_(_r1)
;
column = quoted | *(char_ - (lit(_r1) /*| eol*/))
;
quoted = omit[char_("\"'")[_a = _1]] > text(_a)[boost::phoenix::swap(_val,_1)] > -lit(_a)
quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a)
;
text = *(unesc_char | (char_ - char_(_r1)))
;
//BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
}
private:
qi::rule<Iterator, void(csv_line&,std::string const&), qi::blank_type> line;
qi::rule<Iterator, column(std::string const&)> column; // no-skip
qi::rule<Iterator, std::string(char)> text;
qi::rule<Iterator, qi::locals<char>, std::string()> quoted;
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip
qi::rule<Iterator, csv_value(char)> text;
qi::rule<Iterator, qi::locals<char>, csv_value()> quoted;
qi::symbols<char const, char const> unesc_char;
};

View file

@ -61,6 +61,7 @@ csv_datasource::csv_datasource(parameters const& params)
desc_(csv_datasource::name(), *params.get<std::string>("encoding", "utf-8")),
extent_(),
filename_(),
row_limit_(*params.get<mapnik::value_integer>("row_limit", 0)),
inline_string_(),
escape_(*params.get<std::string>("escape", "")),
separator_(*params.get<std::string>("separator", "")),
@ -140,7 +141,7 @@ void csv_datasource::parse_csv(T & stream,
std::string sep = mapnik::util::trim_copy(separator);
if (sep.empty()) sep = detail::detect_separator(csv_line);
separator_ = sep; // <------------------- FIXME !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
separator_ = sep;
// set back to start
stream.seekg(0, std::ios::beg);
@ -252,6 +253,11 @@ void csv_datasource::parse_csv(T & stream,
auto pos = stream.tellg();
while (std::getline(stream, csv_line, stream.widen(newline)) || is_first_row)
{
if ((row_limit_ > 0) && (line_number++ > row_limit_))
{
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
break;
}
auto record_offset = pos;
auto record_size = csv_line.length();
pos = stream.tellg();
@ -264,7 +270,6 @@ void csv_datasource::parse_csv(T & stream,
boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n "));
if (trimmed.empty())
{
++line_number;
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number;
continue;
}
@ -409,7 +414,6 @@ void csv_datasource::parse_csv(T & stream,
MAPNIK_LOG_ERROR(csv) << s.str();
}
}
++line_number;
}
catch (mapnik::datasource_exception const& ex )
{

View file

@ -99,6 +99,7 @@ private:
mapnik::layer_descriptor desc_;
mapnik::box2d<double> extent_;
std::string filename_;
mapnik::value_integer row_limit_;
std::string inline_string_;
std::string escape_;
std::string separator_;

View file

@ -49,19 +49,20 @@ namespace csv_utils
static const mapnik::csv_line_grammar<char const*> line_g;
static mapnik::csv_line parse_line(char const* start, char const* end, std::string const& separator, std::size_t num_columns)
template <typename Iterator>
static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
{
mapnik::csv_line values;
if (num_columns > 0) values.reserve(num_columns);
boost::spirit::standard::blank_type blank;
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::ref(values), boost::phoenix::cref(separator)), blank))
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
{
throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end));
}
return values;
}
static mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator)
static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator)
{
auto start = line_str.c_str();
auto end = start + line_str.length();
@ -212,7 +213,7 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
}
else
{
throw std::runtime_error("FIXME WKT");
throw std::runtime_error("Failed to parse WKT:" + row[locator.index]);
}
}
else if (locator.type == geometry_column_locator::GEOJSON)
@ -220,7 +221,7 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
if (!mapnik::json::from_geojson(row[locator.index], geom))
{
throw std::runtime_error("FIXME GEOJSON");
throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]);
}
}
else if (locator.type == geometry_column_locator::LON_LAT)
@ -228,11 +229,11 @@ static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::stri
double x, y;
if (!mapnik::util::string2double(row[locator.index],x))
{
throw std::runtime_error("FIXME Lon");
throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]);
}
if (!mapnik::util::string2double(row[locator.index2],y))
{
throw std::runtime_error("FIXME Lat");
throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
}
geom = mapnik::geometry::point<double>(x,y);
}