CSV - impolement transduction parsing to defer std::string's ctor

This commit is contained in:
artemp 2015-08-25 15:11:54 +02:00
parent 3753d50b75
commit fb3f8704b8
5 changed files with 47 additions and 34 deletions

View file

@ -31,16 +31,20 @@
namespace mapnik {
namespace qi = boost::spirit::qi;
using csv_value = std::string;
using csv_line = std::vector<csv_value>;
using csv_data = std::vector<csv_line>;
using csv_value = boost::iterator_range<char const*>;
using csv_record = std::vector<csv_value>;
using csv_data = std::vector<csv_record>;
template <typename Iterator>
struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi::blank_type>
struct csv_record_grammar : qi::grammar<Iterator, csv_record(std::string const&), qi::blank_type>
{
csv_line_grammar() : csv_line_grammar::base_type(line)
using iterator_range = boost::iterator_range<Iterator>;
csv_record_grammar()
: csv_record_grammar::base_type(line)
{
using namespace qi;
qi::raw_type raw;
qi::_a_type _a;
qi::_r1_type _r1;
qi::lit_type lit;
@ -61,25 +65,25 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
("\\\"", '\"')
("\"\"", '\"') // double quote
;
line = column(_r1) % char_(_r1)
;
column = quoted | *(char_ - (lit(_r1) /*| eol*/))
column = quoted | raw[*(char_ - (lit(_r1) /*| eol*/))]
;
quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a)
;
text = *(unesc_char | (char_ - char_(_r1)))
text = raw[*(unesc_char | (char_ - char_(_r1)))]
;
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
}
private:
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, csv_record(std::string const&), qi::blank_type> line;
qi::rule<Iterator, csv_value(std::string const&)> column; // no-skip
qi::rule<Iterator, csv_value(char)> text;
qi::rule<Iterator, qi::locals<char>, csv_value()> quoted;
qi::symbols<char const, char const> unesc_char;
};
template <typename Iterator>
struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi::blank_type>
{
@ -94,10 +98,9 @@ struct csv_file_grammar : qi::grammar<Iterator, csv_data(std::string const&), qi
}
private:
qi::rule<Iterator, csv_data(std::string const&), qi::blank_type> start;
csv_line_grammar<Iterator> line;
csv_record_grammar<Iterator> line;
};
}
#endif // MAPNIK_CVS_GRAMMAR_HPP

View file

@ -39,8 +39,8 @@ inline bool from_wkt(std::string const& wkt, mapnik::geometry::geometry<double>
using namespace boost::spirit;
static const mapnik::wkt::wkt_grammar<std::string::const_iterator> g;
ascii::space_type space;
std::string::const_iterator first = wkt.begin();
std::string::const_iterator last = wkt.end();
auto first = wkt.begin();
auto last = wkt.end();
return qi::phrase_parse(first, last, (g)(boost::phoenix::ref(geom)), space);
}

View file

@ -162,7 +162,7 @@ void csv_datasource::parse_csv(T & stream,
auto headers = csv_utils::parse_line(manual_headers_, sep);
for (auto const& header : headers)
{
std::string val = mapnik::util::trim_copy(header);
std::string val = mapnik::util::trim_copy(std::string(header.begin(), header.end()));
detail::locate_geometry_column(val, index++, locator_);
headers_.push_back(val);
}
@ -176,13 +176,13 @@ void csv_datasource::parse_csv(T & stream,
auto headers = csv_utils::parse_line(csv_line, sep);
// skip blank lines
std::string val;
if (headers.size() > 0 && headers[0].empty()) ++line_number;
if (headers.size() > 0 && headers[0].begin() == headers[0].end()) ++line_number;
else
{
std::size_t index = 0;
for (auto const& header : headers)
{
val = mapnik::util::trim_copy(header);
val = mapnik::util::trim_copy(std::string(header.begin(), header.end()));
if (val.empty())
{
if (strict_)
@ -338,7 +338,8 @@ void csv_datasource::parse_csv(T & stream,
// encoded consistenly as empty strings
continue;
}
std::string value = mapnik::util::trim_copy(*beg++);
std::string value = mapnik::util::trim_copy(std::string(beg->begin(),beg->end()));
++beg;
int value_length = value.length();
if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT
|| locator_.type == detail::geometry_column_locator::GEOJSON)) continue;
@ -404,7 +405,7 @@ void csv_datasource::parse_csv(T & stream,
std::ostringstream s;
s << "CSV Plugin: expected geometry column: could not parse row "
<< line_number << " "
<< values[locator_.index] << "'";
<< "FIXME values[locator_.index]" << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());

View file

@ -47,12 +47,12 @@
namespace csv_utils
{
static const mapnik::csv_line_grammar<char const*> line_g;
static const mapnik::csv_record_grammar<char const*> line_g;
template <typename Iterator>
static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
static mapnik::csv_record parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns)
{
mapnik::csv_line values;
mapnik::csv_record values;
if (num_columns > 0) values.reserve(num_columns);
boost::spirit::standard::blank_type blank;
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values))
@ -62,7 +62,7 @@ static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string con
return values;
}
static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator)
static inline mapnik::csv_record parse_line(std::string const& line_str, std::string const& separator)
{
auto start = line_str.c_str();
auto end = start + line_str.length();
@ -201,39 +201,47 @@ static inline void locate_geometry_column(std::string const& header, std::size_t
}
}
static mapnik::geometry::geometry<double> extract_geometry(std::vector<std::string> const& row, geometry_column_locator const& locator)
static mapnik::geometry::geometry<double> extract_geometry(std::vector<boost::iterator_range<char const*> > const& row,
geometry_column_locator const& locator)
{
mapnik::geometry::geometry<double> geom;
if (locator.type == geometry_column_locator::WKT)
{
if (mapnik::from_wkt(row[locator.index], geom))
std::string wkt(row[locator.index].begin(), row[locator.index].end());
//auto itr = row[locator.index].begin();
//auto end = row[locator.index].end();
if (mapnik::from_wkt(wkt, geom))
{
// correct orientations ..
mapnik::geometry::correct(geom);
}
else
{
throw std::runtime_error("Failed to parse WKT:" + row[locator.index]);
throw std::runtime_error("Failed to parse WKT:" + std::string(row[locator.index].begin(),row[locator.index].end()));
}
}
else if (locator.type == geometry_column_locator::GEOJSON)
{
if (!mapnik::json::from_geojson(row[locator.index], geom))
std::string json(row[locator.index].begin(), row[locator.index].end());
//if (!mapnik::json::from_geojson(row[locator.index].begin(), row[locator.index].end(), geom))
if (!mapnik::json::from_geojson(json, geom))
{
throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]);
throw std::runtime_error("Failed to parse geojson:" + std::string(row[locator.index].begin(),row[locator.index].end()));
}
}
else if (locator.type == geometry_column_locator::LON_LAT)
{
double x, y;
if (!mapnik::util::string2double(row[locator.index],x))
if (!mapnik::util::string2double(row[locator.index].begin(),row[locator.index].end(), x))
{
throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]);
throw std::runtime_error("Failed to parser longitude (easting)" + std::string(row[locator.index].begin(),row[locator.index].end()));
}
if (!mapnik::util::string2double(row[locator.index2],y))
if (!mapnik::util::string2double(row[locator.index2].begin(), row[locator.index2].end(), y))
{
throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]);
throw std::runtime_error("Failed to parser latitude (northing)" + std::string(row[locator.index2].begin(),row[locator.index2].end()));
}
geom = mapnik::geometry::point<double>(x,y);
}
@ -254,7 +262,8 @@ void process_properties(Feature & feature, Headers const& headers, Values const&
feature.put(fld_name,tr.transcode(""));
continue;
}
std::string value = mapnik::util::trim_copy(*val_beg++);
std::string value = mapnik::util::trim_copy(std::string(val_beg->begin(),val_beg->end()));
++val_beg;
int value_length = value.length();
if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT