upgrade CSV parser to boost::spirit::x3

This commit is contained in:
artemp 2016-10-14 14:24:44 +02:00
parent 01fbbafc26
commit 83cc819c39
5 changed files with 202 additions and 145 deletions

View file

@ -1,79 +0,0 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef MAPNIK_CSV_GRAMMAR_HPP
#define MAPNIK_CSV_GRAMMAR_HPP
#include <mapnik/csv/csv_types.hpp>
#include <boost/spirit/include/qi.hpp>
namespace mapnik {
namespace qi = boost::spirit::qi;
struct csv_white_space_skipper : qi::primitive_parser<csv_white_space_skipper>
{
template <typename Context, typename Iterator>
struct attribute
{
typedef qi::unused_type type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& /*context*/, Skipper const& skipper
, Attribute& /*attr*/) const
{
qi::skip_over(first, last, skipper);
if (first != last && *first == ' ')
{
while (++first != last && *first == ' ')
;
return true;
}
return false;
}
template <typename Context>
qi::info what(Context& /*context*/) const
{
return qi::info("csv_white_space_skipper");
}
};
template <typename Iterator, typename Skipper = csv_white_space_skipper>
struct csv_line_grammar : qi::grammar<Iterator, csv_line(char, char), Skipper>
{
csv_line_grammar();
private:
qi::rule<Iterator, csv_line(char, char), Skipper> line;
qi::rule<Iterator, csv_value(char, char)> column; // no-skip
qi::rule<Iterator, csv_value(char)> text; // no-skip
qi::rule<Iterator, csv_value(char)> quoted; // no-skip
qi::symbols<char const, char const> unesc_char;
};
}
#endif // MAPNIK_CSV_GRAMMAR_HPP

View file

@ -1,61 +0,0 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2016 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#include <mapnik/csv/csv_grammar.hpp>
namespace mapnik {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
csv_line_grammar<Iterator, Skipper>::csv_line_grammar()
: csv_line_grammar::base_type(line)
{
qi::_r1_type _r1;
qi::_r2_type _r2;
qi::lit_type lit;
qi::char_type char_;
unesc_char.add
("\\a", '\a')
("\\b", '\b')
("\\f", '\f')
("\\n", '\n')
("\\r", '\r')
("\\t", '\t')
("\\v", '\v')
("\\\\",'\\')
("\\\'", '\'')
("\\\"", '\"')
("\"\"", '\"') // double quote
;
line = -lit("\r") > -lit("\n") > column(_r1, _r2) % lit(_r1)
;
column = quoted(_r2) | *(char_ - lit(_r1))
;
quoted = lit(_r1) > text(_r1) > lit(_r1) // support unmatched quotes or not (??)
;
text = *(unesc_char | (char_ - lit(_r1)))
;
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
}
} // namespace mapnik

View file

@ -0,0 +1,67 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef MAPNIK_CSV_GRAMMAR_X3_HPP
#define MAPNIK_CSV_GRAMMAR_X3_HPP
#include <mapnik/csv/csv_types.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
namespace mapnik {
namespace x3 = boost::spirit::x3;
struct csv_white_space_skipper : x3::parser<csv_white_space_skipper>
{
using attribute_type = x3::unused_type;
static bool const has_attribute = false;
template <typename Iterator, typename Context, typename Attribute>
bool parse(Iterator& first, Iterator const& last,
Context const& context, x3::unused_type, Attribute& ) const
{
x3::skip_over(first, last, context);
if (first != last && *first == ' ')
{
while (++first != last && *first == ' ')
;
return true;
}
return false;
}
};
auto static const csv_white_space = csv_white_space_skipper{};
namespace grammar {
struct separator_tag;
struct quote_tag;
struct csv_line_class;
using csv_line_grammar_type = x3::rule<csv_line_class, csv_line>;
BOOST_SPIRIT_DECLARE(csv_line_grammar_type);
}}
#endif // MAPNIK_CSV_GRAMMAR_X3_HPP

View file

@ -0,0 +1,127 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2016 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#pragma GCC diagnostic push
#include <mapnik/warning_ignore.hpp>
#include <mapnik/csv/csv_grammar_x3.hpp>
#pragma GCC diagnostic pop
#include <iostream>
namespace mapnik { namespace grammar {
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
using x3::lit;
using x3::lexeme;
using ascii::char_;
struct unesc_char_ : x3::symbols<char>
{
unesc_char_()
{
add("\\a", '\a')
("\\b", '\b')
("\\f", '\f')
("\\n", '\n')
("\\r", '\r')
("\\t", '\t')
("\\v", '\v')
("\\\\",'\\')
("\\\'", '\'')
("\\\"", '\"')
("\"\"", '\"') // double quote
;
}
} unesc_char;
struct separator_ : x3::parser<separator_>
{
using attribute_type = x3::unused_type;
static bool const has_attribute = false;
template <typename Iterator, typename Context, typename Attribute>
bool parse(Iterator& first, Iterator const& last,
Context const& context, x3::unused_type, Attribute& ) const
{
x3::skip_over(first, last, context);
if (first != last && *first == x3::get<separator_tag>(context))
{
++first;
return true;
}
return false;
}
} separator;
struct quote_ : x3::parser<quote_>
{
using attribute_type = x3::unused_type;
static bool const has_attribute = false;
template <typename Iterator, typename Context, typename Attribute>
bool parse(Iterator& first, Iterator const& last,
Context const& context, x3::unused_type, Attribute& ) const
{
x3::skip_over(first, last, context);
if (first != last && *first == x3::get<quote_tag>(context))
{
++first;
return true;
}
return false;
}
} quote;
// starting rule
csv_line_grammar_type const line("csv-line");
// rules
x3::rule<class csv_column, csv_value> column("csv-column");
x3::rule<class csv_text, csv_value> text("csv-text");
x3::rule<class csc_quoted_text, csv_value> quoted_text("csv-quoted-text");
auto const line_def = -lit('\r') > -lit('\n') > lexeme[column] % separator
;
auto const column_def = quoted_text | *(char_ - separator)
;
auto const quoted_text_def = quote > text > quote // support unmatched quotes or not (??)
;
auto const text_def = *(unesc_char | (char_ - quote))
;
BOOST_SPIRIT_DEFINE (
line,
column,
quoted_text,
text
);
} // grammar
grammar::csv_line_grammar_type const& csv_line_grammar()
{
return grammar::line;
}
} // namespace mapnik

View file

@ -30,7 +30,7 @@
#include <mapnik/util/trim.hpp>
#include <mapnik/datasource.hpp>
// csv grammar
#include <mapnik/csv/csv_grammar_impl.hpp>
#include <mapnik/csv/csv_grammar_x3_def.hpp>
//
#include "csv_getline.hpp"
#include "csv_utils.hpp"
@ -192,14 +192,17 @@ bool valid(geometry_column_locator const& locator, std::size_t max_size)
} // namespace detail
static const mapnik::csv_line_grammar<char const*> line_g;
static const mapnik::csv_white_space_skipper skipper{};
mapnik::csv_line parse_line(char const* start, char const* end, char separator, char quote, std::size_t num_columns)
{
namespace x3 = boost::spirit::x3;
auto parser = x3::with<mapnik::grammar::quote_tag>(quote)
[ x3::with<mapnik::grammar::separator_tag>(separator)
[ mapnik::csv_line_grammar()]
];
mapnik::csv_line values;
if (num_columns > 0) values.reserve(num_columns);
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(separator, quote), skipper, values))
if (!x3::phrase_parse(start, end, parser, mapnik::csv_white_space, values))
{
throw mapnik::datasource_exception("Failed to parse CSV line:\n" + std::string(start, end));
}