From 83cc819c392813d71978da8fd2399639e9fdc445 Mon Sep 17 00:00:00 2001 From: artemp Date: Fri, 14 Oct 2016 14:24:44 +0200 Subject: [PATCH] upgrade CSV parser to boost::spirit::x3 --- include/mapnik/csv/csv_grammar.hpp | 79 -------------- include/mapnik/csv/csv_grammar_impl.hpp | 61 ----------- include/mapnik/csv/csv_grammar_x3.hpp | 67 ++++++++++++ include/mapnik/csv/csv_grammar_x3_def.hpp | 127 ++++++++++++++++++++++ plugins/input/csv/csv_utils.cpp | 13 ++- 5 files changed, 202 insertions(+), 145 deletions(-) delete mode 100644 include/mapnik/csv/csv_grammar.hpp delete mode 100644 include/mapnik/csv/csv_grammar_impl.hpp create mode 100644 include/mapnik/csv/csv_grammar_x3.hpp create mode 100644 include/mapnik/csv/csv_grammar_x3_def.hpp diff --git a/include/mapnik/csv/csv_grammar.hpp b/include/mapnik/csv/csv_grammar.hpp deleted file mode 100644 index 240f11ce1..000000000 --- a/include/mapnik/csv/csv_grammar.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/***************************************************************************** - * - * This file is part of Mapnik (c++ mapping toolkit) - * - * Copyright (C) 2015 Artem Pavlenko - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - *****************************************************************************/ - -#ifndef MAPNIK_CSV_GRAMMAR_HPP -#define MAPNIK_CSV_GRAMMAR_HPP - -#include -#include - -namespace mapnik { - -namespace qi = boost::spirit::qi; - -struct csv_white_space_skipper : qi::primitive_parser -{ - template - struct attribute - { - typedef qi::unused_type type; - }; - - template - bool parse(Iterator& first, Iterator const& last - , Context& /*context*/, Skipper const& skipper - , Attribute& /*attr*/) const - { - qi::skip_over(first, last, skipper); - if (first != last && *first == ' ') - { - while (++first != last && *first == ' ') - ; - return true; - } - return false; - } - - template - qi::info what(Context& /*context*/) const - { - return qi::info("csv_white_space_skipper"); - } -}; - - -template -struct csv_line_grammar : qi::grammar -{ - csv_line_grammar(); -private: - qi::rule line; - qi::rule column; // no-skip - qi::rule text; // no-skip - qi::rule quoted; // no-skip - qi::symbols unesc_char; -}; - -} - -#endif // MAPNIK_CSV_GRAMMAR_HPP diff --git a/include/mapnik/csv/csv_grammar_impl.hpp b/include/mapnik/csv/csv_grammar_impl.hpp deleted file mode 100644 index 560835443..000000000 --- a/include/mapnik/csv/csv_grammar_impl.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/***************************************************************************** - * - * This file is part of Mapnik (c++ mapping toolkit) - * - * Copyright (C) 2016 Artem Pavlenko - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - *****************************************************************************/ - -#include - -namespace mapnik { - -namespace qi = boost::spirit::qi; - -template -csv_line_grammar::csv_line_grammar() - : csv_line_grammar::base_type(line) -{ - qi::_r1_type _r1; - qi::_r2_type _r2; - qi::lit_type lit; - qi::char_type char_; - unesc_char.add - ("\\a", '\a') - ("\\b", '\b') - ("\\f", '\f') - ("\\n", '\n') - ("\\r", '\r') - ("\\t", '\t') - ("\\v", '\v') - ("\\\\",'\\') - ("\\\'", '\'') - ("\\\"", '\"') - ("\"\"", '\"') // double quote - ; - line = -lit("\r") > -lit("\n") > column(_r1, _r2) % lit(_r1) - ; - column = quoted(_r2) | *(char_ - lit(_r1)) - ; - quoted = lit(_r1) > text(_r1) > lit(_r1) // support unmatched quotes or not (??) - ; - text = *(unesc_char | (char_ - lit(_r1))) - ; - BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); -} - -} // namespace mapnik diff --git a/include/mapnik/csv/csv_grammar_x3.hpp b/include/mapnik/csv/csv_grammar_x3.hpp new file mode 100644 index 000000000..2d66a39ab --- /dev/null +++ b/include/mapnik/csv/csv_grammar_x3.hpp @@ -0,0 +1,67 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#ifndef MAPNIK_CSV_GRAMMAR_X3_HPP +#define MAPNIK_CSV_GRAMMAR_X3_HPP + +#include +#include +#include +namespace mapnik { + +namespace x3 = boost::spirit::x3; + +struct csv_white_space_skipper : x3::parser +{ + using attribute_type = x3::unused_type; + static bool const has_attribute = false; + + template + bool parse(Iterator& first, Iterator const& last, + Context const& context, x3::unused_type, Attribute& ) const + { + x3::skip_over(first, last, context); + if (first != last && *first == ' ') + { + while (++first != last && *first == ' ') + ; + return true; + } + return false; + } +}; + +auto static const csv_white_space = csv_white_space_skipper{}; + +namespace grammar { + +struct separator_tag; +struct quote_tag; + +struct csv_line_class; +using csv_line_grammar_type = x3::rule; + +BOOST_SPIRIT_DECLARE(csv_line_grammar_type); + +}} + +#endif // MAPNIK_CSV_GRAMMAR_X3_HPP diff --git a/include/mapnik/csv/csv_grammar_x3_def.hpp b/include/mapnik/csv/csv_grammar_x3_def.hpp new file mode 100644 index 000000000..9b1d3ba6f --- /dev/null +++ b/include/mapnik/csv/csv_grammar_x3_def.hpp @@ -0,0 +1,127 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2016 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#pragma GCC diagnostic push +#include +#include +#pragma GCC diagnostic pop +#include +namespace mapnik { namespace grammar { + +namespace x3 = boost::spirit::x3; +namespace ascii = boost::spirit::x3::ascii; + +using x3::lit; +using x3::lexeme; +using ascii::char_; + +struct unesc_char_ : x3::symbols +{ + unesc_char_() + { + add("\\a", '\a') + ("\\b", '\b') + ("\\f", '\f') + ("\\n", '\n') + ("\\r", '\r') + ("\\t", '\t') + ("\\v", '\v') + ("\\\\",'\\') + ("\\\'", '\'') + ("\\\"", '\"') + ("\"\"", '\"') // double quote + ; + } +} unesc_char; + +struct separator_ : x3::parser +{ + using attribute_type = x3::unused_type; + static bool const has_attribute = false; + + template + bool parse(Iterator& first, Iterator const& last, + Context const& context, x3::unused_type, Attribute& ) const + { + x3::skip_over(first, last, context); + if (first != last && *first == x3::get(context)) + { + ++first; + return true; + } + return false; + } +} separator; + +struct quote_ : x3::parser +{ + using attribute_type = x3::unused_type; + static bool const has_attribute = false; + + template + bool parse(Iterator& first, Iterator const& last, + Context const& context, x3::unused_type, Attribute& ) const + { + x3::skip_over(first, last, context); + if (first != last && *first == x3::get(context)) + { + ++first; + return true; + } + return false; + } +} quote; + +// starting rule +csv_line_grammar_type const line("csv-line"); +// rules +x3::rule column("csv-column"); +x3::rule text("csv-text"); +x3::rule quoted_text("csv-quoted-text"); + +auto const line_def = -lit('\r') > -lit('\n') > lexeme[column] % separator + ; + +auto const column_def = quoted_text | *(char_ - separator) + ; + +auto const quoted_text_def = quote > text > quote // support unmatched quotes or not (??) + ; + +auto const text_def = *(unesc_char | (char_ - quote)) + ; + +BOOST_SPIRIT_DEFINE ( + line, + column, + quoted_text, + text + ); + +} // grammar + +grammar::csv_line_grammar_type const& csv_line_grammar() +{ + return grammar::line; +} + +} // namespace mapnik diff --git a/plugins/input/csv/csv_utils.cpp b/plugins/input/csv/csv_utils.cpp index 62e9e760b..b9e7ea762 100644 --- a/plugins/input/csv/csv_utils.cpp +++ b/plugins/input/csv/csv_utils.cpp @@ -30,7 +30,7 @@ #include #include // csv grammar -#include +#include // #include "csv_getline.hpp" #include "csv_utils.hpp" @@ -192,14 +192,17 @@ bool valid(geometry_column_locator const& locator, std::size_t max_size) } // namespace detail -static const mapnik::csv_line_grammar line_g; -static const mapnik::csv_white_space_skipper skipper{}; - mapnik::csv_line parse_line(char const* start, char const* end, char separator, char quote, std::size_t num_columns) { + namespace x3 = boost::spirit::x3; + auto parser = x3::with(quote) + [ x3::with(separator) + [ mapnik::csv_line_grammar()] + ]; + mapnik::csv_line values; if (num_columns > 0) values.reserve(num_columns); - if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(separator, quote), skipper, values)) + if (!x3::phrase_parse(start, end, parser, mapnik::csv_white_space, values)) { throw mapnik::datasource_exception("Failed to parse CSV line:\n" + std::string(start, end)); }