upgrade CSV parser to boost::spirit::x3
This commit is contained in:
parent
01fbbafc26
commit
83cc819c39
5 changed files with 202 additions and 145 deletions
|
@ -1,79 +0,0 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef MAPNIK_CSV_GRAMMAR_HPP
|
||||
#define MAPNIK_CSV_GRAMMAR_HPP
|
||||
|
||||
#include <mapnik/csv/csv_types.hpp>
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
|
||||
namespace mapnik {
|
||||
|
||||
namespace qi = boost::spirit::qi;
|
||||
|
||||
struct csv_white_space_skipper : qi::primitive_parser<csv_white_space_skipper>
|
||||
{
|
||||
template <typename Context, typename Iterator>
|
||||
struct attribute
|
||||
{
|
||||
typedef qi::unused_type type;
|
||||
};
|
||||
|
||||
template <typename Iterator, typename Context
|
||||
, typename Skipper, typename Attribute>
|
||||
bool parse(Iterator& first, Iterator const& last
|
||||
, Context& /*context*/, Skipper const& skipper
|
||||
, Attribute& /*attr*/) const
|
||||
{
|
||||
qi::skip_over(first, last, skipper);
|
||||
if (first != last && *first == ' ')
|
||||
{
|
||||
while (++first != last && *first == ' ')
|
||||
;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
qi::info what(Context& /*context*/) const
|
||||
{
|
||||
return qi::info("csv_white_space_skipper");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Iterator, typename Skipper = csv_white_space_skipper>
|
||||
struct csv_line_grammar : qi::grammar<Iterator, csv_line(char, char), Skipper>
|
||||
{
|
||||
csv_line_grammar();
|
||||
private:
|
||||
qi::rule<Iterator, csv_line(char, char), Skipper> line;
|
||||
qi::rule<Iterator, csv_value(char, char)> column; // no-skip
|
||||
qi::rule<Iterator, csv_value(char)> text; // no-skip
|
||||
qi::rule<Iterator, csv_value(char)> quoted; // no-skip
|
||||
qi::symbols<char const, char const> unesc_char;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // MAPNIK_CSV_GRAMMAR_HPP
|
|
@ -1,61 +0,0 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2016 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include <mapnik/csv/csv_grammar.hpp>
|
||||
|
||||
namespace mapnik {
|
||||
|
||||
namespace qi = boost::spirit::qi;
|
||||
|
||||
template <typename Iterator, typename Skipper>
|
||||
csv_line_grammar<Iterator, Skipper>::csv_line_grammar()
|
||||
: csv_line_grammar::base_type(line)
|
||||
{
|
||||
qi::_r1_type _r1;
|
||||
qi::_r2_type _r2;
|
||||
qi::lit_type lit;
|
||||
qi::char_type char_;
|
||||
unesc_char.add
|
||||
("\\a", '\a')
|
||||
("\\b", '\b')
|
||||
("\\f", '\f')
|
||||
("\\n", '\n')
|
||||
("\\r", '\r')
|
||||
("\\t", '\t')
|
||||
("\\v", '\v')
|
||||
("\\\\",'\\')
|
||||
("\\\'", '\'')
|
||||
("\\\"", '\"')
|
||||
("\"\"", '\"') // double quote
|
||||
;
|
||||
line = -lit("\r") > -lit("\n") > column(_r1, _r2) % lit(_r1)
|
||||
;
|
||||
column = quoted(_r2) | *(char_ - lit(_r1))
|
||||
;
|
||||
quoted = lit(_r1) > text(_r1) > lit(_r1) // support unmatched quotes or not (??)
|
||||
;
|
||||
text = *(unesc_char | (char_ - lit(_r1)))
|
||||
;
|
||||
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
|
||||
}
|
||||
|
||||
} // namespace mapnik
|
67
include/mapnik/csv/csv_grammar_x3.hpp
Normal file
67
include/mapnik/csv/csv_grammar_x3.hpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2015 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef MAPNIK_CSV_GRAMMAR_X3_HPP
|
||||
#define MAPNIK_CSV_GRAMMAR_X3_HPP
|
||||
|
||||
#include <mapnik/csv/csv_types.hpp>
|
||||
#include <boost/spirit/home/x3.hpp>
|
||||
#include <iostream>
|
||||
namespace mapnik {
|
||||
|
||||
namespace x3 = boost::spirit::x3;
|
||||
|
||||
struct csv_white_space_skipper : x3::parser<csv_white_space_skipper>
|
||||
{
|
||||
using attribute_type = x3::unused_type;
|
||||
static bool const has_attribute = false;
|
||||
|
||||
template <typename Iterator, typename Context, typename Attribute>
|
||||
bool parse(Iterator& first, Iterator const& last,
|
||||
Context const& context, x3::unused_type, Attribute& ) const
|
||||
{
|
||||
x3::skip_over(first, last, context);
|
||||
if (first != last && *first == ' ')
|
||||
{
|
||||
while (++first != last && *first == ' ')
|
||||
;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
auto static const csv_white_space = csv_white_space_skipper{};
|
||||
|
||||
namespace grammar {
|
||||
|
||||
struct separator_tag;
|
||||
struct quote_tag;
|
||||
|
||||
struct csv_line_class;
|
||||
using csv_line_grammar_type = x3::rule<csv_line_class, csv_line>;
|
||||
|
||||
BOOST_SPIRIT_DECLARE(csv_line_grammar_type);
|
||||
|
||||
}}
|
||||
|
||||
#endif // MAPNIK_CSV_GRAMMAR_X3_HPP
|
127
include/mapnik/csv/csv_grammar_x3_def.hpp
Normal file
127
include/mapnik/csv/csv_grammar_x3_def.hpp
Normal file
|
@ -0,0 +1,127 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2016 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#include <mapnik/warning_ignore.hpp>
|
||||
#include <mapnik/csv/csv_grammar_x3.hpp>
|
||||
#pragma GCC diagnostic pop
|
||||
#include <iostream>
|
||||
namespace mapnik { namespace grammar {
|
||||
|
||||
namespace x3 = boost::spirit::x3;
|
||||
namespace ascii = boost::spirit::x3::ascii;
|
||||
|
||||
using x3::lit;
|
||||
using x3::lexeme;
|
||||
using ascii::char_;
|
||||
|
||||
struct unesc_char_ : x3::symbols<char>
|
||||
{
|
||||
unesc_char_()
|
||||
{
|
||||
add("\\a", '\a')
|
||||
("\\b", '\b')
|
||||
("\\f", '\f')
|
||||
("\\n", '\n')
|
||||
("\\r", '\r')
|
||||
("\\t", '\t')
|
||||
("\\v", '\v')
|
||||
("\\\\",'\\')
|
||||
("\\\'", '\'')
|
||||
("\\\"", '\"')
|
||||
("\"\"", '\"') // double quote
|
||||
;
|
||||
}
|
||||
} unesc_char;
|
||||
|
||||
struct separator_ : x3::parser<separator_>
|
||||
{
|
||||
using attribute_type = x3::unused_type;
|
||||
static bool const has_attribute = false;
|
||||
|
||||
template <typename Iterator, typename Context, typename Attribute>
|
||||
bool parse(Iterator& first, Iterator const& last,
|
||||
Context const& context, x3::unused_type, Attribute& ) const
|
||||
{
|
||||
x3::skip_over(first, last, context);
|
||||
if (first != last && *first == x3::get<separator_tag>(context))
|
||||
{
|
||||
++first;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} separator;
|
||||
|
||||
struct quote_ : x3::parser<quote_>
|
||||
{
|
||||
using attribute_type = x3::unused_type;
|
||||
static bool const has_attribute = false;
|
||||
|
||||
template <typename Iterator, typename Context, typename Attribute>
|
||||
bool parse(Iterator& first, Iterator const& last,
|
||||
Context const& context, x3::unused_type, Attribute& ) const
|
||||
{
|
||||
x3::skip_over(first, last, context);
|
||||
if (first != last && *first == x3::get<quote_tag>(context))
|
||||
{
|
||||
++first;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} quote;
|
||||
|
||||
// starting rule
|
||||
csv_line_grammar_type const line("csv-line");
|
||||
// rules
|
||||
x3::rule<class csv_column, csv_value> column("csv-column");
|
||||
x3::rule<class csv_text, csv_value> text("csv-text");
|
||||
x3::rule<class csc_quoted_text, csv_value> quoted_text("csv-quoted-text");
|
||||
|
||||
auto const line_def = -lit('\r') > -lit('\n') > lexeme[column] % separator
|
||||
;
|
||||
|
||||
auto const column_def = quoted_text | *(char_ - separator)
|
||||
;
|
||||
|
||||
auto const quoted_text_def = quote > text > quote // support unmatched quotes or not (??)
|
||||
;
|
||||
|
||||
auto const text_def = *(unesc_char | (char_ - quote))
|
||||
;
|
||||
|
||||
BOOST_SPIRIT_DEFINE (
|
||||
line,
|
||||
column,
|
||||
quoted_text,
|
||||
text
|
||||
);
|
||||
|
||||
} // grammar
|
||||
|
||||
grammar::csv_line_grammar_type const& csv_line_grammar()
|
||||
{
|
||||
return grammar::line;
|
||||
}
|
||||
|
||||
} // namespace mapnik
|
|
@ -30,7 +30,7 @@
|
|||
#include <mapnik/util/trim.hpp>
|
||||
#include <mapnik/datasource.hpp>
|
||||
// csv grammar
|
||||
#include <mapnik/csv/csv_grammar_impl.hpp>
|
||||
#include <mapnik/csv/csv_grammar_x3_def.hpp>
|
||||
//
|
||||
#include "csv_getline.hpp"
|
||||
#include "csv_utils.hpp"
|
||||
|
@ -192,14 +192,17 @@ bool valid(geometry_column_locator const& locator, std::size_t max_size)
|
|||
|
||||
} // namespace detail
|
||||
|
||||
static const mapnik::csv_line_grammar<char const*> line_g;
|
||||
static const mapnik::csv_white_space_skipper skipper{};
|
||||
|
||||
mapnik::csv_line parse_line(char const* start, char const* end, char separator, char quote, std::size_t num_columns)
|
||||
{
|
||||
namespace x3 = boost::spirit::x3;
|
||||
auto parser = x3::with<mapnik::grammar::quote_tag>(quote)
|
||||
[ x3::with<mapnik::grammar::separator_tag>(separator)
|
||||
[ mapnik::csv_line_grammar()]
|
||||
];
|
||||
|
||||
mapnik::csv_line values;
|
||||
if (num_columns > 0) values.reserve(num_columns);
|
||||
if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(separator, quote), skipper, values))
|
||||
if (!x3::phrase_parse(start, end, parser, mapnik::csv_white_space, values))
|
||||
{
|
||||
throw mapnik::datasource_exception("Failed to parse CSV line:\n" + std::string(start, end));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue