csv_grammar - handle various quotting options + disable csv_utils::fix_json_quoting(csv_line)

This commit is contained in:
artemp 2015-06-10 13:40:55 +01:00
parent 793a2f9ffb
commit 40b963f9ad
2 changed files with 17 additions and 10 deletions

View file

@ -41,11 +41,15 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
csv_line_grammar() : csv_line_grammar::base_type(line)
{
using namespace qi;
qi::_a_type _a;
qi::_r1_type _r1;
qi::lit_type lit;
qi::eol_type eol;
//qi::eol_type eol;
qi::_val_type _val;
qi::_1_type _1;
qi::char_type char_;
qi::eps_type eps;
qi::omit_type omit;
unesc_char.add
("\\a", '\a')
("\\b", '\b')
@ -55,24 +59,26 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
("\\t", '\t')
("\\v", '\v')
("\\\\",'\\')
//("\\\'", '\')
("\\\'", '\'')
("\\\"", '\"')
("\"\"", '\"') // double quote
;
line = column(_r1) % char_(_r1)
line = column(_r1) % char_(_r1)
;
column = quoted | *(char_ - (lit(_r1) /*| eol*/))
;
quoted = '"' >> *("\"\"" | unesc_char | ~char_('"')) >> '"'
text = *(unesc_char | (char_ - char_(_r1)))
;
quoted = omit[char_("\"'")[_a = _1]] >> text(_a)[_val = _1] >> lit(_a)
;
//http://stackoverflow.com/questions/7436481/how-to-make-my-split-work-only-on-one-real-line-and-be-capable-to-skeep-quoted-p/7462539#7462539
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
}
private:
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, column(std::string const&)> column; // no-skip
qi::rule<Iterator, std::string()> quoted;
qi::rule<Iterator, std::string(char)> text;
qi::rule<Iterator, qi::locals<char>, std::string()> quoted;
qi::symbols<char const, char const> unesc_char;
};

View file

@ -141,7 +141,7 @@ csv_datasource::csv_datasource(parameters const& params)
if (!inline_string_.empty())
{
std::istringstream in(inline_string_);
parse_csv(in,escape_, separator_, quote_);
parse_csv(in, escape_, separator_, quote_);
}
else
{
@ -474,11 +474,12 @@ void csv_datasource::parse_csv(T & stream,
{
// special handling for varieties of quoting that we will enounter with json
// TODO - test with custom "quo" option
#if 0 // TODO - remove
if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6))
{
csv_utils::fix_json_quoting(csv_line);
}
#endif
auto values = mapnik::parse_line(csv_line, sep);
unsigned num_fields = values.size();