csv_grammar - handle various quotting options + disable csv_utils::fix_json_quoting(csv_line)

This commit is contained in:
artemp 2015-06-10 13:40:55 +01:00
parent 793a2f9ffb
commit 40b963f9ad
2 changed files with 17 additions and 10 deletions

View file

@ -41,11 +41,15 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
csv_line_grammar() : csv_line_grammar::base_type(line) csv_line_grammar() : csv_line_grammar::base_type(line)
{ {
using namespace qi; using namespace qi;
qi::_a_type _a;
qi::_r1_type _r1; qi::_r1_type _r1;
qi::lit_type lit; qi::lit_type lit;
qi::eol_type eol; //qi::eol_type eol;
qi::_val_type _val;
qi::_1_type _1;
qi::char_type char_; qi::char_type char_;
qi::eps_type eps;
qi::omit_type omit;
unesc_char.add unesc_char.add
("\\a", '\a') ("\\a", '\a')
("\\b", '\b') ("\\b", '\b')
@ -55,24 +59,26 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(std::string const&), qi
("\\t", '\t') ("\\t", '\t')
("\\v", '\v') ("\\v", '\v')
("\\\\",'\\') ("\\\\",'\\')
//("\\\'", '\') ("\\\'", '\'')
("\\\"", '\"') ("\\\"", '\"')
("\"\"", '\"') // double quote
; ;
line = column(_r1) % char_(_r1) line = column(_r1) % char_(_r1)
; ;
column = quoted | *(char_ - (lit(_r1) /*| eol*/)) column = quoted | *(char_ - (lit(_r1) /*| eol*/))
; ;
quoted = '"' >> *("\"\"" | unesc_char | ~char_('"')) >> '"' text = *(unesc_char | (char_ - char_(_r1)))
;
quoted = omit[char_("\"'")[_a = _1]] >> text(_a)[_val = _1] >> lit(_a)
; ;
//http://stackoverflow.com/questions/7436481/how-to-make-my-split-work-only-on-one-real-line-and-be-capable-to-skeep-quoted-p/7462539#7462539
BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted));
} }
private: private:
qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line; qi::rule<Iterator, csv_line(std::string const&), qi::blank_type> line;
qi::rule<Iterator, column(std::string const&)> column; // no-skip qi::rule<Iterator, column(std::string const&)> column; // no-skip
qi::rule<Iterator, std::string()> quoted; qi::rule<Iterator, std::string(char)> text;
qi::rule<Iterator, qi::locals<char>, std::string()> quoted;
qi::symbols<char const, char const> unesc_char; qi::symbols<char const, char const> unesc_char;
}; };

View file

@ -141,7 +141,7 @@ csv_datasource::csv_datasource(parameters const& params)
if (!inline_string_.empty()) if (!inline_string_.empty())
{ {
std::istringstream in(inline_string_); std::istringstream in(inline_string_);
parse_csv(in,escape_, separator_, quote_); parse_csv(in, escape_, separator_, quote_);
} }
else else
{ {
@ -474,11 +474,12 @@ void csv_datasource::parse_csv(T & stream,
{ {
// special handling for varieties of quoting that we will enounter with json // special handling for varieties of quoting that we will enounter with json
// TODO - test with custom "quo" option // TODO - test with custom "quo" option
#if 0 // TODO - remove
if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6)) if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6))
{ {
csv_utils::fix_json_quoting(csv_line); csv_utils::fix_json_quoting(csv_line);
} }
#endif
auto values = mapnik::parse_line(csv_line, sep); auto values = mapnik::parse_line(csv_line, sep);
unsigned num_fields = values.size(); unsigned num_fields = values.size();