CSV utils - strengthen quote detection logic + tests

This commit is contained in:
artemp 2016-02-19 15:05:15 +01:00
parent 62de76a66e
commit 9fe049d8af
2 changed files with 21 additions and 10 deletions

View file

@ -142,7 +142,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
// autodetect newlines/quotes/separators // autodetect newlines/quotes/separators
char newline = '\n'; // default char newline = '\n'; // default
bool has_newline = false; bool has_newline = false;
bool has_quote = false; bool has_single_quote = false;
char quote = '"'; // default char quote = '"'; // default
char separator = ','; // default char separator = ','; // default
// local counters // local counters
@ -168,11 +168,10 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
has_newline = true; has_newline = true;
break; break;
case '\'': case '\'':
case '"': if (!has_single_quote)
if (!has_quote)
{ {
quote = c; quote = c;
has_quote = true; has_single_quote = true;
} }
break; break;
case ',': case ',':
@ -185,7 +184,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
if (!has_newline) ++num_pipes; if (!has_newline) ++num_pipes;
break; break;
case ';': case ';':
if (!has_newline) ++num_semicolons; if (!has_newline) ++num_semicolons;
break; break;
} }
} }
@ -209,17 +208,29 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
} }
} }
if (has_newline) if (has_newline && has_single_quote)
{ {
std::istringstream ss(std::string(buffer.begin(), buffer.end())); std::istringstream ss(std::string(buffer.begin(), buffer.end()));
std::size_t num_columns = 0; std::size_t num_columns = 0;
for (std::string line; csv_utils::getline_csv(ss, line, newline, quote) && !ss.eof(); ) for (std::string line; csv_utils::getline_csv(ss, line, newline, quote); )
{ {
if (line.size() == 0) continue; if (size < file_length && ss.eof())
{
// we can't be sure last line
// is not truncated so skip it
break;
}
if (line.size() == 0) continue; // empty lines are not interesting
auto num_quotes = std::count(line.begin(), line.end(), quote);
if (num_quotes % 2 != 0)
{
quote = '"';
break;
}
auto columns = csv_utils::parse_line(line, separator, quote); auto columns = csv_utils::parse_line(line, separator, quote);
if (num_columns > 0 && num_columns != columns.size()) if (num_columns > 0 && num_columns != columns.size())
{ {
quote = (quote == '"') ? '\'' : '"'; quote = '"';
break; break;
} }
num_columns = columns.size(); num_columns = columns.size();

@ -1 +1 @@
Subproject commit a49ef259427514faa2cc21242ee840c0caa1e290 Subproject commit 2a8261be8cca79a4b6fd62e8f4a93b2808613fef