CSV utils - strengthen quote detection logic + tests
This commit is contained in:
parent
62de76a66e
commit
9fe049d8af
2 changed files with 21 additions and 10 deletions
|
@ -142,7 +142,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
|||
// autodetect newlines/quotes/separators
|
||||
char newline = '\n'; // default
|
||||
bool has_newline = false;
|
||||
bool has_quote = false;
|
||||
bool has_single_quote = false;
|
||||
char quote = '"'; // default
|
||||
char separator = ','; // default
|
||||
// local counters
|
||||
|
@ -168,11 +168,10 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
|||
has_newline = true;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if (!has_quote)
|
||||
if (!has_single_quote)
|
||||
{
|
||||
quote = c;
|
||||
has_quote = true;
|
||||
has_single_quote = true;
|
||||
}
|
||||
break;
|
||||
case ',':
|
||||
|
@ -185,7 +184,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
|||
if (!has_newline) ++num_pipes;
|
||||
break;
|
||||
case ';':
|
||||
if (!has_newline) ++num_semicolons;
|
||||
if (!has_newline) ++num_semicolons;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -209,17 +208,29 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
|||
}
|
||||
}
|
||||
|
||||
if (has_newline)
|
||||
if (has_newline && has_single_quote)
|
||||
{
|
||||
std::istringstream ss(std::string(buffer.begin(), buffer.end()));
|
||||
std::size_t num_columns = 0;
|
||||
for (std::string line; csv_utils::getline_csv(ss, line, newline, quote) && !ss.eof(); )
|
||||
for (std::string line; csv_utils::getline_csv(ss, line, newline, quote); )
|
||||
{
|
||||
if (line.size() == 0) continue;
|
||||
if (size < file_length && ss.eof())
|
||||
{
|
||||
// we can't be sure last line
|
||||
// is not truncated so skip it
|
||||
break;
|
||||
}
|
||||
if (line.size() == 0) continue; // empty lines are not interesting
|
||||
auto num_quotes = std::count(line.begin(), line.end(), quote);
|
||||
if (num_quotes % 2 != 0)
|
||||
{
|
||||
quote = '"';
|
||||
break;
|
||||
}
|
||||
auto columns = csv_utils::parse_line(line, separator, quote);
|
||||
if (num_columns > 0 && num_columns != columns.size())
|
||||
{
|
||||
quote = (quote == '"') ? '\'' : '"';
|
||||
quote = '"';
|
||||
break;
|
||||
}
|
||||
num_columns = columns.size();
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit a49ef259427514faa2cc21242ee840c0caa1e290
|
||||
Subproject commit 2a8261be8cca79a4b6fd62e8f4a93b2808613fef
|
Loading…
Reference in a new issue