CSV utils - strengthen quote detection logic + tests
This commit is contained in:
parent
62de76a66e
commit
9fe049d8af
2 changed files with 21 additions and 10 deletions
|
@ -142,7 +142,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
||||||
// autodetect newlines/quotes/separators
|
// autodetect newlines/quotes/separators
|
||||||
char newline = '\n'; // default
|
char newline = '\n'; // default
|
||||||
bool has_newline = false;
|
bool has_newline = false;
|
||||||
bool has_quote = false;
|
bool has_single_quote = false;
|
||||||
char quote = '"'; // default
|
char quote = '"'; // default
|
||||||
char separator = ','; // default
|
char separator = ','; // default
|
||||||
// local counters
|
// local counters
|
||||||
|
@ -168,11 +168,10 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
||||||
has_newline = true;
|
has_newline = true;
|
||||||
break;
|
break;
|
||||||
case '\'':
|
case '\'':
|
||||||
case '"':
|
if (!has_single_quote)
|
||||||
if (!has_quote)
|
|
||||||
{
|
{
|
||||||
quote = c;
|
quote = c;
|
||||||
has_quote = true;
|
has_single_quote = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ',':
|
case ',':
|
||||||
|
@ -185,7 +184,7 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
||||||
if (!has_newline) ++num_pipes;
|
if (!has_newline) ++num_pipes;
|
||||||
break;
|
break;
|
||||||
case ';':
|
case ';':
|
||||||
if (!has_newline) ++num_semicolons;
|
if (!has_newline) ++num_semicolons;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -209,17 +208,29 @@ std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_newline)
|
if (has_newline && has_single_quote)
|
||||||
{
|
{
|
||||||
std::istringstream ss(std::string(buffer.begin(), buffer.end()));
|
std::istringstream ss(std::string(buffer.begin(), buffer.end()));
|
||||||
std::size_t num_columns = 0;
|
std::size_t num_columns = 0;
|
||||||
for (std::string line; csv_utils::getline_csv(ss, line, newline, quote) && !ss.eof(); )
|
for (std::string line; csv_utils::getline_csv(ss, line, newline, quote); )
|
||||||
{
|
{
|
||||||
if (line.size() == 0) continue;
|
if (size < file_length && ss.eof())
|
||||||
|
{
|
||||||
|
// we can't be sure last line
|
||||||
|
// is not truncated so skip it
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (line.size() == 0) continue; // empty lines are not interesting
|
||||||
|
auto num_quotes = std::count(line.begin(), line.end(), quote);
|
||||||
|
if (num_quotes % 2 != 0)
|
||||||
|
{
|
||||||
|
quote = '"';
|
||||||
|
break;
|
||||||
|
}
|
||||||
auto columns = csv_utils::parse_line(line, separator, quote);
|
auto columns = csv_utils::parse_line(line, separator, quote);
|
||||||
if (num_columns > 0 && num_columns != columns.size())
|
if (num_columns > 0 && num_columns != columns.size())
|
||||||
{
|
{
|
||||||
quote = (quote == '"') ? '\'' : '"';
|
quote = '"';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
num_columns = columns.size();
|
num_columns = columns.size();
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit a49ef259427514faa2cc21242ee840c0caa1e290
|
Subproject commit 2a8261be8cca79a4b6fd62e8f4a93b2808613fef
|
Loading…
Reference in a new issue