Merge branch 'master' into spirit-x3
This commit is contained in:
commit
349b7e6786
5 changed files with 59 additions and 55 deletions
|
@ -80,7 +80,7 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(char, char), Skipper>
|
||||||
;
|
;
|
||||||
column = quoted(_r2) | *(char_ - (lit(_r1)))
|
column = quoted(_r2) | *(char_ - (lit(_r1)))
|
||||||
;
|
;
|
||||||
quoted = omit[char_(_r1)[_a = _1]] > text(_a) > -lit(_a) // support unmatched quotes or not (??)
|
quoted = omit[char_(_r1)[_a = _1]] > text(_a) > lit(_a) // support unmatched quotes or not (??)
|
||||||
;
|
;
|
||||||
text = *(unesc_char | (char_ - lit(_r1)))
|
text = *(unesc_char | (char_ - lit(_r1)))
|
||||||
;
|
;
|
||||||
|
|
|
@ -179,21 +179,21 @@ void csv_datasource::parse_csv(T & stream)
|
||||||
char newline;
|
char newline;
|
||||||
bool has_newline;
|
bool has_newline;
|
||||||
char detected_quote;
|
char detected_quote;
|
||||||
std::tie(newline, has_newline, detected_quote) = detail::autodect_newline_and_quote(stream, file_length);
|
char detected_separator;
|
||||||
|
std::tie(newline, has_newline, detected_separator, detected_quote) = detail::autodect_csv_flavour(stream, file_length);
|
||||||
if (quote_ == 0) quote_ = detected_quote;
|
if (quote_ == 0) quote_ = detected_quote;
|
||||||
// set back to start
|
if (separator_ == 0) separator_ = detected_separator;
|
||||||
stream.seekg(0, std::ios::beg);
|
|
||||||
std::string csv_line;
|
|
||||||
csv_utils::getline_csv(stream, csv_line, newline, quote_);
|
|
||||||
if (separator_ == 0)
|
|
||||||
{
|
|
||||||
separator_ = detail::detect_separator(csv_line);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// set back to start
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_
|
||||||
<< "' quote: '" << quote_ << "'";
|
<< "' quote: '" << quote_ << "'";
|
||||||
stream.seekg(0, std::ios::beg);
|
|
||||||
|
|
||||||
|
// rewind stream
|
||||||
|
stream.seekg(0, std::ios::beg);
|
||||||
|
//
|
||||||
|
std::string csv_line;
|
||||||
|
csv_utils::getline_csv(stream, csv_line, newline, quote_);
|
||||||
|
stream.seekg(0, std::ios::beg);
|
||||||
int line_number = 0;
|
int line_number = 0;
|
||||||
if (!manual_headers_.empty())
|
if (!manual_headers_.empty())
|
||||||
{
|
{
|
||||||
|
|
|
@ -136,49 +136,21 @@ std::size_t file_length(T & stream)
|
||||||
return stream.tellg();
|
return stream.tellg();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline char detect_separator(std::string const& str)
|
|
||||||
{
|
|
||||||
char separator = ','; // default
|
|
||||||
int num_commas = std::count(str.begin(), str.end(), ',');
|
|
||||||
// detect tabs
|
|
||||||
int num_tabs = std::count(str.begin(), str.end(), '\t');
|
|
||||||
if (num_tabs > 0)
|
|
||||||
{
|
|
||||||
if (num_tabs > num_commas)
|
|
||||||
{
|
|
||||||
separator = '\t';
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // pipes
|
|
||||||
{
|
|
||||||
int num_pipes = std::count(str.begin(), str.end(), '|');
|
|
||||||
if (num_pipes > num_commas)
|
|
||||||
{
|
|
||||||
separator = '|';
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
|
||||||
}
|
|
||||||
else // semicolons
|
|
||||||
{
|
|
||||||
int num_semicolons = std::count(str.begin(), str.end(), ';');
|
|
||||||
if (num_semicolons > num_commas)
|
|
||||||
{
|
|
||||||
separator = ';';
|
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return separator;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::tuple<char,bool,char> autodect_newline_and_quote(T & stream, std::size_t file_length)
|
std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t file_length)
|
||||||
{
|
{
|
||||||
// autodetect newlines
|
// autodetect newlines/quotes/separators
|
||||||
char newline = '\n';
|
char newline = '\n'; // default
|
||||||
bool has_newline = false;
|
bool has_newline = false;
|
||||||
bool has_quote = false;
|
bool has_quote = false;
|
||||||
char quote = '"';
|
char quote = '"'; // default
|
||||||
|
char separator = ','; // default
|
||||||
|
// local counters
|
||||||
|
int num_commas = 0;
|
||||||
|
int num_tabs = 0;
|
||||||
|
int num_pipes = 0;
|
||||||
|
int num_semicolons = 0;
|
||||||
|
|
||||||
static std::size_t const max_size = 4000;
|
static std::size_t const max_size = 4000;
|
||||||
std::size_t size = std::min(file_length, max_size);
|
std::size_t size = std::min(file_length, max_size);
|
||||||
for (std::size_t lidx = 0; lidx < size; ++lidx)
|
for (std::size_t lidx = 0; lidx < size; ++lidx)
|
||||||
|
@ -201,9 +173,40 @@ std::tuple<char,bool,char> autodect_newline_and_quote(T & stream, std::size_t fi
|
||||||
has_quote = true;
|
has_quote = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case ',':
|
||||||
|
if (!has_newline) ++num_commas;
|
||||||
|
break;
|
||||||
|
case '\t':
|
||||||
|
if (!has_newline) ++num_tabs;
|
||||||
|
break;
|
||||||
|
case '|':
|
||||||
|
if (!has_newline) ++num_pipes;
|
||||||
|
break;
|
||||||
|
case ';':
|
||||||
|
if (!has_newline) ++num_semicolons;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std::make_tuple(newline, has_newline, quote);
|
// detect separator
|
||||||
|
if (num_tabs > 0 && num_tabs > num_commas)
|
||||||
|
{
|
||||||
|
separator = '\t';
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
|
||||||
|
}
|
||||||
|
else // pipes/semicolons
|
||||||
|
{
|
||||||
|
if (num_pipes > num_commas)
|
||||||
|
{
|
||||||
|
separator = '|';
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
|
||||||
|
}
|
||||||
|
else if (num_semicolons > num_commas)
|
||||||
|
{
|
||||||
|
separator = ';';
|
||||||
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::make_tuple(newline, has_newline, separator, quote);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -987,7 +987,7 @@ TEST_CASE("csv") {
|
||||||
using ustring = mapnik::value_unicode_string;
|
using ustring = mapnik::value_unicode_string;
|
||||||
using row = std::pair<std::string, std::size_t>;
|
using row = std::pair<std::string, std::size_t>;
|
||||||
|
|
||||||
for (auto const &r : {
|
for (auto const& r : {
|
||||||
row{"test/data/csv/fails/needs_headers_two_lines.csv", 2},
|
row{"test/data/csv/fails/needs_headers_two_lines.csv", 2},
|
||||||
row{"test/data/csv/fails/needs_headers_one_line.csv", 1},
|
row{"test/data/csv/fails/needs_headers_one_line.csv", 1},
|
||||||
row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}})
|
row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}})
|
||||||
|
|
|
@ -76,16 +76,17 @@ std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& fil
|
||||||
char newline;
|
char newline;
|
||||||
bool has_newline;
|
bool has_newline;
|
||||||
char detected_quote;
|
char detected_quote;
|
||||||
std::tie(newline, has_newline, detected_quote) = ::detail::autodect_newline_and_quote(csv_file, file_length);
|
char detected_separator;
|
||||||
|
std::tie(newline, has_newline, detected_separator, detected_quote) = ::detail::autodect_csv_flavour(csv_file, file_length);
|
||||||
if (quote == 0) quote = detected_quote;
|
if (quote == 0) quote = detected_quote;
|
||||||
|
if (separator == 0) separator = detected_separator;
|
||||||
// set back to start
|
// set back to start
|
||||||
csv_file.seekg(0, std::ios::beg);
|
csv_file.seekg(0, std::ios::beg);
|
||||||
// get first line
|
|
||||||
std::string csv_line;
|
std::string csv_line;
|
||||||
csv_utils::getline_csv(csv_file, csv_line, newline, quote);
|
csv_utils::getline_csv(csv_file, csv_line, newline, quote);
|
||||||
if (separator == 0) separator = ::detail::detect_separator(csv_line);
|
|
||||||
csv_file.seekg(0, std::ios::beg);
|
csv_file.seekg(0, std::ios::beg);
|
||||||
int line_number = 0;
|
int line_number = 0;
|
||||||
|
|
||||||
::detail::geometry_column_locator locator;
|
::detail::geometry_column_locator locator;
|
||||||
std::vector<std::string> headers;
|
std::vector<std::string> headers;
|
||||||
std::clog << "Parsing CSV using SEPARATOR=" << separator << " QUOTE=" << quote << std::endl;
|
std::clog << "Parsing CSV using SEPARATOR=" << separator << " QUOTE=" << quote << std::endl;
|
||||||
|
|
Loading…
Reference in a new issue