Merge branch 'master' into spirit-x3

This commit is contained in:
artemp 2016-02-15 16:11:18 +01:00
commit 349b7e6786
5 changed files with 59 additions and 55 deletions

View file

@ -80,7 +80,7 @@ struct csv_line_grammar : qi::grammar<Iterator, csv_line(char, char), Skipper>
; ;
column = quoted(_r2) | *(char_ - (lit(_r1))) column = quoted(_r2) | *(char_ - (lit(_r1)))
; ;
quoted = omit[char_(_r1)[_a = _1]] > text(_a) > -lit(_a) // support unmatched quotes or not (??) quoted = omit[char_(_r1)[_a = _1]] > text(_a) > lit(_a) // support unmatched quotes or not (??)
; ;
text = *(unesc_char | (char_ - lit(_r1))) text = *(unesc_char | (char_ - lit(_r1)))
; ;

View file

@ -179,21 +179,21 @@ void csv_datasource::parse_csv(T & stream)
char newline; char newline;
bool has_newline; bool has_newline;
char detected_quote; char detected_quote;
std::tie(newline, has_newline, detected_quote) = detail::autodect_newline_and_quote(stream, file_length); char detected_separator;
std::tie(newline, has_newline, detected_separator, detected_quote) = detail::autodect_csv_flavour(stream, file_length);
if (quote_ == 0) quote_ = detected_quote; if (quote_ == 0) quote_ = detected_quote;
// set back to start if (separator_ == 0) separator_ = detected_separator;
stream.seekg(0, std::ios::beg);
std::string csv_line;
csv_utils::getline_csv(stream, csv_line, newline, quote_);
if (separator_ == 0)
{
separator_ = detail::detect_separator(csv_line);
}
// set back to start
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_ MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_
<< "' quote: '" << quote_ << "'"; << "' quote: '" << quote_ << "'";
stream.seekg(0, std::ios::beg);
// rewind stream
stream.seekg(0, std::ios::beg);
//
std::string csv_line;
csv_utils::getline_csv(stream, csv_line, newline, quote_);
stream.seekg(0, std::ios::beg);
int line_number = 0; int line_number = 0;
if (!manual_headers_.empty()) if (!manual_headers_.empty())
{ {

View file

@ -136,49 +136,21 @@ std::size_t file_length(T & stream)
return stream.tellg(); return stream.tellg();
} }
static inline char detect_separator(std::string const& str)
{
char separator = ','; // default
int num_commas = std::count(str.begin(), str.end(), ',');
// detect tabs
int num_tabs = std::count(str.begin(), str.end(), '\t');
if (num_tabs > 0)
{
if (num_tabs > num_commas)
{
separator = '\t';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
}
}
else // pipes
{
int num_pipes = std::count(str.begin(), str.end(), '|');
if (num_pipes > num_commas)
{
separator = '|';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
}
else // semicolons
{
int num_semicolons = std::count(str.begin(), str.end(), ';');
if (num_semicolons > num_commas)
{
separator = ';';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
}
}
}
return separator;
}
template <typename T> template <typename T>
std::tuple<char,bool,char> autodect_newline_and_quote(T & stream, std::size_t file_length) std::tuple<char, bool, char, char> autodect_csv_flavour(T & stream, std::size_t file_length)
{ {
// autodetect newlines // autodetect newlines/quotes/separators
char newline = '\n'; char newline = '\n'; // default
bool has_newline = false; bool has_newline = false;
bool has_quote = false; bool has_quote = false;
char quote = '"'; char quote = '"'; // default
char separator = ','; // default
// local counters
int num_commas = 0;
int num_tabs = 0;
int num_pipes = 0;
int num_semicolons = 0;
static std::size_t const max_size = 4000; static std::size_t const max_size = 4000;
std::size_t size = std::min(file_length, max_size); std::size_t size = std::min(file_length, max_size);
for (std::size_t lidx = 0; lidx < size; ++lidx) for (std::size_t lidx = 0; lidx < size; ++lidx)
@ -201,9 +173,40 @@ std::tuple<char,bool,char> autodect_newline_and_quote(T & stream, std::size_t fi
has_quote = true; has_quote = true;
} }
break; break;
case ',':
if (!has_newline) ++num_commas;
break;
case '\t':
if (!has_newline) ++num_tabs;
break;
case '|':
if (!has_newline) ++num_pipes;
break;
case ';':
if (!has_newline) ++num_semicolons;
break;
} }
} }
return std::make_tuple(newline, has_newline, quote); // detect separator
if (num_tabs > 0 && num_tabs > num_commas)
{
separator = '\t';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator";
}
else // pipes/semicolons
{
if (num_pipes > num_commas)
{
separator = '|';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator";
}
else if (num_semicolons > num_commas)
{
separator = ';';
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator";
}
}
return std::make_tuple(newline, has_newline, separator, quote);
} }

View file

@ -987,7 +987,7 @@ TEST_CASE("csv") {
using ustring = mapnik::value_unicode_string; using ustring = mapnik::value_unicode_string;
using row = std::pair<std::string, std::size_t>; using row = std::pair<std::string, std::size_t>;
for (auto const &r : { for (auto const& r : {
row{"test/data/csv/fails/needs_headers_two_lines.csv", 2}, row{"test/data/csv/fails/needs_headers_two_lines.csv", 2},
row{"test/data/csv/fails/needs_headers_one_line.csv", 1}, row{"test/data/csv/fails/needs_headers_one_line.csv", 1},
row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}}) row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}})

View file

@ -76,16 +76,17 @@ std::pair<bool,box2d<double>> process_csv_file(T & boxes, std::string const& fil
char newline; char newline;
bool has_newline; bool has_newline;
char detected_quote; char detected_quote;
std::tie(newline, has_newline, detected_quote) = ::detail::autodect_newline_and_quote(csv_file, file_length); char detected_separator;
std::tie(newline, has_newline, detected_separator, detected_quote) = ::detail::autodect_csv_flavour(csv_file, file_length);
if (quote == 0) quote = detected_quote; if (quote == 0) quote = detected_quote;
if (separator == 0) separator = detected_separator;
// set back to start // set back to start
csv_file.seekg(0, std::ios::beg); csv_file.seekg(0, std::ios::beg);
// get first line
std::string csv_line; std::string csv_line;
csv_utils::getline_csv(csv_file, csv_line, newline, quote); csv_utils::getline_csv(csv_file, csv_line, newline, quote);
if (separator == 0) separator = ::detail::detect_separator(csv_line);
csv_file.seekg(0, std::ios::beg); csv_file.seekg(0, std::ios::beg);
int line_number = 0; int line_number = 0;
::detail::geometry_column_locator locator; ::detail::geometry_column_locator locator;
std::vector<std::string> headers; std::vector<std::string> headers;
std::clog << "Parsing CSV using SEPARATOR=" << separator << " QUOTE=" << quote << std::endl; std::clog << "Parsing CSV using SEPARATOR=" << separator << " QUOTE=" << quote << std::endl;