From 5c6c8ff7a3a65b1cfb008374a324631fb8e11bea Mon Sep 17 00:00:00 2001 From: artemp Date: Mon, 5 Oct 2015 15:22:09 +0100 Subject: [PATCH] csv.input - restore handling of inline headers and only one line of data without new line --- plugins/input/csv/csv_datasource.cpp | 26 +++++++++++++------------- plugins/input/csv/csv_utils.hpp | 5 +---- utils/csvindex/csvindex.cpp | 3 +-- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index c72f3ac46..f06dfc62c 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -179,17 +179,20 @@ void csv_datasource::parse_csv(T & stream) stream.seekg(0, std::ios::beg); char newline; bool has_newline; + std::tie(newline, has_newline) = detail::autodect_newline(stream, file_length); + // set back to start stream.seekg(0, std::ios::beg); - + std::string csv_line; + csv_utils::getline_csv(stream, csv_line, newline, quote_); if (separator_ == 0) { - separator_ = detail::detect_separator(stream, newline, quote_); + separator_ = detail::detect_separator(csv_line); } + MAPNIK_LOG_DEBUG(csv) << "csv_datasource: separator: '" << separator_ << "' quote: '" << quote_ << "'"; - stream.seekg(0, std::ios::beg); int line_number = 1; @@ -206,7 +209,6 @@ void csv_datasource::parse_csv(T & stream) } else // parse first line as headers { - std::string csv_line; while (csv_utils::getline_csv(stream, csv_line, newline, quote_)) { try @@ -274,13 +276,12 @@ void csv_datasource::parse_csv(T & stream) [ & ](std::string const& header){ ctx_->push(header); }); mapnik::transcoder tr(desc_.get_encoding()); - auto pos = stream.tellg(); + auto pos = stream.tellg(); // handle rare case of a single line of data and user-provided headers // where a lack of a newline will mean that csv_utils::getline_csv returns false - -#if 0 // FIXME bool is_first_row = false; + if (!has_newline) { stream.setstate(std::ios::failbit); @@ -290,14 +291,13 @@ void csv_datasource::parse_csv(T & stream) is_first_row = true; } } -#endif if (has_disk_index_) return; std::vector boxes; - std::string csv_line; - while (/*is_first_row || */csv_utils::getline_csv(stream, csv_line, newline, quote_)) + while (is_first_row || csv_utils::getline_csv(stream, csv_line, newline, quote_)) { + if ((row_limit_ > 0) && (line_number++ > row_limit_)) { MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count; @@ -306,10 +306,10 @@ void csv_datasource::parse_csv(T & stream) auto record_offset = pos; auto record_size = csv_line.length(); pos = stream.tellg(); - //is_first_row = false; // FIXME + is_first_row = false; + // skip blank lines - unsigned line_length = csv_line.length(); - if (line_length <= 10) + if (record_size <= 10) { std::string trimmed = csv_line; boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n ")); diff --git a/plugins/input/csv/csv_utils.hpp b/plugins/input/csv/csv_utils.hpp index 5f850571b..de308f480 100644 --- a/plugins/input/csv/csv_utils.hpp +++ b/plugins/input/csv/csv_utils.hpp @@ -141,11 +141,8 @@ std::size_t file_length(T & stream) return stream.tellg(); } -template -static inline char detect_separator(InputStream & stream, char delim, char quote) +static inline char detect_separator(std::string const& str) { - std::string str; - csv_utils::getline_csv(stream, str, delim, quote); char separator = ','; // default int num_commas = std::count(str.begin(), str.end(), ','); // detect tabs diff --git a/utils/csvindex/csvindex.cpp b/utils/csvindex/csvindex.cpp index 7543169cb..bf347ef3c 100644 --- a/utils/csvindex/csvindex.cpp +++ b/utils/csvindex/csvindex.cpp @@ -183,8 +183,7 @@ int main (int argc, char** argv) // get first line std::string csv_line; csv_utils::getline_csv(csv_file, csv_line, newline, quote); - //mapnik::util::trim(separator); - if (separator == 0) separator = detail::detect_separator(csv_file, newline, quote); + if (separator == 0) separator = detail::detect_separator(csv_line); csv_file.seekg(0, std::ios::beg); int line_number = 1; detail::geometry_column_locator locator;