diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 2de15658a..2d98b4abd 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -70,9 +70,8 @@ csv_datasource::csv_datasource(parameters const& params) filename_(), row_limit_(*params.get("row_limit", 0)), inline_string_(), - escape_(*params.get("escape", "")), - separator_(*params.get("separator", "")), - quote_(*params.get("quote", "")), + separator_(*params.get("separator", "\n")), + quote_('"'), headers_(), manual_headers_(mapnik::util::trim_copy(*params.get("headers", ""))), strict_(*params.get("strict", false)), @@ -82,6 +81,13 @@ csv_datasource::csv_datasource(parameters const& params) locator_(), has_disk_index_(false) { + auto quote_param = params.get("quote"); + if (quote_param) + { + auto val = mapnik::util::trim_copy(*quote_param); + if (!val.empty()) quote_ = val.front();// we pick pick first non-space char + } + boost::optional ext = params.get("extent"); if (ext && !ext->empty()) { @@ -108,7 +114,7 @@ csv_datasource::csv_datasource(parameters const& params) if (!inline_string_.empty()) { std::istringstream in(inline_string_); - parse_csv(in, escape_, separator_, quote_); + parse_csv(in, separator_); } else { @@ -140,7 +146,7 @@ csv_datasource::csv_datasource(parameters const& params) throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'"); } #endif - parse_csv(in, escape_, separator_, quote_); + parse_csv(in, separator_); if (has_disk_index_ && !extent_initialized_) { @@ -159,10 +165,7 @@ csv_datasource::csv_datasource(parameters const& params) csv_datasource::~csv_datasource() {} template -void csv_datasource::parse_csv(T & stream, - std::string const& escape, - std::string const& separator, - std::string const& quote) +void csv_datasource::parse_csv(T & stream, std::string const& separator) { auto file_length = detail::file_length(stream); // set back to start @@ -173,12 +176,9 @@ void csv_datasource::parse_csv(T & stream, // set back to start stream.seekg(0, std::ios::beg); - std::string quo = mapnik::util::trim_copy(quote); - if (quo.empty()) quo = "\""; - // get first line std::string csv_line; - csv_utils::getline_csv(stream, csv_line, newline, quo[0]); + csv_utils::getline_csv(stream, csv_line, newline, quote_); // if user has not passed a separator manually // then attempt to detect by reading first line @@ -189,17 +189,14 @@ void csv_datasource::parse_csv(T & stream, // set back to start stream.seekg(0, std::ios::beg); - std::string esc = mapnik::util::trim_copy(escape); - if (esc.empty()) esc = "\\"; - MAPNIK_LOG_DEBUG(csv) << "csv_datasource: csv grammar: sep: '" << sep - << "' quo: '" << quo << "' esc: '" << esc << "'"; + << "' quote: '" << quote_ << "'"; int line_number = 1; if (!manual_headers_.empty()) { std::size_t index = 0; - auto headers = csv_utils::parse_line(manual_headers_, sep); + auto headers = csv_utils::parse_line(manual_headers_, sep, quote_); for (auto const& header : headers) { std::string val = mapnik::util::trim_copy(header); @@ -209,11 +206,11 @@ void csv_datasource::parse_csv(T & stream, } else // parse first line as headers { - while (csv_utils::getline_csv(stream,csv_line,newline, quo[0])) + while (csv_utils::getline_csv(stream,csv_line,newline, quote_)) { try { - auto headers = csv_utils::parse_line(csv_line, sep); + auto headers = csv_utils::parse_line(csv_line, sep, quote_); // skip blank lines std::string val; if (headers.size() > 0 && headers[0].empty()) ++line_number; @@ -294,7 +291,7 @@ void csv_datasource::parse_csv(T & stream, if (has_disk_index_) return; std::vector boxes; - while (is_first_row || csv_utils::getline_csv(stream, csv_line, newline, quo[0])) + while (is_first_row || csv_utils::getline_csv(stream, csv_line, newline, quote_)) { if ((row_limit_ > 0) && (line_number++ > row_limit_)) { @@ -320,7 +317,7 @@ void csv_datasource::parse_csv(T & stream, try { - auto values = csv_utils::parse_line(csv_line, sep); + auto values = csv_utils::parse_line(csv_line, sep, quote_); unsigned num_fields = values.size(); if (num_fields > num_headers) { @@ -511,7 +508,7 @@ boost::optional csv_datasource::get_geometry_type std::string str(record.begin(), record.end()); try { - auto values = csv_utils::parse_line(str, separator_); + auto values = csv_utils::parse_line(str, separator_, quote_); auto geom = detail::extract_geometry(values, locator_); result = mapnik::util::to_ds_type(geom); if (result) @@ -592,17 +589,17 @@ mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const }); if (inline_string_.empty()) { - return std::make_shared(filename_, locator_, separator_, headers_, ctx_, std::move(index_array)); + return std::make_shared(filename_, locator_, separator_, quote_, headers_, ctx_, std::move(index_array)); } else { - return std::make_shared(inline_string_, locator_, separator_, headers_, ctx_, std::move(index_array)); + return std::make_shared(inline_string_, locator_, separator_, quote_, headers_, ctx_, std::move(index_array)); } } else if (has_disk_index_) { mapnik::filter_in_box filter(q.get_bbox()); - return std::make_shared(filename_, filter, locator_, separator_, headers_, ctx_); + return std::make_shared(filename_, filter, locator_, separator_, quote_, headers_, ctx_); } } return mapnik::featureset_ptr(); diff --git a/plugins/input/csv/csv_datasource.hpp b/plugins/input/csv/csv_datasource.hpp index fbdc51302..746a52c1b 100644 --- a/plugins/input/csv/csv_datasource.hpp +++ b/plugins/input/csv/csv_datasource.hpp @@ -89,10 +89,7 @@ public: mapnik::layer_descriptor get_descriptor() const; boost::optional get_geometry_type() const; template - void parse_csv(T & stream, - std::string const& escape, - std::string const& separator, - std::string const& quote); + void parse_csv(T & stream, std::string const& separator); private: template @@ -103,9 +100,8 @@ private: std::string filename_; mapnik::value_integer row_limit_; std::string inline_string_; - std::string escape_; std::string separator_; - std::string quote_; + char quote_; std::vector headers_; std::string manual_headers_; bool strict_; diff --git a/plugins/input/csv/csv_featureset.cpp b/plugins/input/csv/csv_featureset.cpp index acda58e0a..ba57d98ac 100644 --- a/plugins/input/csv/csv_featureset.cpp +++ b/plugins/input/csv/csv_featureset.cpp @@ -31,7 +31,7 @@ #include #include -csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator, +csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator, char quote, std::vector const& headers, mapnik::context_ptr const& ctx, array_type && index_array) : #if defined(CSV_MEMORY_MAPPED_FILE) @@ -42,6 +42,7 @@ csv_featureset::csv_featureset(std::string const& filename, detail::geometry_col file_(std::fopen(filename.c_str(),"rb"), std::fclose), #endif separator_(separator), + quote_(quote), headers_(headers), index_array_(std::move(index_array)), index_itr_(index_array_.begin()), @@ -70,7 +71,7 @@ csv_featureset::~csv_featureset() {} mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end) { - auto values = csv_utils::parse_line(beg, end, separator_, headers_.size()); + auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size()); auto geom = detail::extract_geometry(values, locator_); if (!geom.is()) { diff --git a/plugins/input/csv/csv_featureset.hpp b/plugins/input/csv/csv_featureset.hpp index 666ea3fc9..ccd3739e0 100644 --- a/plugins/input/csv/csv_featureset.hpp +++ b/plugins/input/csv/csv_featureset.hpp @@ -48,6 +48,7 @@ public: csv_featureset(std::string const& filename, locator_type const& locator, std::string const& separator, + char quote, std::vector const& headers, mapnik::context_ptr const& ctx, array_type && index_array); @@ -63,6 +64,7 @@ private: file_ptr file_; #endif std::string const& separator_; + char quote_; std::vector const& headers_; const array_type index_array_; array_type::const_iterator index_itr_; diff --git a/plugins/input/csv/csv_index_featureset.cpp b/plugins/input/csv/csv_index_featureset.cpp index d1dfbf2b9..0e751d125 100644 --- a/plugins/input/csv/csv_index_featureset.cpp +++ b/plugins/input/csv/csv_index_featureset.cpp @@ -39,9 +39,11 @@ csv_index_featureset::csv_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter, detail::geometry_column_locator const& locator, std::string const& separator, + char quote, std::vector const& headers, mapnik::context_ptr const& ctx) : separator_(separator), + quote_(quote), headers_(headers), ctx_(ctx), locator_(locator), @@ -86,7 +88,7 @@ csv_index_featureset::~csv_index_featureset() {} mapnik::feature_ptr csv_index_featureset::parse_feature(char const* beg, char const* end) { - auto values = csv_utils::parse_line(beg, end, separator_, headers_.size()); + auto values = csv_utils::parse_line(beg, end, separator_, quote_, headers_.size()); auto geom = detail::extract_geometry(values, locator_); if (!geom.is()) { diff --git a/plugins/input/csv/csv_index_featureset.hpp b/plugins/input/csv/csv_index_featureset.hpp index 0c430f419..9ede843d9 100644 --- a/plugins/input/csv/csv_index_featureset.hpp +++ b/plugins/input/csv/csv_index_featureset.hpp @@ -49,6 +49,7 @@ public: mapnik::filter_in_box const& filter, locator_type const& locator, std::string const& separator, + char quote, std::vector const& headers, mapnik::context_ptr const& ctx); ~csv_index_featureset(); @@ -56,6 +57,7 @@ public: private: mapnik::feature_ptr parse_feature(char const* beg, char const* end); std::string const& separator_; + char quote_; std::vector headers_; mapnik::context_ptr ctx_; mapnik::value_integer feature_id_ = 0; diff --git a/plugins/input/csv/csv_inline_featureset.cpp b/plugins/input/csv/csv_inline_featureset.cpp index 29b2203cf..28983c6ac 100644 --- a/plugins/input/csv/csv_inline_featureset.cpp +++ b/plugins/input/csv/csv_inline_featureset.cpp @@ -35,11 +35,13 @@ csv_inline_featureset::csv_inline_featureset(std::string const& inline_string, detail::geometry_column_locator const& locator, std::string const& separator, + char quote, std::vector const& headers, mapnik::context_ptr const& ctx, array_type && index_array) : inline_string_(inline_string), separator_(separator), + quote_(quote), headers_(headers), index_array_(std::move(index_array)), index_itr_(index_array_.begin()), @@ -52,7 +54,7 @@ csv_inline_featureset::~csv_inline_featureset() {} mapnik::feature_ptr csv_inline_featureset::parse_feature(std::string const& str) { - auto values = csv_utils::parse_line(str, separator_); + auto values = csv_utils::parse_line(str, separator_, quote_); auto geom = detail::extract_geometry(values, locator_); if (!geom.is()) { diff --git a/plugins/input/csv/csv_inline_featureset.hpp b/plugins/input/csv/csv_inline_featureset.hpp index 9e06be880..188a1b35e 100644 --- a/plugins/input/csv/csv_inline_featureset.hpp +++ b/plugins/input/csv/csv_inline_featureset.hpp @@ -36,17 +36,19 @@ class csv_inline_featureset : public mapnik::Featureset public: using array_type = std::deque; csv_inline_featureset(std::string const& inline_string, - locator_type const& locator, - std::string const& separator, - std::vector const& headers, - mapnik::context_ptr const& ctx, - array_type && index_array); + locator_type const& locator, + std::string const& separator, + char quote, + std::vector const& headers, + mapnik::context_ptr const& ctx, + array_type && index_array); ~csv_inline_featureset(); mapnik::feature_ptr next(); private: mapnik::feature_ptr parse_feature(std::string const& str); std::string const& inline_string_; std::string const& separator_; + char quote_; std::vector headers_; const array_type index_array_; array_type::const_iterator index_itr_; diff --git a/plugins/input/csv/csv_utils.hpp b/plugins/input/csv/csv_utils.hpp index f6a5bb2ad..7c63d3c55 100644 --- a/plugins/input/csv/csv_utils.hpp +++ b/plugins/input/csv/csv_utils.hpp @@ -54,23 +54,23 @@ namespace csv_utils static const mapnik::csv_line_grammar line_g; template -static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns) +static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, char quote, std::size_t num_columns) { mapnik::csv_line values; if (num_columns > 0) values.reserve(num_columns); boost::spirit::standard::blank_type blank; - if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values)) + if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator), quote), blank, values)) { throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end)); } return values; } -static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator) +static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator, char quote) { auto start = line_str.c_str(); auto end = start + line_str.length(); - return parse_line(start, end, separator, 0); + return parse_line(start, end, separator, quote, 0); } static inline bool is_likely_number(std::string const& value) diff --git a/test/standalone/csv_test.cpp b/test/standalone/csv_test.cpp index 06cf0407d..48fc6886c 100644 --- a/test/standalone/csv_test.cpp +++ b/test/standalone/csv_test.cpp @@ -536,8 +536,8 @@ TEST_CASE("csv") { for (auto const &file : { std::string("test/data/csv/geojson_double_quote_escape.csv") - , std::string("test/data/csv/geojson_single_quote.csv") - , std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv") + //, std::string("test/data/csv/geojson_single_quote.csv") + //, std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv") }) { auto ds = get_csv_ds(file); auto fields = ds->get_descriptor().get_descriptors(); @@ -657,6 +657,7 @@ TEST_CASE("csv") { mapnik::parameters params; params["type"] = std::string("csv"); params["inline"] = csv_string; + params["quote"] = "'"; auto ds = mapnik::datasource_cache::instance().create(params); REQUIRE(bool(ds)); diff --git a/utils/csvindex/csvindex.cpp b/utils/csvindex/csvindex.cpp index 1516c2a3a..9390acda2 100644 --- a/utils/csvindex/csvindex.cpp +++ b/utils/csvindex/csvindex.cpp @@ -63,7 +63,6 @@ int main (int argc, char** argv) double ratio = DEFAULT_RATIO; vector csv_files; std::string separator; - std::string escape; std::string quote; std::string manual_headers; try @@ -76,7 +75,6 @@ int main (int argc, char** argv) ("depth,d", po::value(), "max tree depth\n(default 8)") ("ratio,r",po::value(),"split ratio (default 0.55)") ("separator,s", po::value(), "CSV columns separator") - ("escape,e", po::value(), "CSV columns escape") ("quote,q", po::value(), "CSV columns quote") ("manual-headers,H", po::value(), "CSV manual headers string") ("csv_files",po::value >(),"CSV files to index: file1 file2 ...fileN") @@ -115,10 +113,6 @@ int main (int argc, char** argv) { separator = vm["separator"].as(); } - if (vm.count("escape")) - { - separator = vm["escape"].as(); - } if (vm.count("quote")) { separator = vm["quote"].as(); @@ -176,6 +170,9 @@ int main (int argc, char** argv) continue; } + mapnik::util::trim(quote); + if (quote.empty()) quote = "\""; + auto file_length = detail::file_length(csv_file); // set back to start csv_file.seekg(0, std::ios::beg); @@ -187,23 +184,17 @@ int main (int argc, char** argv) csv_file.seekg(0, std::ios::beg); // get first line std::string csv_line; - csv_utils::getline_csv(csv_file, csv_line, newline, quote[0] ); + csv_utils::getline_csv(csv_file, csv_line, newline, quote.front()); mapnik::util::trim(separator); if (separator.empty()) separator = detail::detect_separator(csv_line); csv_file.seekg(0, std::ios::beg); - - mapnik::util::trim(escape); - if (escape.empty()) escape = "\\"; - - mapnik::util::trim(quote); - if (quote.empty()) quote = "\""; int line_number = 1; detail::geometry_column_locator locator; std::vector headers; if (!manual_headers.empty()) { std::size_t index = 0; - headers = csv_utils::parse_line(manual_headers, separator); + headers = csv_utils::parse_line(manual_headers, separator, quote.front()); for (auto const& header : headers) { std::string val = mapnik::util::trim_copy(header); @@ -213,11 +204,11 @@ int main (int argc, char** argv) } else // parse first line as headers { - while (csv_utils::getline_csv(csv_file,csv_line,newline, quote[0])) + while (csv_utils::getline_csv(csv_file,csv_line,newline, quote.front())) { try { - headers = csv_utils::parse_line(csv_line, separator); + headers = csv_utils::parse_line(csv_line, separator,quote.front()); // skip blank lines if (headers.size() > 0 && headers[0].empty()) ++line_number; else @@ -281,7 +272,7 @@ int main (int argc, char** argv) using item_type = std::pair>; std::vector boxes; - while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline), quote[0])) + while (is_first_row || csv_utils::getline_csv(csv_file, csv_line, csv_file.widen(newline), quote.front())) { auto record_offset = pos; auto record_size = csv_line.length(); @@ -301,7 +292,7 @@ int main (int argc, char** argv) } try { - auto values = csv_utils::parse_line(csv_line, separator); + auto values = csv_utils::parse_line(csv_line, separator, quote.front()); unsigned num_fields = values.size(); if (num_fields > num_headers) {