diff --git a/.gitmodules b/.gitmodules index 4cca9a4a8..49ec16134 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "test/data"] path = test/data url = https://github.com/mapnik/test-data.git - branch = master + branch = large_csv [submodule "test/data-visual"] path = test/data-visual url = https://github.com/mapnik/test-data-visual.git diff --git a/include/build.py b/include/build.py index 54bb16b59..9c225a767 100644 --- a/include/build.py +++ b/include/build.py @@ -27,6 +27,7 @@ Import('env') base = './mapnik/' subdirs = [ '', + 'csv', 'svg', 'wkt', 'cairo', diff --git a/include/mapnik/csv/csv_grammar.hpp b/include/mapnik/csv/csv_grammar.hpp new file mode 100644 index 000000000..195542b5f --- /dev/null +++ b/include/mapnik/csv/csv_grammar.hpp @@ -0,0 +1,103 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#ifndef MAPNIK_CVS_GRAMMAR_HPP +#define MAPNIK_CVS_GRAMMAR_HPP + +//#define BOOST_SPIRIT_DEBUG + +#include +#include + +namespace mapnik { + +namespace qi = boost::spirit::qi; +using csv_value = std::string; +using csv_line = std::vector; +using csv_data = std::vector; + +template +struct csv_line_grammar : qi::grammar +{ + csv_line_grammar() : csv_line_grammar::base_type(line) + { + using namespace qi; + qi::_a_type _a; + qi::_r1_type _r1; + qi::lit_type lit; + //qi::eol_type eol; + qi::_1_type _1; + qi::char_type char_; + qi::omit_type omit; + unesc_char.add + ("\\a", '\a') + ("\\b", '\b') + ("\\f", '\f') + ("\\n", '\n') + ("\\r", '\r') + ("\\t", '\t') + ("\\v", '\v') + ("\\\\",'\\') + ("\\\'", '\'') + ("\\\"", '\"') + ("\"\"", '\"') // double quote + ; + + line = column(_r1) % char_(_r1) + ; + column = quoted | *(char_ - (lit(_r1) /*| eol*/)) + ; + quoted = omit[char_("\"'")[_a = _1]] > text(_a) > -lit(_a) + ; + text = *(unesc_char | (char_ - char_(_r1))) + ; + BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); + } + private: + qi::rule line; + qi::rule column; // no-skip + qi::rule text; + qi::rule, csv_value()> quoted; + qi::symbols unesc_char; +}; + +template +struct csv_file_grammar : qi::grammar +{ + csv_file_grammar() : csv_file_grammar::base_type(start) + { + using namespace qi; + qi::eol_type eol; + qi::_r1_type _r1; + start = -line(_r1) % eol + ; + BOOST_SPIRIT_DEBUG_NODES((start)); + } + private: + qi::rule start; + csv_line_grammar line; +}; + + +} + +#endif // MAPNIK_CVS_GRAMMAR_HPP diff --git a/plugins/input/csv/build.py b/plugins/input/csv/build.py index d1f3716d5..c2beb2452 100644 --- a/plugins/input/csv/build.py +++ b/plugins/input/csv/build.py @@ -30,6 +30,8 @@ plugin_env = plugin_base.Clone() plugin_sources = Split( """ %(PLUGIN_NAME)s_datasource.cpp + %(PLUGIN_NAME)s_featureset.cpp + %(PLUGIN_NAME)s_inline_featureset.cpp """ % locals() ) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index fef1a5195..a727524d0 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -20,34 +20,26 @@ * *****************************************************************************/ -#include "csv_datasource.hpp" #include "csv_utils.hpp" - +#include "csv_datasource.hpp" +#include "csv_featureset.hpp" +#include "csv_inline_featureset.hpp" // boost -#include #include - // mapnik #include #include #include #include #include -#include -#include #include -#include -#include -#include #include #include #include #include - // stl #include #include -#include #include #include #include @@ -57,47 +49,31 @@ using mapnik::parameters; DATASOURCE_PLUGIN(csv_datasource) + +namespace { + +using cvs_value = mapnik::util::variant; + +} + csv_datasource::csv_datasource(parameters const& params) - : datasource(params), +: datasource(params), desc_(csv_datasource::name(), *params.get("encoding", "utf-8")), extent_(), filename_(), - inline_string_(), - file_length_(0), row_limit_(*params.get("row_limit", 0)), - features_(), + inline_string_(), escape_(*params.get("escape", "")), separator_(*params.get("separator", "")), quote_(*params.get("quote", "")), headers_(), manual_headers_(mapnik::util::trim_copy(*params.get("headers", ""))), strict_(*params.get("strict", false)), - filesize_max_(*params.get("filesize_max", 20.0)), // MB ctx_(std::make_shared()), - extent_initialized_(false) + extent_initialized_(false), + tree_(nullptr), + locator_() { - /* TODO: - general: - - refactor parser into generic class - - tests of grid_renderer output - - ensure that the attribute desc_ matches the first feature added - alternate large file pipeline: - - stat file, detect > 15 MB - - build up csv line-by-line iterator - - creates opportunity to filter attributes by map query - speed: - - add properties for wkt/json/lon/lat at parse time - - add ability to pass 'filter' keyword to drop attributes at layer init - - create quad tree on the fly for small/med size files - - memory map large files for reading - - smaller features (less memory overhead) - usability: - - enforce column names without leading digit - - better error messages (add filepath) if not reading from string - - move to spirit to tokenize and add character level error feedback: - http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/ - */ - boost::optional ext = params.get("extent"); if (ext && !ext->empty()) { @@ -113,7 +89,6 @@ csv_datasource::csv_datasource(parameters const& params) { boost::optional file = params.get("file"); if (!file) throw mapnik::datasource_exception("CSV Plugin: missing parameter"); - boost::optional base = params.get("base"); if (base) filename_ = *base + "/" + *file; @@ -123,7 +98,7 @@ csv_datasource::csv_datasource(parameters const& params) if (!inline_string_.empty()) { std::istringstream in(inline_string_); - parse_csv(in,escape_, separator_, quote_); + parse_csv(in, escape_, separator_, quote_); } else { @@ -136,13 +111,12 @@ csv_datasource::csv_datasource(parameters const& params) { throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'"); } - parse_csv(in,escape_, separator_, quote_); + parse_csv(in, escape_, separator_, quote_); in.close(); } } - -csv_datasource::~csv_datasource() { } +csv_datasource::~csv_datasource() {} template void csv_datasource::parse_csv(T & stream, @@ -150,98 +124,28 @@ void csv_datasource::parse_csv(T & stream, std::string const& separator, std::string const& quote) { - stream.seekg(0, std::ios::end); - file_length_ = stream.tellg(); - - if (filesize_max_ > 0) - { - double file_mb = static_cast(file_length_)/1048576; - - // throw if this is an unreasonably large file to read into memory - if (file_mb > filesize_max_) - { - std::ostringstream s; - s << "CSV Plugin: csv file is greater than "; - s << filesize_max_ << "MB - you should use a more efficient data format like sqlite, postgis or a shapefile to render this data (set 'filesize_max=0' to disable this restriction if you have lots of memory)"; - throw mapnik::datasource_exception(s.str()); - } - } - + auto file_length = detail::file_length(stream); // set back to start stream.seekg(0, std::ios::beg); - - // autodetect newlines - char newline = '\n'; - bool has_newline = false; - for (unsigned lidx = 0; lidx < file_length_ && lidx < 4000; lidx++) - { - char c = static_cast(stream.get()); - if (c == '\r') - { - newline = '\r'; - has_newline = true; - break; - } - if (c == '\n') - { - has_newline = true; - break; - } - } - + char newline; + bool has_newline; + std::tie(newline, has_newline) = detail::autodect_newline(stream, file_length); // set back to start stream.seekg(0, std::ios::beg); - // get first line std::string csv_line; - std::getline(stream,csv_line,newline); + std::getline(stream,csv_line,stream.widen(newline)); // if user has not passed a separator manually // then attempt to detect by reading first line + std::string sep = mapnik::util::trim_copy(separator); - if (sep.empty()) - { - // default to ',' - sep = ","; - int num_commas = std::count(csv_line.begin(), csv_line.end(), ','); - // detect tabs - int num_tabs = std::count(csv_line.begin(), csv_line.end(), '\t'); - if (num_tabs > 0) - { - if (num_tabs > num_commas) - { - sep = "\t"; - - MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator"; - } - } - else // pipes - { - int num_pipes = std::count(csv_line.begin(), csv_line.end(), '|'); - if (num_pipes > num_commas) - { - sep = "|"; - - MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator"; - } - else // semicolons - { - int num_semicolons = std::count(csv_line.begin(), csv_line.end(), ';'); - if (num_semicolons > num_commas) - { - sep = ";"; - - MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator"; - } - } - } - } + if (sep.empty()) sep = detail::detect_separator(csv_line); + separator_ = sep; // set back to start stream.seekg(0, std::ios::beg); - using escape_type = boost::escaped_list_separator; - std::string esc = mapnik::util::trim_copy(escape); if (esc.empty()) esc = "\\"; @@ -251,104 +155,41 @@ void csv_datasource::parse_csv(T & stream, MAPNIK_LOG_DEBUG(csv) << "csv_datasource: csv grammar: sep: '" << sep << "' quo: '" << quo << "' esc: '" << esc << "'"; - boost::escaped_list_separator grammer; - try - { - // grammer = boost::escaped_list_separator('\\', ',', '\"'); - grammer = boost::escaped_list_separator(esc, sep, quo); - } - catch(std::exception const& ex) - { - std::string s("CSV Plugin: "); - s += ex.what(); - throw mapnik::datasource_exception(s); - } - - using Tokenizer = boost::tokenizer< escape_type >; - int line_number = 1; - bool has_wkt_field = false; - bool has_json_field = false; - bool has_lat_field = false; - bool has_lon_field = false; - unsigned wkt_idx = 0; - unsigned json_idx = 0; - unsigned lat_idx = 0; - unsigned lon_idx = 0; - if (!manual_headers_.empty()) { - Tokenizer tok(manual_headers_, grammer); - Tokenizer::iterator beg = tok.begin(); - unsigned idx = 0; - for (; beg != tok.end(); ++beg) + std::size_t index = 0; + auto headers = csv_utils::parse_line(manual_headers_, sep); + for (auto const& header : headers) { - std::string val = mapnik::util::trim_copy(*beg); - std::string lower_val = val; - std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower); - if (lower_val == "wkt" - || (lower_val.find("geom") != std::string::npos)) - { - wkt_idx = idx; - has_wkt_field = true; - } - if (lower_val == "geojson") - { - json_idx = idx; - has_json_field = true; - } - if (lower_val == "x" - || lower_val == "lon" - || lower_val == "lng" - || lower_val == "long" - || (lower_val.find("longitude") != std::string::npos)) - { - lon_idx = idx; - has_lon_field = true; - } - if (lower_val == "y" - || lower_val == "lat" - || (lower_val.find("latitude") != std::string::npos)) - { - lat_idx = idx; - has_lat_field = true; - } - ++idx; + std::string val = mapnik::util::trim_copy(header); + detail::locate_geometry_column(val, index++, locator_); headers_.push_back(val); } } else // parse first line as headers { - while (std::getline(stream,csv_line,newline)) + while (std::getline(stream,csv_line,stream.widen(newline))) { try { - Tokenizer tok(csv_line, grammer); - Tokenizer::iterator beg = tok.begin(); - std::string val; - if (beg != tok.end()) - val = mapnik::util::trim_copy(*beg); - + auto headers = csv_utils::parse_line(csv_line, sep); // skip blank lines - if (val.empty()) - { - // do nothing - ++line_number; - } + std::string val; + if (headers.size() > 0 && headers[0].empty()) ++line_number; else { - int idx = -1; - for (; beg != tok.end(); ++beg) + std::size_t index = 0; + for (auto const& header : headers) { - ++idx; - val = mapnik::util::trim_copy(*beg); + val = mapnik::util::trim_copy(header); if (val.empty()) { if (strict_) { std::ostringstream s; s << "CSV Plugin: expected a column header at line "; - s << line_number << ", column " << idx; + s << line_number << ", column " << index; s << " - ensure this row contains valid header fields: '"; s << csv_line << "'\n"; throw mapnik::datasource_exception(s.str()); @@ -357,49 +198,22 @@ void csv_datasource::parse_csv(T & stream, { // create a placeholder for the empty header std::ostringstream s; - s << "_" << idx; + s << "_" << index; headers_.push_back(s.str()); } } else { - std::string lower_val = val; - std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower); - if (lower_val == "wkt" - || (lower_val.find("geom") != std::string::npos)) - { - wkt_idx = idx; - has_wkt_field = true; - } - if (lower_val == "geojson") - { - json_idx = idx; - has_json_field = true; - } - if (lower_val == "x" - || lower_val == "lon" - || lower_val == "lng" - || lower_val == "long" - || (lower_val.find("longitude") != std::string::npos)) - { - lon_idx = idx; - has_lon_field = true; - } - if (lower_val == "y" - || lower_val == "lat" - || (lower_val.find("latitude") != std::string::npos)) - { - lat_idx = idx; - has_lat_field = true; - } + detail::locate_geometry_column(val, index, locator_); headers_.push_back(val); } + ++index; } ++line_number; break; } } - catch(const std::exception & ex) + catch (std::exception const& ex) { std::string s("CSV Plugin: error parsing headers: "); s += ex.what(); @@ -408,16 +222,16 @@ void csv_datasource::parse_csv(T & stream, } } - if (!has_wkt_field && !has_json_field && (!has_lon_field || !has_lat_field) ) + if (locator_.type == detail::geometry_column_locator::UNKNOWN) { - throw mapnik::datasource_exception("CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or latitude/longitude - this is required for reading geometry data"); + throw mapnik::datasource_exception("CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or " + "latitude/longitude - this is required for reading geometry data"); } mapnik::value_integer feature_count = 0; bool extent_started = false; std::size_t num_headers = headers_.size(); - std::for_each(headers_.begin(), headers_.end(), [ & ](std::string const& header){ ctx_->push(header); }); @@ -434,15 +248,20 @@ void csv_datasource::parse_csv(T & stream, is_first_row = true; } } - while (std::getline(stream,csv_line,newline) || is_first_row) + + std::vector boxes; + auto pos = stream.tellg(); + while (std::getline(stream, csv_line, stream.widen(newline)) || is_first_row) { - is_first_row = false; - if ((row_limit_ > 0) && (line_number > row_limit_)) + if ((row_limit_ > 0) && (line_number++ > row_limit_)) { MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count; break; } - + auto record_offset = pos; + auto record_size = csv_line.length(); + pos = stream.tellg(); + is_first_row = false; // skip blank lines unsigned line_length = csv_line.length(); if (line_length <= 10) @@ -451,7 +270,6 @@ void csv_datasource::parse_csv(T & stream, boost::trim_if(trimmed,boost::algorithm::is_any_of("\",'\r\n ")); if (trimmed.empty()) { - ++line_number; MAPNIK_LOG_DEBUG(csv) << "csv_datasource: empty row encountered at line: " << line_number; continue; } @@ -459,17 +277,8 @@ void csv_datasource::parse_csv(T & stream, try { - // special handling for varieties of quoting that we will enounter with json - // TODO - test with custom "quo" option - if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6)) - { - csv_utils::fix_json_quoting(csv_line); - } - - Tokenizer tok(csv_line, grammer); - Tokenizer::iterator beg = tok.begin(); - - unsigned num_fields = std::distance(beg,tok.end()); + auto values = csv_utils::parse_line(csv_line, sep); + unsigned num_fields = values.size(); if (num_fields > num_headers) { std::ostringstream s; @@ -494,378 +303,108 @@ void csv_datasource::parse_csv(T & stream, } } - // NOTE: we use ++feature_count here because feature id's should start at 1; - mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,++feature_count)); - double x = 0; - double y = 0; - bool parsed_x = false; - bool parsed_y = false; - bool parsed_wkt = false; - bool parsed_json = false; - std::vector collected; - for (unsigned i = 0; i < num_headers; ++i) + auto geom = detail::extract_geometry(values, locator_); + if (!geom.is()) { - std::string fld_name(headers_.at(i)); - collected.push_back(fld_name); - std::string value; - if (beg == tok.end()) // there are more headers than column values for this row + auto box = mapnik::geometry::envelope(geom); + boxes.emplace_back(std::move(box), make_pair(record_offset, record_size)); + if (!extent_initialized_) { - // add an empty string here to represent a missing value - // not using null type here since nulls are not a csv thing - feature->put(fld_name,tr.transcode(value.c_str())); - if (feature_count == 1) + if (!extent_started) { - desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); - } - // continue here instead of break so that all missing values are - // encoded consistenly as empty strings - continue; - } - else - { - value = mapnik::util::trim_copy(*beg); - ++beg; - } - - int value_length = value.length(); - - // parse wkt - if (has_wkt_field) - { - if (i == wkt_idx) - { - // skip empty geoms - if (value.empty()) - { - break; - } - mapnik::geometry::geometry geom; - if (mapnik::from_wkt(value, geom)) - { - // correct orientations etc - mapnik::geometry::correct(geom); - // set geometry - feature->set_geometry(std::move(geom)); - parsed_wkt = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected well known text geometry: could not parse row " - << line_number - << ",column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - } - } - } - } - // TODO - support both wkt/geojson columns - // at once to create multi-geoms? - // parse as geojson - else if (has_json_field) - { - if (i == json_idx) - { - // skip empty geoms - if (value.empty()) - { - break; - } - mapnik::geometry::geometry geom; - if (mapnik::json::from_geojson(value, geom)) - { - feature->set_geometry(std::move(geom)); - parsed_json = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected geojson geometry: could not parse row " - << line_number - << ",column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - } - } - } - } - else - { - // longitude - if (i == lon_idx) - { - // skip empty geoms - if (value.empty()) - { - break; - } - - if (mapnik::util::string2double(value,x)) - { - parsed_x = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected a float value for longitude: could not parse row " - << line_number - << ", column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - } - } - } - // latitude - else if (i == lat_idx) - { - // skip empty geoms - if (value.empty()) - { - break; - } - - if (mapnik::util::string2double(value,y)) - { - parsed_y = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected a float value for latitude: could not parse row " - << line_number - << ", column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - } - } - } - } - - // now, add attributes, skipping any WKT or JSON fields - if ((has_wkt_field) && (i == wkt_idx)) continue; - if ((has_json_field) && (i == json_idx)) continue; - /* First we detect likely strings, - then try parsing likely numbers, - then try converting to bool, - finally falling back to string type. - An empty string or a string of "null" will be parsed - as a string rather than a true null value. - Likely strings are either empty values, very long values - or values with leading zeros like 001 (which are not safe - to assume are numbers) - */ - - bool matched = false; - bool has_dot = value.find(".") != std::string::npos; - if (value.empty() || - (value_length > 20) || - (value_length > 1 && !has_dot && value[0] == '0')) - { - matched = true; - feature->put(fld_name,std::move(tr.transcode(value.c_str()))); - if (feature_count == 1) - { - desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); - } - } - else if (csv_utils::is_likely_number(value)) - { - bool has_e = value.find("e") != std::string::npos; - if (has_dot || has_e) - { - double float_val = 0.0; - if (mapnik::util::string2double(value,float_val)) - { - matched = true; - feature->put(fld_name,float_val); - if (feature_count == 1) - { - desc_.add_descriptor( - mapnik::attribute_descriptor( - fld_name,mapnik::Double)); - } - } + extent_started = true; + extent_ = mapnik::geometry::envelope(geom); } else { - mapnik::value_integer int_val = 0; - if (mapnik::util::string2int(value,int_val)) - { - matched = true; - feature->put(fld_name,int_val); - if (feature_count == 1) - { - desc_.add_descriptor( - mapnik::attribute_descriptor( - fld_name,mapnik::Integer)); - } - } + extent_.expand_to_include(mapnik::geometry::envelope(geom)); } } - if (!matched) + if (++feature_count != 1) continue; + auto beg = values.begin(); + auto end = values.end(); + for (std::size_t i = 0; i < num_headers; ++i) { - // NOTE: we don't use mapnik::util::string2bool - // here because we don't want to treat 'on' and 'off' - // as booleans, only 'true' and 'false' - bool bool_val = false; - std::string lower_val = value; - std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower); - if (lower_val == "true") + std::string const& header = headers_.at(i); + if (beg == end) // there are more headers than column values for this row { - matched = true; - bool_val = true; - } - else if (lower_val == "false") - { - matched = true; - bool_val = false; - } - if (matched) - { - feature->put(fld_name,bool_val); + // add an empty string here to represent a missing value + // not using null type here since nulls are not a csv thing if (feature_count == 1) { - desc_.add_descriptor( - mapnik::attribute_descriptor( - fld_name,mapnik::Boolean)); + desc_.add_descriptor(mapnik::attribute_descriptor(header, mapnik::String)); } + // continue here instead of break so that all missing values are + // encoded consistenly as empty strings + continue; } - else - { - // fallback to normal string - feature->put(fld_name,std::move(tr.transcode(value.c_str()))); - if (feature_count == 1) - { - desc_.add_descriptor( - mapnik::attribute_descriptor( - fld_name,mapnik::String)); - } - } - } - } + std::string value = mapnik::util::trim_copy(*beg++); + int value_length = value.length(); + if (locator_.index == i && (locator_.type == detail::geometry_column_locator::WKT + || locator_.type == detail::geometry_column_locator::GEOJSON)) continue; - bool null_geom = true; - if (has_wkt_field || has_json_field) - { - if (parsed_wkt || parsed_json) - { - if (!extent_initialized_) + // First we detect likely strings, + // then try parsing likely numbers, + // then try converting to bool, + // finally falling back to string type. + + // An empty string or a string of "null" will be parsed + // as a string rather than a true null value. + // Likely strings are either empty values, very long values + // or values with leading zeros like 001 (which are not safe + // to assume are numbers) + + bool matched = false; + bool has_dot = value.find(".") != std::string::npos; + if (value.empty() || (value_length > 20) || (value_length > 1 && !has_dot && value[0] == '0')) { - if (!extent_started) + matched = true; + desc_.add_descriptor(mapnik::attribute_descriptor(header, mapnik::String)); + } + else if (csv_utils::is_likely_number(value)) + { + bool has_e = value.find("e") != std::string::npos; + if (has_dot || has_e) { - extent_started = true; - extent_ = feature->envelope(); + double float_val = 0.0; + if (mapnik::util::string2double(value,float_val)) + { + matched = true; + desc_.add_descriptor(mapnik::attribute_descriptor(header,mapnik::Double)); + } } else { - extent_.expand_to_include(feature->envelope()); + mapnik::value_integer int_val = 0; + if (mapnik::util::string2int(value,int_val)) + { + matched = true; + desc_.add_descriptor(mapnik::attribute_descriptor(header,mapnik::Integer)); + } } } - features_.push_back(feature); - null_geom = false; - } - else - { - std::ostringstream s; - s << "CSV Plugin: could not read WKT or GeoJSON geometry " - << "for line " << line_number << " - found " << headers_.size() - << " with values like: " << csv_line << "\n"; - if (strict_) + if (!matched) { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - continue; + // NOTE: we don't use mapnik::util::string2bool + // here because we don't want to treat 'on' and 'off' + // as booleans, only 'true' and 'false' + if (csv_utils::ignore_case_equal(value, "true") || csv_utils::ignore_case_equal(value, "false")) + { + desc_.add_descriptor(mapnik::attribute_descriptor(header, mapnik::Boolean)); + } + else // fallback to normal string + { + desc_.add_descriptor(mapnik::attribute_descriptor(header, mapnik::String)); + } } } } - else if (has_lat_field || has_lon_field) - { - if (parsed_x && parsed_y) - { - mapnik::geometry::point pt(x,y); - feature->set_geometry(std::move(pt)); - features_.push_back(feature); - null_geom = false; - if (!extent_initialized_) - { - if (!extent_started) - { - extent_started = true; - extent_ = feature->envelope(); - } - else - { - extent_.expand_to_include(feature->envelope()); - } - } - } - else if (parsed_x || parsed_y) - { - std::ostringstream s; - s << "CSV Plugin: does your csv have valid headers?\n"; - if (!parsed_x) - { - s << "Could not detect or parse any rows named 'x' or 'longitude' " - << "for line " << line_number << " but found " << headers_.size() - << " with values like: " << csv_line << "\n" - << "for: " << boost::algorithm::join(collected, ",") << "\n"; - } - if (!parsed_y) - { - s << "Could not detect or parse any rows named 'y' or 'latitude' " - << "for line " << line_number << " but found " << headers_.size() - << " with values like: " << csv_line << "\n" - << "for: " << boost::algorithm::join(collected, ",") << "\n"; - } - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - MAPNIK_LOG_ERROR(csv) << s.str(); - continue; - } - } - } - - if (null_geom) + else { std::ostringstream s; - s << "CSV Plugin: could not detect and parse valid lat/lon fields or wkt/json geometry for line " - << line_number; + s << "CSV Plugin: expected geometry column: could not parse row " + << line_number << " " + << values[locator_.index] << "'"; if (strict_) { throw mapnik::datasource_exception(s.str()); @@ -873,27 +412,18 @@ void csv_datasource::parse_csv(T & stream, else { MAPNIK_LOG_ERROR(csv) << s.str(); - // with no geometry we will never - // add this feature so drop the count - feature_count--; - continue; } } - - ++line_number; } - catch(mapnik::datasource_exception const& ex ) + catch (mapnik::datasource_exception const& ex ) { - if (strict_) - { - throw mapnik::datasource_exception(ex.what()); - } + if (strict_) throw ex; else { MAPNIK_LOG_ERROR(csv) << ex.what(); } } - catch(std::exception const& ex) + catch (std::exception const& ex) { std::ostringstream s; s << "CSV Plugin: unexpected error parsing line: " << line_number @@ -909,10 +439,8 @@ void csv_datasource::parse_csv(T & stream, } } } - if (feature_count < 1) - { - MAPNIK_LOG_ERROR(csv) << "CSV Plugin: could not parse any lines of data"; - } + // bulk insert initialise r-tree + tree_ = std::make_unique(boxes); } const char * csv_datasource::name() @@ -939,19 +467,58 @@ boost::optional csv_datasource::get_geometry_type { boost::optional result; int multi_type = 0; - unsigned num_features = features_.size(); - for (unsigned i = 0; i < num_features && i < 5; ++i) + auto itr = tree_->qbegin(boost::geometry::index::intersects(extent_)); + auto end = tree_->qend(); + mapnik::context_ptr ctx = std::make_shared(); + for (std::size_t count = 0; itr !=end && count < 5; ++itr, ++count) { - result = mapnik::util::to_ds_type(features_[i]->get_geometry()); - if (result) + csv_datasource::item_type const& item = *itr; + std::size_t file_offset = item.second.first; + std::size_t size = item.second.second; + + std::string str; + if (inline_string_.empty()) { - int type = static_cast(*result); - if (multi_type > 0 && multi_type != type) +#if defined (_WINDOWS) + std::ifstream in(mapnik::utf8_to_utf16(filename_),std::ios_base::in | std::ios_base::binary); +#else + std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary); +#endif + if (!in.is_open()) { - result.reset(mapnik::datasource_geometry_t::Collection); - return result; + throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'"); } - multi_type = type; + in.seekg(file_offset); + std::vector record; + record.resize(size); + in.read(record.data(), size); + str = std::string(record.begin(), record.end()); + } + else + { + str = inline_string_.substr(file_offset, size); + } + + try + { + auto values = csv_utils::parse_line(str, separator_); + auto geom = detail::extract_geometry(values, locator_); + result = mapnik::util::to_ds_type(geom); + if (result) + { + int type = static_cast(*result); + if (multi_type > 0 && multi_type != type) + { + result.reset(mapnik::datasource_geometry_t::Collection); + return result; + } + multi_type = type; + } + } + catch (std::exception const& ex) + { + if (strict_) throw ex; + else MAPNIK_LOG_ERROR(csv) << ex.what(); } } return result; @@ -959,32 +526,61 @@ boost::optional csv_datasource::get_geometry_type mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const { - const std::set& attribute_names = q.property_names(); - std::set::const_iterator pos = attribute_names.begin(); - while (pos != attribute_names.end()) + + for (auto const& name : q.property_names()) { bool found_name = false; - for (std::size_t i = 0; i < headers_.size(); ++i) + for (auto const& header : headers_) { - if (headers_[i] == *pos) + if (header == name) { found_name = true; break; } } - if (! found_name) + if (!found_name) { std::ostringstream s; - s << "CSV Plugin: no attribute '" << *pos << "'. Valid attributes are: " + s << "CSV Plugin: no attribute '" << name << "'. Valid attributes are: " << boost::algorithm::join(headers_, ",") << "."; throw mapnik::datasource_exception(s.str()); } - ++pos; } - return std::make_shared(q.get_bbox(),features_); + + mapnik::box2d const& box = q.get_bbox(); + if (extent_.intersects(box)) + { + csv_featureset::array_type index_array; + if (tree_) + { + tree_->query(boost::geometry::index::intersects(box),std::back_inserter(index_array)); + std::sort(index_array.begin(),index_array.end(), + [] (item_type const& item0, item_type const& item1) + { + return item0.second.first < item1.second.first; + }); + if (inline_string_.empty()) + { + return std::make_shared(filename_, locator_, separator_, headers_, ctx_, std::move(index_array)); + } + else + { + return std::make_shared(inline_string_, locator_, separator_, headers_, ctx_, std::move(index_array)); + } + } + } + return mapnik::featureset_ptr(); } mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt, double tol) const { - throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet"); + mapnik::box2d query_bbox(pt, pt); + query_bbox.pad(tol); + mapnik::query q(query_bbox); + std::vector const& desc = desc_.get_descriptors(); + for (auto const& item : desc) + { + q.add_property_name(item.get_name()); + } + return features(q); } diff --git a/plugins/input/csv/csv_datasource.hpp b/plugins/input/csv/csv_datasource.hpp index 7881af858..0c8864b14 100644 --- a/plugins/input/csv/csv_datasource.hpp +++ b/plugins/input/csv/csv_datasource.hpp @@ -35,15 +35,51 @@ // boost #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-local-typedef" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wconversion" +#include +#include +#pragma GCC diagnostic pop // stl #include #include #include +template +struct csv_linear : boost::geometry::index::linear {}; + +namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree { + +template +struct options_type > +{ + using type = options, + insert_default_tag, + choose_by_content_diff_tag, + split_default_tag, + linear_tag, +#if BOOST_VERSION >= 105700 + node_variant_static_tag>; +#else + node_s_mem_static_tag>; + +#endif +}; +}}}}} + class csv_datasource : public mapnik::datasource { public: + using box_type = mapnik::box2d; + using item_type = std::pair>; + using spatial_index_type = boost::geometry::index::rtree>; + csv_datasource(mapnik::parameters const& params); virtual ~csv_datasource (); mapnik::datasource::datasource_t type() const; @@ -63,19 +99,18 @@ private: mapnik::layer_descriptor desc_; mapnik::box2d extent_; std::string filename_; - std::string inline_string_; - unsigned file_length_; mapnik::value_integer row_limit_; - std::deque features_; + std::string inline_string_; std::string escape_; std::string separator_; std::string quote_; std::vector headers_; std::string manual_headers_; bool strict_; - double filesize_max_; mapnik::context_ptr ctx_; bool extent_initialized_; + std::unique_ptr tree_; + detail::geometry_column_locator locator_; }; #endif // MAPNIK_CSV_DATASOURCE_HPP diff --git a/plugins/input/csv/csv_featureset.cpp b/plugins/input/csv/csv_featureset.cpp new file mode 100644 index 000000000..4a9e74a9f --- /dev/null +++ b/plugins/input/csv/csv_featureset.cpp @@ -0,0 +1,86 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +// mapnik +#include "csv_featureset.hpp" +#include +#include +#include +#include +// stl +#include +#include +#include + +csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator, + std::vector const& headers, mapnik::context_ptr const& ctx, array_type && index_array) + : +#ifdef _WINDOWS + file_(_wfopen(mapnik::utf8_to_utf16(filename).c_str(), L"rb"), std::fclose), +#else + file_(std::fopen(filename.c_str(),"rb"), std::fclose), +#endif + separator_(separator), + headers_(headers), + index_array_(std::move(index_array)), + index_itr_(index_array_.begin()), + index_end_(index_array_.end()), + ctx_(ctx), + locator_(locator), + tr_("utf8") +{ + if (!file_) throw std::runtime_error("Can't open " + filename); +} + +csv_featureset::~csv_featureset() {} + +mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end) +{ + auto values = csv_utils::parse_line(beg, end, separator_, headers_.size()); + auto geom = detail::extract_geometry(values, locator_); + if (!geom.is()) + { + mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_)); + feature->set_geometry(std::move(geom)); + detail::process_properties(*feature, headers_, values, locator_, tr_); + return feature; + } + return mapnik::feature_ptr(); +} + +mapnik::feature_ptr csv_featureset::next() +{ + if (index_itr_ != index_end_) + { + csv_datasource::item_type const& item = *index_itr_++; + std::size_t file_offset = item.second.first; + std::size_t size = item.second.second; + std::fseek(file_.get(), file_offset, SEEK_SET); + std::vector record; + record.resize(size); + std::fread(record.data(), size, 1, file_.get()); + auto const* start = record.data(); + auto const* end = start + record.size(); + return parse_feature(start, end); + } + return mapnik::feature_ptr(); +} diff --git a/plugins/input/csv/csv_featureset.hpp b/plugins/input/csv/csv_featureset.hpp new file mode 100644 index 000000000..1fc2103f2 --- /dev/null +++ b/plugins/input/csv/csv_featureset.hpp @@ -0,0 +1,62 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#ifndef CSV_FEATURESET_HPP +#define CSV_FEATURESET_HPP + +#include +#include +#include "csv_utils.hpp" +#include "csv_datasource.hpp" +#include +#include + +class csv_featureset : public mapnik::Featureset +{ + using file_ptr = std::unique_ptr; + using locator_type = detail::geometry_column_locator; +public: + using array_type = std::deque; + csv_featureset(std::string const& filename, + locator_type const& locator, + std::string const& separator, + std::vector const& headers, + mapnik::context_ptr const& ctx, + array_type && index_array); + ~csv_featureset(); + mapnik::feature_ptr next(); +private: + mapnik::feature_ptr parse_feature(char const* beg, char const* end); + file_ptr file_; + std::string const& separator_; + std::vector const& headers_; + const array_type index_array_; + array_type::const_iterator index_itr_; + array_type::const_iterator index_end_; + mapnik::context_ptr ctx_; + mapnik::value_integer feature_id_ = 0; + detail::geometry_column_locator const& locator_; + mapnik::transcoder tr_; +}; + + +#endif // CSV_FEATURESET_HPP diff --git a/plugins/input/csv/csv_inline_featureset.cpp b/plugins/input/csv/csv_inline_featureset.cpp new file mode 100644 index 000000000..29b2203cf --- /dev/null +++ b/plugins/input/csv/csv_inline_featureset.cpp @@ -0,0 +1,78 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +// mapnik +#include "csv_inline_featureset.hpp" +#include +#include +#include +#include +#include +// stl +#include +#include +#include + +csv_inline_featureset::csv_inline_featureset(std::string const& inline_string, + detail::geometry_column_locator const& locator, + std::string const& separator, + std::vector const& headers, + mapnik::context_ptr const& ctx, + array_type && index_array) + : inline_string_(inline_string), + separator_(separator), + headers_(headers), + index_array_(std::move(index_array)), + index_itr_(index_array_.begin()), + index_end_(index_array_.end()), + ctx_(ctx), + locator_(locator), + tr_("utf8") {} + +csv_inline_featureset::~csv_inline_featureset() {} + +mapnik::feature_ptr csv_inline_featureset::parse_feature(std::string const& str) +{ + auto values = csv_utils::parse_line(str, separator_); + auto geom = detail::extract_geometry(values, locator_); + if (!geom.is()) + { + mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_, ++feature_id_)); + feature->set_geometry(std::move(geom)); + detail::process_properties(*feature, headers_, values, locator_, tr_); + return feature; + } + return mapnik::feature_ptr(); +} + +mapnik::feature_ptr csv_inline_featureset::next() +{ + if (index_itr_ != index_end_) + { + csv_datasource::item_type const& item = *index_itr_++; + std::size_t file_offset = item.second.first; + std::size_t size = item.second.second; + std::string str = inline_string_.substr(file_offset, size); + return parse_feature(str); + } + return mapnik::feature_ptr(); +} diff --git a/plugins/input/csv/csv_inline_featureset.hpp b/plugins/input/csv/csv_inline_featureset.hpp new file mode 100644 index 000000000..9e06be880 --- /dev/null +++ b/plugins/input/csv/csv_inline_featureset.hpp @@ -0,0 +1,61 @@ +/***************************************************************************** + * + * This file is part of Mapnik (c++ mapping toolkit) + * + * Copyright (C) 2015 Artem Pavlenko + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *****************************************************************************/ + +#ifndef CSV_INLINE_FEATURESET_HPP +#define CSV_INLINE_FEATURESET_HPP + +#include +#include +#include "csv_utils.hpp" +#include "csv_datasource.hpp" +#include +#include + +class csv_inline_featureset : public mapnik::Featureset +{ + using locator_type = detail::geometry_column_locator; +public: + using array_type = std::deque; + csv_inline_featureset(std::string const& inline_string, + locator_type const& locator, + std::string const& separator, + std::vector const& headers, + mapnik::context_ptr const& ctx, + array_type && index_array); + ~csv_inline_featureset(); + mapnik::feature_ptr next(); +private: + mapnik::feature_ptr parse_feature(std::string const& str); + std::string const& inline_string_; + std::string const& separator_; + std::vector headers_; + const array_type index_array_; + array_type::const_iterator index_itr_; + array_type::const_iterator index_end_; + mapnik::context_ptr ctx_; + mapnik::value_integer feature_id_ = 0; + detail::geometry_column_locator const& locator_; + mapnik::transcoder tr_; +}; + + +#endif // CSV_INLINE_FEATURESET_HPP diff --git a/plugins/input/csv/csv_utils.hpp b/plugins/input/csv/csv_utils.hpp index c55065e9a..b2981cefd 100644 --- a/plugins/input/csv/csv_utils.hpp +++ b/plugins/input/csv/csv_utils.hpp @@ -23,6 +23,16 @@ #ifndef MAPNIK_CSV_UTILS_DATASOURCE_HPP #define MAPNIK_CSV_UTILS_DATASOURCE_HPP +// mapnik +#include +#include +#include +#include +#include +#include +#include +#include +// boost #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-local-typedef" @@ -32,66 +42,275 @@ #include #include +#include namespace csv_utils { - static inline bool is_likely_number(std::string const& value) + +static const mapnik::csv_line_grammar line_g; + +template +static mapnik::csv_line parse_line(Iterator start, Iterator end, std::string const& separator, std::size_t num_columns) +{ + mapnik::csv_line values; + if (num_columns > 0) values.reserve(num_columns); + boost::spirit::standard::blank_type blank; + if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank, values)) { - return( strspn( value.c_str(), "e-.+0123456789" ) == value.size() ); + throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end)); + } + return values; +} + +static inline mapnik::csv_line parse_line(std::string const& line_str, std::string const& separator) +{ + auto start = line_str.c_str(); + auto end = start + line_str.length(); + return parse_line(start, end, separator, 0); +} + +static inline bool is_likely_number(std::string const& value) +{ + return( strspn( value.c_str(), "e-.+0123456789" ) == value.size() ); +} + +struct ignore_case_equal_pred +{ + bool operator () (unsigned char a, unsigned char b) const + { + return std::tolower(a) == std::tolower(b); + } +}; + +inline bool ignore_case_equal(std::string const& s0, std::string const& s1) +{ + return std::equal(s0.begin(), s0.end(), + s1.begin(), ignore_case_equal_pred()); +} + +} + + +namespace detail { + +template +std::size_t file_length(T & stream) +{ + stream.seekg(0, std::ios::end); + return stream.tellg(); +} + +static inline std::string detect_separator(std::string const& str) +{ + std::string separator = ","; // default + int num_commas = std::count(str.begin(), str.end(), ','); + // detect tabs + int num_tabs = std::count(str.begin(), str.end(), '\t'); + if (num_tabs > 0) + { + if (num_tabs > num_commas) + { + separator = "\t"; + MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected tab separator"; + } + } + else // pipes + { + int num_pipes = std::count(str.begin(), str.end(), '|'); + if (num_pipes > num_commas) + { + separator = "|"; + MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected '|' separator"; + } + else // semicolons + { + int num_semicolons = std::count(str.begin(), str.end(), ';'); + if (num_semicolons > num_commas) + { + separator = ";"; + MAPNIK_LOG_DEBUG(csv) << "csv_datasource: auto detected ';' separator"; + } + } + } + return separator; +} + +template +std::tuple autodect_newline(T & stream, std::size_t file_length) +{ + // autodetect newlines + char newline = '\n'; + bool has_newline = false; + for (std::size_t lidx = 0; lidx < file_length && lidx < 4000; ++lidx) + { + char c = static_cast(stream.get()); + if (c == '\r') + { + newline = '\r'; + has_newline = true; + break; + } + if (c == '\n') + { + has_newline = true; + break; + } + } + return std::make_tuple(newline,has_newline); +} + + +struct geometry_column_locator +{ + geometry_column_locator() + : type(UNKNOWN), index(-1), index2(-1) {} + + enum { UNKNOWN = 0, WKT, GEOJSON, LON_LAT } type; + std::size_t index; + std::size_t index2; +}; + +static inline void locate_geometry_column(std::string const& header, std::size_t index, geometry_column_locator & locator) +{ + std::string lower_val(header); + std::transform(lower_val.begin(), lower_val.end(), lower_val.begin(), ::tolower); + if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos)) + { + locator.type = geometry_column_locator::WKT; + locator.index = index; + } + else if (lower_val == "geojson") + { + locator.type = geometry_column_locator::GEOJSON; + locator.index = index; + } + else if (lower_val == "x" || lower_val == "lon" + || lower_val == "lng" || lower_val == "long" + || (lower_val.find("longitude") != std::string::npos)) + { + locator.index = index; + locator.type = geometry_column_locator::LON_LAT; } - static inline void fix_json_quoting(std::string & csv_line) + else if (lower_val == "y" + || lower_val == "lat" + || (lower_val.find("latitude") != std::string::npos)) { - std::string wrapping_char; - std::string::size_type j_idx = std::string::npos; - std::string::size_type post_idx = std::string::npos; - std::string::size_type j_idx_double = csv_line.find("\"{"); - std::string::size_type j_idx_single = csv_line.find("'{"); - if (j_idx_double != std::string::npos) - { - wrapping_char = "\""; - j_idx = j_idx_double; - post_idx = csv_line.find("}\""); + locator.index2 = index; + locator.type = geometry_column_locator::LON_LAT; + } +} - } - else if (j_idx_single != std::string::npos) +static mapnik::geometry::geometry extract_geometry(std::vector const& row, geometry_column_locator const& locator) +{ + mapnik::geometry::geometry geom; + if (locator.type == geometry_column_locator::WKT) + { + if (mapnik::from_wkt(row[locator.index], geom)) { - wrapping_char = "'"; - j_idx = j_idx_single; - post_idx = csv_line.find("}'"); + // correct orientations .. + mapnik::geometry::correct(geom); } - // we are positive it is valid json - if (!wrapping_char.empty()) + else { - // grab the json chunk - std::string json_chunk = csv_line.substr(j_idx,post_idx+wrapping_char.size()); - bool does_not_have_escaped_double_quotes = (json_chunk.find("\\\"") == std::string::npos); - // ignore properly escaped quotes like \" which need no special handling - if (does_not_have_escaped_double_quotes) + throw std::runtime_error("Failed to parse WKT:" + row[locator.index]); + } + } + else if (locator.type == geometry_column_locator::GEOJSON) + { + + if (!mapnik::json::from_geojson(row[locator.index], geom)) + { + throw std::runtime_error("Failed to parse GeoJSON:" + row[locator.index]); + } + } + else if (locator.type == geometry_column_locator::LON_LAT) + { + double x, y; + if (!mapnik::util::string2double(row[locator.index],x)) + { + throw std::runtime_error("Failed to parse Longitude(Easting):" + row[locator.index]); + } + if (!mapnik::util::string2double(row[locator.index2],y)) + { + throw std::runtime_error("Failed to parse Latitude(Northing):" + row[locator.index2]); + } + geom = mapnik::geometry::point(x,y); + } + return geom; +} + +template +void process_properties(Feature & feature, Headers const& headers, Values const& values, Locator const& locator, Transcoder const& tr) +{ + auto val_beg = values.begin(); + auto val_end = values.end(); + auto num_headers = headers.size(); + for (std::size_t i = 0; i < num_headers; ++i) + { + std::string const& fld_name = headers.at(i); + if (val_beg == val_end) + { + feature.put(fld_name,tr.transcode("")); + continue; + } + std::string value = mapnik::util::trim_copy(*val_beg++); + int value_length = value.length(); + + if (locator.index == i && (locator.type == detail::geometry_column_locator::WKT + || locator.type == detail::geometry_column_locator::GEOJSON) ) continue; + + + bool matched = false; + bool has_dot = value.find(".") != std::string::npos; + if (value.empty() || + (value_length > 20) || + (value_length > 1 && !has_dot && value[0] == '0')) + { + matched = true; + feature.put(fld_name,std::move(tr.transcode(value.c_str()))); + } + else if (csv_utils::is_likely_number(value)) + { + bool has_e = value.find("e") != std::string::npos; + if (has_dot || has_e) { - std::string pre_json = csv_line.substr(0,j_idx); - std::string post_json = csv_line.substr(post_idx+wrapping_char.size()); - // handle "" in a string wrapped in " - // http://tools.ietf.org/html/rfc4180#section-2 item 7. - // e.g. "{""type"":""Point"",""coordinates"":[30.0,10.0]}" - if (json_chunk.find("\"\"") != std::string::npos) + double float_val = 0.0; + if (mapnik::util::string2double(value,float_val)) { - boost::algorithm::replace_all(json_chunk,"\"\"","\\\""); - csv_line = pre_json + json_chunk + post_json; + matched = true; + feature.put(fld_name,float_val); } - // handle " in a string wrapped in ' - // e.g. '{"type":"Point","coordinates":[30.0,10.0]}' - else + } + else + { + mapnik::value_integer int_val = 0; + if (mapnik::util::string2int(value,int_val)) { - // escape " because we cannot exchange for single quotes - // https://github.com/mapnik/mapnik/issues/1408 - boost::algorithm::replace_all(json_chunk,"\"","\\\""); - boost::algorithm::replace_all(json_chunk,"'","\""); - csv_line = pre_json + json_chunk + post_json; + matched = true; + feature.put(fld_name,int_val); } } } + if (!matched) + { + if (csv_utils::ignore_case_equal(value, "true")) + { + feature.put(fld_name, true); + } + else if (csv_utils::ignore_case_equal(value, "false")) + { + feature.put(fld_name, false); + } + else // fallback to string + { + feature.put(fld_name,std::move(tr.transcode(value.c_str()))); + } + } } } + +}// ns detail + #endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP diff --git a/plugins/input/geojson/large_geojson_featureset.cpp b/plugins/input/geojson/large_geojson_featureset.cpp index 1df7dce4a..6f61d53c7 100644 --- a/plugins/input/geojson/large_geojson_featureset.cpp +++ b/plugins/input/geojson/large_geojson_featureset.cpp @@ -29,7 +29,6 @@ // stl #include #include -#include #include "large_geojson_featureset.hpp" diff --git a/plugins/input/geojson/large_geojson_featureset.hpp b/plugins/input/geojson/large_geojson_featureset.hpp index a67eec5bf..8321ff313 100644 --- a/plugins/input/geojson/large_geojson_featureset.hpp +++ b/plugins/input/geojson/large_geojson_featureset.hpp @@ -26,9 +26,7 @@ #include #include "geojson_datasource.hpp" -#include #include -#include #include class large_geojson_featureset : public mapnik::Featureset diff --git a/test/data b/test/data index cb1e7f2ed..cbf02d3a9 160000 --- a/test/data +++ b/test/data @@ -1 +1 @@ -Subproject commit cb1e7f2ed8f2482bf8fb370981ec450922fa36de +Subproject commit cbf02d3a9d173c27c69541df347dfbd22c6c1612 diff --git a/test/standalone/csv_test.cpp b/test/standalone/csv_test.cpp index 2023f67f4..c15692906 100644 --- a/test/standalone/csv_test.cpp +++ b/test/standalone/csv_test.cpp @@ -21,139 +21,145 @@ namespace bfs = boost::filesystem; namespace { -void add_csv_files(bfs::path dir, std::vector &csv_files) { - for (auto const &entry : boost::make_iterator_range( - bfs::directory_iterator(dir), bfs::directory_iterator())) { - auto path = entry.path(); - if (path.extension().native() == ".csv") { - csv_files.emplace_back(path); +void add_csv_files(bfs::path dir, std::vector &csv_files) +{ + for (auto const &entry : boost::make_iterator_range( + bfs::directory_iterator(dir), bfs::directory_iterator())) + { + auto path = entry.path(); + if (path.extension().native() == ".csv") + { + csv_files.emplace_back(path); + } } - } } -mapnik::datasource_ptr get_csv_ds(std::string const &file_name, bool strict = true) { - mapnik::parameters params; - params["type"] = std::string("csv"); - params["file"] = file_name; - params["strict"] = mapnik::value_bool(strict); - auto ds = mapnik::datasource_cache::instance().create(params); - // require a non-null pointer returned - REQUIRE(bool(ds)); - return ds; +mapnik::datasource_ptr get_csv_ds(std::string const &file_name, bool strict = true) +{ + mapnik::parameters params; + params["type"] = std::string("csv"); + params["file"] = file_name; + params["strict"] = mapnik::value_bool(strict); + auto ds = mapnik::datasource_cache::instance().create(params); + // require a non-null pointer returned + REQUIRE(ds != nullptr); + return ds; } void require_field_names(std::vector const &fields, - std::initializer_list const &names) { - REQUIRE(fields.size() == names.size()); - auto itr_a = fields.begin(); - auto const end_a = fields.end(); - auto itr_b = names.begin(); - for (; itr_a != end_a; ++itr_a, ++itr_b) { - CHECK(itr_a->get_name() == *itr_b); - } + std::initializer_list const &names) +{ + REQUIRE(fields.size() == names.size()); + auto itr_a = fields.begin(); + auto const end_a = fields.end(); + auto itr_b = names.begin(); + for (; itr_a != end_a; ++itr_a, ++itr_b) + { + CHECK(itr_a->get_name() == *itr_b); + } } void require_field_types(std::vector const &fields, std::initializer_list const &types) { - REQUIRE(fields.size() == types.size()); - auto itr_a = fields.begin(); - auto const end_a = fields.end(); - auto itr_b = types.begin(); - for (; itr_a != end_a; ++itr_a, ++itr_b) { - CHECK(itr_a->get_type() == *itr_b); - } + REQUIRE(fields.size() == types.size()); + auto itr_a = fields.begin(); + auto const end_a = fields.end(); + auto itr_b = types.begin(); + for (; itr_a != end_a; ++itr_a, ++itr_b) { + CHECK(itr_a->get_type() == *itr_b); + } } mapnik::featureset_ptr all_features(mapnik::datasource_ptr ds) { - auto fields = ds->get_descriptor().get_descriptors(); - mapnik::query query(ds->envelope()); - for (auto const &field : fields) { - query.add_property_name(field.get_name()); - } - return ds->features(query); + auto fields = ds->get_descriptor().get_descriptors(); + mapnik::query query(ds->envelope()); + for (auto const &field : fields) { + query.add_property_name(field.get_name()); + } + return ds->features(query); } std::size_t count_features(mapnik::featureset_ptr features) { - std::size_t count = 0; - while (features->next()) { - ++count; - } - return count; + std::size_t count = 0; + while (features->next()) { + ++count; + } + return count; } using attr = std::tuple; void require_attributes(mapnik::feature_ptr feature, std::initializer_list const &attrs) { - REQUIRE(bool(feature)); - for (auto const &kv : attrs) { - REQUIRE(feature->has_key(std::get<0>(kv))); - CHECK(feature->get(std::get<0>(kv)) == std::get<1>(kv)); - } + REQUIRE(bool(feature)); + for (auto const &kv : attrs) { + REQUIRE(feature->has_key(std::get<0>(kv))); + CHECK(feature->get(std::get<0>(kv)) == std::get<1>(kv)); + } } namespace detail { struct feature_count { - template - std::size_t operator()(T const &geom) const { - return mapnik::util::apply_visitor(*this, geom); - } - - std::size_t operator()(mapnik::geometry::geometry_empty const &) const { - return 0; - } - - template - std::size_t operator()(mapnik::geometry::point const &) const { - return 1; - } - - template - std::size_t operator()(mapnik::geometry::line_string const &) const { - return 1; - } - - template - std::size_t operator()(mapnik::geometry::polygon const &) const { - return 1; - } - - template - std::size_t operator()(mapnik::geometry::multi_point const &mp) const { - return mp.size(); - } - - template - std::size_t operator()(mapnik::geometry::multi_line_string const &mls) const { - return mls.size(); - } - - template - std::size_t operator()(mapnik::geometry::multi_polygon const &mp) const { - return mp.size(); - } - - template - std::size_t operator()(mapnik::geometry::geometry_collection const &col) const { - std::size_t sum = 0; - for (auto const &geom : col) { - sum += operator()(geom); + template + std::size_t operator()(T const &geom) const { + return mapnik::util::apply_visitor(*this, geom); + } + + std::size_t operator()(mapnik::geometry::geometry_empty const &) const { + return 0; + } + + template + std::size_t operator()(mapnik::geometry::point const &) const { + return 1; + } + + template + std::size_t operator()(mapnik::geometry::line_string const &) const { + return 1; + } + + template + std::size_t operator()(mapnik::geometry::polygon const &) const { + return 1; + } + + template + std::size_t operator()(mapnik::geometry::multi_point const &mp) const { + return mp.size(); + } + + template + std::size_t operator()(mapnik::geometry::multi_line_string const &mls) const { + return mls.size(); + } + + template + std::size_t operator()(mapnik::geometry::multi_polygon const &mp) const { + return mp.size(); + } + + template + std::size_t operator()(mapnik::geometry::geometry_collection const &col) const { + std::size_t sum = 0; + for (auto const &geom : col) { + sum += operator()(geom); + } + return sum; } - return sum; - } }; } // namespace detail template std::size_t feature_count(mapnik::geometry::geometry const &g) { - return detail::feature_count()(g); + return detail::feature_count()(g); } void require_geometry(mapnik::feature_ptr feature, std::size_t num_parts, mapnik::geometry::geometry_types type) { - REQUIRE(bool(feature)); - CHECK(mapnik::geometry::geometry_type(feature->get_geometry()) == type); - CHECK(feature_count(feature->get_geometry()) == num_parts); + REQUIRE(bool(feature)); + CHECK(mapnik::geometry::geometry_type(feature->get_geometry()) == type); + CHECK(feature_count(feature->get_geometry()) == num_parts); } } // anonymous namespace @@ -163,520 +169,519 @@ const bool registered = mapnik::datasource_cache::instance().register_datasource TEST_CASE("csv") { - if (mapnik::util::exists(csv_plugin)) - { + if (mapnik::util::exists(csv_plugin)) + { + REQUIRE(registered); + // make the tests silent since we intentially test error conditions that are noisy + auto const severity = mapnik::logger::instance().get_severity(); + mapnik::logger::instance().set_severity(mapnik::logger::none); - REQUIRE(registered); + // check the CSV datasource is loaded + const std::vector plugin_names = + mapnik::datasource_cache::instance().plugin_names(); + const bool have_csv_plugin = + std::find(plugin_names.begin(), plugin_names.end(), "csv") != plugin_names.end(); - // make the tests silent since we intentially test error conditions that are noisy - auto const severity = mapnik::logger::instance().get_severity(); - mapnik::logger::instance().set_severity(mapnik::logger::none); + SECTION("broken files") { + if (have_csv_plugin) { + std::vector broken; + add_csv_files("test/data/csv/fails", broken); + add_csv_files("test/data/csv/warns", broken); + broken.emplace_back("test/data/csv/fails/does_not_exist.csv"); - // check the CSV datasource is loaded - const std::vector plugin_names = - mapnik::datasource_cache::instance().plugin_names(); - const bool have_csv_plugin = - std::find(plugin_names.begin(), plugin_names.end(), "csv") != plugin_names.end(); + for (auto const &path : broken) + { + REQUIRE_THROWS(get_csv_ds(path.native())); + } + } + } // END SECTION - SECTION("broken files") { - if (have_csv_plugin) { - std::vector broken; - add_csv_files("test/data/csv/fails", broken); - add_csv_files("test/data/csv/warns", broken); - broken.emplace_back("test/data/csv/fails/does_not_exist.csv"); + SECTION("good files") { + if (have_csv_plugin) { + std::vector good; + add_csv_files("test/data/csv", good); + add_csv_files("test/data/csv/warns", good); - for (auto const &path : broken) { - REQUIRE_THROWS(get_csv_ds(path.native())); - } - } - } // END SECTION + for (auto const& path : good) + { + auto ds = get_csv_ds(path.native(), false); + // require a non-null pointer returned + REQUIRE(bool(ds)); + } + } + } // END SECTION - SECTION("good files") { - if (have_csv_plugin) { - std::vector good; - add_csv_files("test/data/csv", good); - add_csv_files("test/data/csv/warns", good); + SECTION("lon/lat detection") + { + for (auto const& lon_name : {std::string("lon"), std::string("lng")}) + { + auto ds = get_csv_ds((boost::format("test/data/csv/%1%_lat.csv") % lon_name).str()); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {lon_name, "lat"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer}); - for (auto const &path : good) { - auto ds = get_csv_ds(path.native(), false); - // require a non-null pointer returned + CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); + + mapnik::query query(ds->envelope()); + for (auto const &field : fields) + { + query.add_property_name(field.get_name()); + } + auto features = ds->features(query); + auto feature = features->next(); + + require_attributes(feature, { + attr { lon_name, mapnik::value_integer(0) }, + attr { "lat", mapnik::value_integer(0) } + }); + } + } // END SECTION + + SECTION("type detection") { + auto ds = get_csv_ds("test/data/csv/nypd.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"Precinct", "Phone", "Address", "City", "geo_longitude", "geo_latitude", "geo_accuracy"}); + require_field_types(fields, {mapnik::String, mapnik::String, mapnik::String, mapnik::String, mapnik::Double, mapnik::Double, mapnik::String}); + + CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); + CHECK(count_features(all_features(ds)) == 2); + + auto feature = all_features(ds)->next(); + require_attributes(feature, { + attr { "City", mapnik::value_unicode_string("New York, NY") } + , attr { "geo_accuracy", mapnik::value_unicode_string("house") } + , attr { "Phone", mapnik::value_unicode_string("(212) 334-0711") } + , attr { "Address", mapnik::value_unicode_string("19 Elizabeth Street") } + , attr { "Precinct", mapnik::value_unicode_string("5th Precinct") } + , attr { "geo_longitude", mapnik::value_integer(-70) } + , attr { "geo_latitude", mapnik::value_integer(40) } + }); + } // END SECTION + + SECTION("skipping blank rows") { + auto ds = get_csv_ds("test/data/csv/blank_rows.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "name"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); + + CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); + CHECK(count_features(all_features(ds)) == 2); + } // END SECTION + + SECTION("empty rows") { + auto ds = get_csv_ds("test/data/csv/empty_rows.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "text", "date", "integer", "boolean", "float", "time", "datetime", "empty_column"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::String, mapnik::Integer, mapnik::Boolean, mapnik::Double, mapnik::String, mapnik::String, mapnik::String}); + + CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); + CHECK(count_features(all_features(ds)) == 4); + + auto featureset = all_features(ds); + auto feature = featureset->next(); + require_attributes(feature, { + attr { "x", mapnik::value_integer(0) } + , attr { "empty_column", mapnik::value_unicode_string("") } + , attr { "text", mapnik::value_unicode_string("a b") } + , attr { "float", mapnik::value_double(1.0) } + , attr { "datetime", mapnik::value_unicode_string("1971-01-01T04:14:00") } + , attr { "y", mapnik::value_integer(0) } + , attr { "boolean", mapnik::value_bool(true) } + , attr { "time", mapnik::value_unicode_string("04:14:00") } + , attr { "date", mapnik::value_unicode_string("1971-01-01") } + , attr { "integer", mapnik::value_integer(40) } + }); + + while (bool(feature = featureset->next())) { + CHECK(feature->size() == 10); + CHECK(feature->get("empty_column") == mapnik::value_unicode_string("")); + } + } // END SECTION + + SECTION("slashes") { + auto ds = get_csv_ds("test/data/csv/has_attributes_with_slashes.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "name"}); + // NOTE: y column is integer, even though a double value is used below in the test? + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); + + auto featureset = all_features(ds); + require_attributes(featureset->next(), { + attr{"x", 0} + , attr{"y", 0} + , attr{"name", mapnik::value_unicode_string("a/a") } }); + require_attributes(featureset->next(), { + attr{"x", 1} + , attr{"y", 4} + , attr{"name", mapnik::value_unicode_string("b/b") } }); + require_attributes(featureset->next(), { + attr{"x", 10} + , attr{"y", 2.5} + , attr{"name", mapnik::value_unicode_string("c/c") } }); + } // END SECTION + + SECTION("wkt field") { + using mapnik::geometry::geometry_types; + + auto ds = get_csv_ds("test/data/csv/wkt.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"type"}); + require_field_types(fields, {mapnik::String}); + + auto featureset = all_features(ds); + require_geometry(featureset->next(), 1, geometry_types::Point); + require_geometry(featureset->next(), 1, geometry_types::LineString); + require_geometry(featureset->next(), 1, geometry_types::Polygon); + require_geometry(featureset->next(), 1, geometry_types::Polygon); + require_geometry(featureset->next(), 4, geometry_types::MultiPoint); + require_geometry(featureset->next(), 2, geometry_types::MultiLineString); + require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); + require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); + } // END SECTION + + SECTION("handling of missing header") { + // TODO: does this mean 'missing_header.csv' should be in the warnings + // subdirectory, since it doesn't work in strict mode? + auto ds = get_csv_ds("test/data/csv/missing_header.csv", false); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"one", "two", "x", "y", "_4", "aftermissing"}); + auto feature = all_features(ds)->next(); + REQUIRE(feature); + REQUIRE(feature->has_key("_4")); + CHECK(feature->get("_4") == mapnik::value_unicode_string("missing")); + } // END SECTION + + SECTION("handling of headers that are numbers") { + auto ds = get_csv_ds("test/data/csv/numbers_for_headers.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "1990", "1991", "1992"}); + auto feature = all_features(ds)->next(); + require_attributes(feature, { + attr{"x", 0} + , attr{"y", 0} + , attr{"1990", 1} + , attr{"1991", 2} + , attr{"1992", 3} + }); + auto expression = mapnik::parse_expression("[1991]=2"); + REQUIRE(bool(expression)); + auto value = mapnik::util::apply_visitor( + mapnik::evaluate( + *feature, mapnik::attributes()), *expression); + CHECK(value == true); + } // END SECTION + + SECTION("quoted numbers") { + using ustring = mapnik::value_unicode_string; + + auto ds = get_csv_ds("test/data/csv/quoted_numbers.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "label"}); + auto featureset = all_features(ds); + + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"label", ustring("0,0") } }); + require_attributes(featureset->next(), { + attr{"x", 5}, attr{"y", 5}, attr{"label", ustring("5,5") } }); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 5}, attr{"label", ustring("0,5") } }); + require_attributes(featureset->next(), { + attr{"x", 5}, attr{"y", 0}, attr{"label", ustring("5,0") } }); + require_attributes(featureset->next(), { + attr{"x", 2.5}, attr{"y", 2.5}, attr{"label", ustring("2.5,2.5") } }); + + } // END SECTION + + SECTION("reading newlines") { + for (auto const &platform : {std::string("windows"), std::string("mac")}) { + std::string file_name = (boost::format("test/data/csv/%1%_newlines.csv") % platform).str(); + auto ds = get_csv_ds(file_name); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "z"}); + require_attributes(all_features(ds)->next(), { + attr{"x", 1}, attr{"y", 10}, attr{"z", 9999.9999} }); + } + } // END SECTION + + SECTION("mixed newlines") { + using ustring = mapnik::value_unicode_string; + + for (auto const &file : { + std::string("test/data/csv/mac_newlines_with_unix_inline.csv") + , std::string("test/data/csv/mac_newlines_with_unix_inline_escaped.csv") + , std::string("test/data/csv/windows_newlines_with_unix_inline.csv") + , std::string("test/data/csv/windows_newlines_with_unix_inline_escaped.csv") + }) { + auto ds = get_csv_ds(file); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "line"}); + require_attributes(all_features(ds)->next(), { + attr{"x", 0}, attr{"y", 0} + , attr{"line", ustring("many\n lines\n of text\n with unix newlines")} }); + } + } // END SECTION + + SECTION("tabs") { + auto ds = get_csv_ds("test/data/csv/tabs_in_csv.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "z"}); + require_attributes(all_features(ds)->next(), { + attr{"x", -122}, attr{"y", 48}, attr{"z", 0} }); + } // END SECTION + + SECTION("separators") { + using ustring = mapnik::value_unicode_string; + + for (auto const &file : { + std::string("test/data/csv/pipe_delimiters.csv") + , std::string("test/data/csv/semicolon_delimiters.csv") + }) { + auto ds = get_csv_ds(file); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "z"}); + require_attributes(all_features(ds)->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"z", ustring("hello")} }); + } + } // END SECTION + + SECTION("null and bool keywords are empty strings") { + using ustring = mapnik::value_unicode_string; + + auto ds = get_csv_ds("test/data/csv/nulls_and_booleans_as_strings.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "null", "boolean"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::Boolean}); + + auto featureset = all_features(ds); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"null", ustring("null")}, attr{"boolean", true}}); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"null", ustring("")}, attr{"boolean", false}}); + } // END SECTION + + SECTION("nonexistent query fields throw") { + auto ds = get_csv_ds("test/data/csv/lon_lat.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"lon", "lat"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer}); + + mapnik::query query(ds->envelope()); + for (auto const &field : fields) { + query.add_property_name(field.get_name()); + } + // also add an invalid one, triggering throw + query.add_property_name("bogus"); + + REQUIRE_THROWS(ds->features(query)); + } // END SECTION + + SECTION("leading zeros mean strings") { + using ustring = mapnik::value_unicode_string; + + auto ds = get_csv_ds("test/data/csv/leading_zeros.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "fips"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); + + auto featureset = all_features(ds); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("001")}}); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("003")}}); + require_attributes(featureset->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("005")}}); + } // END SECTION + + SECTION("advanced geometry detection") { + using row = std::pair; + + for (row r : { + row{"point", mapnik::datasource_geometry_t::Point} + , row{"poly", mapnik::datasource_geometry_t::Polygon} + , row{"multi_poly", mapnik::datasource_geometry_t::Polygon} + , row{"line", mapnik::datasource_geometry_t::LineString} + }) { + std::string file_name = (boost::format("test/data/csv/%1%_wkt.csv") % r.first).str(); + auto ds = get_csv_ds(file_name); + CHECK(ds->get_geometry_type() == r.second); + } + } // END SECTION + + SECTION("creation of CSV from in-memory strings") { + using ustring = mapnik::value_unicode_string; + + for (auto const &name : {std::string("Winthrop, WA"), std::string(u8"Qu\u00e9bec")}) { + std::string csv_string = + (boost::format( + "wkt,Name\n" + "\"POINT (120.15 48.47)\",\"%1%\"\n" + ) % name).str(); + + mapnik::parameters params; + params["type"] = std::string("csv"); + params["inline"] = csv_string; + auto ds = mapnik::datasource_cache::instance().create(params); + REQUIRE(bool(ds)); + + auto feature = all_features(ds)->next(); + REQUIRE(bool(feature)); + REQUIRE(feature->has_key("Name")); + CHECK(feature->get("Name") == ustring(name.c_str())); + } + } // END SECTION + + SECTION("geojson quoting") { + using mapnik::geometry::geometry_types; + + for (auto const &file : { + std::string("test/data/csv/geojson_double_quote_escape.csv") + , std::string("test/data/csv/geojson_single_quote.csv") + , std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv") + }) { + auto ds = get_csv_ds(file); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"type"}); + require_field_types(fields, {mapnik::String}); + + auto featureset = all_features(ds); + require_geometry(featureset->next(), 1, geometry_types::Point); + require_geometry(featureset->next(), 1, geometry_types::LineString); + require_geometry(featureset->next(), 1, geometry_types::Polygon); + require_geometry(featureset->next(), 1, geometry_types::Polygon); + require_geometry(featureset->next(), 4, geometry_types::MultiPoint); + require_geometry(featureset->next(), 2, geometry_types::MultiLineString); + require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); + require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); + } + } // END SECTION + + SECTION("blank undelimited rows are still parsed") { + using ustring = mapnik::value_unicode_string; + + // TODO: does this mean this CSV file should be in the warnings + // subdirectory, since it doesn't work in strict mode? + auto ds = get_csv_ds("test/data/csv/more_headers_than_column_values.csv", false); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "one", "two", "three"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::String, mapnik::String}); + + require_attributes(all_features(ds)->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"one", ustring("")}, attr{"two", ustring("")}, attr{"three", ustring("")} }); + } // END SECTION + + SECTION("fewer headers than rows throws") { + REQUIRE_THROWS(get_csv_ds("test/data/csv/more_column_values_than_headers.csv")); + } // END SECTION + + SECTION("feature ID only incremented for valid rows") { + auto ds = get_csv_ds("test/data/csv/warns/feature_id_counting.csv", false); + auto fs = all_features(ds); + + // first + auto feature = fs->next(); + REQUIRE(bool(feature)); + CHECK(feature->id() == 1); + + // second, should have skipped bogus one + feature = fs->next(); + REQUIRE(bool(feature)); + CHECK(feature->id() == 2); + + feature = fs->next(); + CHECK(!feature); + } // END SECTION + + SECTION("dynamically defining headers") { + using ustring = mapnik::value_unicode_string; + using row = std::pair; + + for (auto const &r : { + row{"test/data/csv/fails/needs_headers_two_lines.csv", 2}, + row{"test/data/csv/fails/needs_headers_one_line.csv", 1}, + row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1}}) + { + mapnik::parameters params; + params["type"] = std::string("csv"); + params["file"] = r.first; + params["headers"] = "x,y,name"; + auto ds = mapnik::datasource_cache::instance().create(params); + REQUIRE(bool(ds)); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "name"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); + require_attributes(all_features(ds)->next(), { + attr{"x", 0}, attr{"y", 0}, attr{"name", ustring("data_name")} }); + REQUIRE(count_features(all_features(ds)) == r.second); + } + } // END SECTION + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wlong-long" + SECTION("64bit int fields work") { + auto ds = get_csv_ds("test/data/csv/64bit_int.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "bigint"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::Integer}); + + auto fs = all_features(ds); + auto feature = fs->next(); + require_attributes(feature, { + attr{"x", 0}, attr{"y", 0}, attr{"bigint", 2147483648} }); + + feature = fs->next(); + require_attributes(feature, { + attr{"x", 0}, attr{"y", 0}, attr{"bigint", 9223372036854775807ll} }); + require_attributes(feature, { + attr{"x", 0}, attr{"y", 0}, attr{"bigint", 0x7FFFFFFFFFFFFFFFll} }); + } // END SECTION +#pragma GCC diagnostic pop + + SECTION("various number types") { + auto ds = get_csv_ds("test/data/csv/number_types.csv"); + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {"x", "y", "floats"}); + require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::Double}); + auto fs = all_features(ds); + for (double d : { .0, +.0, 1e-06, -1e-06, 0.000001, 1.234e+16, 1.234e+16 }) { + auto feature = fs->next(); + REQUIRE(bool(feature)); + CHECK(feature->get("floats").get() == Approx(d)); + } + } // END SECTION + + SECTION("manually supplied extent") { + std::string csv_string("wkt,Name\n"); + mapnik::parameters params; + params["type"] = std::string("csv"); + params["inline"] = csv_string; + params["extent"] = "-180,-90,180,90"; + auto ds = mapnik::datasource_cache::instance().create(params); REQUIRE(bool(ds)); - } - } - } // END SECTION - - SECTION("lon/lat detection") { - for (auto const &lon_name : {std::string("lon"), std::string("lng")}) { - auto ds = get_csv_ds((boost::format("test/data/csv/%1%_lat.csv") % lon_name).str()); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {lon_name, "lat"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer}); - - CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); - - mapnik::query query(ds->envelope()); - for (auto const &field : fields) { - query.add_property_name(field.get_name()); - } - auto features = ds->features(query); - auto feature = features->next(); - - require_attributes(feature, { - attr { lon_name, mapnik::value_integer(0) }, - attr { "lat", mapnik::value_integer(0) } - }); - } - } // END SECTION - - SECTION("type detection") { - auto ds = get_csv_ds("test/data/csv/nypd.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"Precinct", "Phone", "Address", "City", "geo_longitude", "geo_latitude", "geo_accuracy"}); - require_field_types(fields, {mapnik::String, mapnik::String, mapnik::String, mapnik::String, mapnik::Double, mapnik::Double, mapnik::String}); - - CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); - CHECK(count_features(all_features(ds)) == 2); - - auto feature = all_features(ds)->next(); - require_attributes(feature, { - attr { "City", mapnik::value_unicode_string("New York, NY") } - , attr { "geo_accuracy", mapnik::value_unicode_string("house") } - , attr { "Phone", mapnik::value_unicode_string("(212) 334-0711") } - , attr { "Address", mapnik::value_unicode_string("19 Elizabeth Street") } - , attr { "Precinct", mapnik::value_unicode_string("5th Precinct") } - , attr { "geo_longitude", mapnik::value_integer(-70) } - , attr { "geo_latitude", mapnik::value_integer(40) } - }); - } // END SECTION - - SECTION("skipping blank rows") { - auto ds = get_csv_ds("test/data/csv/blank_rows.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "name"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); - - CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); - CHECK(count_features(all_features(ds)) == 2); - } // END SECTION - - SECTION("empty rows") { - auto ds = get_csv_ds("test/data/csv/empty_rows.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "text", "date", "integer", "boolean", "float", "time", "datetime", "empty_column"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::String, mapnik::Integer, mapnik::Boolean, mapnik::Double, mapnik::String, mapnik::String, mapnik::String}); - - CHECK(ds->get_geometry_type() == mapnik::datasource_geometry_t::Point); - CHECK(count_features(all_features(ds)) == 4); - - auto featureset = all_features(ds); - auto feature = featureset->next(); - require_attributes(feature, { - attr { "x", mapnik::value_integer(0) } - , attr { "empty_column", mapnik::value_unicode_string("") } - , attr { "text", mapnik::value_unicode_string("a b") } - , attr { "float", mapnik::value_double(1.0) } - , attr { "datetime", mapnik::value_unicode_string("1971-01-01T04:14:00") } - , attr { "y", mapnik::value_integer(0) } - , attr { "boolean", mapnik::value_bool(true) } - , attr { "time", mapnik::value_unicode_string("04:14:00") } - , attr { "date", mapnik::value_unicode_string("1971-01-01") } - , attr { "integer", mapnik::value_integer(40) } - }); - - while (bool(feature = featureset->next())) { - CHECK(feature->size() == 10); - CHECK(feature->get("empty_column") == mapnik::value_unicode_string("")); - } - } // END SECTION - - SECTION("slashes") { - auto ds = get_csv_ds("test/data/csv/has_attributes_with_slashes.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "name"}); - // NOTE: y column is integer, even though a double value is used below in the test? - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); - - auto featureset = all_features(ds); - require_attributes(featureset->next(), { - attr{"x", 0} - , attr{"y", 0} - , attr{"name", mapnik::value_unicode_string("a/a") } }); - require_attributes(featureset->next(), { - attr{"x", 1} - , attr{"y", 4} - , attr{"name", mapnik::value_unicode_string("b/b") } }); - require_attributes(featureset->next(), { - attr{"x", 10} - , attr{"y", 2.5} - , attr{"name", mapnik::value_unicode_string("c/c") } }); - } // END SECTION - - SECTION("wkt field") { - using mapnik::geometry::geometry_types; - - auto ds = get_csv_ds("test/data/csv/wkt.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"type"}); - require_field_types(fields, {mapnik::String}); - - auto featureset = all_features(ds); - require_geometry(featureset->next(), 1, geometry_types::Point); - require_geometry(featureset->next(), 1, geometry_types::LineString); - require_geometry(featureset->next(), 1, geometry_types::Polygon); - require_geometry(featureset->next(), 1, geometry_types::Polygon); - require_geometry(featureset->next(), 4, geometry_types::MultiPoint); - require_geometry(featureset->next(), 2, geometry_types::MultiLineString); - require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); - require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); - } // END SECTION - - SECTION("handling of missing header") { - // TODO: does this mean 'missing_header.csv' should be in the warnings - // subdirectory, since it doesn't work in strict mode? - auto ds = get_csv_ds("test/data/csv/missing_header.csv", false); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"one", "two", "x", "y", "_4", "aftermissing"}); - auto feature = all_features(ds)->next(); - REQUIRE(feature); - REQUIRE(feature->has_key("_4")); - CHECK(feature->get("_4") == mapnik::value_unicode_string("missing")); - } // END SECTION - - SECTION("handling of headers that are numbers") { - auto ds = get_csv_ds("test/data/csv/numbers_for_headers.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "1990", "1991", "1992"}); - auto feature = all_features(ds)->next(); - require_attributes(feature, { - attr{"x", 0} - , attr{"y", 0} - , attr{"1990", 1} - , attr{"1991", 2} - , attr{"1992", 3} - }); - auto expression = mapnik::parse_expression("[1991]=2"); - REQUIRE(bool(expression)); - auto value = mapnik::util::apply_visitor( - mapnik::evaluate( - *feature, mapnik::attributes()), *expression); - CHECK(value == true); - } // END SECTION - - SECTION("quoted numbers") { - using ustring = mapnik::value_unicode_string; - - auto ds = get_csv_ds("test/data/csv/quoted_numbers.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "label"}); - auto featureset = all_features(ds); - - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"label", ustring("0,0") } }); - require_attributes(featureset->next(), { - attr{"x", 5}, attr{"y", 5}, attr{"label", ustring("5,5") } }); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 5}, attr{"label", ustring("0,5") } }); - require_attributes(featureset->next(), { - attr{"x", 5}, attr{"y", 0}, attr{"label", ustring("5,0") } }); - require_attributes(featureset->next(), { - attr{"x", 2.5}, attr{"y", 2.5}, attr{"label", ustring("2.5,2.5") } }); - - } // END SECTION - - SECTION("reading newlines") { - for (auto const &platform : {std::string("windows"), std::string("mac")}) { - std::string file_name = (boost::format("test/data/csv/%1%_newlines.csv") % platform).str(); - auto ds = get_csv_ds(file_name); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "z"}); - require_attributes(all_features(ds)->next(), { - attr{"x", 1}, attr{"y", 10}, attr{"z", 9999.9999} }); - } - } // END SECTION - - SECTION("mixed newlines") { - using ustring = mapnik::value_unicode_string; - - for (auto const &file : { - std::string("test/data/csv/mac_newlines_with_unix_inline.csv") - , std::string("test/data/csv/mac_newlines_with_unix_inline_escaped.csv") - , std::string("test/data/csv/windows_newlines_with_unix_inline.csv") - , std::string("test/data/csv/windows_newlines_with_unix_inline_escaped.csv") - }) { - auto ds = get_csv_ds(file); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "line"}); - require_attributes(all_features(ds)->next(), { - attr{"x", 0}, attr{"y", 0} - , attr{"line", ustring("many\n lines\n of text\n with unix newlines")} }); - } - } // END SECTION - - SECTION("tabs") { - auto ds = get_csv_ds("test/data/csv/tabs_in_csv.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "z"}); - require_attributes(all_features(ds)->next(), { - attr{"x", -122}, attr{"y", 48}, attr{"z", 0} }); - } // END SECTION - - SECTION("separators") { - using ustring = mapnik::value_unicode_string; - - for (auto const &file : { - std::string("test/data/csv/pipe_delimiters.csv") - , std::string("test/data/csv/semicolon_delimiters.csv") - }) { - auto ds = get_csv_ds(file); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "z"}); - require_attributes(all_features(ds)->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"z", ustring("hello")} }); - } - } // END SECTION - - SECTION("null and bool keywords are empty strings") { - using ustring = mapnik::value_unicode_string; - - auto ds = get_csv_ds("test/data/csv/nulls_and_booleans_as_strings.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "null", "boolean"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::Boolean}); - - auto featureset = all_features(ds); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"null", ustring("null")}, attr{"boolean", true}}); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"null", ustring("")}, attr{"boolean", false}}); - } // END SECTION - - SECTION("nonexistent query fields throw") { - auto ds = get_csv_ds("test/data/csv/lon_lat.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"lon", "lat"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer}); - - mapnik::query query(ds->envelope()); - for (auto const &field : fields) { - query.add_property_name(field.get_name()); - } - // also add an invalid one, triggering throw - query.add_property_name("bogus"); - - REQUIRE_THROWS(ds->features(query)); - } // END SECTION - - SECTION("leading zeros mean strings") { - using ustring = mapnik::value_unicode_string; - - auto ds = get_csv_ds("test/data/csv/leading_zeros.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "fips"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); - - auto featureset = all_features(ds); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("001")}}); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("003")}}); - require_attributes(featureset->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"fips", ustring("005")}}); - } // END SECTION - - SECTION("advanced geometry detection") { - using row = std::pair; - - for (row r : { - row{"point", mapnik::datasource_geometry_t::Point} - , row{"poly", mapnik::datasource_geometry_t::Polygon} - , row{"multi_poly", mapnik::datasource_geometry_t::Polygon} - , row{"line", mapnik::datasource_geometry_t::LineString} - }) { - std::string file_name = (boost::format("test/data/csv/%1%_wkt.csv") % r.first).str(); - auto ds = get_csv_ds(file_name); - CHECK(ds->get_geometry_type() == r.second); - } - } // END SECTION - - SECTION("creation of CSV from in-memory strings") { - using ustring = mapnik::value_unicode_string; - - for (auto const &name : {std::string("Winthrop, WA"), std::string(u8"Qu\u00e9bec")}) { - std::string csv_string = - (boost::format( - "wkt,Name\n" - "\"POINT (120.15 48.47)\",\"%1%\"\n" - ) % name).str(); - - mapnik::parameters params; - params["type"] = std::string("csv"); - params["inline"] = csv_string; - auto ds = mapnik::datasource_cache::instance().create(params); - REQUIRE(bool(ds)); - - auto feature = all_features(ds)->next(); - REQUIRE(bool(feature)); - REQUIRE(feature->has_key("Name")); - CHECK(feature->get("Name") == ustring(name.c_str())); - } - } // END SECTION - - SECTION("geojson quoting") { - using mapnik::geometry::geometry_types; - - for (auto const &file : { - std::string("test/data/csv/geojson_double_quote_escape.csv") - , std::string("test/data/csv/geojson_single_quote.csv") - , std::string("test/data/csv/geojson_2x_double_quote_filebakery_style.csv") - }) { - auto ds = get_csv_ds(file); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"type"}); - require_field_types(fields, {mapnik::String}); - - auto featureset = all_features(ds); - require_geometry(featureset->next(), 1, geometry_types::Point); - require_geometry(featureset->next(), 1, geometry_types::LineString); - require_geometry(featureset->next(), 1, geometry_types::Polygon); - require_geometry(featureset->next(), 1, geometry_types::Polygon); - require_geometry(featureset->next(), 4, geometry_types::MultiPoint); - require_geometry(featureset->next(), 2, geometry_types::MultiLineString); - require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); - require_geometry(featureset->next(), 2, geometry_types::MultiPolygon); - } - } // END SECTION - - SECTION("blank undelimited rows are still parsed") { - using ustring = mapnik::value_unicode_string; - - // TODO: does this mean this CSV file should be in the warnings - // subdirectory, since it doesn't work in strict mode? - auto ds = get_csv_ds("test/data/csv/more_headers_than_column_values.csv", false); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "one", "two", "three"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String, mapnik::String, mapnik::String}); - - require_attributes(all_features(ds)->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"one", ustring("")}, attr{"two", ustring("")}, attr{"three", ustring("")} }); - } // END SECTION - - SECTION("fewer headers than rows throws") { - REQUIRE_THROWS(get_csv_ds("test/data/csv/more_column_values_than_headers.csv")); - } // END SECTION - - SECTION("feature ID only incremented for valid rows") { - auto ds = get_csv_ds("test/data/csv/warns/feature_id_counting.csv", false); - auto fs = all_features(ds); - - // first - auto feature = fs->next(); - REQUIRE(bool(feature)); - CHECK(feature->id() == 1); - - // second, should have skipped bogus one - feature = fs->next(); - REQUIRE(bool(feature)); - CHECK(feature->id() == 2); - - feature = fs->next(); - CHECK(!feature); - } // END SECTION - - SECTION("dynamically defining headers") { - using ustring = mapnik::value_unicode_string; - using row = std::pair; - - for (auto const &r : { - row{"test/data/csv/fails/needs_headers_two_lines.csv", 2} - , row{"test/data/csv/fails/needs_headers_one_line.csv", 1} - , row{"test/data/csv/fails/needs_headers_one_line_no_newline.csv", 1} - }) { - mapnik::parameters params; - params["type"] = std::string("csv"); - params["file"] = r.first; - params["headers"] = "x,y,name"; - auto ds = mapnik::datasource_cache::instance().create(params); - REQUIRE(bool(ds)); - - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "name"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::String}); - require_attributes(all_features(ds)->next(), { - attr{"x", 0}, attr{"y", 0}, attr{"name", ustring("data_name")} }); - REQUIRE(count_features(all_features(ds)) == r.second); - } - } // END SECTION - - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wlong-long" - SECTION("64bit int fields work") { - auto ds = get_csv_ds("test/data/csv/64bit_int.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "bigint"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::Integer}); - - auto fs = all_features(ds); - auto feature = fs->next(); - require_attributes(feature, { - attr{"x", 0}, attr{"y", 0}, attr{"bigint", 2147483648} }); - - feature = fs->next(); - require_attributes(feature, { - attr{"x", 0}, attr{"y", 0}, attr{"bigint", 9223372036854775807ll} }); - require_attributes(feature, { - attr{"x", 0}, attr{"y", 0}, attr{"bigint", 0x7FFFFFFFFFFFFFFFll} }); - } // END SECTION - #pragma GCC diagnostic pop - - SECTION("various number types") { - auto ds = get_csv_ds("test/data/csv/number_types.csv"); - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {"x", "y", "floats"}); - require_field_types(fields, {mapnik::Integer, mapnik::Integer, mapnik::Double}); - - auto fs = all_features(ds); - for (double d : { .0, +.0, 1e-06, -1e-06, 0.000001, 1.234e+16, 1.234e+16 }) { - auto feature = fs->next(); - REQUIRE(bool(feature)); - CHECK(feature->get("floats").get() == Approx(d)); - } - } // END SECTION - - SECTION("manually supplied extent") { - std::string csv_string("wkt,Name\n"); - mapnik::parameters params; - params["type"] = std::string("csv"); - params["inline"] = csv_string; - params["extent"] = "-180,-90,180,90"; - auto ds = mapnik::datasource_cache::instance().create(params); - REQUIRE(bool(ds)); - - auto box = ds->envelope(); - CHECK(box.minx() == -180); - CHECK(box.miny() == -90); - CHECK(box.maxx() == 180); - CHECK(box.maxy() == 90); - } // END SECTION - - SECTION("inline geojson") { - std::string csv_string = "geojson\n'{\"coordinates\":[-92.22568,38.59553],\"type\":\"Point\"}'"; - mapnik::parameters params; - params["type"] = std::string("csv"); - params["inline"] = csv_string; - auto ds = mapnik::datasource_cache::instance().create(params); - REQUIRE(bool(ds)); - - auto fields = ds->get_descriptor().get_descriptors(); - require_field_names(fields, {}); - - // TODO: this originally had the following comment: - // - re-enable after https://github.com/mapnik/mapnik/issues/2319 is fixed - // but that seems to have been merged and tested separately? - auto fs = all_features(ds); - auto feat = fs->next(); - CHECK(feature_count(feat->get_geometry()) == 1); - } // END SECTION - - mapnik::logger::instance().set_severity(severity); - } + auto box = ds->envelope(); + CHECK(box.minx() == -180); + CHECK(box.miny() == -90); + CHECK(box.maxx() == 180); + CHECK(box.maxy() == 90); + } // END SECTION + + SECTION("inline geojson") { + std::string csv_string = "geojson\n'{\"coordinates\":[-92.22568,38.59553],\"type\":\"Point\"}'"; + mapnik::parameters params; + params["type"] = std::string("csv"); + params["inline"] = csv_string; + auto ds = mapnik::datasource_cache::instance().create(params); + REQUIRE(bool(ds)); + + auto fields = ds->get_descriptor().get_descriptors(); + require_field_names(fields, {}); + + // TODO: this originally had the following comment: + // - re-enable after https://github.com/mapnik/mapnik/issues/2319 is fixed + // but that seems to have been merged and tested separately? + auto fs = all_features(ds); + auto feat = fs->next(); + CHECK(feature_count(feat->get_geometry()) == 1); + } // END SECTION + mapnik::logger::instance().set_severity(severity); + } } // END TEST CASE