diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index f98ff2d33..2e37d1d43 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -13,12 +13,13 @@ #include #include #include -#include // mapnik::boolean +#include // mapnik::boolean // stl #include #include #include +#include #include using mapnik::datasource; @@ -28,46 +29,46 @@ using namespace boost::spirit; DATASOURCE_PLUGIN(csv_datasource) csv_datasource::csv_datasource(parameters const& params, bool bind) - : datasource(params), - desc_(*params_.get("type"), *params_.get("encoding","utf-8")), - extent_(), - filename_(), - inline_string_(), - file_length_(0), - row_limit_(*params_.get("row_limit",0)), - features_(), - escape_(*params_.get("escape","")), - separator_(*params_.get("separator","")), - quote_(*params_.get("quote","")), - headers_(), - manual_headers_(boost::trim_copy(*params_.get("headers",""))), - strict_(*params_.get("strict",false)), - quiet_(*params_.get("quiet",false)), - filesize_max_(*params_.get("filesize_max",20.0)) // MB +: datasource(params), + desc_(*params_.get("type"), *params_.get("encoding", "utf-8")), + extent_(), + filename_(), + inline_string_(), + file_length_(0), + row_limit_(*params_.get("row_limit", 0)), + features_(), + escape_(*params_.get("escape", "")), + separator_(*params_.get("separator", "")), + quote_(*params_.get("quote", "")), + headers_(), + manual_headers_(boost::trim_copy(*params_.get("headers", ""))), + strict_(*params_.get("strict", false)), + quiet_(*params_.get("quiet", false)), + filesize_max_(*params_.get("filesize_max", 20.0)) // MB { /* TODO: - general: - - refactor parser into generic class - - tests of grid_renderer output - - ensure that the attribute desc_ matches the first feature added - alternate large file pipeline: - - stat file, detect > 15 MB - - build up csv line-by-line iterator - - creates opportunity to filter attributes by map query - speed: - - add properties for wkt/lon/lat at parse time - - remove boost::lexical_cast - - add ability to pass 'filter' keyword to drop attributes at layer init - - create quad tree on the fly for small/med size files - - memory map large files for reading - - smaller features (less memory overhead) - usability: - - enforce column names without leading digit - - better error messages (add filepath) if not reading from string - - move to spirit to tokenize and add character level error feedback: - http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/ + general: + - refactor parser into generic class + - tests of grid_renderer output + - ensure that the attribute desc_ matches the first feature added + alternate large file pipeline: + - stat file, detect > 15 MB + - build up csv line-by-line iterator + - creates opportunity to filter attributes by map query + speed: + - add properties for wkt/lon/lat at parse time + - remove boost::lexical_cast + - add ability to pass 'filter' keyword to drop attributes at layer init + - create quad tree on the fly for small/med size files + - memory map large files for reading + - smaller features (less memory overhead) + usability: + - enforce column names without leading digit + - better error messages (add filepath) if not reading from string + - move to spirit to tokenize and add character level error feedback: + http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/ */ - + boost::optional inline_string = params_.get("inline"); if (inline_string) { @@ -77,14 +78,14 @@ csv_datasource::csv_datasource(parameters const& params, bool bind) { boost::optional file = params_.get("file"); if (!file) throw mapnik::datasource_exception("CSV Plugin: missing parameter"); - + boost::optional base = params_.get("base"); if (base) filename_ = *base + "/" + *file; else filename_ = *file; } - + if (bind) { this->bind(); @@ -97,7 +98,7 @@ csv_datasource::~csv_datasource() { } void csv_datasource::bind() const { if (is_bound_) return; - + if (!inline_string_.empty()) { std::istringstream in(inline_string_); @@ -120,13 +121,13 @@ void csv_datasource::parse_csv(T& stream, std::string const& separator, std::string const& quote) const { - stream.seekg (0, std::ios::end); + stream.seekg(0, std::ios::end); file_length_ = stream.tellg(); - + if (filesize_max_ > 0) { double file_mb = static_cast(file_length_)/1048576; - + // throw if this is an unreasonably large file to read into memory if (file_mb > filesize_max_) { @@ -139,13 +140,13 @@ void csv_datasource::parse_csv(T& stream, } // set back to start - stream.seekg (0, std::ios::beg); + stream.seekg(0, std::ios::beg); // autodetect newlines char newline = '\n'; int newline_count = 0; int carriage_count = 0; - for(unsigned idx = 0; idx < file_length_; idx++) + for (unsigned idx = 0; idx < file_length_; idx++) { char c = static_cast(stream.get()); if (c == '\n') @@ -172,8 +173,8 @@ void csv_datasource::parse_csv(T& stream, } // set back to start - stream.seekg (0, std::ios::beg); - + stream.seekg(0, std::ios::beg); + // get first line std::string csv_line; std::getline(stream,csv_line,newline); @@ -214,22 +215,22 @@ void csv_datasource::parse_csv(T& stream, if (num_semicolons > num_commas) { sep = ";"; - #ifdef MAPNIK_DEBUG +#ifdef MAPNIK_DEBUG std::clog << "CSV Plugin: auto detected ';' separator\n"; - #endif +#endif } } } } // set back to start - stream.seekg (0, std::ios::beg); - + stream.seekg(0, std::ios::beg); + typedef boost::escaped_list_separator escape_type; std::string esc = boost::trim_copy(escape); if (esc.empty()) esc = "\\"; - + std::string quo = boost::trim_copy(quote); if (quo.empty()) quo = "\""; @@ -240,16 +241,16 @@ void csv_datasource::parse_csv(T& stream, boost::escaped_list_separator grammer; try { - //grammer = boost::escaped_list_separator('\\', ',', '\"'); + // grammer = boost::escaped_list_separator('\\', ',', '\"'); grammer = boost::escaped_list_separator(esc, sep, quo); } - catch (const std::exception & ex ) + catch(const std::exception & ex) { std::ostringstream s; s << "CSV Plugin: " << ex.what(); throw mapnik::datasource_exception(s.str()); } - + typedef boost::tokenizer< escape_type > Tokenizer; int line_number(1); @@ -303,7 +304,7 @@ void csv_datasource::parse_csv(T& stream, Tokenizer tok(csv_line, grammer); Tokenizer::iterator beg = tok.begin(); std::string val = boost::trim_copy(*beg); - + // skip blank lines if (val.empty()) { @@ -337,7 +338,7 @@ void csv_datasource::parse_csv(T& stream, } } else - { + { std::string lower_val = boost::algorithm::to_lower_copy(val); if (lower_val == "wkt" || (lower_val.find("geom") != std::string::npos)) @@ -367,7 +368,7 @@ void csv_datasource::parse_csv(T& stream, break; } } - catch (const std::exception & ex ) + catch(const std::exception & ex) { std::ostringstream s; s << "CSV Plugin: error parsing headers: " << ex.what(); @@ -397,7 +398,7 @@ void csv_datasource::parse_csv(T& stream, #endif break; } - + unsigned line_length = csv_line.length(); // skip blank lines @@ -418,7 +419,7 @@ void csv_datasource::parse_csv(T& stream, { Tokenizer tok(csv_line, grammer); Tokenizer::iterator beg = tok.begin(); - + // early return for strict mode if (strict_) { @@ -430,7 +431,7 @@ void csv_datasource::parse_csv(T& stream, throw mapnik::datasource_exception(s.str()); } } - + mapnik::feature_ptr feature(mapnik::feature_factory::create(feature_count)); double x(0); double y(0); @@ -439,7 +440,7 @@ void csv_datasource::parse_csv(T& stream, bool parsed_wkt = false; bool null_geom = false; std::vector collected; - + for (unsigned i = 0; i < num_headers; ++i) { std::string fld_name(headers_.at(i)); @@ -449,7 +450,7 @@ void csv_datasource::parse_csv(T& stream, { UnicodeString ustr = tr.transcode(value.c_str()); boost::put(*feature,fld_name,ustr); - //boost::put(*feature,fld_name,mapnik::value_null()); + // boost::put(*feature,fld_name,mapnik::value_null()); null_geom = true; if (feature_count == 1) { @@ -464,83 +465,83 @@ void csv_datasource::parse_csv(T& stream, } int value_length = value.length(); - + // parse wkt if (has_wkt_field) { - if (i == wkt_idx) - { - // skip empty geoms - if (value.empty()) - { - null_geom = true; - break; - } + if (i == wkt_idx) + { + // skip empty geoms + if (value.empty()) + { + null_geom = true; + break; + } - // optimize simple "POINT (x y)" - // using this shaved 2 seconds off csv that took 8 seconds total to parse - if (value.find("POINT") == 0) - { - using boost::phoenix::ref; - using boost::spirit::qi::_1; - std::string::const_iterator str_beg = value.begin(); - std::string::const_iterator str_end = value.end(); - bool r = qi::phrase_parse(str_beg,str_end, - ( - qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')' - ), - ascii::space); - - if (r && (str_beg == str_end)) - { - mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point); - pt->move_to(x,y); - feature->add_geometry(pt); - parsed_wkt = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected well known text geometry: could not parse row " - << line_number - << ",column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - if (!quiet_) std::clog << s.str() << "\n"; - } - } - } - else - { - if (mapnik::from_wkt(value, feature->paths())) - { - parsed_wkt = true; - } - else - { - std::ostringstream s; - s << "CSV Plugin: expected well known text geometry: could not parse row " - << line_number - << ",column " - << i << " - found: '" - << value << "'"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - if (!quiet_) std::clog << s.str() << "\n"; - } - } - } - } + // optimize simple "POINT (x y)" + // using this shaved 2 seconds off csv that took 8 seconds total to parse + if (value.find("POINT") == 0) + { + using boost::phoenix::ref; + using boost::spirit::qi::_1; + std::string::const_iterator str_beg = value.begin(); + std::string::const_iterator str_end = value.end(); + bool r = qi::phrase_parse(str_beg,str_end, + ( + qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')' + ), + ascii::space); + + if (r && (str_beg == str_end)) + { + mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point); + pt->move_to(x,y); + feature->add_geometry(pt); + parsed_wkt = true; + } + else + { + std::ostringstream s; + s << "CSV Plugin: expected well known text geometry: could not parse row " + << line_number + << ",column " + << i << " - found: '" + << value << "'"; + if (strict_) + { + throw mapnik::datasource_exception(s.str()); + } + else + { + if (!quiet_) std::clog << s.str() << "\n"; + } + } + } + else + { + if (mapnik::from_wkt(value, feature->paths())) + { + parsed_wkt = true; + } + else + { + std::ostringstream s; + s << "CSV Plugin: expected well known text geometry: could not parse row " + << line_number + << ",column " + << i << " - found: '" + << value << "'"; + if (strict_) + { + throw mapnik::datasource_exception(s.str()); + } + else + { + if (!quiet_) std::clog << s.str() << "\n"; + } + } + } + } } else { @@ -554,12 +555,12 @@ void csv_datasource::parse_csv(T& stream, break; } - try + try { x = boost::lexical_cast(value); parsed_x = true; } - catch (boost::bad_lexical_cast & ex) + catch(boost::bad_lexical_cast & ex) { std::ostringstream s; s << "CSV Plugin: expected a float value for longitude: could not parse row " @@ -569,7 +570,7 @@ void csv_datasource::parse_csv(T& stream, << value << "'"; if (strict_) { - throw mapnik::datasource_exception(s.str()); + throw mapnik::datasource_exception(s.str()); } else { @@ -587,12 +588,12 @@ void csv_datasource::parse_csv(T& stream, break; } - try + try { y = boost::lexical_cast(value); parsed_y = true; } - catch (boost::bad_lexical_cast & ex) + catch(boost::bad_lexical_cast & ex) { std::ostringstream s; s << "CSV Plugin: expected a float value for latitude: could not parse row " @@ -602,7 +603,7 @@ void csv_datasource::parse_csv(T& stream, << value << "'"; if (strict_) { - throw mapnik::datasource_exception(s.str()); + throw mapnik::datasource_exception(s.str()); } else { @@ -611,7 +612,7 @@ void csv_datasource::parse_csv(T& stream, } } } - + // add all values as attributes // here we detect numbers and treat everything else as pure strings // this is intentional since boolean and null types are not common in csv editors @@ -633,7 +634,7 @@ void csv_datasource::parse_csv(T& stream, { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); } - + } else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-') { @@ -683,7 +684,7 @@ void csv_datasource::parse_csv(T& stream, } } } - + if (null_geom) { ++line_number; @@ -700,7 +701,7 @@ void csv_datasource::parse_csv(T& stream, continue; } } - + if (has_wkt_field) { if (parsed_wkt) @@ -721,7 +722,7 @@ void csv_datasource::parse_csv(T& stream, { std::ostringstream s; s << "CSV Plugin: could not read WKT geometry " - << "for line " << line_number << " - found " << headers_.size() + << "for line " << line_number << " - found " << headers_.size() << " with values like: " << csv_line << "\n"; if (strict_) { @@ -743,12 +744,12 @@ void csv_datasource::parse_csv(T& stream, feature->add_geometry(pt); features_.push_back(feature); ++feature_count; - + if (!extent_initialized) { extent_initialized = true; extent_ = feature->envelope(); - + } else { @@ -762,7 +763,7 @@ void csv_datasource::parse_csv(T& stream, { s << "CSV Plugin: does your csv have valid headers?\n" << "Could not detect or parse any rows named 'x' or 'longitude' " - << "for line " << line_number << " but found " << headers_.size() + << "for line " << line_number << " but found " << headers_.size() << " with values like: " << csv_line << "\n" << "for: " << boost::algorithm::join(collected, ",") << "\n"; } @@ -770,7 +771,7 @@ void csv_datasource::parse_csv(T& stream, { s << "CSV Plugin: does your csv have valid headers?\n" << "Could not detect or parse any rows named 'y' or 'latitude' " - << "for line " << line_number << " but found " << headers_.size() + << "for line " << line_number << " but found " << headers_.size() << " with values like: " << csv_line << "\n" << "for: " << boost::algorithm::join(collected, ",") << "\n"; } @@ -787,7 +788,7 @@ void csv_datasource::parse_csv(T& stream, } ++line_number; } - catch (const mapnik::datasource_exception & ex ) + catch(const mapnik::datasource_exception & ex ) { if (strict_) { @@ -798,7 +799,7 @@ void csv_datasource::parse_csv(T& stream, if (!quiet_) std::clog << ex.what() << "\n"; } } - catch (const std::exception & ex ) + catch(const std::exception & ex) { std::ostringstream s; s << "CSV Plugin: unexpected error parsing line: " << line_number @@ -840,23 +841,23 @@ mapnik::box2d csv_datasource::envelope() const mapnik::layer_descriptor csv_datasource::get_descriptor() const { if (!is_bound_) bind(); - + return desc_; } mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const { if (!is_bound_) bind(); - - // TODO - should we check q.property_names() and throw if not found in headers_? - //const std::set& attribute_names = q.property_names(); - + + // TODO - should we check q.property_names() and throw if not found in headers_? + // const std::set& attribute_names = q.property_names(); + return boost::make_shared(q.get_bbox(),features_); } mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt) const { if (!is_bound_) bind(); - + throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet"); } diff --git a/plugins/input/csv/csv_datasource.hpp b/plugins/input/csv/csv_datasource.hpp index d2e51bf8c..e8fff4b5b 100644 --- a/plugins/input/csv/csv_datasource.hpp +++ b/plugins/input/csv/csv_datasource.hpp @@ -7,39 +7,39 @@ // stl #include -class csv_datasource : public mapnik::datasource +class csv_datasource : public mapnik::datasource { - public: - csv_datasource(mapnik::parameters const& params, bool bind=true); - virtual ~csv_datasource (); - int type() const; - static std::string name(); - mapnik::featureset_ptr features(mapnik::query const& q) const; - mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const; - mapnik::box2d envelope() const; - mapnik::layer_descriptor get_descriptor() const; - void bind() const; - template - void parse_csv(T& stream, - std::string const& escape, - std::string const& separator, - std::string const& quote) const; - private: - mutable mapnik::layer_descriptor desc_; - mutable mapnik::box2d extent_; - mutable std::string filename_; - mutable std::string inline_string_; - mutable unsigned file_length_; - mutable int row_limit_; - mutable std::vector features_; - mutable std::string escape_; - mutable std::string separator_; - mutable std::string quote_; - mutable std::vector headers_; - mutable std::string manual_headers_; - mutable bool strict_; - mutable bool quiet_; - mutable double filesize_max_; +public: + csv_datasource(mapnik::parameters const& params, bool bind=true); + virtual ~csv_datasource (); + int type() const; + static std::string name(); + mapnik::featureset_ptr features(mapnik::query const& q) const; + mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const; + mapnik::box2d envelope() const; + mapnik::layer_descriptor get_descriptor() const; + void bind() const; + template + void parse_csv(T& stream, + std::string const& escape, + std::string const& separator, + std::string const& quote) const; +private: + mutable mapnik::layer_descriptor desc_; + mutable mapnik::box2d extent_; + mutable std::string filename_; + mutable std::string inline_string_; + mutable unsigned file_length_; + mutable int row_limit_; + mutable std::vector features_; + mutable std::string escape_; + mutable std::string separator_; + mutable std::string quote_; + mutable std::vector headers_; + mutable std::string manual_headers_; + mutable bool strict_; + mutable bool quiet_; + mutable double filesize_max_; };