#include "csv_datasource.hpp" // boost #include #include #include #include #include // mapnik #include #include #include #include #include // stl #include #include // fstream #include #include // ostream_operator // clib #include #include //#include using mapnik::datasource; using mapnik::parameters; DATASOURCE_PLUGIN(csv_datasource) csv_datasource::csv_datasource(parameters const& params, bool bind) : datasource(params), desc_(*params_.get("type"), *params_.get("encoding","utf-8")), extent_(), filename_(), inline_string_(), features_(), separator_(*params_.get("separator",",")), escape_(*params_.get("escape","\\")), quote_(*params_.get("quote","\"")) { /* TODO: build up features lazily, and filter cols using query support for newlines other than \n https://docs.google.com/a/dbsgeo.com/spreadsheet/pub?key=0AqV4OJpywingdFBCV1o3SXp3OU94U3VJWTRoLWRPbGc&output=csv spatial index */ boost::optional inline_string = params_.get("inline"); if (inline_string) { inline_string_ = *inline_string; } else { boost::optional file = params_.get("file"); if (!file) throw mapnik::datasource_exception("CSV Plugin: missing parameter"); boost::optional base = params_.get("base"); if (base) filename_ = *base + "/" + *file; else filename_ = *file; } if (bind) { this->bind(); } } csv_datasource::~csv_datasource() { } void csv_datasource::bind() const { if (is_bound_) return; if (!inline_string_.empty()) { std::istringstream in(inline_string_); parse_csv(in); } else { std::ifstream in(filename_.c_str()); if (!in.is_open()) throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'"); parse_csv(in); in.close(); } is_bound_ = true; } template void csv_datasource::parse_csv(T& stream) const { typedef boost::escaped_list_separator separator_type; typedef boost::tokenizer< separator_type > Tokenizer; std::string csv_line; boost::escaped_list_separator grammer(escape_, separator_, quote_); mapnik::transcoder tr(desc_.get_encoding()); int line_no(1); int feature_count(0); while (std::getline(stream,csv_line)) { Tokenizer tok(csv_line, grammer); Tokenizer::iterator beg = tok.begin(); std::string val = boost::trim_copy(*beg); // skip lines with leading blanks (assume whole line is empty) if (val.empty()) continue; // handle headers if (line_no == 1) { unsigned i = 0; for (; beg != tok.end(); ++beg) { std::string value = boost::trim_copy(*beg); // todo - ensure col names do not start with digit try { headers_.push_back(boost::lexical_cast(value)); } catch (boost::bad_lexical_cast & ex) { std::ostringstream s; s << "CSV Plugin: expected string type column header - could not parse column " << i << " - found: '" << value << "'"; throw mapnik::datasource_exception(s.str()); } } ++i; } else { double x; double y; bool parsed_x = false; bool parsed_y = false; bool has_wkt_field = false; bool parsed_wkt = false; bool extent_initialized = false; // look for wkt field if (std::find(headers_.begin(), headers_.end(), "wkt") != headers_.end()) { has_wkt_field = true; } mapnik::feature_ptr feature(mapnik::feature_factory::create(feature_count)); ++feature_count; unsigned i = 0; for (;beg != tok.end(); ++beg) { std::string value = boost::trim_copy(*beg); // avoid range error if trailing separator on last col // TODO - should we throw instead? if (i >= headers_.size()) break; std::string fld_name(headers_.at(i)); // parse wkt if (has_wkt_field && fld_name == "wkt" && !parsed_wkt) { // skip empty geoms if (value.empty()) break; bool result = mapnik::from_wkt(value, feature->paths()); if (!result) { std::ostringstream s; s << "CSV Plugin: expected well known text geometry: could not parse row " << line_no << ",column " << i << " - found: '" << value << "'"; throw mapnik::datasource_exception(s.str()); } parsed_wkt = true; } // longitude else if ( !parsed_x && (fld_name == "x" || fld_name == "lon" || fld_name == "longitude") ) { try { x = boost::lexical_cast(value); parsed_x = true; } catch (boost::bad_lexical_cast & ex) { std::ostringstream s; s << "CSV Plugin: expected longitude: could not parse row " << line_no << ", column " << i << " - found: '" << value << "'"; throw mapnik::datasource_exception(s.str()); } } // latitude else if ( !parsed_y && (fld_name == "y" || fld_name == "lat" || fld_name == "latitude") ) { try { y = boost::lexical_cast(value); parsed_y = true; } catch (boost::bad_lexical_cast & ex) { std::ostringstream s; s << "CSV Plugin: expected latitude: could not parse row " << line_no << ", column " << i << " - found: '" << value << "'"; throw mapnik::datasource_exception(s.str()); } } // add all values as attributes try { if (value.find(".") != std::string::npos) { double val = boost::lexical_cast(value); boost::put(*feature,fld_name,val); if (line_no == 2) desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Double)); } else { int val = boost::lexical_cast(value); boost::put(*feature,fld_name,val); if (line_no == 2) desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Integer)); } } catch (boost::bad_lexical_cast & ex) { std::string val = boost::lexical_cast(value); if (!val.empty()) { if (val == "true") { boost::put(*feature,fld_name,true); if (line_no == 2) desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); } else if(val == "false") { boost::put(*feature,fld_name,false); if (line_no == 2) desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); } else { UnicodeString ustr = tr.transcode(val.c_str()); boost::put(*feature,fld_name,ustr); if (line_no == 2) desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); } } else { boost::put(*feature,headers_.at(i),mapnik::value_null()); } } ++i; } if (has_wkt_field) { if (parsed_wkt) { if (line_no >= 2 && !extent_initialized) { extent_initialized = true; extent_ = feature->envelope(); } else { extent_.expand_to_include(feature->envelope()); } features_.push_back(feature); } } else { if (!parsed_x) { std::ostringstream s; s << "CSV Plugin: could not detect or parse any rows named 'x', 'lon' or 'longitude' " << "does your csv have headers?"; throw mapnik::datasource_exception(s.str()); } else if (!parsed_y) { std::ostringstream s; s << "CSV Plugin: could not detect or parse rows named 'y', 'lat' or 'latitude' " << "does your csv have headers?"; throw mapnik::datasource_exception(s.str()); } else { if (line_no >= 2 && !extent_initialized) { extent_initialized = true; extent_.init(x, y, x, y); } else { extent_.expand_to_include(x,y); } mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point); pt->move_to(x,y); feature->add_geometry(pt); features_.push_back(feature); } } } ++line_no; } } std::string csv_datasource::name() { return "csv"; } int csv_datasource::type() const { return datasource::Vector; } mapnik::box2d csv_datasource::envelope() const { if (!is_bound_) bind(); return extent_; } mapnik::layer_descriptor csv_datasource::get_descriptor() const { if (!is_bound_) bind(); return desc_; } mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const { if (!is_bound_) bind(); // TODO - should we check q.property_names() and throw if not found in headers_? //const std::set& attribute_names = q.property_names(); return boost::make_shared(q.get_bbox(),features_); } mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt) const { if (!is_bound_) bind(); throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet"); }