csv: code formatting

This commit is contained in:
Dane Springmeyer 2011-11-13 19:33:57 -08:00
parent b3b938a804
commit 5205960326
2 changed files with 193 additions and 192 deletions

View file

@ -13,12 +13,13 @@
#include <mapnik/geometry.hpp>
#include <mapnik/memory_featureset.hpp>
#include <mapnik/wkt/wkt_factory.hpp>
#include <mapnik/ptree_helpers.hpp> // mapnik::boolean
#include <mapnik/ptree_helpers.hpp> // mapnik::boolean
// stl
#include <sstream>
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
using mapnik::datasource;
@ -28,46 +29,46 @@ using namespace boost::spirit;
DATASOURCE_PLUGIN(csv_datasource)
csv_datasource::csv_datasource(parameters const& params, bool bind)
: datasource(params),
desc_(*params_.get<std::string>("type"), *params_.get<std::string>("encoding","utf-8")),
extent_(),
filename_(),
inline_string_(),
file_length_(0),
row_limit_(*params_.get<int>("row_limit",0)),
features_(),
escape_(*params_.get<std::string>("escape","")),
separator_(*params_.get<std::string>("separator","")),
quote_(*params_.get<std::string>("quote","")),
headers_(),
manual_headers_(boost::trim_copy(*params_.get<std::string>("headers",""))),
strict_(*params_.get<mapnik::boolean>("strict",false)),
quiet_(*params_.get<mapnik::boolean>("quiet",false)),
filesize_max_(*params_.get<float>("filesize_max",20.0)) // MB
: datasource(params),
desc_(*params_.get<std::string>("type"), *params_.get<std::string>("encoding", "utf-8")),
extent_(),
filename_(),
inline_string_(),
file_length_(0),
row_limit_(*params_.get<int>("row_limit", 0)),
features_(),
escape_(*params_.get<std::string>("escape", "")),
separator_(*params_.get<std::string>("separator", "")),
quote_(*params_.get<std::string>("quote", "")),
headers_(),
manual_headers_(boost::trim_copy(*params_.get<std::string>("headers", ""))),
strict_(*params_.get<mapnik::boolean>("strict", false)),
quiet_(*params_.get<mapnik::boolean>("quiet", false)),
filesize_max_(*params_.get<float>("filesize_max", 20.0)) // MB
{
/* TODO:
general:
- refactor parser into generic class
- tests of grid_renderer output
- ensure that the attribute desc_ matches the first feature added
alternate large file pipeline:
- stat file, detect > 15 MB
- build up csv line-by-line iterator
- creates opportunity to filter attributes by map query
speed:
- add properties for wkt/lon/lat at parse time
- remove boost::lexical_cast
- add ability to pass 'filter' keyword to drop attributes at layer init
- create quad tree on the fly for small/med size files
- memory map large files for reading
- smaller features (less memory overhead)
usability:
- enforce column names without leading digit
- better error messages (add filepath) if not reading from string
- move to spirit to tokenize and add character level error feedback:
http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/
general:
- refactor parser into generic class
- tests of grid_renderer output
- ensure that the attribute desc_ matches the first feature added
alternate large file pipeline:
- stat file, detect > 15 MB
- build up csv line-by-line iterator
- creates opportunity to filter attributes by map query
speed:
- add properties for wkt/lon/lat at parse time
- remove boost::lexical_cast
- add ability to pass 'filter' keyword to drop attributes at layer init
- create quad tree on the fly for small/med size files
- memory map large files for reading
- smaller features (less memory overhead)
usability:
- enforce column names without leading digit
- better error messages (add filepath) if not reading from string
- move to spirit to tokenize and add character level error feedback:
http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/
*/
boost::optional<std::string> inline_string = params_.get<std::string>("inline");
if (inline_string)
{
@ -77,14 +78,14 @@ csv_datasource::csv_datasource(parameters const& params, bool bind)
{
boost::optional<std::string> file = params_.get<std::string>("file");
if (!file) throw mapnik::datasource_exception("CSV Plugin: missing <file> parameter");
boost::optional<std::string> base = params_.get<std::string>("base");
if (base)
filename_ = *base + "/" + *file;
else
filename_ = *file;
}
if (bind)
{
this->bind();
@ -97,7 +98,7 @@ csv_datasource::~csv_datasource() { }
void csv_datasource::bind() const
{
if (is_bound_) return;
if (!inline_string_.empty())
{
std::istringstream in(inline_string_);
@ -120,13 +121,13 @@ void csv_datasource::parse_csv(T& stream,
std::string const& separator,
std::string const& quote) const
{
stream.seekg (0, std::ios::end);
stream.seekg(0, std::ios::end);
file_length_ = stream.tellg();
if (filesize_max_ > 0)
{
double file_mb = static_cast<double>(file_length_)/1048576;
// throw if this is an unreasonably large file to read into memory
if (file_mb > filesize_max_)
{
@ -139,13 +140,13 @@ void csv_datasource::parse_csv(T& stream,
}
// set back to start
stream.seekg (0, std::ios::beg);
stream.seekg(0, std::ios::beg);
// autodetect newlines
char newline = '\n';
int newline_count = 0;
int carriage_count = 0;
for(unsigned idx = 0; idx < file_length_; idx++)
for (unsigned idx = 0; idx < file_length_; idx++)
{
char c = static_cast<char>(stream.get());
if (c == '\n')
@ -172,8 +173,8 @@ void csv_datasource::parse_csv(T& stream,
}
// set back to start
stream.seekg (0, std::ios::beg);
stream.seekg(0, std::ios::beg);
// get first line
std::string csv_line;
std::getline(stream,csv_line,newline);
@ -214,22 +215,22 @@ void csv_datasource::parse_csv(T& stream,
if (num_semicolons > num_commas)
{
sep = ";";
#ifdef MAPNIK_DEBUG
#ifdef MAPNIK_DEBUG
std::clog << "CSV Plugin: auto detected ';' separator\n";
#endif
#endif
}
}
}
}
// set back to start
stream.seekg (0, std::ios::beg);
stream.seekg(0, std::ios::beg);
typedef boost::escaped_list_separator<char> escape_type;
std::string esc = boost::trim_copy(escape);
if (esc.empty()) esc = "\\";
std::string quo = boost::trim_copy(quote);
if (quo.empty()) quo = "\"";
@ -240,16 +241,16 @@ void csv_datasource::parse_csv(T& stream,
boost::escaped_list_separator<char> grammer;
try
{
//grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
// grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
grammer = boost::escaped_list_separator<char>(esc, sep, quo);
}
catch (const std::exception & ex )
catch(const std::exception & ex)
{
std::ostringstream s;
s << "CSV Plugin: " << ex.what();
throw mapnik::datasource_exception(s.str());
}
typedef boost::tokenizer< escape_type > Tokenizer;
int line_number(1);
@ -303,7 +304,7 @@ void csv_datasource::parse_csv(T& stream,
Tokenizer tok(csv_line, grammer);
Tokenizer::iterator beg = tok.begin();
std::string val = boost::trim_copy(*beg);
// skip blank lines
if (val.empty())
{
@ -337,7 +338,7 @@ void csv_datasource::parse_csv(T& stream,
}
}
else
{
{
std::string lower_val = boost::algorithm::to_lower_copy(val);
if (lower_val == "wkt"
|| (lower_val.find("geom") != std::string::npos))
@ -367,7 +368,7 @@ void csv_datasource::parse_csv(T& stream,
break;
}
}
catch (const std::exception & ex )
catch(const std::exception & ex)
{
std::ostringstream s;
s << "CSV Plugin: error parsing headers: " << ex.what();
@ -397,7 +398,7 @@ void csv_datasource::parse_csv(T& stream,
#endif
break;
}
unsigned line_length = csv_line.length();
// skip blank lines
@ -418,7 +419,7 @@ void csv_datasource::parse_csv(T& stream,
{
Tokenizer tok(csv_line, grammer);
Tokenizer::iterator beg = tok.begin();
// early return for strict mode
if (strict_)
{
@ -430,7 +431,7 @@ void csv_datasource::parse_csv(T& stream,
throw mapnik::datasource_exception(s.str());
}
}
mapnik::feature_ptr feature(mapnik::feature_factory::create(feature_count));
double x(0);
double y(0);
@ -439,7 +440,7 @@ void csv_datasource::parse_csv(T& stream,
bool parsed_wkt = false;
bool null_geom = false;
std::vector<std::string> collected;
for (unsigned i = 0; i < num_headers; ++i)
{
std::string fld_name(headers_.at(i));
@ -449,7 +450,7 @@ void csv_datasource::parse_csv(T& stream,
{
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
//boost::put(*feature,fld_name,mapnik::value_null());
// boost::put(*feature,fld_name,mapnik::value_null());
null_geom = true;
if (feature_count == 1)
{
@ -464,83 +465,83 @@ void csv_datasource::parse_csv(T& stream,
}
int value_length = value.length();
// parse wkt
if (has_wkt_field)
{
if (i == wkt_idx)
{
// skip empty geoms
if (value.empty())
{
null_geom = true;
break;
}
if (i == wkt_idx)
{
// skip empty geoms
if (value.empty())
{
null_geom = true;
break;
}
// optimize simple "POINT (x y)"
// using this shaved 2 seconds off csv that took 8 seconds total to parse
if (value.find("POINT") == 0)
{
using boost::phoenix::ref;
using boost::spirit::qi::_1;
std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end();
bool r = qi::phrase_parse(str_beg,str_end,
(
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')'
),
ascii::space);
if (r && (str_beg == str_end))
{
mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point);
pt->move_to(x,y);
feature->add_geometry(pt);
parsed_wkt = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected well known text geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
if (!quiet_) std::clog << s.str() << "\n";
}
}
}
else
{
if (mapnik::from_wkt(value, feature->paths()))
{
parsed_wkt = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected well known text geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
if (!quiet_) std::clog << s.str() << "\n";
}
}
}
}
// optimize simple "POINT (x y)"
// using this shaved 2 seconds off csv that took 8 seconds total to parse
if (value.find("POINT") == 0)
{
using boost::phoenix::ref;
using boost::spirit::qi::_1;
std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end();
bool r = qi::phrase_parse(str_beg,str_end,
(
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')'
),
ascii::space);
if (r && (str_beg == str_end))
{
mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point);
pt->move_to(x,y);
feature->add_geometry(pt);
parsed_wkt = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected well known text geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
if (!quiet_) std::clog << s.str() << "\n";
}
}
}
else
{
if (mapnik::from_wkt(value, feature->paths()))
{
parsed_wkt = true;
}
else
{
std::ostringstream s;
s << "CSV Plugin: expected well known text geometry: could not parse row "
<< line_number
<< ",column "
<< i << " - found: '"
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
}
else
{
if (!quiet_) std::clog << s.str() << "\n";
}
}
}
}
}
else
{
@ -554,12 +555,12 @@ void csv_datasource::parse_csv(T& stream,
break;
}
try
try
{
x = boost::lexical_cast<double>(value);
parsed_x = true;
}
catch (boost::bad_lexical_cast & ex)
catch(boost::bad_lexical_cast & ex)
{
std::ostringstream s;
s << "CSV Plugin: expected a float value for longitude: could not parse row "
@ -569,7 +570,7 @@ void csv_datasource::parse_csv(T& stream,
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
throw mapnik::datasource_exception(s.str());
}
else
{
@ -587,12 +588,12 @@ void csv_datasource::parse_csv(T& stream,
break;
}
try
try
{
y = boost::lexical_cast<double>(value);
parsed_y = true;
}
catch (boost::bad_lexical_cast & ex)
catch(boost::bad_lexical_cast & ex)
{
std::ostringstream s;
s << "CSV Plugin: expected a float value for latitude: could not parse row "
@ -602,7 +603,7 @@ void csv_datasource::parse_csv(T& stream,
<< value << "'";
if (strict_)
{
throw mapnik::datasource_exception(s.str());
throw mapnik::datasource_exception(s.str());
}
else
{
@ -611,7 +612,7 @@ void csv_datasource::parse_csv(T& stream,
}
}
}
// add all values as attributes
// here we detect numbers and treat everything else as pure strings
// this is intentional since boolean and null types are not common in csv editors
@ -633,7 +634,7 @@ void csv_datasource::parse_csv(T& stream,
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
}
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-')
{
@ -683,7 +684,7 @@ void csv_datasource::parse_csv(T& stream,
}
}
}
if (null_geom)
{
++line_number;
@ -700,7 +701,7 @@ void csv_datasource::parse_csv(T& stream,
continue;
}
}
if (has_wkt_field)
{
if (parsed_wkt)
@ -721,7 +722,7 @@ void csv_datasource::parse_csv(T& stream,
{
std::ostringstream s;
s << "CSV Plugin: could not read WKT geometry "
<< "for line " << line_number << " - found " << headers_.size()
<< "for line " << line_number << " - found " << headers_.size()
<< " with values like: " << csv_line << "\n";
if (strict_)
{
@ -743,12 +744,12 @@ void csv_datasource::parse_csv(T& stream,
feature->add_geometry(pt);
features_.push_back(feature);
++feature_count;
if (!extent_initialized)
{
extent_initialized = true;
extent_ = feature->envelope();
}
else
{
@ -762,7 +763,7 @@ void csv_datasource::parse_csv(T& stream,
{
s << "CSV Plugin: does your csv have valid headers?\n"
<< "Could not detect or parse any rows named 'x' or 'longitude' "
<< "for line " << line_number << " but found " << headers_.size()
<< "for line " << line_number << " but found " << headers_.size()
<< " with values like: " << csv_line << "\n"
<< "for: " << boost::algorithm::join(collected, ",") << "\n";
}
@ -770,7 +771,7 @@ void csv_datasource::parse_csv(T& stream,
{
s << "CSV Plugin: does your csv have valid headers?\n"
<< "Could not detect or parse any rows named 'y' or 'latitude' "
<< "for line " << line_number << " but found " << headers_.size()
<< "for line " << line_number << " but found " << headers_.size()
<< " with values like: " << csv_line << "\n"
<< "for: " << boost::algorithm::join(collected, ",") << "\n";
}
@ -787,7 +788,7 @@ void csv_datasource::parse_csv(T& stream,
}
++line_number;
}
catch (const mapnik::datasource_exception & ex )
catch(const mapnik::datasource_exception & ex )
{
if (strict_)
{
@ -798,7 +799,7 @@ void csv_datasource::parse_csv(T& stream,
if (!quiet_) std::clog << ex.what() << "\n";
}
}
catch (const std::exception & ex )
catch(const std::exception & ex)
{
std::ostringstream s;
s << "CSV Plugin: unexpected error parsing line: " << line_number
@ -840,23 +841,23 @@ mapnik::box2d<double> csv_datasource::envelope() const
mapnik::layer_descriptor csv_datasource::get_descriptor() const
{
if (!is_bound_) bind();
return desc_;
}
mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
{
if (!is_bound_) bind();
// TODO - should we check q.property_names() and throw if not found in headers_?
//const std::set<std::string>& attribute_names = q.property_names();
// TODO - should we check q.property_names() and throw if not found in headers_?
// const std::set<std::string>& attribute_names = q.property_names();
return boost::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
}
mapnik::featureset_ptr csv_datasource::features_at_point(mapnik::coord2d const& pt) const
{
if (!is_bound_) bind();
throw mapnik::datasource_exception("CSV Plugin: features_at_point is not supported yet");
}

View file

@ -7,39 +7,39 @@
// stl
#include <vector>
class csv_datasource : public mapnik::datasource
class csv_datasource : public mapnik::datasource
{
public:
csv_datasource(mapnik::parameters const& params, bool bind=true);
virtual ~csv_datasource ();
int type() const;
static std::string name();
mapnik::featureset_ptr features(mapnik::query const& q) const;
mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const;
mapnik::box2d<double> envelope() const;
mapnik::layer_descriptor get_descriptor() const;
void bind() const;
template <typename T>
void parse_csv(T& stream,
std::string const& escape,
std::string const& separator,
std::string const& quote) const;
private:
mutable mapnik::layer_descriptor desc_;
mutable mapnik::box2d<double> extent_;
mutable std::string filename_;
mutable std::string inline_string_;
mutable unsigned file_length_;
mutable int row_limit_;
mutable std::vector<mapnik::feature_ptr> features_;
mutable std::string escape_;
mutable std::string separator_;
mutable std::string quote_;
mutable std::vector<std::string> headers_;
mutable std::string manual_headers_;
mutable bool strict_;
mutable bool quiet_;
mutable double filesize_max_;
public:
csv_datasource(mapnik::parameters const& params, bool bind=true);
virtual ~csv_datasource ();
int type() const;
static std::string name();
mapnik::featureset_ptr features(mapnik::query const& q) const;
mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const;
mapnik::box2d<double> envelope() const;
mapnik::layer_descriptor get_descriptor() const;
void bind() const;
template <typename T>
void parse_csv(T& stream,
std::string const& escape,
std::string const& separator,
std::string const& quote) const;
private:
mutable mapnik::layer_descriptor desc_;
mutable mapnik::box2d<double> extent_;
mutable std::string filename_;
mutable std::string inline_string_;
mutable unsigned file_length_;
mutable int row_limit_;
mutable std::vector<mapnik::feature_ptr> features_;
mutable std::string escape_;
mutable std::string separator_;
mutable std::string quote_;
mutable std::vector<std::string> headers_;
mutable std::string manual_headers_;
mutable bool strict_;
mutable bool quiet_;
mutable double filesize_max_;
};