csv: code formatting
This commit is contained in:
parent
b3b938a804
commit
5205960326
2 changed files with 193 additions and 192 deletions
|
@ -13,12 +13,13 @@
|
||||||
#include <mapnik/geometry.hpp>
|
#include <mapnik/geometry.hpp>
|
||||||
#include <mapnik/memory_featureset.hpp>
|
#include <mapnik/memory_featureset.hpp>
|
||||||
#include <mapnik/wkt/wkt_factory.hpp>
|
#include <mapnik/wkt/wkt_factory.hpp>
|
||||||
#include <mapnik/ptree_helpers.hpp> // mapnik::boolean
|
#include <mapnik/ptree_helpers.hpp> // mapnik::boolean
|
||||||
|
|
||||||
// stl
|
// stl
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
using mapnik::datasource;
|
using mapnik::datasource;
|
||||||
|
@ -28,44 +29,44 @@ using namespace boost::spirit;
|
||||||
DATASOURCE_PLUGIN(csv_datasource)
|
DATASOURCE_PLUGIN(csv_datasource)
|
||||||
|
|
||||||
csv_datasource::csv_datasource(parameters const& params, bool bind)
|
csv_datasource::csv_datasource(parameters const& params, bool bind)
|
||||||
: datasource(params),
|
: datasource(params),
|
||||||
desc_(*params_.get<std::string>("type"), *params_.get<std::string>("encoding","utf-8")),
|
desc_(*params_.get<std::string>("type"), *params_.get<std::string>("encoding", "utf-8")),
|
||||||
extent_(),
|
extent_(),
|
||||||
filename_(),
|
filename_(),
|
||||||
inline_string_(),
|
inline_string_(),
|
||||||
file_length_(0),
|
file_length_(0),
|
||||||
row_limit_(*params_.get<int>("row_limit",0)),
|
row_limit_(*params_.get<int>("row_limit", 0)),
|
||||||
features_(),
|
features_(),
|
||||||
escape_(*params_.get<std::string>("escape","")),
|
escape_(*params_.get<std::string>("escape", "")),
|
||||||
separator_(*params_.get<std::string>("separator","")),
|
separator_(*params_.get<std::string>("separator", "")),
|
||||||
quote_(*params_.get<std::string>("quote","")),
|
quote_(*params_.get<std::string>("quote", "")),
|
||||||
headers_(),
|
headers_(),
|
||||||
manual_headers_(boost::trim_copy(*params_.get<std::string>("headers",""))),
|
manual_headers_(boost::trim_copy(*params_.get<std::string>("headers", ""))),
|
||||||
strict_(*params_.get<mapnik::boolean>("strict",false)),
|
strict_(*params_.get<mapnik::boolean>("strict", false)),
|
||||||
quiet_(*params_.get<mapnik::boolean>("quiet",false)),
|
quiet_(*params_.get<mapnik::boolean>("quiet", false)),
|
||||||
filesize_max_(*params_.get<float>("filesize_max",20.0)) // MB
|
filesize_max_(*params_.get<float>("filesize_max", 20.0)) // MB
|
||||||
{
|
{
|
||||||
/* TODO:
|
/* TODO:
|
||||||
general:
|
general:
|
||||||
- refactor parser into generic class
|
- refactor parser into generic class
|
||||||
- tests of grid_renderer output
|
- tests of grid_renderer output
|
||||||
- ensure that the attribute desc_ matches the first feature added
|
- ensure that the attribute desc_ matches the first feature added
|
||||||
alternate large file pipeline:
|
alternate large file pipeline:
|
||||||
- stat file, detect > 15 MB
|
- stat file, detect > 15 MB
|
||||||
- build up csv line-by-line iterator
|
- build up csv line-by-line iterator
|
||||||
- creates opportunity to filter attributes by map query
|
- creates opportunity to filter attributes by map query
|
||||||
speed:
|
speed:
|
||||||
- add properties for wkt/lon/lat at parse time
|
- add properties for wkt/lon/lat at parse time
|
||||||
- remove boost::lexical_cast
|
- remove boost::lexical_cast
|
||||||
- add ability to pass 'filter' keyword to drop attributes at layer init
|
- add ability to pass 'filter' keyword to drop attributes at layer init
|
||||||
- create quad tree on the fly for small/med size files
|
- create quad tree on the fly for small/med size files
|
||||||
- memory map large files for reading
|
- memory map large files for reading
|
||||||
- smaller features (less memory overhead)
|
- smaller features (less memory overhead)
|
||||||
usability:
|
usability:
|
||||||
- enforce column names without leading digit
|
- enforce column names without leading digit
|
||||||
- better error messages (add filepath) if not reading from string
|
- better error messages (add filepath) if not reading from string
|
||||||
- move to spirit to tokenize and add character level error feedback:
|
- move to spirit to tokenize and add character level error feedback:
|
||||||
http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/
|
http://boost-spirit.com/home/articles/qi-example/tracking-the-input-position-while-parsing/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
boost::optional<std::string> inline_string = params_.get<std::string>("inline");
|
boost::optional<std::string> inline_string = params_.get<std::string>("inline");
|
||||||
|
@ -120,7 +121,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
std::string const& separator,
|
std::string const& separator,
|
||||||
std::string const& quote) const
|
std::string const& quote) const
|
||||||
{
|
{
|
||||||
stream.seekg (0, std::ios::end);
|
stream.seekg(0, std::ios::end);
|
||||||
file_length_ = stream.tellg();
|
file_length_ = stream.tellg();
|
||||||
|
|
||||||
if (filesize_max_ > 0)
|
if (filesize_max_ > 0)
|
||||||
|
@ -139,13 +140,13 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
}
|
}
|
||||||
|
|
||||||
// set back to start
|
// set back to start
|
||||||
stream.seekg (0, std::ios::beg);
|
stream.seekg(0, std::ios::beg);
|
||||||
|
|
||||||
// autodetect newlines
|
// autodetect newlines
|
||||||
char newline = '\n';
|
char newline = '\n';
|
||||||
int newline_count = 0;
|
int newline_count = 0;
|
||||||
int carriage_count = 0;
|
int carriage_count = 0;
|
||||||
for(unsigned idx = 0; idx < file_length_; idx++)
|
for (unsigned idx = 0; idx < file_length_; idx++)
|
||||||
{
|
{
|
||||||
char c = static_cast<char>(stream.get());
|
char c = static_cast<char>(stream.get());
|
||||||
if (c == '\n')
|
if (c == '\n')
|
||||||
|
@ -172,7 +173,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
}
|
}
|
||||||
|
|
||||||
// set back to start
|
// set back to start
|
||||||
stream.seekg (0, std::ios::beg);
|
stream.seekg(0, std::ios::beg);
|
||||||
|
|
||||||
// get first line
|
// get first line
|
||||||
std::string csv_line;
|
std::string csv_line;
|
||||||
|
@ -214,16 +215,16 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
if (num_semicolons > num_commas)
|
if (num_semicolons > num_commas)
|
||||||
{
|
{
|
||||||
sep = ";";
|
sep = ";";
|
||||||
#ifdef MAPNIK_DEBUG
|
#ifdef MAPNIK_DEBUG
|
||||||
std::clog << "CSV Plugin: auto detected ';' separator\n";
|
std::clog << "CSV Plugin: auto detected ';' separator\n";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set back to start
|
// set back to start
|
||||||
stream.seekg (0, std::ios::beg);
|
stream.seekg(0, std::ios::beg);
|
||||||
|
|
||||||
typedef boost::escaped_list_separator<char> escape_type;
|
typedef boost::escaped_list_separator<char> escape_type;
|
||||||
|
|
||||||
|
@ -240,10 +241,10 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
boost::escaped_list_separator<char> grammer;
|
boost::escaped_list_separator<char> grammer;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
//grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
|
// grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
|
||||||
grammer = boost::escaped_list_separator<char>(esc, sep, quo);
|
grammer = boost::escaped_list_separator<char>(esc, sep, quo);
|
||||||
}
|
}
|
||||||
catch (const std::exception & ex )
|
catch(const std::exception & ex)
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: " << ex.what();
|
s << "CSV Plugin: " << ex.what();
|
||||||
|
@ -367,7 +368,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const std::exception & ex )
|
catch(const std::exception & ex)
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: error parsing headers: " << ex.what();
|
s << "CSV Plugin: error parsing headers: " << ex.what();
|
||||||
|
@ -449,7 +450,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
{
|
{
|
||||||
UnicodeString ustr = tr.transcode(value.c_str());
|
UnicodeString ustr = tr.transcode(value.c_str());
|
||||||
boost::put(*feature,fld_name,ustr);
|
boost::put(*feature,fld_name,ustr);
|
||||||
//boost::put(*feature,fld_name,mapnik::value_null());
|
// boost::put(*feature,fld_name,mapnik::value_null());
|
||||||
null_geom = true;
|
null_geom = true;
|
||||||
if (feature_count == 1)
|
if (feature_count == 1)
|
||||||
{
|
{
|
||||||
|
@ -468,79 +469,79 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
// parse wkt
|
// parse wkt
|
||||||
if (has_wkt_field)
|
if (has_wkt_field)
|
||||||
{
|
{
|
||||||
if (i == wkt_idx)
|
if (i == wkt_idx)
|
||||||
{
|
{
|
||||||
// skip empty geoms
|
// skip empty geoms
|
||||||
if (value.empty())
|
if (value.empty())
|
||||||
{
|
{
|
||||||
null_geom = true;
|
null_geom = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// optimize simple "POINT (x y)"
|
// optimize simple "POINT (x y)"
|
||||||
// using this shaved 2 seconds off csv that took 8 seconds total to parse
|
// using this shaved 2 seconds off csv that took 8 seconds total to parse
|
||||||
if (value.find("POINT") == 0)
|
if (value.find("POINT") == 0)
|
||||||
{
|
{
|
||||||
using boost::phoenix::ref;
|
using boost::phoenix::ref;
|
||||||
using boost::spirit::qi::_1;
|
using boost::spirit::qi::_1;
|
||||||
std::string::const_iterator str_beg = value.begin();
|
std::string::const_iterator str_beg = value.begin();
|
||||||
std::string::const_iterator str_end = value.end();
|
std::string::const_iterator str_end = value.end();
|
||||||
bool r = qi::phrase_parse(str_beg,str_end,
|
bool r = qi::phrase_parse(str_beg,str_end,
|
||||||
(
|
(
|
||||||
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')'
|
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')'
|
||||||
),
|
),
|
||||||
ascii::space);
|
ascii::space);
|
||||||
|
|
||||||
if (r && (str_beg == str_end))
|
if (r && (str_beg == str_end))
|
||||||
{
|
{
|
||||||
mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point);
|
mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point);
|
||||||
pt->move_to(x,y);
|
pt->move_to(x,y);
|
||||||
feature->add_geometry(pt);
|
feature->add_geometry(pt);
|
||||||
parsed_wkt = true;
|
parsed_wkt = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: expected well known text geometry: could not parse row "
|
s << "CSV Plugin: expected well known text geometry: could not parse row "
|
||||||
<< line_number
|
<< line_number
|
||||||
<< ",column "
|
<< ",column "
|
||||||
<< i << " - found: '"
|
<< i << " - found: '"
|
||||||
<< value << "'";
|
<< value << "'";
|
||||||
if (strict_)
|
if (strict_)
|
||||||
{
|
{
|
||||||
throw mapnik::datasource_exception(s.str());
|
throw mapnik::datasource_exception(s.str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!quiet_) std::clog << s.str() << "\n";
|
if (!quiet_) std::clog << s.str() << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (mapnik::from_wkt(value, feature->paths()))
|
if (mapnik::from_wkt(value, feature->paths()))
|
||||||
{
|
{
|
||||||
parsed_wkt = true;
|
parsed_wkt = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: expected well known text geometry: could not parse row "
|
s << "CSV Plugin: expected well known text geometry: could not parse row "
|
||||||
<< line_number
|
<< line_number
|
||||||
<< ",column "
|
<< ",column "
|
||||||
<< i << " - found: '"
|
<< i << " - found: '"
|
||||||
<< value << "'";
|
<< value << "'";
|
||||||
if (strict_)
|
if (strict_)
|
||||||
{
|
{
|
||||||
throw mapnik::datasource_exception(s.str());
|
throw mapnik::datasource_exception(s.str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!quiet_) std::clog << s.str() << "\n";
|
if (!quiet_) std::clog << s.str() << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -559,7 +560,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
x = boost::lexical_cast<double>(value);
|
x = boost::lexical_cast<double>(value);
|
||||||
parsed_x = true;
|
parsed_x = true;
|
||||||
}
|
}
|
||||||
catch (boost::bad_lexical_cast & ex)
|
catch(boost::bad_lexical_cast & ex)
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: expected a float value for longitude: could not parse row "
|
s << "CSV Plugin: expected a float value for longitude: could not parse row "
|
||||||
|
@ -592,7 +593,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
y = boost::lexical_cast<double>(value);
|
y = boost::lexical_cast<double>(value);
|
||||||
parsed_y = true;
|
parsed_y = true;
|
||||||
}
|
}
|
||||||
catch (boost::bad_lexical_cast & ex)
|
catch(boost::bad_lexical_cast & ex)
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: expected a float value for latitude: could not parse row "
|
s << "CSV Plugin: expected a float value for latitude: could not parse row "
|
||||||
|
@ -787,7 +788,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
}
|
}
|
||||||
++line_number;
|
++line_number;
|
||||||
}
|
}
|
||||||
catch (const mapnik::datasource_exception & ex )
|
catch(const mapnik::datasource_exception & ex )
|
||||||
{
|
{
|
||||||
if (strict_)
|
if (strict_)
|
||||||
{
|
{
|
||||||
|
@ -798,7 +799,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
if (!quiet_) std::clog << ex.what() << "\n";
|
if (!quiet_) std::clog << ex.what() << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const std::exception & ex )
|
catch(const std::exception & ex)
|
||||||
{
|
{
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << "CSV Plugin: unexpected error parsing line: " << line_number
|
s << "CSV Plugin: unexpected error parsing line: " << line_number
|
||||||
|
@ -848,8 +849,8 @@ mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
|
||||||
{
|
{
|
||||||
if (!is_bound_) bind();
|
if (!is_bound_) bind();
|
||||||
|
|
||||||
// TODO - should we check q.property_names() and throw if not found in headers_?
|
// TODO - should we check q.property_names() and throw if not found in headers_?
|
||||||
//const std::set<std::string>& attribute_names = q.property_names();
|
// const std::set<std::string>& attribute_names = q.property_names();
|
||||||
|
|
||||||
return boost::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
|
return boost::make_shared<mapnik::memory_featureset>(q.get_bbox(),features_);
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,37 +9,37 @@
|
||||||
|
|
||||||
class csv_datasource : public mapnik::datasource
|
class csv_datasource : public mapnik::datasource
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
csv_datasource(mapnik::parameters const& params, bool bind=true);
|
csv_datasource(mapnik::parameters const& params, bool bind=true);
|
||||||
virtual ~csv_datasource ();
|
virtual ~csv_datasource ();
|
||||||
int type() const;
|
int type() const;
|
||||||
static std::string name();
|
static std::string name();
|
||||||
mapnik::featureset_ptr features(mapnik::query const& q) const;
|
mapnik::featureset_ptr features(mapnik::query const& q) const;
|
||||||
mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const;
|
mapnik::featureset_ptr features_at_point(mapnik::coord2d const& pt) const;
|
||||||
mapnik::box2d<double> envelope() const;
|
mapnik::box2d<double> envelope() const;
|
||||||
mapnik::layer_descriptor get_descriptor() const;
|
mapnik::layer_descriptor get_descriptor() const;
|
||||||
void bind() const;
|
void bind() const;
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void parse_csv(T& stream,
|
void parse_csv(T& stream,
|
||||||
std::string const& escape,
|
std::string const& escape,
|
||||||
std::string const& separator,
|
std::string const& separator,
|
||||||
std::string const& quote) const;
|
std::string const& quote) const;
|
||||||
private:
|
private:
|
||||||
mutable mapnik::layer_descriptor desc_;
|
mutable mapnik::layer_descriptor desc_;
|
||||||
mutable mapnik::box2d<double> extent_;
|
mutable mapnik::box2d<double> extent_;
|
||||||
mutable std::string filename_;
|
mutable std::string filename_;
|
||||||
mutable std::string inline_string_;
|
mutable std::string inline_string_;
|
||||||
mutable unsigned file_length_;
|
mutable unsigned file_length_;
|
||||||
mutable int row_limit_;
|
mutable int row_limit_;
|
||||||
mutable std::vector<mapnik::feature_ptr> features_;
|
mutable std::vector<mapnik::feature_ptr> features_;
|
||||||
mutable std::string escape_;
|
mutable std::string escape_;
|
||||||
mutable std::string separator_;
|
mutable std::string separator_;
|
||||||
mutable std::string quote_;
|
mutable std::string quote_;
|
||||||
mutable std::vector<std::string> headers_;
|
mutable std::vector<std::string> headers_;
|
||||||
mutable std::string manual_headers_;
|
mutable std::string manual_headers_;
|
||||||
mutable bool strict_;
|
mutable bool strict_;
|
||||||
mutable bool quiet_;
|
mutable bool quiet_;
|
||||||
mutable double filesize_max_;
|
mutable double filesize_max_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue