csv plugin: support single row data and fix warning when no data can be parsed

This commit is contained in:
Dane Springmeyer 2012-08-31 12:07:35 -07:00
parent c1102cbb7a
commit affecb0f32
7 changed files with 105 additions and 8 deletions

View file

@ -143,7 +143,7 @@ void csv_datasource::bind() const
} }
template <typename T> template <typename T>
void csv_datasource::parse_csv(T& stream, void csv_datasource::parse_csv(T & stream,
std::string const& escape, std::string const& escape,
std::string const& separator, std::string const& separator,
std::string const& quote) const std::string const& quote) const
@ -171,6 +171,7 @@ void csv_datasource::parse_csv(T& stream,
// autodetect newlines // autodetect newlines
char newline = '\n'; char newline = '\n';
bool has_newline = false;
int newline_count = 0; int newline_count = 0;
int carriage_count = 0; int carriage_count = 0;
for (unsigned idx = 0; idx < file_length_; idx++) for (unsigned idx = 0; idx < file_length_; idx++)
@ -179,10 +180,12 @@ void csv_datasource::parse_csv(T& stream,
if (c == '\n') if (c == '\n')
{ {
++newline_count; ++newline_count;
has_newline = true;
} }
else if (c == '\r') else if (c == '\r')
{ {
++carriage_count; ++carriage_count;
has_newline = true;
} }
// read at least 2000 bytes before testing // read at least 2000 bytes before testing
if (idx == file_length_-1 || idx > 4000) if (idx == file_length_-1 || idx > 4000)
@ -422,7 +425,7 @@ void csv_datasource::parse_csv(T& stream,
throw mapnik::datasource_exception(s.str()); throw mapnik::datasource_exception(s.str());
} }
int feature_count(1); int feature_count(0);
bool extent_initialized = false; bool extent_initialized = false;
std::size_t num_headers = headers_.size(); std::size_t num_headers = headers_.size();
@ -435,12 +438,23 @@ void csv_datasource::parse_csv(T& stream,
mapnik::wkt_parser parse_wkt; mapnik::wkt_parser parse_wkt;
mapnik::json::geometry_parser<std::string::const_iterator> parse_json; mapnik::json::geometry_parser<std::string::const_iterator> parse_json;
while (std::getline(stream,csv_line,newline)) // handle rare case of a single line of data and user-provided headers
// where a lack of a newline will mean that std::getline returns false
bool is_first_row = false;
if (!has_newline)
{ {
stream >> csv_line;
if (!csv_line.empty())
{
is_first_row = true;
}
}
while (std::getline(stream,csv_line,newline) || is_first_row)
{
is_first_row = false;
if ((row_limit_ > 0) && (line_number > row_limit_)) if ((row_limit_ > 0) && (line_number > row_limit_))
{ {
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count; MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
break; break;
} }
@ -495,7 +509,8 @@ void csv_datasource::parse_csv(T& stream,
} }
} }
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,feature_count)); // NOTE: we use ++feature_count here because feature id's should start at 1;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,++feature_count));
double x(0); double x(0);
double y(0); double y(0);
bool parsed_x = false; bool parsed_x = false;
@ -754,7 +769,6 @@ void csv_datasource::parse_csv(T& stream,
extent_.expand_to_include(feature->envelope()); extent_.expand_to_include(feature->envelope());
} }
features_.push_back(feature); features_.push_back(feature);
++feature_count;
null_geom = false; null_geom = false;
} }
else else
@ -782,7 +796,6 @@ void csv_datasource::parse_csv(T& stream,
pt->move_to(x,y); pt->move_to(x,y);
feature->add_geometry(pt); feature->add_geometry(pt);
features_.push_back(feature); features_.push_back(feature);
++feature_count;
null_geom = false; null_geom = false;
if (!extent_initialized) if (!extent_initialized)
{ {
@ -836,6 +849,9 @@ void csv_datasource::parse_csv(T& stream,
else else
{ {
MAPNIK_LOG_ERROR(csv) << s.str(); MAPNIK_LOG_ERROR(csv) << s.str();
// with no geometry we will never
// add this feature so drop the count
feature_count--;
continue; continue;
} }
} }

View file

@ -54,7 +54,7 @@ public:
void bind() const; void bind() const;
template <typename T> template <typename T>
void parse_csv(T& stream, void parse_csv(T & stream,
std::string const& escape, std::string const& escape,
std::string const& separator, std::string const& separator,
std::string const& quote) const; std::string const& quote) const;

View file

@ -0,0 +1 @@
0,0,data_name
1 0 0 data_name

View file

@ -0,0 +1 @@
0,0,data_name
1 0 0 data_name

View file

@ -0,0 +1,2 @@
0,0,data_name
0,0,data_name
1 0 0 data_name
2 0 0 data_name

View file

@ -0,0 +1,4 @@
x,y,id
0,0,1
bad,bad,2
0,0,2
1 x y id
2 0 0 1
3 bad bad 2
4 0 0 2

View file

@ -418,6 +418,79 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
# this has invalid header # so throw # this has invalid header # so throw
ds = get_csv_ds('more_column_values_than_headers.csv') ds = get_csv_ds('more_column_values_than_headers.csv')
def test_that_feature_id_only_incremented_for_valid_rows(**kwargs):
ds = mapnik.Datasource(type='csv',
file=os.path.join('../data/csv/warns','feature_id_counting.csv'),
quiet=True)
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','id'])
eq_(ds.field_types(),['int','int','int'])
fs = ds.featureset()
# first
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['id'],1)
# second, should have skipped bogus one
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['id'],2)
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),2)
def test_dynamically_defining_headers1(**kwargs):
ds = mapnik.Datasource(type='csv',
file=os.path.join('../data/csv/fails','needs_headers_two_lines.csv'),
quiet=True,
headers='x,y,name')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','name'])
eq_(ds.field_types(),['int','int','str'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['name'],'data_name')
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),2)
def test_dynamically_defining_headers2(**kwargs):
ds = mapnik.Datasource(type='csv',
file=os.path.join('../data/csv/fails','needs_headers_one_line.csv'),
quiet=True,
headers='x,y,name')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','name'])
eq_(ds.field_types(),['int','int','str'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['name'],'data_name')
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),1)
def test_dynamically_defining_headers3(**kwargs):
ds = mapnik.Datasource(type='csv',
file=os.path.join('../data/csv/fails','needs_headers_one_line_no_newline.csv'),
quiet=True,
headers='x,y,name')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','name'])
eq_(ds.field_types(),['int','int','str'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['name'],'data_name')
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),1)
if __name__ == "__main__": if __name__ == "__main__":
setup() setup()
[eval(run)(visual=True) for run in dir() if 'test_' in run] [eval(run)(visual=True) for run in dir() if 'test_' in run]