csv plugin: support single row data and fix warning when no data can be parsed
This commit is contained in:
parent
c1102cbb7a
commit
affecb0f32
7 changed files with 105 additions and 8 deletions
|
@ -143,7 +143,7 @@ void csv_datasource::bind() const
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void csv_datasource::parse_csv(T& stream,
|
void csv_datasource::parse_csv(T & stream,
|
||||||
std::string const& escape,
|
std::string const& escape,
|
||||||
std::string const& separator,
|
std::string const& separator,
|
||||||
std::string const& quote) const
|
std::string const& quote) const
|
||||||
|
@ -171,6 +171,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
|
|
||||||
// autodetect newlines
|
// autodetect newlines
|
||||||
char newline = '\n';
|
char newline = '\n';
|
||||||
|
bool has_newline = false;
|
||||||
int newline_count = 0;
|
int newline_count = 0;
|
||||||
int carriage_count = 0;
|
int carriage_count = 0;
|
||||||
for (unsigned idx = 0; idx < file_length_; idx++)
|
for (unsigned idx = 0; idx < file_length_; idx++)
|
||||||
|
@ -179,10 +180,12 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
if (c == '\n')
|
if (c == '\n')
|
||||||
{
|
{
|
||||||
++newline_count;
|
++newline_count;
|
||||||
|
has_newline = true;
|
||||||
}
|
}
|
||||||
else if (c == '\r')
|
else if (c == '\r')
|
||||||
{
|
{
|
||||||
++carriage_count;
|
++carriage_count;
|
||||||
|
has_newline = true;
|
||||||
}
|
}
|
||||||
// read at least 2000 bytes before testing
|
// read at least 2000 bytes before testing
|
||||||
if (idx == file_length_-1 || idx > 4000)
|
if (idx == file_length_-1 || idx > 4000)
|
||||||
|
@ -422,7 +425,7 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
throw mapnik::datasource_exception(s.str());
|
throw mapnik::datasource_exception(s.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
int feature_count(1);
|
int feature_count(0);
|
||||||
bool extent_initialized = false;
|
bool extent_initialized = false;
|
||||||
std::size_t num_headers = headers_.size();
|
std::size_t num_headers = headers_.size();
|
||||||
|
|
||||||
|
@ -435,12 +438,23 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
mapnik::wkt_parser parse_wkt;
|
mapnik::wkt_parser parse_wkt;
|
||||||
mapnik::json::geometry_parser<std::string::const_iterator> parse_json;
|
mapnik::json::geometry_parser<std::string::const_iterator> parse_json;
|
||||||
|
|
||||||
while (std::getline(stream,csv_line,newline))
|
// handle rare case of a single line of data and user-provided headers
|
||||||
|
// where a lack of a newline will mean that std::getline returns false
|
||||||
|
bool is_first_row = false;
|
||||||
|
if (!has_newline)
|
||||||
{
|
{
|
||||||
|
stream >> csv_line;
|
||||||
|
if (!csv_line.empty())
|
||||||
|
{
|
||||||
|
is_first_row = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (std::getline(stream,csv_line,newline) || is_first_row)
|
||||||
|
{
|
||||||
|
is_first_row = false;
|
||||||
if ((row_limit_ > 0) && (line_number > row_limit_))
|
if ((row_limit_ > 0) && (line_number > row_limit_))
|
||||||
{
|
{
|
||||||
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
|
MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,7 +509,8 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,feature_count));
|
// NOTE: we use ++feature_count here because feature id's should start at 1;
|
||||||
|
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,++feature_count));
|
||||||
double x(0);
|
double x(0);
|
||||||
double y(0);
|
double y(0);
|
||||||
bool parsed_x = false;
|
bool parsed_x = false;
|
||||||
|
@ -754,7 +769,6 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
extent_.expand_to_include(feature->envelope());
|
extent_.expand_to_include(feature->envelope());
|
||||||
}
|
}
|
||||||
features_.push_back(feature);
|
features_.push_back(feature);
|
||||||
++feature_count;
|
|
||||||
null_geom = false;
|
null_geom = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -782,7 +796,6 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
pt->move_to(x,y);
|
pt->move_to(x,y);
|
||||||
feature->add_geometry(pt);
|
feature->add_geometry(pt);
|
||||||
features_.push_back(feature);
|
features_.push_back(feature);
|
||||||
++feature_count;
|
|
||||||
null_geom = false;
|
null_geom = false;
|
||||||
if (!extent_initialized)
|
if (!extent_initialized)
|
||||||
{
|
{
|
||||||
|
@ -836,6 +849,9 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MAPNIK_LOG_ERROR(csv) << s.str();
|
MAPNIK_LOG_ERROR(csv) << s.str();
|
||||||
|
// with no geometry we will never
|
||||||
|
// add this feature so drop the count
|
||||||
|
feature_count--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,7 @@ public:
|
||||||
void bind() const;
|
void bind() const;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void parse_csv(T& stream,
|
void parse_csv(T & stream,
|
||||||
std::string const& escape,
|
std::string const& escape,
|
||||||
std::string const& separator,
|
std::string const& separator,
|
||||||
std::string const& quote) const;
|
std::string const& quote) const;
|
||||||
|
|
1
tests/data/csv/fails/needs_headers_one_line.csv
Normal file
1
tests/data/csv/fails/needs_headers_one_line.csv
Normal file
|
@ -0,0 +1 @@
|
||||||
|
0,0,data_name
|
|
|
@ -0,0 +1 @@
|
||||||
|
0,0,data_name
|
|
2
tests/data/csv/fails/needs_headers_two_lines.csv
Normal file
2
tests/data/csv/fails/needs_headers_two_lines.csv
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
0,0,data_name
|
||||||
|
0,0,data_name
|
|
4
tests/data/csv/warns/feature_id_counting.csv
Normal file
4
tests/data/csv/warns/feature_id_counting.csv
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
x,y,id
|
||||||
|
0,0,1
|
||||||
|
bad,bad,2
|
||||||
|
0,0,2
|
|
|
@ -418,6 +418,79 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
|
||||||
# this has invalid header # so throw
|
# this has invalid header # so throw
|
||||||
ds = get_csv_ds('more_column_values_than_headers.csv')
|
ds = get_csv_ds('more_column_values_than_headers.csv')
|
||||||
|
|
||||||
|
def test_that_feature_id_only_incremented_for_valid_rows(**kwargs):
|
||||||
|
ds = mapnik.Datasource(type='csv',
|
||||||
|
file=os.path.join('../data/csv/warns','feature_id_counting.csv'),
|
||||||
|
quiet=True)
|
||||||
|
eq_(len(ds.fields()),3)
|
||||||
|
eq_(ds.fields(),['x','y','id'])
|
||||||
|
eq_(ds.field_types(),['int','int','int'])
|
||||||
|
fs = ds.featureset()
|
||||||
|
# first
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['id'],1)
|
||||||
|
# second, should have skipped bogus one
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['id'],2)
|
||||||
|
desc = ds.describe()
|
||||||
|
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
|
||||||
|
eq_(len(ds.all_features()),2)
|
||||||
|
|
||||||
|
def test_dynamically_defining_headers1(**kwargs):
|
||||||
|
ds = mapnik.Datasource(type='csv',
|
||||||
|
file=os.path.join('../data/csv/fails','needs_headers_two_lines.csv'),
|
||||||
|
quiet=True,
|
||||||
|
headers='x,y,name')
|
||||||
|
eq_(len(ds.fields()),3)
|
||||||
|
eq_(ds.fields(),['x','y','name'])
|
||||||
|
eq_(ds.field_types(),['int','int','str'])
|
||||||
|
fs = ds.featureset()
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['name'],'data_name')
|
||||||
|
desc = ds.describe()
|
||||||
|
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
|
||||||
|
eq_(len(ds.all_features()),2)
|
||||||
|
|
||||||
|
def test_dynamically_defining_headers2(**kwargs):
|
||||||
|
ds = mapnik.Datasource(type='csv',
|
||||||
|
file=os.path.join('../data/csv/fails','needs_headers_one_line.csv'),
|
||||||
|
quiet=True,
|
||||||
|
headers='x,y,name')
|
||||||
|
eq_(len(ds.fields()),3)
|
||||||
|
eq_(ds.fields(),['x','y','name'])
|
||||||
|
eq_(ds.field_types(),['int','int','str'])
|
||||||
|
fs = ds.featureset()
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['name'],'data_name')
|
||||||
|
desc = ds.describe()
|
||||||
|
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
|
||||||
|
eq_(len(ds.all_features()),1)
|
||||||
|
|
||||||
|
def test_dynamically_defining_headers3(**kwargs):
|
||||||
|
ds = mapnik.Datasource(type='csv',
|
||||||
|
file=os.path.join('../data/csv/fails','needs_headers_one_line_no_newline.csv'),
|
||||||
|
quiet=True,
|
||||||
|
headers='x,y,name')
|
||||||
|
eq_(len(ds.fields()),3)
|
||||||
|
eq_(ds.fields(),['x','y','name'])
|
||||||
|
eq_(ds.field_types(),['int','int','str'])
|
||||||
|
fs = ds.featureset()
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['name'],'data_name')
|
||||||
|
desc = ds.describe()
|
||||||
|
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
|
||||||
|
eq_(len(ds.all_features()),1)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup()
|
setup()
|
||||||
[eval(run)(visual=True) for run in dir() if 'test_' in run]
|
[eval(run)(visual=True) for run in dir() if 'test_' in run]
|
||||||
|
|
Loading…
Reference in a new issue