CSV plugin: support reading geojson encoded geometries in various flavors of quoting fun - closes #1392
This commit is contained in:
parent
99705308a9
commit
f42805a532
6 changed files with 220 additions and 11 deletions
|
@ -21,6 +21,7 @@
|
|||
*****************************************************************************/
|
||||
|
||||
#include "csv_datasource.hpp"
|
||||
#include "csv_utils.hpp"
|
||||
|
||||
// boost
|
||||
#include <boost/make_shared.hpp>
|
||||
|
@ -36,6 +37,7 @@
|
|||
#include <mapnik/geometry.hpp>
|
||||
#include <mapnik/memory_featureset.hpp>
|
||||
#include <mapnik/wkt/wkt_factory.hpp>
|
||||
#include <mapnik/json/geometry_parser.hpp>
|
||||
#include <mapnik/util/geometry_to_ds_type.hpp>
|
||||
#include <mapnik/util/conversions.hpp>
|
||||
#include <mapnik/boolean.hpp>
|
||||
|
@ -82,7 +84,7 @@ csv_datasource::csv_datasource(parameters const& params, bool bind)
|
|||
- build up csv line-by-line iterator
|
||||
- creates opportunity to filter attributes by map query
|
||||
speed:
|
||||
- add properties for wkt/lon/lat at parse time
|
||||
- add properties for wkt/json/lon/lat at parse time
|
||||
- add ability to pass 'filter' keyword to drop attributes at layer init
|
||||
- create quad tree on the fly for small/med size files
|
||||
- memory map large files for reading
|
||||
|
@ -264,7 +266,7 @@ void csv_datasource::parse_csv(T& stream,
|
|||
// grammer = boost::escaped_list_separator<char>('\\', ',', '\"');
|
||||
grammer = boost::escaped_list_separator<char>(esc, sep, quo);
|
||||
}
|
||||
catch(const std::exception & ex)
|
||||
catch(std::exception const& ex)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: " << ex.what();
|
||||
|
@ -275,9 +277,11 @@ void csv_datasource::parse_csv(T& stream,
|
|||
|
||||
int line_number(1);
|
||||
bool has_wkt_field = false;
|
||||
bool has_json_field = false;
|
||||
bool has_lat_field = false;
|
||||
bool has_lon_field = false;
|
||||
unsigned wkt_idx(0);
|
||||
unsigned json_idx(0);
|
||||
unsigned lat_idx(0);
|
||||
unsigned lon_idx(0);
|
||||
|
||||
|
@ -296,6 +300,11 @@ void csv_datasource::parse_csv(T& stream,
|
|||
wkt_idx = idx;
|
||||
has_wkt_field = true;
|
||||
}
|
||||
if (lower_val == "geojson")
|
||||
{
|
||||
json_idx = idx;
|
||||
has_json_field = true;
|
||||
}
|
||||
if (lower_val == "x"
|
||||
|| lower_val == "lon"
|
||||
|| lower_val == "lng"
|
||||
|
@ -369,6 +378,11 @@ void csv_datasource::parse_csv(T& stream,
|
|||
wkt_idx = idx;
|
||||
has_wkt_field = true;
|
||||
}
|
||||
if (lower_val == "geojson")
|
||||
{
|
||||
json_idx = idx;
|
||||
has_json_field = true;
|
||||
}
|
||||
if (lower_val == "x"
|
||||
|| lower_val == "lon"
|
||||
|| lower_val == "lng"
|
||||
|
@ -401,10 +415,10 @@ void csv_datasource::parse_csv(T& stream,
|
|||
}
|
||||
}
|
||||
|
||||
if (!has_wkt_field && (!has_lon_field || !has_lat_field) )
|
||||
if (!has_wkt_field && !has_json_field && (!has_lon_field || !has_lat_field) )
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: could not detect column headers with the name of wkt, x/y, or latitude/longitude - this is required for reading geometry data";
|
||||
s << "CSV Plugin: could not detect column headers with the name of wkt, geojson, x/y, or latitude/longitude - this is required for reading geometry data";
|
||||
throw mapnik::datasource_exception(s.str());
|
||||
}
|
||||
|
||||
|
@ -444,6 +458,13 @@ void csv_datasource::parse_csv(T& stream,
|
|||
|
||||
try
|
||||
{
|
||||
// special handling for varieties of quoting that we will enounter with json
|
||||
// TODO - test with custom "quo" option
|
||||
if (has_json_field && (quo == "\"") && (std::count(csv_line.begin(), csv_line.end(), '"') >= 6))
|
||||
{
|
||||
csv_utils::fix_json_quoting(csv_line);
|
||||
}
|
||||
|
||||
Tokenizer tok(csv_line, grammer);
|
||||
Tokenizer::iterator beg = tok.begin();
|
||||
|
||||
|
@ -465,6 +486,7 @@ void csv_datasource::parse_csv(T& stream,
|
|||
bool parsed_x = false;
|
||||
bool parsed_y = false;
|
||||
bool parsed_wkt = false;
|
||||
bool parsed_json = false;
|
||||
bool null_geom = false;
|
||||
std::vector<std::string> collected;
|
||||
|
||||
|
@ -570,6 +592,42 @@ void csv_datasource::parse_csv(T& stream,
|
|||
}
|
||||
}
|
||||
}
|
||||
// TODO - support both wkt/geojson columns
|
||||
// at once to create multi-geoms?
|
||||
// parse as geojson
|
||||
else if (has_json_field)
|
||||
{
|
||||
if (i == json_idx)
|
||||
{
|
||||
// skip empty geoms
|
||||
if (value.empty())
|
||||
{
|
||||
null_geom = true;
|
||||
break;
|
||||
}
|
||||
if (mapnik::json::from_geojson(value, feature->paths()))
|
||||
{
|
||||
parsed_json = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: expected geojson geometry: could not parse row "
|
||||
<< line_number
|
||||
<< ",column "
|
||||
<< i << " - found: '"
|
||||
<< value << "'";
|
||||
if (strict_)
|
||||
{
|
||||
throw mapnik::datasource_exception(s.str());
|
||||
}
|
||||
else
|
||||
{
|
||||
MAPNIK_LOG_ERROR(csv) << s.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// longitude
|
||||
|
@ -730,9 +788,9 @@ void csv_datasource::parse_csv(T& stream,
|
|||
}
|
||||
}
|
||||
|
||||
if (has_wkt_field)
|
||||
if (has_wkt_field || has_json_field)
|
||||
{
|
||||
if (parsed_wkt)
|
||||
if (parsed_wkt || parsed_json)
|
||||
{
|
||||
if (!extent_initialized)
|
||||
{
|
||||
|
@ -749,7 +807,7 @@ void csv_datasource::parse_csv(T& stream,
|
|||
else
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CSV Plugin: could not read WKT geometry "
|
||||
s << "CSV Plugin: could not read WKT or GeoJSON geometry "
|
||||
<< "for line " << line_number << " - found " << headers_.size()
|
||||
<< " with values like: " << csv_line << "\n";
|
||||
if (strict_)
|
||||
|
|
86
plugins/input/csv/csv_utils.hpp
Normal file
86
plugins/input/csv/csv_utils.hpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*****************************************************************************
|
||||
*
|
||||
* This file is part of Mapnik (c++ mapping toolkit)
|
||||
*
|
||||
* Copyright (C) 2012 Artem Pavlenko
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||
#define MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
namespace csv_utils
|
||||
{
|
||||
static void fix_json_quoting(std::string & csv_line)
|
||||
{
|
||||
std::string wrapping_char;
|
||||
std::string::size_type j_idx;
|
||||
std::string::size_type post_idx;
|
||||
std::string::size_type j_idx_double = csv_line.find("\"{");
|
||||
std::string::size_type j_idx_single = csv_line.find("'{");
|
||||
if (j_idx_double != std::string::npos)
|
||||
{
|
||||
wrapping_char = "\"";
|
||||
j_idx = j_idx_double;
|
||||
post_idx = csv_line.find("}\"");
|
||||
|
||||
}
|
||||
else if (j_idx_single != std::string::npos)
|
||||
{
|
||||
wrapping_char = "'";
|
||||
j_idx = j_idx_single;
|
||||
post_idx = csv_line.find("}'");
|
||||
}
|
||||
// we are positive it is valid json
|
||||
if (!wrapping_char.empty())
|
||||
{
|
||||
// grab the json chunk
|
||||
std::string json_chunk = csv_line.substr(j_idx,post_idx+wrapping_char.size());
|
||||
bool does_not_have_escaped_double_quotes = (json_chunk.find("\\\"") == std::string::npos);
|
||||
// ignore properly escaped quotes like \" which need no special handling
|
||||
if (does_not_have_escaped_double_quotes)
|
||||
{
|
||||
std::string pre_json = csv_line.substr(0,j_idx);
|
||||
std::string post_json = csv_line.substr(post_idx+wrapping_char.size());
|
||||
// handle "" in a string wrapped in "
|
||||
// http://tools.ietf.org/html/rfc4180#section-2 item 7.
|
||||
// e.g. "{""type"":""Point"",""coordinates"":[30.0,10.0]}"
|
||||
if (json_chunk.find("\"\"") != std::string::npos)
|
||||
{
|
||||
boost::algorithm::replace_all(json_chunk,"\"\"","\\\"");
|
||||
csv_line = pre_json + json_chunk + post_json;
|
||||
}
|
||||
// handle " in a string wrapped in '
|
||||
// e.g. '{"type":"Point","coordinates":[30.0,10.0]}'
|
||||
else
|
||||
{
|
||||
// escape " because we cannot exchange for single quotes
|
||||
// https://github.com/mapnik/mapnik/issues/1408
|
||||
boost::algorithm::replace_all(json_chunk,"\"","\\\"");
|
||||
boost::algorithm::replace_all(json_chunk,"'","\"");
|
||||
csv_line = pre_json + json_chunk + post_json;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MAPNIK_CSV_UTILS_DATASOURCE_HPP
|
10
tests/data/csv/geojson_2x_double_quote_filebakery_style.csv
Normal file
10
tests/data/csv/geojson_2x_double_quote_filebakery_style.csv
Normal file
|
@ -0,0 +1,10 @@
|
|||
type,GeoJSON
|
||||
point, "{""type"":""Point"",""coordinates"":[30.0,10.0]}"
|
||||
linestring, "{""type"":""LineString"",""coordinates"":[[30.0,10.0],[10.0,30.0],[40.0,40.0]]}"
|
||||
polygon, "{""type"":""Polygon"",""coordinates"":[[[30.0,10.0],[10.0,20.0],[20.0,40.0],[40.0,40.0],[30.0,10.0]]]}"
|
||||
polygon, "{""type"":""Polygon"",""coordinates"":[[[35.0,10.0],[10.0,20.0],[15.0,40.0],[45.0,45.0],[35.0,10.0]],[[20.0,30.0],[35.0,35.0],[30.0,20.0],[20.0,30.0]]]}"
|
||||
multipoint, "{""type"":""MultiPoint"",""coordinates"":[[10.0,40.0],[40.0,30.0],[20.0,20.0],[30.0,10.0]]}"
|
||||
multilinestring, "{""type"":""MultiLineString"",""coordinates"":[[[10.0,10.0],[20.0,20.0],[10.0,40.0]],[[40.0,40.0],[30.0,30.0],[40.0,20.0],[30.0,10.0]]]}"
|
||||
multipolygon, "{""type"":""MultiPolygon"",""coordinates"":[[[[30.0,20.0],[10.0,40.0],[45.0,40.0],[30.0,20.0]]],[[[15.0,5.0],[40.0,10.0],[10.0,20.0],[5.0,10.0],[15.0,5.0]]]]}"
|
||||
multipolygon, "{""type"":""MultiPolygon"",""coordinates"":[[[[40.0,40.0],[20.0,45.0],[45.0,30.0],[40.0,40.0]]],[[[20.0,35.0],[45.0,20.0],[30.0,5.0],[10.0,10.0],[10.0,30.0],[20.0,35.0]],[[30.0,20.0],[20.0,25.0],[20.0,15.0],[30.0,20.0]]]]}"
|
||||
collection, "{""type"":""GeometryCollection"",""geometries"":[{""type"":""Polygon"",""coordinates"":[[[1.0,1.0],[2.0,1.0],[2.0,2.0],[1.0,2.0],[1.0,1.0]]]},{""type"":""Point"",""coordinates"":[2.0,3.0]},{""type"":""LineString"",""coordinates"":[[2.0,3.0],[3.0,4.0]]}]}"
|
|
10
tests/data/csv/geojson_double_quote_escape.csv
Normal file
10
tests/data/csv/geojson_double_quote_escape.csv
Normal file
|
@ -0,0 +1,10 @@
|
|||
type,GeoJSON
|
||||
point, "{\"type\":\"Point\",\"coordinates\":[30.0,10.0]}"
|
||||
linestring, "{\"type\":\"LineString\",\"coordinates\":[[30.0,10.0],[10.0,30.0],[40.0,40.0]]}"
|
||||
polygon, "{\"type\":\"Polygon\",\"coordinates\":[[[30.0,10.0],[10.0,20.0],[20.0,40.0],[40.0,40.0],[30.0,10.0]]]}"
|
||||
polygon, "{\"type\":\"Polygon\",\"coordinates\":[[[35.0,10.0],[10.0,20.0],[15.0,40.0],[45.0,45.0],[35.0,10.0]],[[20.0,30.0],[35.0,35.0],[30.0,20.0],[20.0,30.0]]]}"
|
||||
multipoint, "{\"type\":\"MultiPoint\",\"coordinates\":[[10.0,40.0],[40.0,30.0],[20.0,20.0],[30.0,10.0]]}"
|
||||
multilinestring, "{\"type\":\"MultiLineString\",\"coordinates\":[[[10.0,10.0],[20.0,20.0],[10.0,40.0]],[[40.0,40.0],[30.0,30.0],[40.0,20.0],[30.0,10.0]]]}"
|
||||
multipolygon, "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[30.0,20.0],[10.0,40.0],[45.0,40.0],[30.0,20.0]]],[[[15.0,5.0],[40.0,10.0],[10.0,20.0],[5.0,10.0],[15.0,5.0]]]]}"
|
||||
multipolygon, "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[40.0,40.0],[20.0,45.0],[45.0,30.0],[40.0,40.0]]],[[[20.0,35.0],[45.0,20.0],[30.0,5.0],[10.0,10.0],[10.0,30.0],[20.0,35.0]],[[30.0,20.0],[20.0,25.0],[20.0,15.0],[30.0,20.0]]]]}"
|
||||
collection, "{\"type\":\"GeometryCollection\",\"geometries\":[{\"type\":\"Polygon\",\"coordinates\":[[[1.0,1.0],[2.0,1.0],[2.0,2.0],[1.0,2.0],[1.0,1.0]]]},{\"type\":\"Point\",\"coordinates\":[2.0,3.0]},{\"type\":\"LineString\",\"coordinates\":[[2.0,3.0],[3.0,4.0]]}]}"
|
Can't render this file because it contains an unexpected character in line 2 and column 21.
|
10
tests/data/csv/geojson_single_quote.csv
Normal file
10
tests/data/csv/geojson_single_quote.csv
Normal file
|
@ -0,0 +1,10 @@
|
|||
type,GeoJSON
|
||||
point, '{"type":"Point","coordinates":[30.0,10.0]}'
|
||||
linestring, '{"type":"LineString","coordinates":[[30.0,10.0],[10.0,30.0],[40.0,40.0]]}'
|
||||
polygon, '{"type":"Polygon","coordinates":[[[30.0,10.0],[10.0,20.0],[20.0,40.0],[40.0,40.0],[30.0,10.0]]]}'
|
||||
polygon, '{"type":"Polygon","coordinates":[[[35.0,10.0],[10.0,20.0],[15.0,40.0],[45.0,45.0],[35.0,10.0]],[[20.0,30.0],[35.0,35.0],[30.0,20.0],[20.0,30.0]]]}'
|
||||
multipoint, '{"type":"MultiPoint","coordinates":[[10.0,40.0],[40.0,30.0],[20.0,20.0],[30.0,10.0]]}'
|
||||
multilinestring, '{"type":"MultiLineString","coordinates":[[[10.0,10.0],[20.0,20.0],[10.0,40.0]],[[40.0,40.0],[30.0,30.0],[40.0,20.0],[30.0,10.0]]]}'
|
||||
multipolygon, '{"type":"MultiPolygon","coordinates":[[[[30.0,20.0],[10.0,40.0],[45.0,40.0],[30.0,20.0]]],[[[15.0,5.0],[40.0,10.0],[10.0,20.0],[5.0,10.0],[15.0,5.0]]]]}'
|
||||
multipolygon, '{"type":"MultiPolygon","coordinates":[[[[40.0,40.0],[20.0,45.0],[45.0,30.0],[40.0,40.0]]],[[[20.0,35.0],[45.0,20.0],[30.0,5.0],[10.0,10.0],[10.0,30.0],[20.0,35.0]],[[30.0,20.0],[20.0,25.0],[20.0,15.0],[30.0,20.0]]]]}'
|
||||
collection, '{"type":"GeometryCollection","geometries":[{"type":"Polygon","coordinates":[[[1.0,1.0],[2.0,1.0],[2.0,2.0],[1.0,2.0],[1.0,1.0]]]},{"type":"Point","coordinates":[2.0,3.0]},{"type":"LineString","coordinates":[[2.0,3.0],[3.0,4.0]]}]}'
|
Can't render this file because it contains an unexpected character in line 2 and column 20.
|
|
@ -141,7 +141,6 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
|
|||
eq_(ds.fields(),['type','WKT'])
|
||||
eq_(ds.field_types(),['str','str'])
|
||||
fs = ds.all_features()
|
||||
#import pdb;pdb.set_trace()
|
||||
eq_(len(fs[0].geometries()),1)
|
||||
eq_(fs[0].geometries()[0].type(),mapnik.DataGeometryType.Point)
|
||||
eq_(len(fs[1].geometries()),1)
|
||||
|
@ -150,9 +149,6 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
|
|||
eq_(fs[2].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
eq_(len(fs[3].geometries()),1) # one geometry, two parts
|
||||
eq_(fs[3].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
# tests assuming we want to flatten geometries
|
||||
# ideally we should not have to:
|
||||
# https://github.com/mapnik/mapnik/issues?labels=multigeom+robustness&sort=created&direction=desc&state=open&page=1
|
||||
eq_(len(fs[4].geometries()),4)
|
||||
eq_(fs[4].geometries()[0].type(),mapnik.DataGeometryType.Point)
|
||||
eq_(len(fs[5].geometries()),2)
|
||||
|
@ -363,6 +359,45 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
|
|||
feat = fs.next()
|
||||
eq_(feat['Name'],u"Winthrop, WA")
|
||||
|
||||
def validate_geojson_datasource(ds):
|
||||
eq_(len(ds.fields()),2)
|
||||
eq_(ds.fields(),['type','GeoJSON'])
|
||||
eq_(ds.field_types(),['str','str'])
|
||||
fs = ds.all_features()
|
||||
eq_(len(fs[0].geometries()),1)
|
||||
eq_(fs[0].geometries()[0].type(),mapnik.DataGeometryType.Point)
|
||||
eq_(len(fs[1].geometries()),1)
|
||||
eq_(fs[1].geometries()[0].type(),mapnik.DataGeometryType.LineString)
|
||||
eq_(len(fs[2].geometries()),1)
|
||||
eq_(fs[2].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
eq_(len(fs[3].geometries()),1) # one geometry, two parts
|
||||
eq_(fs[3].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
eq_(len(fs[4].geometries()),4)
|
||||
eq_(fs[4].geometries()[0].type(),mapnik.DataGeometryType.Point)
|
||||
eq_(len(fs[5].geometries()),2)
|
||||
eq_(fs[5].geometries()[0].type(),mapnik.DataGeometryType.LineString)
|
||||
eq_(len(fs[6].geometries()),2)
|
||||
eq_(fs[6].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
eq_(len(fs[7].geometries()),2)
|
||||
eq_(fs[7].geometries()[0].type(),mapnik.DataGeometryType.Polygon)
|
||||
desc = ds.describe()
|
||||
eq_(desc['geometry_type'],mapnik.DataGeometryType.Collection)
|
||||
eq_(desc['name'],'csv')
|
||||
eq_(desc['type'],mapnik.DataType.Vector)
|
||||
eq_(desc['encoding'],'utf-8')
|
||||
|
||||
def test_json_field1(**kwargs):
|
||||
ds = get_csv_ds('geojson_double_quote_escape.csv')
|
||||
validate_geojson_datasource(ds)
|
||||
|
||||
def test_json_field2(**kwargs):
|
||||
ds = get_csv_ds('geojson_single_quote.csv')
|
||||
validate_geojson_datasource(ds)
|
||||
|
||||
def test_json_field3(**kwargs):
|
||||
ds = get_csv_ds('geojson_2x_double_quote_filebakery_style.csv')
|
||||
validate_geojson_datasource(ds)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup()
|
||||
|
|
Loading…
Add table
Reference in a new issue