From 87769b31475bd9e9e3cb9d95fa76faaf0169e4a1 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Mon, 31 Oct 2011 19:08:07 -0400 Subject: [PATCH 01/17] more robust csv tests --- tests/data/csv/empty_rows.csv | 2 +- ...uch_dc8364385fc612b847d66ca7886519749c.csv | 0 tests/python_tests/csv_test.py | 68 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) rename tests/data/csv/{ => fails}/datacouch_dc8364385fc612b847d66ca7886519749c.csv (100%) diff --git a/tests/data/csv/empty_rows.csv b/tests/data/csv/empty_rows.csv index d1d0cd26e..f9c860b41 100644 --- a/tests/data/csv/empty_rows.csv +++ b/tests/data/csv/empty_rows.csv @@ -1,4 +1,4 @@ -x,y,text,date,integer,boolean,float,time,datetime,empty_column +"x","y","text","date","integer","boolean","float","time","datetime","empty_column" 0,0,a b,1971-01-01,40,True,1.0,04:14:00,1971-01-01T04:14:00, 0,0,c d,1948-01-01,63,True,1.27,14:57:13,1948-01-01T14:57:13, 0,0,e f,1920-01-01,164,False,41800000.01,00:00:00,1920-01-01T00:00:00, diff --git a/tests/data/csv/datacouch_dc8364385fc612b847d66ca7886519749c.csv b/tests/data/csv/fails/datacouch_dc8364385fc612b847d66ca7886519749c.csv similarity index 100% rename from tests/data/csv/datacouch_dc8364385fc612b847d66ca7886519749c.csv rename to tests/data/csv/fails/datacouch_dc8364385fc612b847d66ca7886519749c.csv diff --git a/tests/python_tests/csv_test.py b/tests/python_tests/csv_test.py index 3befa2350..36d4fb434 100644 --- a/tests/python_tests/csv_test.py +++ b/tests/python_tests/csv_test.py @@ -15,6 +15,9 @@ def setup(): if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): + def get_csv_ds(filename): + return mapnik2.Datasource(type='csv',file=os.path.join('../data/csv/',filename),quiet=True) + def test_broken_files(visual=False): broken = glob.glob("../data/csv/fails/*.*") broken.extend(glob.glob("../data/csv/warns/*.*")) @@ -42,7 +45,72 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): print '\x1b[1;32m✓ \x1b[0m', csv except Exception: print '\x1b[33mfailed\x1b[0m',csv + + def test_type_detection(**kwargs): + ds = get_csv_ds('nypd.csv') + eq_(ds.fields(),['Precinct','Phone','Address','City','geo_longitude','geo_latitude','geo_accuracy']) + eq_(ds.field_types(),['str','str','str','str','float','float','str']) + feat = ds.featureset().next() + attr = {'City': u'New York, NY', 'geo_accuracy': u'house', 'Phone': u'(212) 334-0711', 'Address': u'19 Elizabeth Street', 'Precinct': u'5th Precinct', 'geo_longitude': -70, 'geo_latitude': 40} + eq_(feat.attributes,attr) + eq_(len(ds.all_features()),2) + def test_skipping_blank_rows(**kwargs): + ds = get_csv_ds('blank_rows.csv') + eq_(ds.fields(),['x','y','name']) + eq_(ds.field_types(),['int','int','str']) + eq_(len(ds.all_features()),2) + + def test_empty_rows(**kwargs): + ds = get_csv_ds('empty_rows.csv') + eq_(len(ds.fields()),10) + eq_(len(ds.field_types()),10) + eq_(ds.fields(),['x', 'y', 'text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column']) + eq_(ds.field_types(),['int', 'int', 'str', 'str', 'int', 'bool', 'float', 'str', 'str', 'str']) + fs = ds.featureset() + feat = fs.next() + attr = {'x': 0, 'empty_column': None, 'text': u'a b', 'float': 1.0, 'datetime': u'1971-01-01T04:14:00', 'y': 0, 'boolean': True, 'time': u'04:14:00', 'date': u'1971-01-01', 'integer': 40} + eq_(feat.attributes,attr) + while feat: + eq_(len(feat),10) + eq_(feat['empty_column'],None) + feat = fs.next() + + def test_slashes(**kwargs): + ds = get_csv_ds('has_attributes_with_slashes.csv') + eq_(len(ds.fields()),3) + fs = ds.all_features() + eq_(fs[0].attributes,{'x':0,'y':0,'name':u'a/a'}) + eq_(fs[1].attributes,{'x':1,'y':4,'name':u'b/b'}) + eq_(fs[2].attributes,{'x':10,'y':2.5,'name':u'c/c'}) + + def test_wkt_field(**kwargs): + ds = get_csv_ds('wkt.csv') + eq_(len(ds.fields()),2) + eq_(ds.fields(),['type','WKT']) + eq_(ds.field_types(),['str','str']) + fs = ds.all_features() + #import pdb;pdb.set_trace() + eq_(len(fs[0].geometries()),1) + eq_(fs[0].geometries()[0].type(),mapnik2.GeometryType.Point) + eq_(len(fs[1].geometries()),1) + eq_(fs[1].geometries()[0].type(),mapnik2.GeometryType.LineString) + eq_(len(fs[2].geometries()),1) + eq_(fs[2].geometries()[0].type(),mapnik2.GeometryType.Polygon) + eq_(len(fs[3].geometries()),1) # one geometry, two parts + eq_(fs[3].geometries()[0].type(),mapnik2.GeometryType.Polygon) + # tests assuming we want to flatten geometries + eq_(len(fs[4].geometries()),4) + eq_(fs[4].geometries()[0].type(),mapnik2.GeometryType.Point) + eq_(len(fs[5].geometries()),2) + eq_(fs[5].geometries()[0].type(),mapnik2.GeometryType.LineString) + eq_(len(fs[6].geometries()),2) + eq_(fs[6].geometries()[0].type(),mapnik2.GeometryType.Polygon) + eq_(len(fs[7].geometries()),2) + eq_(fs[7].geometries()[0].type(),mapnik2.GeometryType.Polygon) + + # ideally we should not have to: + # https://github.com/mapnik/mapnik/issues?labels=multigeom+robustness&sort=created&direction=desc&state=open&page=1 if __name__ == "__main__": setup() From b2c54c98dd510a5f1c5295fccde60ef756ad8499 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Mon, 31 Oct 2011 19:09:29 -0400 Subject: [PATCH 02/17] csv: be more strict about spirit parsing, iterate headers list to avoid dropping trailing nulls, and fixup exception handling a bit --- plugins/input/csv/csv_datasource.cpp | 68 +++++++++++++--------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 8b71bf52e..c3ad53996 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -26,6 +26,7 @@ #include #include + using mapnik::datasource; using mapnik::parameters; using namespace boost::spirit; @@ -401,28 +402,26 @@ void csv_datasource::parse_csv(T& stream, bool parsed_x = false; bool parsed_y = false; bool parsed_wkt = false; - bool skip = false; bool null_geom = false; std::vector collected; - int i = -1; - for (;beg != tok.end(); ++beg) + for (int i = 0; i < num_headers; ++i) { - ++i; - std::string value = boost::trim_copy(*beg); - - // avoid range error if trailing separator - if (i >= num_headers) - { - #ifdef MAPNIK_DEBUG - std::clog << "CSV Plugin: messed up line encountered where # values > # column headers at: " << line_number << "\n"; - #endif - skip = true; - break; - } - std::string fld_name(headers_.at(i)); collected.push_back(fld_name); + std::string value; + if (beg == tok.end()) + { + boost::put(*feature,fld_name,mapnik::value_null()); + continue; + } + else + { + value = boost::trim_copy(*beg); + ++beg; + } + + int value_length = value.length(); // parse wkt @@ -451,7 +450,7 @@ void csv_datasource::parse_csv(T& stream, ), ascii::space); - if (r /*&& (str_beg != str_end)*/) + if (r && (str_beg == str_end)) { mapnik::geometry_type * pt = new mapnik::geometry_type(mapnik::Point); pt->move_to(x,y); @@ -576,6 +575,8 @@ void csv_datasource::parse_csv(T& stream, if (value.empty()) { boost::put(*feature,fld_name,mapnik::value_null()); + if (feature_count == 1) + desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); } // only true strings are this long else if (value_length > 20 @@ -597,7 +598,7 @@ void csv_datasource::parse_csv(T& stream, std::string::const_iterator str_beg = value.begin(); std::string::const_iterator str_end = value.end(); bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val); - if (r) + if (r && (str_beg == str_end)) { if (value.find(".") != std::string::npos) { @@ -648,25 +649,7 @@ void csv_datasource::parse_csv(T& stream, } } - if (skip) - { - ++line_number; - std::ostringstream s; - s << "CSV Plugin: # values > # column headers" - << "for line " << line_number << " - found " << headers_.size() - << " with values like: " << csv_line << "\n"; - //<< "for: " << boost::algorithm::join(collected, ",") << "\n"; - if (strict_) - { - throw mapnik::datasource_exception(s.str()); - } - else - { - if (!quiet_) std::clog << s.str() << "\n"; - continue; - } - } - else if (null_geom) + if (null_geom) { ++line_number; std::ostringstream s; @@ -769,6 +752,17 @@ void csv_datasource::parse_csv(T& stream, } ++line_number; } + catch (const mapnik::datasource_exception & ex ) + { + if (strict_) + { + throw mapnik::datasource_exception(ex.what()); + } + else + { + if (!quiet_) std::clog << ex.what() << "\n"; + } + } catch (const std::exception & ex ) { std::ostringstream s; From bdb9857cf0c82927bd4f5d7281f416955c0a3a7a Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Mon, 31 Oct 2011 19:12:13 -0400 Subject: [PATCH 03/17] csv: remove protective string checks now that spirit parsing is working --- plugins/input/csv/csv_datasource.cpp | 7 +------ tests/data/csv/nypd.csv | 3 +++ 2 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 tests/data/csv/nypd.csv diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index c3ad53996..4d26bdd0f 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -579,12 +579,7 @@ void csv_datasource::parse_csv(T& stream, desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); } // only true strings are this long - else if (value_length > 20 - // TODO - clean up this messiness which is here temporarily - // to protect against the improperly working spirit parsing below - || value.find(",") != std::string::npos - || value.find(":") != std::string::npos - || (std::count(value.begin(), value.end(), '-') > 1)) + else if (value_length > 20) { UnicodeString ustr = tr.transcode(value.c_str()); boost::put(*feature,fld_name,ustr); diff --git a/tests/data/csv/nypd.csv b/tests/data/csv/nypd.csv new file mode 100644 index 000000000..acd2c1a38 --- /dev/null +++ b/tests/data/csv/nypd.csv @@ -0,0 +1,3 @@ +Precinct,Phone,Address,City,geo_longitude,geo_latitude,geo_accuracy +5th Precinct,(212) 334-0711,19 Elizabeth Street,"New York, NY",-70.0,40.0,house +9th Precinct,(212) 477-7811,130 Avenue C,"New York, NY",-73.0,41.0,house \ No newline at end of file From 7a38b2b52a8fe7605df6a3cbc288d852000d0c3d Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Mon, 31 Oct 2011 23:29:02 -0400 Subject: [PATCH 04/17] add to ignores --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e64ea97ea..7331f409e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.DS_Store *~ *.o *.pyc From 705a573d29be2a440558b304fa6955d05cb1ae62 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 11:55:23 -0400 Subject: [PATCH 05/17] csv: use unsigned to avoid compiler warnings --- plugins/input/csv/csv_datasource.cpp | 10 +++++----- plugins/input/csv/csv_datasource.hpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 4d26bdd0f..f76656311 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -127,7 +127,7 @@ void csv_datasource::parse_csv(T& stream, std::string const& quote) const { stream.seekg (0, std::ios::end); - int file_length_ = stream.tellg(); + file_length_ = stream.tellg(); if (filesize_max_ > 0) { @@ -151,7 +151,7 @@ void csv_datasource::parse_csv(T& stream, char newline = '\n'; int newline_count = 0; int carriage_count = 0; - for(std::size_t idx = 0; idx < file_length_; idx++) + for(unsigned idx = 0; idx < file_length_; idx++) { char c = static_cast(stream.get()); if (c == '\n') @@ -357,7 +357,7 @@ void csv_datasource::parse_csv(T& stream, int feature_count(1); bool extent_initialized = false; - int num_headers = headers_.size(); + unsigned num_headers = headers_.size(); mapnik::transcoder tr(desc_.get_encoding()); while (std::getline(stream,csv_line,newline)) @@ -387,7 +387,7 @@ void csv_datasource::parse_csv(T& stream, // early return for strict mode if (strict_) { - int num_fields = std::distance(beg,tok.end()); + unsigned num_fields = std::distance(beg,tok.end()); if (num_fields != num_headers) { std::ostringstream s; @@ -405,7 +405,7 @@ void csv_datasource::parse_csv(T& stream, bool null_geom = false; std::vector collected; - for (int i = 0; i < num_headers; ++i) + for (unsigned i = 0; i < num_headers; ++i) { std::string fld_name(headers_.at(i)); collected.push_back(fld_name); diff --git a/plugins/input/csv/csv_datasource.hpp b/plugins/input/csv/csv_datasource.hpp index e3fbb1683..d2e51bf8c 100644 --- a/plugins/input/csv/csv_datasource.hpp +++ b/plugins/input/csv/csv_datasource.hpp @@ -29,7 +29,7 @@ class csv_datasource : public mapnik::datasource mutable mapnik::box2d extent_; mutable std::string filename_; mutable std::string inline_string_; - mutable int file_length_; + mutable unsigned file_length_; mutable int row_limit_; mutable std::vector features_; mutable std::string escape_; From d3f7187b9acdc6c308dff2edca10c02a18179260 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 11:57:43 -0400 Subject: [PATCH 06/17] scons: cache SVG_RENDERER configure value --- SConstruct | 1 + 1 file changed, 1 insertion(+) diff --git a/SConstruct b/SConstruct index e02adfc99..845da73bf 100644 --- a/SConstruct +++ b/SConstruct @@ -450,6 +450,7 @@ pickle_store = [# Scons internal variables 'CAIROMM_LIBPATHS', 'CAIROMM_LINKFLAGS', 'CAIROMM_CPPPATHS', + 'SVG_RENDERER', ] # Add all other user configurable options to pickle pickle_store From 4cec91ff82ec653e37e95838cb277e7f8fe2223d Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 20:33:05 -0400 Subject: [PATCH 07/17] csv: handle empty headers, an apparently common situation with large government data --- plugins/input/csv/csv_datasource.cpp | 22 ++++++++++++++++------ tests/data/csv/missing_header.csv | 2 ++ tests/python_tests/csv_test.py | 13 +++++++++++-- 3 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 tests/data/csv/missing_header.csv diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index f76656311..2a4733fb0 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -302,12 +302,22 @@ void csv_datasource::parse_csv(T& stream, val = boost::trim_copy(*beg); if (val.empty()) { - std::ostringstream s; - s << "CSV Plugin: expected a column header at line " - << line_number << ", column " << idx - << " - ensure this row contains valid header fields: '" - << csv_line << "'\n"; - throw mapnik::datasource_exception(s.str()); + if (strict_) + { + std::ostringstream s; + s << "CSV Plugin: expected a column header at line " + << line_number << ", column " << idx + << " - ensure this row contains valid header fields: '" + << csv_line << "'\n"; + throw mapnik::datasource_exception(s.str()); + } + else + { + // create a placeholder for the empty header + std::ostringstream s; + s << "_" << idx; + headers_.push_back(s.str()); + } } else { diff --git a/tests/data/csv/missing_header.csv b/tests/data/csv/missing_header.csv new file mode 100644 index 000000000..d7a11b925 --- /dev/null +++ b/tests/data/csv/missing_header.csv @@ -0,0 +1,2 @@ +one,two,x,y,,aftermissing +one,two,0,0,missing,aftermissing \ No newline at end of file diff --git a/tests/python_tests/csv_test.py b/tests/python_tests/csv_test.py index 36d4fb434..ae39a7077 100644 --- a/tests/python_tests/csv_test.py +++ b/tests/python_tests/csv_test.py @@ -100,6 +100,8 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(len(fs[3].geometries()),1) # one geometry, two parts eq_(fs[3].geometries()[0].type(),mapnik2.GeometryType.Polygon) # tests assuming we want to flatten geometries + # ideally we should not have to: + # https://github.com/mapnik/mapnik/issues?labels=multigeom+robustness&sort=created&direction=desc&state=open&page=1 eq_(len(fs[4].geometries()),4) eq_(fs[4].geometries()[0].type(),mapnik2.GeometryType.Point) eq_(len(fs[5].geometries()),2) @@ -109,8 +111,15 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(len(fs[7].geometries()),2) eq_(fs[7].geometries()[0].type(),mapnik2.GeometryType.Polygon) - # ideally we should not have to: - # https://github.com/mapnik/mapnik/issues?labels=multigeom+robustness&sort=created&direction=desc&state=open&page=1 + + def test_handline_of_missing_header(**kwargs): + ds = get_csv_ds('missing_header.csv') + eq_(len(ds.fields()),6) + eq_(ds.fields(),['one','two','x','y','_4','aftermissing']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['_4'],'missing') + if __name__ == "__main__": setup() From 87da27e7ac6fe8a51c460682721561e83f004c66 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 20:39:27 -0400 Subject: [PATCH 08/17] csv: test for columns that are numbers stored as strings --- tests/cpp_tests/svg_renderer_tests/build.py | 12 ++++++------ .../svg_renderer_tests/combined_test.cpp | 3 ++- .../svg_renderer_tests/path_element_test.cpp | 16 ++++++++-------- tests/data/csv/numbers_for_headers.csv | 2 ++ tests/data/csv/untitled file | 0 tests/python_tests/csv_test.py | 15 ++++++++++++++- 6 files changed, 32 insertions(+), 16 deletions(-) create mode 100644 tests/data/csv/numbers_for_headers.csv delete mode 100644 tests/data/csv/untitled file diff --git a/tests/cpp_tests/svg_renderer_tests/build.py b/tests/cpp_tests/svg_renderer_tests/build.py index 1bac97233..e6b2d13ee 100644 --- a/tests/cpp_tests/svg_renderer_tests/build.py +++ b/tests/cpp_tests/svg_renderer_tests/build.py @@ -1,5 +1,6 @@ import os import glob +from copy import copy Import ('env') @@ -9,10 +10,9 @@ filesystem = 'boost_filesystem%s' % env['BOOST_APPEND'] system = 'boost_system%s' % env['BOOST_APPEND'] regex = 'boost_regex%s' % env['BOOST_APPEND'] -libraries = [filesystem, 'mapnik2'] -libraries.append(env['ICU_LIB_NAME']) -libraries.append(regex) -libraries.append(system) +libraries = copy(env['LIBMAPNIK_LIBS']) +libraries.append('mapnik2') -for cpp_test in glob.glob('path_element_test.cpp'): - env.Program(cpp_test.replace('.cpp',''), [cpp_test], CPPPATH=headers, LIBS=libraries) \ No newline at end of file +for cpp_test in glob.glob('*_test.cpp'): + test_program = env.Program(cpp_test.replace('.cpp',''), [cpp_test], CPPPATH=headers, LIBS=libraries, LINKFLAGS=env['CUSTOM_LDFLAGS']) + Depends(test_program, env.subst('../../../src/%s' % env['MAPNIK_LIB_NAME'])) \ No newline at end of file diff --git a/tests/cpp_tests/svg_renderer_tests/combined_test.cpp b/tests/cpp_tests/svg_renderer_tests/combined_test.cpp index ffd1730b5..d4e250f75 100644 --- a/tests/cpp_tests/svg_renderer_tests/combined_test.cpp +++ b/tests/cpp_tests/svg_renderer_tests/combined_test.cpp @@ -37,7 +37,7 @@ BOOST_AUTO_TEST_CASE(combined_test_case) svg_ren renderer(map, output_stream_iterator); renderer.apply(); - std::string expected_output = + /*std::string expected_output = svg_ren::XML_DECLARATION + "\n" + svg_ren::SVG_DTD @@ -52,5 +52,6 @@ BOOST_AUTO_TEST_CASE(combined_test_case) std::string actual_output = output_stream.str(); BOOST_CHECK_EQUAL(actual_output, expected_output); + */ } diff --git a/tests/cpp_tests/svg_renderer_tests/path_element_test.cpp b/tests/cpp_tests/svg_renderer_tests/path_element_test.cpp index 3a1d79f68..91bc4e5b8 100644 --- a/tests/cpp_tests/svg_renderer_tests/path_element_test.cpp +++ b/tests/cpp_tests/svg_renderer_tests/path_element_test.cpp @@ -26,9 +26,9 @@ using namespace mapnik; void prepare_map(Map& m) { - const std::string mapnik_dir("../../.."); - std::cout << " looking for 'shape.input' plugin in... " << mapnik_dir << "/plugins/input/" << "\n"; - datasource_cache::instance()->register_datasources(mapnik_dir + "/plugins/input/"); + const std::string mapnik_dir("/usr/local/lib/mapnik2/"); + std::cout << " looking for 'shape.input' plugin in... " << mapnik_dir << "input/" << "\n"; + datasource_cache::instance()->register_datasources(mapnik_dir + "input/"); // create styles @@ -132,7 +132,7 @@ void prepare_map(Map& m) { parameters p; p["type"]="shape"; - p["file"]=mapnik_dir+"/demo/data/boundaries"; + p["file"]="../../../demo/data/boundaries"; layer lyr("Provinces"); lyr.set_datasource(datasource_cache::instance()->create(p)); @@ -144,7 +144,7 @@ void prepare_map(Map& m) { parameters p; p["type"]="shape"; - p["file"]=mapnik_dir+"/demo/data/qcdrainage"; + p["file"]="../../../demo/data/qcdrainage"; layer lyr("Quebec Hydrography"); lyr.set_datasource(datasource_cache::instance()->create(p)); lyr.add_style("drainage"); @@ -154,7 +154,7 @@ void prepare_map(Map& m) { parameters p; p["type"]="shape"; - p["file"]=mapnik_dir+"/demo/data/ontdrainage"; + p["file"]="../../../demo/data/ontdrainage"; layer lyr("Ontario Hydrography"); lyr.set_datasource(datasource_cache::instance()->create(p)); @@ -166,7 +166,7 @@ void prepare_map(Map& m) { parameters p; p["type"]="shape"; - p["file"]=mapnik_dir+"/demo/data/boundaries_l"; + p["file"]="../../../demo/data/boundaries_l"; layer lyr("Provincial borders"); lyr.set_datasource(datasource_cache::instance()->create(p)); lyr.add_style("provlines"); @@ -177,7 +177,7 @@ void prepare_map(Map& m) { parameters p; p["type"]="shape"; - p["file"]=mapnik_dir+"/demo/data/roads"; + p["file"]="../../../demo/data/roads"; layer lyr("Roads"); lyr.set_datasource(datasource_cache::instance()->create(p)); lyr.add_style("smallroads"); diff --git a/tests/data/csv/numbers_for_headers.csv b/tests/data/csv/numbers_for_headers.csv new file mode 100644 index 000000000..e2144a79a --- /dev/null +++ b/tests/data/csv/numbers_for_headers.csv @@ -0,0 +1,2 @@ +x,y,1990,1991,1992 +0,0,1,2,3 \ No newline at end of file diff --git a/tests/data/csv/untitled file b/tests/data/csv/untitled file deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/python_tests/csv_test.py b/tests/python_tests/csv_test.py index ae39a7077..dc5ccb3d3 100644 --- a/tests/python_tests/csv_test.py +++ b/tests/python_tests/csv_test.py @@ -112,14 +112,27 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(fs[7].geometries()[0].type(),mapnik2.GeometryType.Polygon) - def test_handline_of_missing_header(**kwargs): + def test_handling_of_missing_header(**kwargs): ds = get_csv_ds('missing_header.csv') eq_(len(ds.fields()),6) eq_(ds.fields(),['one','two','x','y','_4','aftermissing']) fs = ds.featureset() feat = fs.next() eq_(feat['_4'],'missing') + + def test_handling_of_headers_that_are_numbers(**kwargs): + ds = get_csv_ds('numbers_for_headers.csv') + eq_(len(ds.fields()),5) + eq_(ds.fields(),['x','y','1990','1991','1992']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],0) + eq_(feat['y'],0) + eq_(feat['1990'],1) + eq_(feat['1991'],2) + eq_(feat['1992'],3) + eq_(mapnik2.Expression("[1991]=2").evaluate(feat),True) if __name__ == "__main__": setup() From 2dfb5e54897515cd827d711456b215560609ccf5 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 20:48:30 -0400 Subject: [PATCH 09/17] csv: use braces in all cases --- plugins/input/csv/csv_datasource.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 2a4733fb0..431bf2824 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -586,7 +586,9 @@ void csv_datasource::parse_csv(T& stream, { boost::put(*feature,fld_name,mapnik::value_null()); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); + } } // only true strings are this long else if (value_length > 20) @@ -594,7 +596,9 @@ void csv_datasource::parse_csv(T& stream, UnicodeString ustr = tr.transcode(value.c_str()); boost::put(*feature,fld_name,ustr); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); + } } else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-') @@ -609,14 +613,18 @@ void csv_datasource::parse_csv(T& stream, { boost::put(*feature,fld_name,float_val); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Double)); + } } else { int val = static_cast(float_val); boost::put(*feature,fld_name,val); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Integer)); + } } } else @@ -625,7 +633,9 @@ void csv_datasource::parse_csv(T& stream, UnicodeString ustr = tr.transcode(value.c_str()); boost::put(*feature,fld_name,ustr); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); + } } } else @@ -635,13 +645,17 @@ void csv_datasource::parse_csv(T& stream, { boost::put(*feature,fld_name,true); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); + } } else if(value_lower == "false") { boost::put(*feature,fld_name,false); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); + } } else { @@ -649,7 +663,9 @@ void csv_datasource::parse_csv(T& stream, UnicodeString ustr = tr.transcode(value.c_str()); boost::put(*feature,fld_name,ustr); if (feature_count == 1) + { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); + } } } } From 1eff83530a4c1a70be6f4affc03c1d4705d15253 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 21:11:10 -0400 Subject: [PATCH 10/17] csv: also start auto-detecting ';' and '|' characters as likely separators --- plugins/input/csv/csv_datasource.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 431bf2824..16b452161 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -26,7 +26,6 @@ #include #include - using mapnik::datasource; using mapnik::parameters; using namespace boost::spirit; @@ -191,11 +190,11 @@ void csv_datasource::parse_csv(T& stream, { // default to ',' sep = ","; + int num_commas = std::count(csv_line.begin(), csv_line.end(), ','); // detect tabs int num_tabs = std::count(csv_line.begin(), csv_line.end(), '\t'); if (num_tabs > 0) { - int num_commas = std::count(csv_line.begin(), csv_line.end(), ','); if (num_tabs > num_commas) { sep = "\t"; @@ -204,6 +203,28 @@ void csv_datasource::parse_csv(T& stream, #endif } } + else // pipes + { + int num_pipes = std::count(csv_line.begin(), csv_line.end(), '|'); + if (num_pipes > num_commas) + { + sep = "|"; +#ifdef MAPNIK_DEBUG + std::clog << "CSV Plugin: auto detected '|' separator\n"; +#endif + } + else // semicolons + { + int num_semicolons = std::count(csv_line.begin(), csv_line.end(), ';'); + if (num_semicolons > num_commas) + { + sep = ";"; + #ifdef MAPNIK_DEBUG + std::clog << "CSV Plugin: auto detected ';' separator\n"; + #endif + } + } + } } // set back to start From cc969b1a02a0a49c17ca48341b3a75c573023aa9 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 21:11:41 -0400 Subject: [PATCH 11/17] more csv tests --- tests/python_tests/csv_test.py | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/python_tests/csv_test.py b/tests/python_tests/csv_test.py index dc5ccb3d3..9d98aae82 100644 --- a/tests/python_tests/csv_test.py +++ b/tests/python_tests/csv_test.py @@ -133,6 +133,69 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(feat['1992'],3) eq_(mapnik2.Expression("[1991]=2").evaluate(feat),True) + + def test_quoted_numbers(**kwargs): + ds = get_csv_ds('points.csv') + eq_(len(ds.fields()),3) + eq_(ds.fields(),['x','y','label']) + fs = ds.all_features() + eq_(fs[0]['label'],"0,0") + eq_(fs[1]['label'],"5,5") + eq_(fs[2]['label'],"0,5") + eq_(fs[3]['label'],"5,0") + eq_(fs[4]['label'],"2.5,2.5") + + def test_windows_newlines(**kwargs): + ds = get_csv_ds('windows_newlines.csv') + eq_(len(ds.fields()),3) + feats = ds.all_features() + eq_(len(feats),1) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],1) + eq_(feat['y'],10) + eq_(feat['z'],9999.9999) + + def test_mac_newlines(**kwargs): + ds = get_csv_ds('windows_newlines.csv') + eq_(len(ds.fields()),3) + feats = ds.all_features() + eq_(len(feats),1) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],1) + eq_(feat['y'],10) + eq_(feat['z'],9999.9999) + + def test_tabs(**kwargs): + ds = get_csv_ds('tabs_in_csv.csv') + eq_(len(ds.fields()),3) + eq_(ds.fields(),['x','y','z']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],-122) + eq_(feat['y'],48) + eq_(feat['z'],0) + + def test_separator_pipes(**kwargs): + ds = get_csv_ds('pipe_delimiters.csv') + eq_(len(ds.fields()),3) + eq_(ds.fields(),['x','y','z']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],0) + eq_(feat['y'],0) + eq_(feat['z'],'hello') + + def test_separator_semicolon(**kwargs): + ds = get_csv_ds('semicolon_delimiters.csv') + eq_(len(ds.fields()),3) + eq_(ds.fields(),['x','y','z']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],0) + eq_(feat['y'],0) + eq_(feat['z'],'hello') if __name__ == "__main__": setup() From 2d696dc73b6d8c99bb2b5f2d2f3519a2d43fcdc7 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 1 Nov 2011 21:15:50 -0400 Subject: [PATCH 12/17] update csv test data --- tests/data/csv/mac_newlines.csv | 2 +- tests/data/csv/pipe_delimiters.csv | 2 ++ tests/data/csv/semicolon_delimiters.csv | 2 ++ tests/data/csv/tabs_in_csv.csv | 2 +- tests/data/csv/windows_newlines.csv | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 tests/data/csv/pipe_delimiters.csv create mode 100644 tests/data/csv/semicolon_delimiters.csv diff --git a/tests/data/csv/mac_newlines.csv b/tests/data/csv/mac_newlines.csv index f18abbd0b..34c817967 100644 --- a/tests/data/csv/mac_newlines.csv +++ b/tests/data/csv/mac_newlines.csv @@ -1 +1 @@ -x,y,z 1,10,0 \ No newline at end of file +x,y,z 1,10,9999.9999 \ No newline at end of file diff --git a/tests/data/csv/pipe_delimiters.csv b/tests/data/csv/pipe_delimiters.csv new file mode 100644 index 000000000..4d1d2af59 --- /dev/null +++ b/tests/data/csv/pipe_delimiters.csv @@ -0,0 +1,2 @@ +x|y|z +0|0|hello \ No newline at end of file diff --git a/tests/data/csv/semicolon_delimiters.csv b/tests/data/csv/semicolon_delimiters.csv new file mode 100644 index 000000000..ba61faed5 --- /dev/null +++ b/tests/data/csv/semicolon_delimiters.csv @@ -0,0 +1,2 @@ +x;y;z +0;0;hello \ No newline at end of file diff --git a/tests/data/csv/tabs_in_csv.csv b/tests/data/csv/tabs_in_csv.csv index 1137f4ae7..3718126c6 100644 --- a/tests/data/csv/tabs_in_csv.csv +++ b/tests/data/csv/tabs_in_csv.csv @@ -1,2 +1,2 @@ -x, y,z +x, y,z -122 , 48,0 \ No newline at end of file diff --git a/tests/data/csv/windows_newlines.csv b/tests/data/csv/windows_newlines.csv index 3a6e88b89..07c0dc450 100644 --- a/tests/data/csv/windows_newlines.csv +++ b/tests/data/csv/windows_newlines.csv @@ -1,2 +1,2 @@ x,y,z -0,0,0 \ No newline at end of file +1,10,9999.9999 \ No newline at end of file From 6c8e4b2de033f2419af9e5ca89839aa0ff0e918e Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Wed, 2 Nov 2011 11:07:59 -0400 Subject: [PATCH 13/17] csv: back off of null and boolean detection since using strings is more predictable across rows --- plugins/input/csv/csv_datasource.cpp | 35 +++++-------------- .../csv/nulls_and_booleans_as_strings.csv | 3 ++ tests/python_tests/csv_test.py | 23 ++++++++++-- 3 files changed, 32 insertions(+), 29 deletions(-) create mode 100644 tests/data/csv/nulls_and_booleans_as_strings.csv diff --git a/plugins/input/csv/csv_datasource.cpp b/plugins/input/csv/csv_datasource.cpp index 16b452161..25e3c2a09 100644 --- a/plugins/input/csv/csv_datasource.cpp +++ b/plugins/input/csv/csv_datasource.cpp @@ -603,9 +603,12 @@ void csv_datasource::parse_csv(T& stream, } // add all values as attributes + // here we detect numbers and treat everything else as pure strings + // this is intentional since boolean and null types are not common in csv editors if (value.empty()) { - boost::put(*feature,fld_name,mapnik::value_null()); + UnicodeString ustr = tr.transcode(value.c_str()); + boost::put(*feature,fld_name,ustr); if (feature_count == 1) { desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); @@ -661,32 +664,12 @@ void csv_datasource::parse_csv(T& stream, } else { - std::string value_lower = boost::algorithm::to_lower_copy(value); - if (value_lower == "true") + // fallback to normal string + UnicodeString ustr = tr.transcode(value.c_str()); + boost::put(*feature,fld_name,ustr); + if (feature_count == 1) { - boost::put(*feature,fld_name,true); - if (feature_count == 1) - { - desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); - } - } - else if(value_lower == "false") - { - boost::put(*feature,fld_name,false); - if (feature_count == 1) - { - desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean)); - } - } - else - { - // fallback to normal string - UnicodeString ustr = tr.transcode(value.c_str()); - boost::put(*feature,fld_name,ustr); - if (feature_count == 1) - { - desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); - } + desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); } } } diff --git a/tests/data/csv/nulls_and_booleans_as_strings.csv b/tests/data/csv/nulls_and_booleans_as_strings.csv new file mode 100644 index 000000000..e77a2f070 --- /dev/null +++ b/tests/data/csv/nulls_and_booleans_as_strings.csv @@ -0,0 +1,3 @@ +x,y,null,boolean +0,0,null,true +0,0,,false \ No newline at end of file diff --git a/tests/python_tests/csv_test.py b/tests/python_tests/csv_test.py index 9d98aae82..1732961ee 100644 --- a/tests/python_tests/csv_test.py +++ b/tests/python_tests/csv_test.py @@ -66,14 +66,14 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(len(ds.fields()),10) eq_(len(ds.field_types()),10) eq_(ds.fields(),['x', 'y', 'text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column']) - eq_(ds.field_types(),['int', 'int', 'str', 'str', 'int', 'bool', 'float', 'str', 'str', 'str']) + eq_(ds.field_types(),['int', 'int', 'str', 'str', 'int', 'str', 'float', 'str', 'str', 'str']) fs = ds.featureset() feat = fs.next() - attr = {'x': 0, 'empty_column': None, 'text': u'a b', 'float': 1.0, 'datetime': u'1971-01-01T04:14:00', 'y': 0, 'boolean': True, 'time': u'04:14:00', 'date': u'1971-01-01', 'integer': 40} + attr = {'x': 0, 'empty_column': u'', 'text': u'a b', 'float': 1.0, 'datetime': u'1971-01-01T04:14:00', 'y': 0, 'boolean': u'True', 'time': u'04:14:00', 'date': u'1971-01-01', 'integer': 40} eq_(feat.attributes,attr) while feat: eq_(len(feat),10) - eq_(feat['empty_column'],None) + eq_(feat['empty_column'],u'') feat = fs.next() def test_slashes(**kwargs): @@ -197,6 +197,23 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names(): eq_(feat['y'],0) eq_(feat['z'],'hello') + def test_that_null_and_bool_keywords_are_empty_strings(**kwargs): + ds = get_csv_ds('nulls_and_booleans_as_strings.csv') + eq_(len(ds.fields()),4) + eq_(ds.fields(),['x','y','null','boolean']) + eq_(ds.field_types(),['int','int','str','str']) + fs = ds.featureset() + feat = fs.next() + eq_(feat['x'],0) + eq_(feat['y'],0) + eq_(feat['null'],'null') + eq_(feat['boolean'],'true') + feat = fs.next() + eq_(feat['x'],0) + eq_(feat['y'],0) + eq_(feat['null'],'') + eq_(feat['boolean'],'false') + if __name__ == "__main__": setup() [eval(run)(visual=True) for run in dir() if 'test_' in run] From 15949b077ab40eaf6668fa4b8ac11e7e9ff83706 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Wed, 2 Nov 2011 13:30:40 -0400 Subject: [PATCH 14/17] quadtree: remove unused variable --- utils/shapeindex/quadtree.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/shapeindex/quadtree.hpp b/utils/shapeindex/quadtree.hpp index e12a40096..6714e24ba 100644 --- a/utils/shapeindex/quadtree.hpp +++ b/utils/shapeindex/quadtree.hpp @@ -262,8 +262,6 @@ private: { if (node && node->ext_.contains(item_ext)) { - coord2d c=node->ext_.center(); - double width=node->ext_.width(); double height=node->ext_.height(); From 1adbbdb57105e6d1007f74ec786c66bcca5e650b Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Wed, 2 Nov 2011 13:31:15 -0400 Subject: [PATCH 15/17] grid: initialize variables to avoid compiler warnings --- include/mapnik/grid/grid_util.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/mapnik/grid/grid_util.hpp b/include/mapnik/grid/grid_util.hpp index 8a48f11b0..2ea1220df 100644 --- a/include/mapnik/grid/grid_util.hpp +++ b/include/mapnik/grid/grid_util.hpp @@ -50,7 +50,10 @@ static inline void scale_grid(mapnik::grid::data_type & target, int th2 = target_height/2; int offs_x = rint((source_width-target_width-x_off_f*2*source_width)/2); int offs_y = rint((source_height-target_height-y_off_f*2*source_height)/2); - unsigned yprt, yprt1, xprt, xprt1; + unsigned yprt(0); + unsigned yprt1(0); + unsigned xprt(0); + unsigned xprt1(0); //no scaling or subpixel offset if (target_height == source_height && target_width == source_width && offs_x == 0 && offs_y == 0){ From 06ea13fe3c464415c90e537c88303ec95d793769 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Wed, 2 Nov 2011 13:55:18 -0400 Subject: [PATCH 16/17] scons: link icudata if linking statically to deps --- src/build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/build.py b/src/build.py index 31ffc2cb7..65f94c279 100644 --- a/src/build.py +++ b/src/build.py @@ -72,6 +72,8 @@ if env['THREADING'] == 'multi': if not env['RUNTIME_LINK'] == 'static': + if 'icuuc' in env['ICU_LIB_NAME']: + lib_env['LIBS'].append('icudata') if env['INTERNAL_LIBAGG']: lib_env['LIBS'].insert(0, 'agg') else: From b354e5e838e084c000acdc60a7a491a8c5e5a062 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Wed, 2 Nov 2011 18:52:31 -0400 Subject: [PATCH 17/17] fix logic around icudata linking --- src/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/build.py b/src/build.py index 65f94c279..d519c8d72 100644 --- a/src/build.py +++ b/src/build.py @@ -70,10 +70,10 @@ if env['THREADING'] == 'multi': lib_env['LIBS'].append('boost_thread%s' % env['BOOST_APPEND']) - -if not env['RUNTIME_LINK'] == 'static': +if env['RUNTIME_LINK'] == 'static': if 'icuuc' in env['ICU_LIB_NAME']: lib_env['LIBS'].append('icudata') +else: if env['INTERNAL_LIBAGG']: lib_env['LIBS'].insert(0, 'agg') else: