fix number handling in csv plugin and add tests for 64bit integer support in sqlite and postgis datasources - refs #1669

This commit is contained in:
Dane Springmeyer 2012-12-21 19:53:33 -08:00
parent 9b3d2b7d8e
commit aee275a08c
9 changed files with 138 additions and 29 deletions

View file

@ -652,7 +652,7 @@ void csv_datasource::parse_csv(T & stream,
} }
} }
// now, add attributes, skipping any WKT or JSON fiels // now, add attributes, skipping any WKT or JSON fields
if ((has_wkt_field) && (i == wkt_idx)) continue; if ((has_wkt_field) && (i == wkt_idx)) continue;
if ((has_json_field) && (i == json_idx)) continue; if ((has_json_field) && (i == json_idx)) continue;
/* First we detect likely strings, then try parsing likely numbers, /* First we detect likely strings, then try parsing likely numbers,
@ -664,27 +664,34 @@ void csv_datasource::parse_csv(T & stream,
to assume are numbers) to assume are numbers)
*/ */
bool matched = false;
bool has_dot = value.find(".") != std::string::npos; bool has_dot = value.find(".") != std::string::npos;
if (value.empty() || if (value.empty() ||
(value_length > 20) || (value_length > 20) ||
(value_length > 1 && !has_dot && value[0] == '0')) (value_length > 1 && !has_dot && value[0] == '0'))
{ {
matched = true;
feature->put(fld_name,tr.transcode(value.c_str())); feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
} }
} }
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-') else if ((value[0] >= '0' && value[0] <= '9') ||
value[0] == '-' ||
value[0] == '+' ||
value[0] == '.')
{
bool has_e = value.find("e") != std::string::npos;
if (has_dot || has_e)
{ {
double float_val = 0.0; double float_val = 0.0;
std::string::const_iterator str_beg = value.begin(); std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end(); std::string::const_iterator str_end = value.end();
bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val); if (qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val)
if (r && (str_beg == str_end)) && (str_beg == str_end))
{
if (has_dot)
{ {
matched = true;
feature->put(fld_name,float_val); feature->put(fld_name,float_val);
if (feature_count == 1) if (feature_count == 1)
{ {
@ -693,9 +700,17 @@ void csv_datasource::parse_csv(T & stream,
fld_name,mapnik::Double)); fld_name,mapnik::Double));
} }
} }
}
else else
{ {
feature->put(fld_name,static_cast<mapnik::value_integer>(float_val)); mapnik::value_integer int_val = 0;
std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end();
if (qi::phrase_parse(str_beg,str_end,qi::long_long,ascii::space,int_val)
&& (str_beg == str_end))
{
matched = true;
feature->put(fld_name,int_val);
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor( desc_.add_descriptor(
@ -704,19 +719,8 @@ void csv_datasource::parse_csv(T & stream,
} }
} }
} }
else
{
// fallback to normal string
feature->put(fld_name,tr.transcode(value.c_str()));
if (feature_count == 1)
{
desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::String));
} }
} if (!matched)
}
else
{ {
// fallback to normal string // fallback to normal string
feature->put(fld_name,tr.transcode(value.c_str())); feature->put(fld_name,tr.transcode(value.c_str()));

View file

@ -0,0 +1,3 @@
x,y,bigint
0,0,2147483648
0,0,9223372036854775807
1 x y bigint
2 0 0 2147483648
3 0 0 9223372036854775807

View file

@ -0,0 +1,9 @@
x,y,floats
0,0,.0
0,0,+.0
0,0,1e-06
0,0,-1e-06
0,0,0.000001
0,0,1.234e+16
0,0,1.234e16
0,0,"1.234e16"
1 x y floats
2 0 0 .0
3 0 0 +.0
4 0 0 1e-06
5 0 0 -1e-06
6 0 0 0.000001
7 0 0 1.234e+16
8 0 0 1.234e16
9 0 0 1.234e16

Binary file not shown.

View file

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import glob import glob
import sys
from nose.tools import * from nose.tools import *
from utilities import execution_path from utilities import execution_path
@ -529,6 +530,46 @@ if 'csv' in mapnik.DatasourceCache.plugin_names():
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point) eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),1) eq_(len(ds.all_features()),1)
def test_that_64bit_int_fields_work(**kwargs):
ds = get_csv_ds('64bit_int.csv')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','bigint'])
eq_(ds.field_types(),['int','int','int'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['bigint'],2147483648)
feat = fs.next()
eq_(feat['bigint'],sys.maxint)
eq_(feat['bigint'],9223372036854775807)
eq_(feat['bigint'],0x7FFFFFFFFFFFFFFF)
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),2)
def test_various_number_types(**kwargs):
ds = get_csv_ds('number_types.csv')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','floats'])
eq_(ds.field_types(),['int','int','float'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['floats'],.0)
feat = fs.next()
eq_(feat['floats'],+.0)
feat = fs.next()
eq_(feat['floats'],1e-06)
feat = fs.next()
eq_(feat['floats'],-1e-06)
feat = fs.next()
eq_(feat['floats'],0.000001)
feat = fs.next()
eq_(feat['floats'],1.234e+16)
feat = fs.next()
eq_(feat['floats'],1.234e+16)
desc = ds.describe()
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
eq_(len(ds.all_features()),8)
if __name__ == "__main__": if __name__ == "__main__":
setup() setup()
[eval(run)(visual=True) for run in dir() if 'test_' in run] [eval(run)(visual=True) for run in dir() if 'test_' in run]

View file

@ -239,7 +239,9 @@ null_equality = [
[.1,False,float], [.1,False,float],
[False,False,int], # TODO - should become bool [False,False,int], # TODO - should become bool
[True,False,int], # TODO - should become bool [True,False,int], # TODO - should become bool
[None,True,None] [None,True,None],
[2147483648,False,int],
[922337203685477580,False,int]
] ]
def test_expressions_with_null_equality(): def test_expressions_with_null_equality():
@ -285,7 +287,9 @@ truthyness = [
[.1,True,float], [.1,True,float],
[False,False,int], # TODO - should become bool [False,False,int], # TODO - should become bool
[True,True,int], # TODO - should become bool [True,True,int], # TODO - should become bool
[None,False,None] [None,False,None],
[2147483648,True,int],
[922337203685477580,True,int]
] ]
def test_expressions_for_thruthyness(): def test_expressions_for_thruthyness():

View file

@ -132,6 +132,12 @@ CREATE TABLE test7(gid serial PRIMARY KEY, geom geometry);
INSERT INTO test7(gid, geom) values (1, GeomFromEWKT('SRID=4326;GEOMETRYCOLLECTION(MULTILINESTRING((10 10,20 20,10 40),(40 40,30 30,40 20,30 10)),LINESTRING EMPTY)')); INSERT INTO test7(gid, geom) values (1, GeomFromEWKT('SRID=4326;GEOMETRYCOLLECTION(MULTILINESTRING((10 10,20 20,10 40),(40 40,30 30,40 20,30 10)),LINESTRING EMPTY)'));
''' '''
insert_table_8 = '''
CREATE TABLE test8(gid serial PRIMARY KEY,int_field bigint, geom geometry);
INSERT INTO test8(gid, int_field, geom) values (1, 2147483648, ST_MakePoint(1,1));
INSERT INTO test8(gid, int_field, geom) values (2, 922337203685477580, ST_MakePoint(1,1));
'''
def postgis_setup(): def postgis_setup():
call('dropdb %s' % MAPNIK_TEST_DBNAME,silent=True) call('dropdb %s' % MAPNIK_TEST_DBNAME,silent=True)
@ -146,6 +152,7 @@ def postgis_setup():
call("""psql -q %s -c '%s'""" % (MAPNIK_TEST_DBNAME,insert_table_5b),silent=False) call("""psql -q %s -c '%s'""" % (MAPNIK_TEST_DBNAME,insert_table_5b),silent=False)
call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_6),silent=False) call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_6),silent=False)
call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_7),silent=False) call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_7),silent=False)
call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_8),silent=False)
def postgis_takedown(): def postgis_takedown():
pass pass
@ -472,7 +479,21 @@ if 'postgis' in mapnik.DatasourceCache.plugin_names() \
t.start() t.start()
t.join() t.join()
def test_that_64bit_int_fields_work():
ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,
table='test8')
eq_(len(ds.fields()),2)
eq_(ds.fields(),['gid','int_field'])
eq_(ds.field_types(),['int','int'])
fs = ds.featureset()
feat = fs.next()
eq_(feat.id(),1)
eq_(feat['gid'],1)
eq_(feat['int_field'],2147483648)
feat = fs.next()
eq_(feat.id(),2)
eq_(feat['gid'],2)
eq_(feat['int_field'],922337203685477580)
atexit.register(postgis_takedown) atexit.register(postgis_takedown)

View file

@ -268,16 +268,24 @@ def test_32bit_int_id():
eq_(grid.get_pixel(128,128),int32) eq_(grid.get_pixel(128,128),int32)
utf1 = grid.encode('utf',resolution=4) utf1 = grid.encode('utf',resolution=4)
eq_(utf1['keys'],[str(int32)]) eq_(utf1['keys'],[str(int32)])
# this will fail because it is used internally to mark alpha
#max_neg = -(int32+1)
# so we use max neg-1
max_neg = -(int32) max_neg = -(int32)
grid = gen_grid_for_id(max_neg) grid = gen_grid_for_id(max_neg)
eq_(grid.get_pixel(128,128),max_neg) eq_(grid.get_pixel(128,128),max_neg)
utf1 = grid.encode('utf',resolution=4) utf1 = grid.encode('utf',resolution=4)
eq_(utf1['keys'],[str(max_neg)]) eq_(utf1['keys'],[str(max_neg)])
def test_64bit_int_id():
int64 = 0x7FFFFFFFFFFFFFFF
grid = gen_grid_for_id(int64)
eq_(grid.get_pixel(128,128),int64)
utf1 = grid.encode('utf',resolution=4)
eq_(utf1['keys'],[str(int64)])
max_neg = -(int64)
grid = gen_grid_for_id(max_neg)
eq_(grid.get_pixel(128,128),max_neg)
utf1 = grid.encode('utf',resolution=4)
eq_(utf1['keys'],[str(max_neg)])
def test_id_zero(): def test_id_zero():
grid = gen_grid_for_id(0) grid = gen_grid_for_id(0)
eq_(grid.get_pixel(128,128),0) eq_(grid.get_pixel(128,128),0)

View file

@ -360,6 +360,25 @@ if 'sqlite' in mapnik.DatasourceCache.plugin_names():
eq_(len(feat.geometries()),1) eq_(len(feat.geometries()),1)
eq_(feat.geometries()[0].to_wkt(),'Point(0 0)') eq_(feat.geometries()[0].to_wkt(),'Point(0 0)')
def test_that_64bit_int_fields_work():
ds = mapnik.SQLite(file='../data/sqlite/64bit_int.sqlite',
table='int_table',
use_spatial_index=False
)
eq_(len(ds.fields()),3)
eq_(ds.fields(),['OGC_FID','id','bigint'])
eq_(ds.field_types(),['int','int','int'])
fs = ds.featureset()
feat = fs.next()
eq_(feat.id(),1)
eq_(feat['OGC_FID'],1)
eq_(feat['bigint'],2147483648)
feat = fs.next()
eq_(feat.id(),2)
eq_(feat['OGC_FID'],2)
eq_(feat['bigint'],922337203685477580)
if __name__ == "__main__": if __name__ == "__main__":
setup() setup()
[eval(run)() for run in dir() if 'test_' in run] [eval(run)() for run in dir() if 'test_' in run]