csv: back off of null and boolean detection since using strings is more predictable across rows

This commit is contained in:
Dane Springmeyer 2011-11-02 11:07:59 -04:00
parent 2d696dc73b
commit 6c8e4b2de0
3 changed files with 32 additions and 29 deletions

View file

@ -603,9 +603,12 @@ void csv_datasource::parse_csv(T& stream,
}
// add all values as attributes
// here we detect numbers and treat everything else as pure strings
// this is intentional since boolean and null types are not common in csv editors
if (value.empty())
{
boost::put(*feature,fld_name,mapnik::value_null());
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
@ -661,32 +664,12 @@ void csv_datasource::parse_csv(T& stream,
}
else
{
std::string value_lower = boost::algorithm::to_lower_copy(value);
if (value_lower == "true")
// fallback to normal string
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
if (feature_count == 1)
{
boost::put(*feature,fld_name,true);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean));
}
}
else if(value_lower == "false")
{
boost::put(*feature,fld_name,false);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Boolean));
}
}
else
{
// fallback to normal string
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
}
}

View file

@ -0,0 +1,3 @@
x,y,null,boolean
0,0,null,true
0,0,,false
1 x y null boolean
2 0 0 null true
3 0 0 false

View file

@ -66,14 +66,14 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names():
eq_(len(ds.fields()),10)
eq_(len(ds.field_types()),10)
eq_(ds.fields(),['x', 'y', 'text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column'])
eq_(ds.field_types(),['int', 'int', 'str', 'str', 'int', 'bool', 'float', 'str', 'str', 'str'])
eq_(ds.field_types(),['int', 'int', 'str', 'str', 'int', 'str', 'float', 'str', 'str', 'str'])
fs = ds.featureset()
feat = fs.next()
attr = {'x': 0, 'empty_column': None, 'text': u'a b', 'float': 1.0, 'datetime': u'1971-01-01T04:14:00', 'y': 0, 'boolean': True, 'time': u'04:14:00', 'date': u'1971-01-01', 'integer': 40}
attr = {'x': 0, 'empty_column': u'', 'text': u'a b', 'float': 1.0, 'datetime': u'1971-01-01T04:14:00', 'y': 0, 'boolean': u'True', 'time': u'04:14:00', 'date': u'1971-01-01', 'integer': 40}
eq_(feat.attributes,attr)
while feat:
eq_(len(feat),10)
eq_(feat['empty_column'],None)
eq_(feat['empty_column'],u'')
feat = fs.next()
def test_slashes(**kwargs):
@ -197,6 +197,23 @@ if 'csv' in mapnik2.DatasourceCache.instance().plugin_names():
eq_(feat['y'],0)
eq_(feat['z'],'hello')
def test_that_null_and_bool_keywords_are_empty_strings(**kwargs):
ds = get_csv_ds('nulls_and_booleans_as_strings.csv')
eq_(len(ds.fields()),4)
eq_(ds.fields(),['x','y','null','boolean'])
eq_(ds.field_types(),['int','int','str','str'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['null'],'null')
eq_(feat['boolean'],'true')
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['null'],'')
eq_(feat['boolean'],'false')
if __name__ == "__main__":
setup()
[eval(run)(visual=True) for run in dir() if 'test_' in run]