csv plugin: allow values with leading zeros to stay as strings - closes #993 - refs https://github.com/mapbox/tilemill/issues/1007

This commit is contained in:
Dane Springmeyer 2011-12-12 15:55:33 -08:00
parent 1c4b67ba98
commit 0bb1073b3f
2 changed files with 58 additions and 30 deletions

View file

@ -489,10 +489,12 @@ void csv_datasource::parse_csv(T& stream,
std::string::const_iterator str_beg = value.begin(); std::string::const_iterator str_beg = value.begin();
std::string::const_iterator str_end = value.end(); std::string::const_iterator str_end = value.end();
bool r = qi::phrase_parse(str_beg,str_end, bool r = qi::phrase_parse(str_beg,str_end,
( (
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')' qi::lit("POINT") >> '('
), >> double_[ref(x) = _1]
ascii::space); >> double_[ref(y) = _1] >> ')'
),
ascii::space);
if (r && (str_beg == str_end)) if (r && (str_beg == str_end))
{ {
@ -615,28 +617,27 @@ void csv_datasource::parse_csv(T& stream,
} }
} }
// add all values as attributes // now, add all values as attributes
// here we detect numbers and treat everything else as pure strings /* First we detect likely strings, then try parsing likely numbers,
// this is intentional since boolean and null types are not common in csv editors finally falling back to string type
if (value.empty()) * We intentionally do not try to detect boolean or null types
{ since they are not common in csv
UnicodeString ustr = tr.transcode(value.c_str()); * Likely strings are either empty values, very long values
boost::put(*feature,fld_name,ustr); or value with leading zeros like 001 (which are not safe
if (feature_count == 1) to assume are numbers)
{ */
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
}
// only true strings are this long
else if (value_length > 20)
{
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
bool has_dot = value.find(".") != std::string::npos;
if (value.empty() ||
(value_length > 20) ||
(value_length > 1 && !has_dot && value[0] == '0'))
{
UnicodeString ustr = tr.transcode(value.c_str());
boost::put(*feature,fld_name,ustr);
if (feature_count == 1)
{
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
}
} }
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-') else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-')
{ {
@ -646,12 +647,14 @@ void csv_datasource::parse_csv(T& stream,
bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val); bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val);
if (r && (str_beg == str_end)) if (r && (str_beg == str_end))
{ {
if (value.find(".") != std::string::npos) if (has_dot)
{ {
boost::put(*feature,fld_name,float_val); boost::put(*feature,fld_name,float_val);
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Double)); desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::Double));
} }
} }
else else
@ -660,7 +663,9 @@ void csv_datasource::parse_csv(T& stream,
boost::put(*feature,fld_name,val); boost::put(*feature,fld_name,val);
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Integer)); desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::Integer));
} }
} }
} }
@ -671,7 +676,9 @@ void csv_datasource::parse_csv(T& stream,
boost::put(*feature,fld_name,ustr); boost::put(*feature,fld_name,ustr);
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::String));
} }
} }
} }
@ -682,7 +689,9 @@ void csv_datasource::parse_csv(T& stream,
boost::put(*feature,fld_name,ustr); boost::put(*feature,fld_name,ustr);
if (feature_count == 1) if (feature_count == 1)
{ {
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String)); desc_.add_descriptor(
mapnik::attribute_descriptor(
fld_name,mapnik::String));
} }
} }
} }

View file

@ -227,6 +227,25 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
query.add_property_name('bogus') query.add_property_name('bogus')
fs = ds.features(query) fs = ds.features(query)
def test_that_leading_zeros_mean_strings(**kwargs):
ds = get_csv_ds('leading_zeros.csv')
eq_(len(ds.fields()),3)
eq_(ds.fields(),['x','y','fips'])
eq_(ds.field_types(),['int','int','str'])
fs = ds.featureset()
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['fips'],'001')
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['fips'],'003')
feat = fs.next()
eq_(feat['x'],0)
eq_(feat['y'],0)
eq_(feat['fips'],'005')
if __name__ == "__main__": if __name__ == "__main__":
setup() setup()
[eval(run)(visual=True) for run in dir() if 'test_' in run] [eval(run)(visual=True) for run in dir() if 'test_' in run]