csv plugin: allow values with leading zeros to stay as strings - closes #993 - refs https://github.com/mapbox/tilemill/issues/1007
This commit is contained in:
parent
1c4b67ba98
commit
0bb1073b3f
2 changed files with 58 additions and 30 deletions
|
@ -490,7 +490,9 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
std::string::const_iterator str_end = value.end();
|
std::string::const_iterator str_end = value.end();
|
||||||
bool r = qi::phrase_parse(str_beg,str_end,
|
bool r = qi::phrase_parse(str_beg,str_end,
|
||||||
(
|
(
|
||||||
qi::lit("POINT") >> '(' >> double_[ref(x) = _1] >> double_[ref(y) = _1] >> ')'
|
qi::lit("POINT") >> '('
|
||||||
|
>> double_[ref(x) = _1]
|
||||||
|
>> double_[ref(y) = _1] >> ')'
|
||||||
),
|
),
|
||||||
ascii::space);
|
ascii::space);
|
||||||
|
|
||||||
|
@ -615,28 +617,27 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add all values as attributes
|
// now, add all values as attributes
|
||||||
// here we detect numbers and treat everything else as pure strings
|
/* First we detect likely strings, then try parsing likely numbers,
|
||||||
// this is intentional since boolean and null types are not common in csv editors
|
finally falling back to string type
|
||||||
if (value.empty())
|
* We intentionally do not try to detect boolean or null types
|
||||||
{
|
since they are not common in csv
|
||||||
UnicodeString ustr = tr.transcode(value.c_str());
|
* Likely strings are either empty values, very long values
|
||||||
boost::put(*feature,fld_name,ustr);
|
or value with leading zeros like 001 (which are not safe
|
||||||
if (feature_count == 1)
|
to assume are numbers)
|
||||||
{
|
*/
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// only true strings are this long
|
|
||||||
else if (value_length > 20)
|
|
||||||
{
|
|
||||||
UnicodeString ustr = tr.transcode(value.c_str());
|
|
||||||
boost::put(*feature,fld_name,ustr);
|
|
||||||
if (feature_count == 1)
|
|
||||||
{
|
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
bool has_dot = value.find(".") != std::string::npos;
|
||||||
|
if (value.empty() ||
|
||||||
|
(value_length > 20) ||
|
||||||
|
(value_length > 1 && !has_dot && value[0] == '0'))
|
||||||
|
{
|
||||||
|
UnicodeString ustr = tr.transcode(value.c_str());
|
||||||
|
boost::put(*feature,fld_name,ustr);
|
||||||
|
if (feature_count == 1)
|
||||||
|
{
|
||||||
|
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-')
|
else if ((value[0] >= '0' && value[0] <= '9') || value[0] == '-')
|
||||||
{
|
{
|
||||||
|
@ -646,12 +647,14 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val);
|
bool r = qi::phrase_parse(str_beg,str_end,qi::double_,ascii::space,float_val);
|
||||||
if (r && (str_beg == str_end))
|
if (r && (str_beg == str_end))
|
||||||
{
|
{
|
||||||
if (value.find(".") != std::string::npos)
|
if (has_dot)
|
||||||
{
|
{
|
||||||
boost::put(*feature,fld_name,float_val);
|
boost::put(*feature,fld_name,float_val);
|
||||||
if (feature_count == 1)
|
if (feature_count == 1)
|
||||||
{
|
{
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Double));
|
desc_.add_descriptor(
|
||||||
|
mapnik::attribute_descriptor(
|
||||||
|
fld_name,mapnik::Double));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -660,7 +663,9 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
boost::put(*feature,fld_name,val);
|
boost::put(*feature,fld_name,val);
|
||||||
if (feature_count == 1)
|
if (feature_count == 1)
|
||||||
{
|
{
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::Integer));
|
desc_.add_descriptor(
|
||||||
|
mapnik::attribute_descriptor(
|
||||||
|
fld_name,mapnik::Integer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -671,7 +676,9 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
boost::put(*feature,fld_name,ustr);
|
boost::put(*feature,fld_name,ustr);
|
||||||
if (feature_count == 1)
|
if (feature_count == 1)
|
||||||
{
|
{
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
desc_.add_descriptor(
|
||||||
|
mapnik::attribute_descriptor(
|
||||||
|
fld_name,mapnik::String));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -682,7 +689,9 @@ void csv_datasource::parse_csv(T& stream,
|
||||||
boost::put(*feature,fld_name,ustr);
|
boost::put(*feature,fld_name,ustr);
|
||||||
if (feature_count == 1)
|
if (feature_count == 1)
|
||||||
{
|
{
|
||||||
desc_.add_descriptor(mapnik::attribute_descriptor(fld_name,mapnik::String));
|
desc_.add_descriptor(
|
||||||
|
mapnik::attribute_descriptor(
|
||||||
|
fld_name,mapnik::String));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,6 +227,25 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
|
||||||
query.add_property_name('bogus')
|
query.add_property_name('bogus')
|
||||||
fs = ds.features(query)
|
fs = ds.features(query)
|
||||||
|
|
||||||
|
def test_that_leading_zeros_mean_strings(**kwargs):
|
||||||
|
ds = get_csv_ds('leading_zeros.csv')
|
||||||
|
eq_(len(ds.fields()),3)
|
||||||
|
eq_(ds.fields(),['x','y','fips'])
|
||||||
|
eq_(ds.field_types(),['int','int','str'])
|
||||||
|
fs = ds.featureset()
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['fips'],'001')
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['fips'],'003')
|
||||||
|
feat = fs.next()
|
||||||
|
eq_(feat['x'],0)
|
||||||
|
eq_(feat['y'],0)
|
||||||
|
eq_(feat['fips'],'005')
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup()
|
setup()
|
||||||
[eval(run)(visual=True) for run in dir() if 'test_' in run]
|
[eval(run)(visual=True) for run in dir() if 'test_' in run]
|
||||||
|
|
Loading…
Reference in a new issue