csv plugin: improve newline detection - closes #1497
This commit is contained in:
parent
91a932d7e9
commit
fca564614f
3 changed files with 49 additions and 25 deletions
|
@ -8,6 +8,8 @@ For a complete change history, see the git log.
|
|||
|
||||
## Future
|
||||
|
||||
- Improved detection of newlines in CSV files - now more robust in the face of mixed newline types (#1497)
|
||||
|
||||
- Allow style level compositing operations to work outside of featureset extents across tiled requests (#1477)
|
||||
|
||||
- Support for encoding `literal` postgres types as strings 69fb17cd3/#1466
|
||||
|
|
|
@ -172,33 +172,19 @@ void csv_datasource::parse_csv(T & stream,
|
|||
// autodetect newlines
|
||||
char newline = '\n';
|
||||
bool has_newline = false;
|
||||
int newline_count = 0;
|
||||
int carriage_count = 0;
|
||||
for (unsigned idx = 0; idx < file_length_; idx++)
|
||||
for (unsigned lidx = 0; lidx < file_length_ && lidx < 4000; lidx++)
|
||||
{
|
||||
char c = static_cast<char>(stream.get());
|
||||
if (c == '\r')
|
||||
{
|
||||
newline = '\r';
|
||||
has_newline = true;
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
{
|
||||
++newline_count;
|
||||
has_newline = true;
|
||||
}
|
||||
else if (c == '\r')
|
||||
{
|
||||
++carriage_count;
|
||||
has_newline = true;
|
||||
}
|
||||
// read at least 2000 bytes before testing
|
||||
if (idx == file_length_-1 || idx > 4000)
|
||||
{
|
||||
if (newline_count > carriage_count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (carriage_count > newline_count)
|
||||
{
|
||||
newline = '\r';
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -206,7 +206,7 @@ if 'csv' in mapnik.DatasourceCache.plugin_names():
|
|||
eq_(desc['type'],mapnik.DataType.Vector)
|
||||
eq_(desc['encoding'],'utf-8')
|
||||
|
||||
def test_windows_newlines(**kwargs):
|
||||
def test_reading_windows_newlines(**kwargs):
|
||||
ds = get_csv_ds('windows_newlines.csv')
|
||||
eq_(len(ds.fields()),3)
|
||||
feats = ds.all_features()
|
||||
|
@ -222,8 +222,8 @@ if 'csv' in mapnik.DatasourceCache.plugin_names():
|
|||
eq_(desc['type'],mapnik.DataType.Vector)
|
||||
eq_(desc['encoding'],'utf-8')
|
||||
|
||||
def test_mac_newlines(**kwargs):
|
||||
ds = get_csv_ds('windows_newlines.csv')
|
||||
def test_reading_mac_newlines(**kwargs):
|
||||
ds = get_csv_ds('mac_newlines.csv')
|
||||
eq_(len(ds.fields()),3)
|
||||
feats = ds.all_features()
|
||||
eq_(len(feats),1)
|
||||
|
@ -238,6 +238,42 @@ if 'csv' in mapnik.DatasourceCache.plugin_names():
|
|||
eq_(desc['type'],mapnik.DataType.Vector)
|
||||
eq_(desc['encoding'],'utf-8')
|
||||
|
||||
def check_newlines(filename):
|
||||
ds = get_csv_ds(filename)
|
||||
eq_(len(ds.fields()),3)
|
||||
feats = ds.all_features()
|
||||
eq_(len(feats),1)
|
||||
fs = ds.featureset()
|
||||
feat = fs.next()
|
||||
eq_(feat['x'],0)
|
||||
eq_(feat['y'],0)
|
||||
eq_(feat['line'],'many\n lines\n of text\n with unix newlines')
|
||||
desc = ds.describe()
|
||||
eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
|
||||
eq_(desc['name'],'csv')
|
||||
eq_(desc['type'],mapnik.DataType.Vector)
|
||||
eq_(desc['encoding'],'utf-8')
|
||||
|
||||
def test_mixed_mac_unix_newlines(**kwargs):
|
||||
check_newlines('mac_newlines_with_unix_inline.csv')
|
||||
|
||||
def test_mixed_mac_unix_newlines_escaped(**kwargs):
|
||||
check_newlines('mac_newlines_with_unix_inline_escaped.csv')
|
||||
|
||||
# To hard to support this case
|
||||
#def test_mixed_unix_windows_newlines(**kwargs):
|
||||
# check_newlines('unix_newlines_with_windows_inline.csv')
|
||||
|
||||
# To hard to support this case
|
||||
#def test_mixed_unix_windows_newlines_escaped(**kwargs):
|
||||
# check_newlines('unix_newlines_with_windows_inline_escaped.csv')
|
||||
|
||||
def test_mixed_windows_unix_newlines(**kwargs):
|
||||
check_newlines('windows_newlines_with_unix_inline.csv')
|
||||
|
||||
def test_mixed_windows_unix_newlines_escaped(**kwargs):
|
||||
check_newlines('windows_newlines_with_unix_inline_escaped.csv')
|
||||
|
||||
def test_tabs(**kwargs):
|
||||
ds = get_csv_ds('tabs_in_csv.csv')
|
||||
eq_(len(ds.fields()),3)
|
||||
|
|
Loading…
Reference in a new issue