csv plugin: support single row data and fix warning when no data can be parsed

2012-08-31 12:07:35 -07:00 · 2012-08-31 12:07:35 -07:00 · affecb0f32
commit affecb0f32
parent c1102cbb7a
7 changed files with 105 additions and 8 deletions
--- a/plugins/input/csv/csv_datasource.cpp
+++ b/plugins/input/csv/csv_datasource.cpp
@ -143,7 +143,7 @@ void csv_datasource::bind() const
 }
 template <typename T>
-void csv_datasource::parse_csv(T& stream,
+void csv_datasource::parse_csv(T & stream,
                               std::string const& escape,
                               std::string const& separator,
                               std::string const& quote) const
@ -171,6 +171,7 @@ void csv_datasource::parse_csv(T& stream,
    // autodetect newlines
    char newline = '\n';
    bool has_newline = false;
    int newline_count = 0;
    int carriage_count = 0;
    for (unsigned idx = 0; idx < file_length_; idx++)
@ -179,10 +180,12 @@ void csv_datasource::parse_csv(T& stream,
        if (c == '\n')
        {
            ++newline_count;
            has_newline = true;
        }
        else if (c == '\r')
        {
            ++carriage_count;
            has_newline = true;
        }
        // read at least 2000 bytes before testing
        if (idx == file_length_-1 || idx > 4000)
@ -422,7 +425,7 @@ void csv_datasource::parse_csv(T& stream,
        throw mapnik::datasource_exception(s.str());
    }
-    int feature_count(1);
+    int feature_count(0);
    bool extent_initialized = false;
    std::size_t num_headers = headers_.size();
@ -435,12 +438,23 @@ void csv_datasource::parse_csv(T& stream,
    mapnik::wkt_parser parse_wkt;
    mapnik::json::geometry_parser<std::string::const_iterator> parse_json;
-    while (std::getline(stream,csv_line,newline))
+    // handle rare case of a single line of data and user-provided headers
    // where a lack of a newline will mean that std::getline returns false
    bool is_first_row = false;
    if (!has_newline)
    {
        stream >> csv_line;
        if (!csv_line.empty())
        {
            is_first_row = true;
        }
    }
    while (std::getline(stream,csv_line,newline) || is_first_row)
    {
        is_first_row = false;
        if ((row_limit_ > 0) && (line_number > row_limit_))
        {
            MAPNIK_LOG_DEBUG(csv) << "csv_datasource: row limit hit, exiting at feature: " << feature_count;
            break;
        }
@ -495,7 +509,8 @@ void csv_datasource::parse_csv(T& stream,
                }
            }
-            mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,feature_count));
+            // NOTE: we use ++feature_count here because feature id's should start at 1;
            mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,++feature_count));
            double x(0);
            double y(0);
            bool parsed_x = false;
@ -754,7 +769,6 @@ void csv_datasource::parse_csv(T& stream,
                        extent_.expand_to_include(feature->envelope());
                    }
                    features_.push_back(feature);
                    ++feature_count;
                    null_geom = false;
                }
                else
@ -782,7 +796,6 @@ void csv_datasource::parse_csv(T& stream,
                    pt->move_to(x,y);
                    feature->add_geometry(pt);
                    features_.push_back(feature);
                    ++feature_count;
                    null_geom = false;
                    if (!extent_initialized)
                    {
@ -836,6 +849,9 @@ void csv_datasource::parse_csv(T& stream,
                else
                {
                    MAPNIK_LOG_ERROR(csv) << s.str();
                    // with no geometry we will never
                    // add this feature so drop the count
                    feature_count--;
                    continue;
                }
            }
--- a/plugins/input/csv/csv_datasource.hpp
+++ b/plugins/input/csv/csv_datasource.hpp
@ -54,7 +54,7 @@ public:
    void bind() const;
    template <typename T>
-    void parse_csv(T& stream,
+    void parse_csv(T & stream,
                   std::string const& escape,
                   std::string const& separator,
                   std::string const& quote) const;
--- a/tests/data/csv/fails/needs_headers_one_line.csv
+++ b/tests/data/csv/fails/needs_headers_one_line.csv
@ -0,0 +1 @@
 0,0,data_name
--- a/tests/data/csv/fails/needs_headers_one_line_no_newline.csv
+++ b/tests/data/csv/fails/needs_headers_one_line_no_newline.csv
@ -0,0 +1 @@
 0,0,data_name
--- a/tests/data/csv/fails/needs_headers_two_lines.csv
+++ b/tests/data/csv/fails/needs_headers_two_lines.csv
@ -0,0 +1,2 @@
 0,0,data_name
 0,0,data_name
--- a/tests/data/csv/warns/feature_id_counting.csv
+++ b/tests/data/csv/warns/feature_id_counting.csv
@ -0,0 +1,4 @@
 x,y,id
 0,0,1
 bad,bad,2
 0,0,2
--- a/tests/python_tests/csv_test.py
+++ b/tests/python_tests/csv_test.py
@ -418,6 +418,79 @@ if 'csv' in mapnik.DatasourceCache.instance().plugin_names():
        # this has invalid header # so throw
        ds = get_csv_ds('more_column_values_than_headers.csv')
    def test_that_feature_id_only_incremented_for_valid_rows(**kwargs):
        ds = mapnik.Datasource(type='csv',
                               file=os.path.join('../data/csv/warns','feature_id_counting.csv'),
                               quiet=True)
        eq_(len(ds.fields()),3)
        eq_(ds.fields(),['x','y','id'])
        eq_(ds.field_types(),['int','int','int'])
        fs = ds.featureset()
        # first
        feat = fs.next()
        eq_(feat['x'],0)
        eq_(feat['y'],0)
        eq_(feat['id'],1)
        # second, should have skipped bogus one
        feat = fs.next()
        eq_(feat['x'],0)
        eq_(feat['y'],0)
        eq_(feat['id'],2)
        desc = ds.describe()
        eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
        eq_(len(ds.all_features()),2)
    def test_dynamically_defining_headers1(**kwargs):
        ds = mapnik.Datasource(type='csv',
                               file=os.path.join('../data/csv/fails','needs_headers_two_lines.csv'),
                               quiet=True,
                               headers='x,y,name')
        eq_(len(ds.fields()),3)
        eq_(ds.fields(),['x','y','name'])
        eq_(ds.field_types(),['int','int','str'])
        fs = ds.featureset()
        feat = fs.next()
        eq_(feat['x'],0)
        eq_(feat['y'],0)
        eq_(feat['name'],'data_name')
        desc = ds.describe()
        eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
        eq_(len(ds.all_features()),2)
    def test_dynamically_defining_headers2(**kwargs):
        ds = mapnik.Datasource(type='csv',
                               file=os.path.join('../data/csv/fails','needs_headers_one_line.csv'),
                               quiet=True,
                               headers='x,y,name')
        eq_(len(ds.fields()),3)
        eq_(ds.fields(),['x','y','name'])
        eq_(ds.field_types(),['int','int','str'])
        fs = ds.featureset()
        feat = fs.next()
        eq_(feat['x'],0)
        eq_(feat['y'],0)
        eq_(feat['name'],'data_name')
        desc = ds.describe()
        eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
        eq_(len(ds.all_features()),1)
    def test_dynamically_defining_headers3(**kwargs):
        ds = mapnik.Datasource(type='csv',
                               file=os.path.join('../data/csv/fails','needs_headers_one_line_no_newline.csv'),
                               quiet=True,
                               headers='x,y,name')
        eq_(len(ds.fields()),3)
        eq_(ds.fields(),['x','y','name'])
        eq_(ds.field_types(),['int','int','str'])
        fs = ds.featureset()
        feat = fs.next()
        eq_(feat['x'],0)
        eq_(feat['y'],0)
        eq_(feat['name'],'data_name')
        desc = ds.describe()
        eq_(desc['geometry_type'],mapnik.DataGeometryType.Point)
        eq_(len(ds.all_features()),1)
 if __name__ == "__main__":
    setup()
    [eval(run)(visual=True) for run in dir() if 'test_' in run]