From 763e84a6e9c39c3b0bc5f454da607e6b674aaf43 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Mon, 2 Apr 2012 16:20:41 -0700 Subject: [PATCH] postgis: add support for auto-detection of primary key field - closes #804 - refs #753 --- plugins/input/postgis/postgis_datasource.cpp | 119 ++++++++++++++--- plugins/input/postgis/postgis_datasource.hpp | 2 +- tests/python_tests/postgis_test.py | 131 ++++++++++++++++++- 3 files changed, 232 insertions(+), 20 deletions(-) diff --git a/plugins/input/postgis/postgis_datasource.cpp b/plugins/input/postgis/postgis_datasource.cpp index e992ea051..34f95d495 100644 --- a/plugins/input/postgis/postgis_datasource.cpp +++ b/plugins/input/postgis/postgis_datasource.cpp @@ -107,6 +107,7 @@ void postgis_datasource::bind() const boost::optional initial_size = params_.get("initial_size", 1); boost::optional max_size = params_.get("max_size", 10); + boost::optional require_key = params_.get("require_key", false); ConnectionManager* mgr = ConnectionManager::instance(); mgr->registerPool(creator_, *initial_size, *max_size); @@ -228,6 +229,71 @@ void postgis_datasource::bind() const } } + // detect primary key + if (key_field_.empty()) + { + std::ostringstream s; + s << "SELECT a.attname, a.attnum, t.typname, t.typname in ('int2','int4','int8') " + "AS is_int FROM pg_class c, pg_attribute a, pg_type t, pg_namespace n, pg_index i " + "WHERE a.attnum > 0 AND a.attrelid = c.oid " + "AND a.atttypid = t.oid AND c.relnamespace = n.oid " + "AND c.oid = i.indrelid AND i.indisprimary = 't' " + "AND t.typname !~ '^geom' AND c.relname =" + << " '" << mapnik::sql_utils::unquote_double(geometry_table_) << "' " + //"AND a.attnum = ANY (i.indkey) " // postgres >= 8.1 + << "AND (i.indkey[0]=a.attnum OR i.indkey[1]=a.attnum OR i.indkey[2]=a.attnum " + "OR i.indkey[3]=a.attnum OR i.indkey[4]=a.attnum OR i.indkey[5]=a.attnum " + "OR i.indkey[6]=a.attnum OR i.indkey[7]=a.attnum OR i.indkey[8]=a.attnum " + "OR i.indkey[9]=a.attnum) "; + if (! schema_.empty()) + { + s << "AND n.nspname='" + << mapnik::sql_utils::unquote_double(schema_) + << "' "; + } + s << "ORDER BY a.attnum"; + + shared_ptr rs_key = conn->executeQuery(s.str()); + if (rs_key->next()) + { + unsigned int result_rows = rs_key->size(); + if (result_rows == 1) + { + bool is_int = (std::string(rs_key->getValue(3)) == "t"); + if (is_int) + { + const char* key_field_string = rs_key->getValue(0); + if (key_field_string) + { + key_field_ = std::string(key_field_string); +#ifdef MAPNIK_DEBUG + std::clog << "Postgis Plugin: auto-detected key field of '" << key_field_ << "' on '" << geometry_table_ << "'\n"; +#endif + } + } + } + else if (result_rows > 1) + { + std::clog << "PostGIS Plugin: warning, multi column primary key detected but is not supported\n"; + } + } +#ifdef MAPNIK_DEBUG + else + { + std::clog << "Postgis Plugin: no primary key could be detected for '" << geometry_table_ << "'\n"; + } +#endif + rs_key->close(); + } + + // if a globally unique key field/primary key is required + // but still not known at this point, then throw + if (*require_key && key_field_.empty()) + { + throw mapnik::datasource_exception(std::string("PostGIS Plugin: Error: primary key required for table '") + + geometry_table_ + "', please supply 'key_field' option to specify field to use for primary key"); + } + if (srid_ == 0) { srid_ = -1; @@ -268,9 +334,9 @@ void postgis_datasource::bind() const // validate type of key_field if (! found_key_field && ! key_field_.empty() && fld_name == key_field_) { - found_key_field = true; if (type_oid == 20 || type_oid == 21 || type_oid == 23) { + found_key_field = true; desc_.add_descriptor(attribute_descriptor(fld_name, mapnik::Integer)); } else @@ -314,7 +380,7 @@ void postgis_datasource::bind() const break; case 700: // float4 case 701: // float8 - case 1700: // numeric ?? + case 1700: // numeric desc_.add_descriptor(attribute_descriptor(fld_name, mapnik::Double)); case 1042: // bpchar case 1043: // varchar @@ -545,21 +611,31 @@ featureset_ptr postgis_datasource::features(const query& q) const s << "SELECT ST_AsBinary(\"" << geometryColumn_ << "\") AS geom"; mapnik::context_ptr ctx = boost::make_shared(); + std::set const& props = q.property_names(); + std::set::const_iterator pos = props.begin(); + std::set::const_iterator end = props.end(); if (! key_field_.empty()) { mapnik::sql_utils::quote_attr(s, key_field_); ctx->push(key_field_); + + for (; pos != end; ++pos) + { + if (*pos != key_field_) + { + mapnik::sql_utils::quote_attr(s, *pos); + ctx->push(*pos); + } + } } - - std::set const& props = q.property_names(); - std::set::const_iterator pos = props.begin(); - std::set::const_iterator end = props.end(); - - for (; pos != end; ++pos) + else { - mapnik::sql_utils::quote_attr(s, *pos); - ctx->push(*pos); + for (; pos != end; ++pos) + { + mapnik::sql_utils::quote_attr(s, *pos); + ctx->push(*pos); + } } std::string table_with_bbox = populate_tokens(table_, scale_denom, box); @@ -625,20 +701,29 @@ featureset_ptr postgis_datasource::features_at_point(coord2d const& pt) const s << "SELECT ST_AsBinary(\"" << geometryColumn_ << "\") AS geom"; mapnik::context_ptr ctx = boost::make_shared(); + std::vector::const_iterator itr = desc_.get_descriptors().begin(); + std::vector::const_iterator end = desc_.get_descriptors().end(); if (! key_field_.empty()) { mapnik::sql_utils::quote_attr(s, key_field_); ctx->push(key_field_); + for (; itr != end; ++itr) + { + if (itr->get_name() != key_field_) + { + mapnik::sql_utils::quote_attr(s, itr->get_name()); + ctx->push(itr->get_name()); + } + } } - - std::vector::const_iterator itr = desc_.get_descriptors().begin(); - std::vector::const_iterator end = desc_.get_descriptors().end(); - - for (; itr != end; ++itr) + else { - mapnik::sql_utils::quote_attr(s, itr->get_name()); - ctx->push(itr->get_name()); + for (; itr != end; ++itr) + { + mapnik::sql_utils::quote_attr(s, itr->get_name()); + ctx->push(itr->get_name()); + } } box2d box(pt.x, pt.y, pt.x, pt.y); diff --git a/plugins/input/postgis/postgis_datasource.hpp b/plugins/input/postgis/postgis_datasource.hpp index c353c5216..f309f47fe 100644 --- a/plugins/input/postgis/postgis_datasource.hpp +++ b/plugins/input/postgis/postgis_datasource.hpp @@ -80,7 +80,7 @@ private: mutable std::string schema_; mutable std::string geometry_table_; const std::string geometry_field_; - const std::string key_field_; + mutable std::string key_field_; const int cursor_fetch_size_; const int row_limit_; mutable std::string geometryColumn_; diff --git a/tests/python_tests/postgis_test.py b/tests/python_tests/postgis_test.py index aec77cabc..b77922a30 100644 --- a/tests/python_tests/postgis_test.py +++ b/tests/python_tests/postgis_test.py @@ -63,7 +63,8 @@ def createdb_and_dropdb_on_path(): print 'Notice: skipping postgis tests (createdb/dropdb)' return False -insert_sql = """ +insert_table_1 = """ +CREATE TABLE test(gid serial PRIMARY KEY, geom geometry); INSERT INTO test(geom) values (GeomFromEWKT('SRID=4326;POINT(0 0)')); INSERT INTO test(geom) values (GeomFromEWKT('SRID=4326;POINT(-2 2)')); INSERT INTO test(geom) values (GeomFromEWKT('SRID=4326;MULTIPOINT(2 1,1 2)')); @@ -74,12 +75,46 @@ INSERT INTO test(geom) values (GeomFromEWKT('SRID=4326;MULTIPOLYGON(((1 1,3 1,3 INSERT INTO test(geom) values (GeomFromEWKT('SRID=4326;GEOMETRYCOLLECTION(POLYGON((1 1, 2 1, 2 2, 1 2,1 1)),POINT(2 3),LINESTRING(2 3,3 4))')); """ +insert_table_2 = """ +CREATE TABLE test2(manual_id int4 PRIMARY KEY, geom geometry); +INSERT INTO test2(manual_id, geom) values (0, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test2(manual_id, geom) values (1, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test2(manual_id, geom) values (1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test2(manual_id, geom) values (-1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test2(manual_id, geom) values (2147483647, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test2(manual_id, geom) values (-2147483648, GeomFromEWKT('SRID=4326;POINT(0 0)')); +""" + +insert_table_3 = """ +CREATE TABLE test3(non_id bigint, manual_id int4, geom geometry); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, 0, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, 1, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, 1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, -1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, 2147483647, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test3(non_id, manual_id, geom) values (9223372036854775807, -2147483648, GeomFromEWKT('SRID=4326;POINT(0 0)')); +""" + +insert_table_4 = """ +CREATE TABLE test4(non_id int4, manual_id int8 PRIMARY KEY, geom geometry); +INSERT INTO test4(non_id, manual_id, geom) values (0, 0, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test4(non_id, manual_id, geom) values (0, 1, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test4(non_id, manual_id, geom) values (0, 1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test4(non_id, manual_id, geom) values (0, -1000, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test4(non_id, manual_id, geom) values (0, 2147483647, GeomFromEWKT('SRID=4326;POINT(0 0)')); +INSERT INTO test4(non_id, manual_id, geom) values (0, -2147483648, GeomFromEWKT('SRID=4326;POINT(0 0)')); +""" + def postgis_setup(): call('dropdb %s' % MAPNIK_TEST_DBNAME,silent=True) call('createdb -T %s %s' % (POSTGIS_TEMPLATE_DBNAME,MAPNIK_TEST_DBNAME),silent=False) call('shp2pgsql -s 3857 -g geom -W LATIN1 %s world_merc | psql -q %s' % (SHAPEFILE,MAPNIK_TEST_DBNAME), silent=True) call('''psql -q %s -c "CREATE TABLE \"empty\" (key serial);SELECT AddGeometryColumn('','empty','geom','-1','GEOMETRY',4);"''' % MAPNIK_TEST_DBNAME,silent=False) - call('''psql -q %s -c "create table test(gid serial PRIMARY KEY, geom geometry);%s"''' % (MAPNIK_TEST_DBNAME,insert_sql),silent=False) + call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_1),silent=False) + call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_2),silent=False) + call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_3),silent=False) + call('''psql -q %s -c "%s"''' % (MAPNIK_TEST_DBNAME,insert_table_4),silent=False) + def postgis_takedown(): pass # fails as the db is in use: https://github.com/mapnik/mapnik/issues/960 @@ -182,6 +217,98 @@ if 'postgis' in mapnik.DatasourceCache.instance().plugin_names() \ query.add_property_name('bogus') fs = ds.features(query) + def test_auto_detection_of_unique_feature_id_32_bit(): + ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,table='test2', + geometry_field='geom') + fs = ds.featureset() + eq_(fs.next()['manual_id'],0) + eq_(fs.next()['manual_id'],1) + eq_(fs.next()['manual_id'],1000) + eq_(fs.next()['manual_id'],-1000) + eq_(fs.next()['manual_id'],2147483647) + eq_(fs.next()['manual_id'],-2147483648) + + fs = ds.featureset() + eq_(fs.next().id(),0) + eq_(fs.next().id(),1) + eq_(fs.next().id(),1000) + eq_(fs.next().id(),-1000) + eq_(fs.next().id(),2147483647) + eq_(fs.next().id(),-2147483648) + + def test_auto_detection_will_fail_since_no_primary_key(): + ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,table='test3', + geometry_field='geom', + require_key=False) + fs = ds.featureset() + feat = fs.next() + eq_(feat['manual_id'],0) + # will fail: https://github.com/mapnik/mapnik/issues/895 + #eq_(feat['non_id'],9223372036854775807) + eq_(fs.next()['manual_id'],1) + eq_(fs.next()['manual_id'],1000) + eq_(fs.next()['manual_id'],-1000) + eq_(fs.next()['manual_id'],2147483647) + eq_(fs.next()['manual_id'],-2147483648) + + # since no valid primary key will be detected the fallback + # is auto-incrementing counter + fs = ds.featureset() + eq_(fs.next().id(),1) + eq_(fs.next().id(),2) + eq_(fs.next().id(),3) + eq_(fs.next().id(),4) + eq_(fs.next().id(),5) + eq_(fs.next().id(),6) + + @raises(RuntimeError) + def test_auto_detection_will_fail_and_should_throw(): + ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,table='test3', + geometry_field='geom', + require_key=True) + fs = ds.featureset() + + def test_auto_detection_of_unique_feature_id_64_bit(): + ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,table='test4', + geometry_field='geom', + require_key=False) + fs = ds.featureset() + eq_(fs.next()['manual_id'],0) + eq_(fs.next()['manual_id'],1) + eq_(fs.next()['manual_id'],1000) + eq_(fs.next()['manual_id'],-1000) + eq_(fs.next()['manual_id'],2147483647) + eq_(fs.next()['manual_id'],-2147483648) + + fs = ds.featureset() + eq_(fs.next().id(),0) + eq_(fs.next().id(),1) + eq_(fs.next().id(),1000) + eq_(fs.next().id(),-1000) + eq_(fs.next().id(),2147483647) + eq_(fs.next().id(),-2147483648) + + def test_manually_specified_feature_id_field(): + ds = mapnik.PostGIS(dbname=MAPNIK_TEST_DBNAME,table='test4', + geometry_field='geom', + key_field='manual_id', + require_key=True) + fs = ds.featureset() + eq_(fs.next()['manual_id'],0) + eq_(fs.next()['manual_id'],1) + eq_(fs.next()['manual_id'],1000) + eq_(fs.next()['manual_id'],-1000) + eq_(fs.next()['manual_id'],2147483647) + eq_(fs.next()['manual_id'],-2147483648) + + fs = ds.featureset() + eq_(fs.next().id(),0) + eq_(fs.next().id(),1) + eq_(fs.next().id(),1000) + eq_(fs.next().id(),-1000) + eq_(fs.next().id(),2147483647) + eq_(fs.next().id(),-2147483648) + atexit.register(postgis_takedown) if __name__ == "__main__":