/***************************************************************************** * * This file is part of Mapnik (c++ mapping toolkit) * * Copyright (C) 2011 Artem Pavlenko * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *****************************************************************************/ #include "sqlite_datasource.hpp" #include "sqlite_featureset.hpp" #include "sqlite_resultset.hpp" #include "sqlite_utils.hpp" // mapnik #include #include #include #include // boost #include #include #include #include #include using mapnik::box2d; using mapnik::coord2d; using mapnik::query; using mapnik::featureset_ptr; using mapnik::layer_descriptor; using mapnik::attribute_descriptor; using mapnik::datasource_exception; using mapnik::datasource; using mapnik::parameters; DATASOURCE_PLUGIN(sqlite_datasource) sqlite_datasource::sqlite_datasource(parameters const& params, bool bind) : datasource(params), extent_(), extent_initialized_(false), type_(datasource::Vector), table_(*params_.get("table", "")), fields_(*params_.get("fields", "*")), metadata_(*params_.get("metadata", "")), geometry_table_(*params_.get("geometry_table", "")), geometry_field_(*params_.get("geometry_field", "")), index_table_(*params_.get("index_table", "")), key_field_(*params_.get("key_field", "")), row_offset_(*params_.get("row_offset", 0)), row_limit_(*params_.get("row_limit", 0)), intersects_token_("!intersects!"), desc_(*params_.get("type"), *params_.get("encoding", "utf-8")), format_(mapnik::wkbAuto) { /* TODO - throw if no primary key but spatial index is present? - remove auto-indexing - if spatialite - leverage more of the metadata for geometry type detection */ boost::optional file = params_.get("file"); if (! file) throw datasource_exception("Sqlite Plugin: missing parameter"); if (bind) { this->bind(); } } void sqlite_datasource::bind() const { if (is_bound_) return; boost::optional file = params_.get("file"); if (! file) throw datasource_exception("Sqlite Plugin: missing parameter"); boost::optional base = params_.get("base"); if (base) dataset_name_ = *base + "/" + *file; else dataset_name_ = *file; if ((dataset_name_.compare(":memory:") != 0) && (!boost::filesystem::exists(dataset_name_))) { throw datasource_exception("Sqlite Plugin: " + dataset_name_ + " does not exist"); } use_spatial_index_ = *params_.get("use_spatial_index", true); // TODO - remove this option once all datasources have an indexing api bool auto_index = *params_.get("auto_index", true); boost::optional ext = params_.get("extent"); if (ext) extent_initialized_ = extent_.from_string(*ext); boost::optional wkb = params_.get("wkb_format"); if (wkb) { if (*wkb == "spatialite") { format_ = mapnik::wkbSpatiaLite; } else if (*wkb == "generic") { format_ = mapnik::wkbGeneric; } else { format_ = mapnik::wkbAuto; } } // Populate init_statements_ // 1. Build attach database statements from the "attachdb" parameter // 2. Add explicit init statements from "initdb" parameter // Note that we do some extra work to make sure that any attached // databases are relative to directory containing dataset_name_. Sqlite // will default to attaching from cwd. Typicaly usage means that the // map loader will produce full paths here. boost::optional attachdb = params_.get("attachdb"); if (attachdb) { parse_attachdb(*attachdb); } boost::optional initdb = params_.get("initdb"); if (initdb) { init_statements_.push_back(*initdb); } // now actually create the connection and start executing setup sql dataset_ = boost::make_shared(dataset_name_); boost::optional table_by_index = params_.get("table_by_index"); int passed_parameters = 0; passed_parameters += params_.get("table") ? 1 : 0; passed_parameters += table_by_index ? 1 : 0; if (passed_parameters > 1) { throw datasource_exception("SQLite Plugin: you can only select an by name " "('table' parameter), by number ('table_by_index' parameter), " "do not supply 2 or more of them at the same time" ); } if (table_by_index) { std::vector tables; sqlite_utils::get_tables(dataset_,tables); if (*table_by_index >= tables.size()) { std::ostringstream s; s << "SQLite Plugin: only " << tables.size() << " table(s) exist, cannot find table by index '" << *table_by_index << "'"; throw datasource_exception(s.str()); } table_ = tables[*table_by_index]; } if (table_.empty()) { throw mapnik::datasource_exception("Sqlite Plugin: missing parameter"); } if (geometry_table_.empty()) { geometry_table_ = mapnik::sql_utils::table_from_sql(table_); } // if 'table_' is a subquery then we try to deduce names // and types from the first row returned from that query using_subquery_ = false; if (table_ != geometry_table_) { using_subquery_ = true; } else { // attempt to auto-quote table if needed if (sqlite_utils::needs_quoting(table_)) { table_ = std::string("[") + table_ + "]"; geometry_table_ = table_; } } // Execute init_statements_ for (std::vector::const_iterator iter = init_statements_.begin(); iter != init_statements_.end(); ++iter) { #ifdef MAPNIK_DEBUG std::clog << "Sqlite Plugin: Execute init sql: " << *iter << std::endl; #endif dataset_->execute(*iter); } bool found_types_via_subquery = false; if (using_subquery_) { std::ostringstream s; std::string query = populate_tokens(table_); s << "SELECT " << fields_ << " FROM (" << query << ") LIMIT 1"; found_types_via_subquery = sqlite_utils::detect_types_from_subquery( s.str(), geometry_field_, desc_, dataset_); } // TODO - consider removing this if (key_field_ == "rowid") { desc_.add_descriptor(attribute_descriptor("rowid", mapnik::Integer)); } bool found_table = sqlite_utils::table_info(key_field_, found_types_via_subquery, geometry_field_, geometry_table_, desc_, dataset_); if (! found_table) { std::ostringstream s; s << "Sqlite Plugin: could not query table '" << geometry_table_ << "'"; if (using_subquery_) { s << " from subquery '" << table_ << "'"; } // report get available tables std::vector tables; sqlite_utils::get_tables(dataset_,tables); if (tables.size() > 0) { s << " (available tables for " << dataset_name_ << " are: '" << boost::algorithm::join(tables, ", ") << "')"; } throw datasource_exception(s.str()); } if (geometry_field_.empty()) { std::ostringstream s; s << "Sqlite Plugin: unable to detect the column " << "containing a valid geometry on table '" << geometry_table_ << "'. " << "Please provide a column name by passing the 'geometry_field' option " << "or indicate a different spatial table to use by passing the 'geometry_table' option"; throw datasource_exception(s.str()); } if (index_table_.empty()) { // Generate implicit index_table name - need to do this after // we have discovered meta-data or else we don't know the column name index_table_ = sqlite_utils::index_for_table(geometry_table_,geometry_field_); } std::string index_db = sqlite_utils::index_for_db(dataset_name_); has_spatial_index_ = false; if (use_spatial_index_) { if (boost::filesystem::exists(index_db)) { dataset_->execute("attach database '" + index_db + "' as " + index_table_); } has_spatial_index_ = sqlite_utils::has_rtree(index_table_,dataset_); if (!has_spatial_index_ && auto_index) { if (! key_field_.empty()) { std::ostringstream query; query << "SELECT " << geometry_field_ << "," << key_field_ << " FROM (" << geometry_table_ << ")"; /* std::vector rtree_list; { boost::shared_ptr rs = dataset_->execute_query(query.str()); sqlite_utils::build_tree(rs,rtree_list); } if (sqlite_utils::create_spatial_index2(index_db,index_table_,rtree_list)) { //extent_initialized_ = true; has_spatial_index_ = true; if (boost::filesystem::exists(index_db)) { dataset_->execute("attach database '" + index_db + "' as " + index_table_); } } */ boost::shared_ptr rs = dataset_->execute_query(query.str()); if (sqlite_utils::create_spatial_index(index_db,index_table_,rs)) { //extent_initialized_ = true; has_spatial_index_ = true; if (boost::filesystem::exists(index_db)) { dataset_->execute("attach database '" + index_db + "' as " + index_table_); } } } else { std::ostringstream s; s << "Sqlite Plugin: key_field is empty for " << geometry_field_ << " and " << geometry_table_; throw datasource_exception(s.str()); } } } if (! extent_initialized_) { // TODO - clean this up - reducing arguments std::string query = populate_tokens(table_); if (!sqlite_utils::detect_extent(dataset_, has_spatial_index_, extent_, index_table_, metadata_, geometry_field_, geometry_table_, key_field_, query)) { std::ostringstream s; s << "Sqlite Plugin: extent could not be determined for table '" << geometry_table_ << "' and geometry field '" << geometry_field_ << "'" << " because an rtree spatial index is missing or empty." << " - either set the table 'extent' or create an rtree spatial index"; throw datasource_exception(s.str()); } } is_bound_ = true; } std::string sqlite_datasource::populate_tokens(const std::string& sql) const { std::string populated_sql = sql; if (boost::algorithm::ifind_first(populated_sql, intersects_token_)) { // replace with dummy comparison that is true boost::algorithm::ireplace_first(populated_sql, intersects_token_, "1=1"); } return populated_sql; } sqlite_datasource::~sqlite_datasource() { } #if (BOOST_FILESYSTEM_VERSION <= 2) namespace boost { namespace filesystem { path read_symlink(const path& p) { path symlink_path; #ifdef BOOST_POSIX_API for (std::size_t path_max = 64;; path_max *= 2)// loop 'til buffer is large enough { boost::scoped_array buf(new char[path_max]); ssize_t result; if ((result=::readlink(p.string().c_str(), buf.get(), path_max))== -1) { throw std::runtime_error("could not read symlink"); } else { if(result != static_cast(path_max)) { symlink_path.assign(buf.get(), buf.get() + result); break; } } } #endif return symlink_path; } } } #endif void sqlite_datasource::parse_attachdb(std::string const& attachdb) const { boost::char_separator sep(","); boost::tokenizer > tok(attachdb, sep); // The attachdb line is a comma sparated list of [dbname@]filename for (boost::tokenizer >::iterator beg = tok.begin(); beg != tok.end(); ++beg) { std::string const& spec(*beg); size_t atpos = spec.find('@'); // See if it contains an @ sign if (atpos == spec.npos) { throw datasource_exception("attachdb parameter has syntax dbname@filename[,...]"); } // Break out the dbname and the filename std::string dbname = boost::trim_copy(spec.substr(0, atpos)); std::string filename = boost::trim_copy(spec.substr(atpos + 1)); // Normalize the filename and make it relative to dataset_name_ if (filename.compare(":memory:") != 0) { boost::filesystem::path child_path(filename); // It is a relative path. Fix it. if (! child_path.has_root_directory() && ! child_path.has_root_name()) { boost::filesystem::path absolute_path(dataset_name_); // support symlinks if (boost::filesystem::is_symlink(absolute_path)) { absolute_path = boost::filesystem::read_symlink(absolute_path); } #if (BOOST_FILESYSTEM_VERSION == 3) filename = boost::filesystem::absolute(absolute_path.parent_path() / filename).string(); #else filename = boost::filesystem::complete(absolute_path.branch_path() / filename).normalize().string(); #endif } } // And add an init_statement_ init_statements_.push_back("attach database '" + filename + "' as " + dbname); } } std::string sqlite_datasource::name() { return "sqlite"; } mapnik::datasource::datasource_t sqlite_datasource::type() const { return type_; } box2d sqlite_datasource::envelope() const { if (! is_bound_) bind(); return extent_; } mapnik::statistics_ptr sqlite_datasource::get_statistics() const { if (! is_bound_) bind(); std::map stats; std::ostringstream s; std::vector::const_iterator itr = desc_.get_descriptors().begin(); std::vector::const_iterator end = desc_.get_descriptors().end(); std::vector field_names; for ( ; itr != end; ++itr) { std::string fld_name = itr->get_name(); if (fld_name != key_field_ && itr->get_type() == mapnik::Double || itr->get_type() == mapnik::Integer) { field_names.push_back("MIN([" + itr->get_name() + "])"); field_names.push_back("MAX([" + itr->get_name() + "])"); field_names.push_back("AVG([" + itr->get_name() + "])"); } } std::string query(table_); query = populate_tokens(table_); s << "SELECT "; s << boost::algorithm::join(field_names, ","); s << " FROM "; s << query ; #ifdef MAPNIK_DEBUG std::clog << "Sqlite Plugin: " << s.str() << std::endl; #endif boost::shared_ptr rs(dataset_->execute_query(s.str())); while (rs->is_valid() && rs->step_next()) { itr = desc_.get_descriptors().begin(); int col = 0; for ( ; itr != end; ++itr) { std::string fld_name = itr->get_name(); if (fld_name != key_field_ && itr->get_type() == mapnik::Double || itr->get_type() == mapnik::Integer) { mapnik::parameters p; p["min"] = rs->column_double(col); col++; p["max"] = rs->column_double(col); col++; p["mean"] = rs->column_double(col); col++; stats[itr->get_name()] = p; } } } return boost::make_shared(stats); } boost::optional sqlite_datasource::get_geometry_type() const { if (! is_bound_) bind(); boost::optional result; if (dataset_) { // finally, get geometry type by querying first feature std::ostringstream s; s << "SELECT " << geometry_field_ << " FROM " << geometry_table_; if (row_limit_ > 0 && row_limit_ < 5) { s << " LIMIT " << row_limit_; } else { s << " LIMIT 5"; } boost::shared_ptr rs = dataset_->execute_query(s.str()); int multi_type = 0; while (rs->is_valid() && rs->step_next()) { int size; const char* data = (const char*) rs->column_blob(0, size); if (data) { boost::ptr_vector paths; mapnik::geometry_utils::from_wkb(paths, data, size, mapnik::wkbAuto); mapnik::util::to_ds_type(paths,result); if (result) { int type = static_cast(*result); if (multi_type > 0 && multi_type != type) { result.reset(mapnik::datasource::Collection); return result; } multi_type = type; } } } } return result; } layer_descriptor sqlite_datasource::get_descriptor() const { if (! is_bound_) bind(); return desc_; } featureset_ptr sqlite_datasource::features(query const& q) const { if (! is_bound_) bind(); if (dataset_) { mapnik::box2d const& e = q.get_bbox(); std::ostringstream s; mapnik::context_ptr ctx = boost::make_shared(); s << "SELECT " << geometry_field_; if (!key_field_.empty()) { s << "," << key_field_; ctx->push(key_field_); } std::set const& props = q.property_names(); std::set::const_iterator pos = props.begin(); std::set::const_iterator end = props.end(); for ( ;pos != end;++pos) { // TODO - should we restrict duplicate key query? //if (*pos != key_field_) s << ",[" << *pos << "]"; ctx->push(*pos); } s << " FROM "; std::string query(table_); if (! key_field_.empty() && has_spatial_index_) { // TODO - debug warn if fails sqlite_utils::apply_spatial_filter(query, e, table_, key_field_, index_table_, geometry_table_, intersects_token_); } else { query = populate_tokens(table_); } s << query ; if (row_limit_ > 0) { s << " LIMIT " << row_limit_; } if (row_offset_ > 0) { s << " OFFSET " << row_offset_; } #ifdef MAPNIK_DEBUG std::clog << "Sqlite Plugin: table: " << table_ << "\n\n"; std::clog << "Sqlite Plugin: query: " << s.str() << "\n\n"; #endif boost::shared_ptr rs(dataset_->execute_query(s.str())); return boost::make_shared(rs, ctx, desc_.get_encoding(), format_, using_subquery_); } return featureset_ptr(); } featureset_ptr sqlite_datasource::features_at_point(coord2d const& pt) const { if (! is_bound_) bind(); if (dataset_) { // TODO - need tolerance mapnik::box2d const e(pt.x, pt.y, pt.x, pt.y); std::ostringstream s; mapnik::context_ptr ctx = boost::make_shared(); s << "SELECT " << geometry_field_; if (!key_field_.empty()) { s << "," << key_field_; ctx->push(key_field_); } std::vector::const_iterator itr = desc_.get_descriptors().begin(); std::vector::const_iterator end = desc_.get_descriptors().end(); for ( ; itr != end; ++itr) { std::string fld_name = itr->get_name(); if (fld_name != key_field_) { s << ",[" << itr->get_name() << "]"; ctx->push(itr->get_name()); } } s << " FROM "; std::string query(table_); if (! key_field_.empty() && has_spatial_index_) { // TODO - debug warn if fails sqlite_utils::apply_spatial_filter(query, e, table_, key_field_, index_table_, geometry_table_, intersects_token_); } else { query = populate_tokens(table_); } s << query ; if (row_limit_ > 0) { s << " LIMIT " << row_limit_; } if (row_offset_ > 0) { s << " OFFSET " << row_offset_; } #ifdef MAPNIK_DEBUG std::clog << "Sqlite Plugin: " << s.str() << std::endl; #endif boost::shared_ptr rs(dataset_->execute_query(s.str())); return boost::make_shared(rs, ctx, desc_.get_encoding(), format_, using_subquery_); } return featureset_ptr(); }