From 2426a44671daf901d2517ba9ec3f67ea37930d30 Mon Sep 17 00:00:00 2001 From: artemp Date: Wed, 16 Aug 2017 10:35:04 +0100 Subject: [PATCH] initial updated spatial-index implementation --- include/mapnik/geom_util.hpp | 54 +++++++------- plugins/input/geojson/geojson_datasource.cpp | 63 ++++++++++------ plugins/input/geojson/geojson_datasource.hpp | 21 ++++-- .../geojson/geojson_index_featureset.cpp | 32 +++++--- .../geojson/geojson_index_featureset.hpp | 5 +- utils/mapnik-index/mapnik-index.cpp | 73 ++++++++++++++----- 6 files changed, 163 insertions(+), 85 deletions(-) diff --git a/include/mapnik/geom_util.hpp b/include/mapnik/geom_util.hpp index 8dea3d16f..86e2b0cc6 100644 --- a/include/mapnik/geom_util.hpp +++ b/include/mapnik/geom_util.hpp @@ -132,87 +132,91 @@ inline T sqr(T x) return x * x; } -inline double distance2(double x0,double y0,double x1,double y1) +inline double distance2(double x0, double y0, double x1, double y1) { double dx = x1 - x0; double dy = y1 - y0; return sqr(dx) + sqr(dy); } -inline double distance(double x0,double y0, double x1,double y1) +inline double distance(double x0, double y0, double x1, double y1) { - return std::sqrt(distance2(x0,y0,x1,y1)); + return std::sqrt(distance2(x0, y0, x1, y1)); } inline double point_to_segment_distance(double x, double y, double ax, double ay, double bx, double by) { - double len2 = distance2(ax,ay,bx,by); + double len2 = distance2(ax, ay, bx, by); if (len2 < 1e-14) { - return distance(x,y,ax,ay); + return distance(x, y, ax, ay); } - double r = ((x - ax)*(bx - ax) + (y - ay)*(by -ay))/len2; - if ( r < 0 ) + double r = ((x - ax) * (bx - ax) + (y - ay) * (by - ay)) / len2; + if (r < 0) { - return distance(x,y,ax,ay); + return distance(x, y, ax, ay); } else if (r > 1) { - return distance(x,y,bx,by); + return distance(x, y, bx, by); } - double s = ((ay - y)*(bx - ax) - (ax - x)*(by - ay))/len2; + double s = ((ay - y) * (bx - ax) - (ax - x) * (by - ay)) / len2; return std::fabs(s) * std::sqrt(len2); } template -inline bool point_on_path(double x,double y,Iter start,Iter end, double tol) +inline bool point_on_path(double x, double y, Iter start, Iter end, double tol) { - double x0=std::get<0>(*start); - double y0=std::get<1>(*start); + double x0 = std::get<0>(*start); + double y0 = std::get<1>(*start); double x1 = 0; double y1 = 0; while (++start != end) { - if ( std::get<2>(*start) == SEG_MOVETO) + if (std::get<2>(*start) == SEG_MOVETO) { x0 = std::get<0>(*start); y0 = std::get<1>(*start); continue; } - x1=std::get<0>(*start); - y1=std::get<1>(*start); + x1 = std::get<0>(*start); + y1 = std::get<1>(*start); - double distance = point_to_segment_distance(x,y,x0,y0,x1,y1); + double distance = point_to_segment_distance(x, y, x0, y0, x1, y1); if (distance < tol) return true; - x0=x1; - y0=y1; + x0 = x1; + y0 = y1; } return false; } // filters -struct filter_in_box +template +struct bounding_box_filter { - box2d box_; - explicit filter_in_box(box2d const& box) + using value_type = T; + box2d box_; + explicit bounding_box_filter(box2d const& box) : box_(box) {} - bool pass(box2d const& extent) const + bool pass(box2d const& extent) const { return extent.intersects(box_); } }; +using filter_in_box = bounding_box_filter; + struct filter_at_point { box2d box_; - explicit filter_at_point(coord2d const& pt, double tol=0) - : box_(pt,pt) + explicit filter_at_point(coord2d const& pt, double tol = 0) + : box_(pt, pt) { box_.pad(tol); } diff --git a/plugins/input/geojson/geojson_datasource.cpp b/plugins/input/geojson/geojson_datasource.cpp index 7bbf54627..048b889cf 100644 --- a/plugins/input/geojson/geojson_datasource.cpp +++ b/plugins/input/geojson/geojson_datasource.cpp @@ -175,7 +175,21 @@ geojson_datasource::geojson_datasource(parameters const& params) } else { - initialise_index(start, end); + //initialise_index(start, end); + std::string index_filename = filename_ + ".bbox.rtree"; + std::cerr << "loading " << index_filename << " .." << std::endl; + index_file_ = boost::interprocess::managed_mapped_file(boost::interprocess::open_only, index_filename.c_str()); + tree_ = std::unique_ptr> + (index_file_.find("rtree-index").first, + [](spatial_index_type* si) {std::cerr << "calling deleter:" << si << std::endl;}); + std::cerr << "TREE:" << tree_.get() << std::endl; + auto bounds = tree_->bounds(); + double min_x = bounds.min_corner().get<0>(); + double min_y = bounds.min_corner().get<1>(); + double max_x = bounds.max_corner().get<0>(); + double max_y = bounds.max_corner().get<1>(); + std::cerr << "Bounding box: " << min_x << "," << min_y << "," << max_x << "," << max_y << std::endl; + extent_ = { min_x, min_y, max_x, max_y }; } } } @@ -206,27 +220,30 @@ void geojson_datasource::initialise_descriptor(mapnik::feature_ptr const& featur void geojson_datasource::initialise_disk_index(std::string const& filename) { // read extent - using value_type = std::pair; + using value_type = record;//std::pair; std::ifstream index(filename_ + ".index", std::ios::binary); if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'"); - extent_ = mapnik::util::spatial_index::bounding_box(index); - mapnik::filter_in_box filter(extent_); + std::ifstream, + mapnik::box2d>::bounding_box(index); + extent_ = { ext_f.minx(), ext_f.miny(),ext_f.maxx(), ext_f.maxy() }; + std::cerr << "EXTENT:" << extent_ << std::endl; + mapnik::bounding_box_filter filter(ext_f); std::vector positions; mapnik::util::spatial_index::query_first_n(filter, index, positions, num_features_to_query_); + mapnik::bounding_box_filter, + std::ifstream, mapnik::box2d>::query_first_n(filter, index, positions, num_features_to_query_); mapnik::util::file file(filename_); if (!file) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'"); mapnik::context_ptr ctx = std::make_shared(); for (auto const& pos : positions) { - std::fseek(file.get(), pos.first, SEEK_SET); + std::fseek(file.get(), pos.off, SEEK_SET); std::vector record; - record.resize(pos.second); - auto count = std::fread(record.data(), pos.second, 1, file.get()); + record.resize(pos.size); + auto count = std::fread(record.data(), pos.size, 1, file.get()); auto const* start = record.data(); auto const* end = (count == 1) ? start + record.size() : start; mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); @@ -348,7 +365,7 @@ void geojson_datasource::initialise_index(Iterator start, Iterator end) } } // packing algorithm - tree_ = std::make_unique(values); + //tree_ = std::make_unique(values); } desc_.order_by_name(); } @@ -393,7 +410,6 @@ void geojson_datasource::parse_geojson(Iterator start, Iterator end) features_.push_back(std::move(feature)); } - using values_container = std::vector< std::pair>>; values_container values; values.reserve(features_.size()); @@ -421,8 +437,7 @@ void geojson_datasource::parse_geojson(Iterator start, Iterator end) ++geometry_index; } // packing algorithm - tree_ = std::make_unique(values); - + //tree_ = std::make_unique(values); } geojson_datasource::~geojson_datasource() {} @@ -453,14 +468,15 @@ boost::optional geojson_datasource::get_geometry_ int multi_type = 0; if (has_disk_index_) { - using value_type = std::pair; + using value_type = record;//std::pair; std::ifstream index(filename_ + ".index", std::ios::binary); if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'"); - mapnik::filter_in_box filter(extent_); + mapnik::bounding_box_filter filter(mapnik::box2d(extent_.minx(),extent_.miny(), extent_.maxx(),extent_.maxy())); std::vector positions; mapnik::util::spatial_index::query_first_n(filter, index, positions, num_features_to_query_); + mapnik::bounding_box_filter, + std::ifstream, + mapnik::box2d>::query_first_n(filter, index, positions, num_features_to_query_); mapnik::util::file file(filename_); @@ -468,10 +484,10 @@ boost::optional geojson_datasource::get_geometry_ mapnik::context_ptr ctx = std::make_shared(); for (auto const& pos : positions) { - std::fseek(file.get(), pos.first, SEEK_SET); + std::fseek(file.get(), pos.off, SEEK_SET); std::vector record; - record.resize(pos.second); - auto count = std::fread(record.data(), pos.second, 1, file.get()); + record.resize(pos.size); + auto count = std::fread(record.data(), pos.size, 1, file.get()); auto const* start = record.data(); auto const* end = (count == 1) ? start + record.size() : start; mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); // temp feature @@ -586,12 +602,15 @@ mapnik::featureset_ptr geojson_datasource::features(mapnik::query const& q) cons } else { + std::cerr << "Num features:" << index_array.size() << std::endl; return std::make_shared(filename_, std::move(index_array)); } } else if (has_disk_index_) { - mapnik::filter_in_box filter(q.get_bbox()); + //mapnik::filter_in_box filter(q.get_bbox()); + auto const& bbox = q.get_bbox(); + mapnik::bounding_box_filter const filter(mapnik::box2d(bbox.minx(), bbox.miny(), bbox.maxx(), bbox.maxy())); return std::make_shared(filename_, filter); } diff --git a/plugins/input/geojson/geojson_datasource.hpp b/plugins/input/geojson/geojson_datasource.hpp index 6b3e1bcbd..e4221d0f4 100644 --- a/plugins/input/geojson/geojson_datasource.hpp +++ b/plugins/input/geojson/geojson_datasource.hpp @@ -38,6 +38,7 @@ #include #include #include +#include #pragma GCC diagnostic pop // stl @@ -46,7 +47,7 @@ #include #include #include - +#include template struct geojson_linear : boost::geometry::index::linear {}; @@ -74,10 +75,13 @@ struct options_type > class geojson_datasource : public mapnik::datasource { public: - using box_type = mapnik::box2d; + using box_type = mapnik::box2d; using item_type = std::pair >; - using spatial_index_type = boost::geometry::index::rtree >; - + using indexable_type = boost::geometry::index::indexable; + using equal_to_type = boost::geometry::index::equal_to; + using allocator_type = boost::interprocess::allocator; + using spatial_index_type = boost::geometry::index::rtree, indexable_type, equal_to_type, allocator_type>; + //using spatial_index_type = boost::geometry::index::rtree >; // constructor geojson_datasource(mapnik::parameters const& params); virtual ~geojson_datasource (); @@ -101,11 +105,18 @@ private: bool from_inline_string_; mapnik::box2d extent_; std::vector features_; - std::unique_ptr tree_; + std::unique_ptr> tree_; + boost::interprocess::managed_mapped_file index_file_; bool cache_features_ = true; bool has_disk_index_ = false; const std::size_t num_features_to_query_; }; +struct record +{ + std::uint64_t off; + std::uint64_t size; + float box[4]; +}; #endif // GEOJSON_DATASOURCE_HPP diff --git a/plugins/input/geojson/geojson_index_featureset.cpp b/plugins/input/geojson/geojson_index_featureset.cpp index 611640d96..ec84bfc23 100644 --- a/plugins/input/geojson/geojson_index_featureset.cpp +++ b/plugins/input/geojson/geojson_index_featureset.cpp @@ -34,8 +34,9 @@ #include #include #include +#include -geojson_index_featureset::geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter) +geojson_index_featureset::geojson_index_featureset(std::string const& filename, mapnik::bounding_box_filter const& filter) : #if defined(MAPNIK_MEMORY_MAPPED_FILE) // @@ -44,7 +45,8 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename, #else file_(std::fopen(filename.c_str(),"rb"), std::fclose), #endif - ctx_(std::make_shared()) + ctx_(std::make_shared()), + query_box_(filter.box_) { #if defined (MAPNIK_MEMORY_MAPPED_FILE) @@ -65,11 +67,20 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename, std::ifstream index(indexname.c_str(), std::ios::binary); if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: can't open index file " + indexname); mapnik::util::spatial_index::query(filter, index, positions_); + mapnik::bounding_box_filter, + std::ifstream, + mapnik::box2d>::query(filter, index, positions_); + + std::cerr << "#1 Num features:" << positions_.size() << std::endl; + positions_.erase(std::remove_if(positions_.begin(), + positions_.end(), + [&](value_type const& pos) + { return !mapnik::box2d{pos.box[0], pos.box[1], pos.box[2], pos.box[3]}.intersects(query_box_);}), + positions_.end()); + std::cerr << "#2 Num features:" << positions_.size() << std::endl; std::sort(positions_.begin(), positions_.end(), - [](value_type const& lhs, value_type const& rhs) { return lhs.first < rhs.first;}); + [](value_type const& lhs, value_type const& rhs) { return lhs.off < rhs.off;}); itr_ = positions_.begin(); } @@ -81,13 +92,13 @@ mapnik::feature_ptr geojson_index_featureset::next() { auto pos = *itr_++; #if defined(MAPNIK_MEMORY_MAPPED_FILE) - char const* start = (char const*)mapped_region_->get_address() + pos.first; - char const* end = start + pos.second; + char const* start = (char const*)mapped_region_->get_address() + pos.off; + char const* end = start + pos.size; #else - std::fseek(file_.get(), pos.first, SEEK_SET); + std::fseek(file_.get(), pos.off, SEEK_SET); std::vector record; record.resize(pos.second); - auto count = std::fread(record.data(), pos.second, 1, file_.get()); + auto count = std::fread(record.data(), pos.size, 1, file_.get()); auto const* start = record.data(); auto const* end = (count == 1) ? start + record.size() : start; #endif @@ -96,8 +107,7 @@ mapnik::feature_ptr geojson_index_featureset::next() using mapnik::json::grammar::iterator_type; mapnik::json::parse_feature(start, end, *feature, tr); // throw on failure // skip empty geometries - if (mapnik::geometry::is_empty(feature->get_geometry())) - continue; + if (mapnik::geometry::is_empty(feature->get_geometry())) continue; return feature; } return mapnik::feature_ptr(); diff --git a/plugins/input/geojson/geojson_index_featureset.hpp b/plugins/input/geojson/geojson_index_featureset.hpp index 23e9023bb..af98d2738 100644 --- a/plugins/input/geojson/geojson_index_featureset.hpp +++ b/plugins/input/geojson/geojson_index_featureset.hpp @@ -41,9 +41,9 @@ class geojson_index_featureset : public mapnik::Featureset { - using value_type = std::pair; + using value_type = record; public: - geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter); + geojson_index_featureset(std::string const& filename, mapnik::bounding_box_filter const& filter); virtual ~geojson_index_featureset(); mapnik::feature_ptr next(); @@ -57,6 +57,7 @@ private: #endif mapnik::value_integer feature_id_ = 1; mapnik::context_ptr ctx_; + mapnik::box2d query_box_; std::vector positions_; std::vector::iterator itr_; }; diff --git a/utils/mapnik-index/mapnik-index.cpp b/utils/mapnik-index/mapnik-index.cpp index 334641739..02ede132f 100644 --- a/utils/mapnik-index/mapnik-index.cpp +++ b/utils/mapnik-index/mapnik-index.cpp @@ -68,6 +68,7 @@ int main (int argc, char** argv) char separator = 0; char quote = 0; std::string manual_headers; + po::variables_map vm; try { po::options_description desc("Mapnik CSV/GeoJSON index utility"); @@ -82,11 +83,11 @@ int main (int argc, char** argv) ("manual-headers,H", po::value(), "CSV manual headers string") ("files",po::value >(),"Files to index: file1 file2 ...fileN") ("validate-features", "Validate GeoJSON features") + ("bbox,b", "output bounding boxes") ; po::positional_options_description p; p.add("files",-1); - po::variables_map vm; po::store(po::command_line_parser(argc, argv) .options(desc) .style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise) @@ -208,29 +209,61 @@ int main (int argc, char** argv) { std::clog << extent << std::endl; mapnik::box2d extent_d(extent.minx(), extent.miny(), extent.maxx(), extent.maxy()); - mapnik::quad_tree> tree(extent_d, depth, ratio); - for (auto const& item : boxes) + if (vm.count("bbox")) { - auto ext_f = std::get<0>(item); - tree.insert(std::get<1>(item), mapnik::box2d(ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy())); - } - - std::fstream file((filename + ".index").c_str(), - std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary); - if (!file) - { - std::clog << "cannot open index file for writing file \"" - << (filename + ".index") << "\"" << std::endl; + std::fstream file((filename + ".bbox").c_str(), + std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary); + if (!file) + { + std::clog << "cannot open index file for writing file \"" + << (filename + ".bbox") << "\"" << std::endl; + } + else + { + for (auto const& item : boxes) + { + auto ext_f = std::get<0>(item); + auto pos = std::get<1>(item); + file.write(reinterpret_cast(&pos.first), sizeof(std::uint64_t)); + file.write(reinterpret_cast(&pos.second), sizeof(std::uint64_t)); + file.write(reinterpret_cast(&ext_f), sizeof(ext_f)); + } + } } else { - tree.trim(); - std::clog << "number nodes=" << tree.count() << std::endl; - std::clog << "number element=" << tree.count_items() << std::endl; - file.exceptions(std::ios::failbit | std::ios::badbit); - tree.write(file); - file.flush(); - file.close(); + struct record { + std::uint64_t off; + std::uint64_t size; + float box[4]; + } rec; + + mapnik::quad_tree> tree(extent, depth, ratio); + for (auto const& item : boxes) + { + auto ext_f = std::get<0>(item); + rec = { std::get<1>(item).first, std::get<1>(item).second, { ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy() }}; + //tree.insert(rec, mapnik::box2d(ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy())); + tree.insert(rec, ext_f); + } + + std::fstream file((filename + ".index").c_str(), + std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary); + if (!file) + { + std::clog << "cannot open index file for writing file \"" + << (filename + ".index") << "\"" << std::endl; + } + else + { + tree.trim(); + std::clog << "number nodes=" << tree.count() << std::endl; + std::clog << "number element=" << tree.count_items() << std::endl; + file.exceptions(std::ios::failbit | std::ios::badbit); + tree.write(file); + file.flush(); + file.close(); + } } } else