initial updated spatial-index implementation

This commit is contained in:
artemp 2017-08-16 10:35:04 +01:00
parent e00152b262
commit 2426a44671
6 changed files with 163 additions and 85 deletions

View file

@ -196,18 +196,22 @@ inline bool point_on_path(double x,double y,Iter start,Iter end, double tol)
}
// filters
struct filter_in_box
template <typename T>
struct bounding_box_filter
{
box2d<double> box_;
explicit filter_in_box(box2d<double> const& box)
using value_type = T;
box2d<value_type> box_;
explicit bounding_box_filter(box2d<value_type> const& box)
: box_(box) {}
bool pass(box2d<double> const& extent) const
bool pass(box2d<value_type> const& extent) const
{
return extent.intersects(box_);
}
};
using filter_in_box = bounding_box_filter<double>;
struct filter_at_point
{
box2d<double> box_;

View file

@ -175,7 +175,21 @@ geojson_datasource::geojson_datasource(parameters const& params)
}
else
{
initialise_index(start, end);
//initialise_index(start, end);
std::string index_filename = filename_ + ".bbox.rtree";
std::cerr << "loading " << index_filename << " .." << std::endl;
index_file_ = boost::interprocess::managed_mapped_file(boost::interprocess::open_only, index_filename.c_str());
tree_ = std::unique_ptr<spatial_index_type, std::function<void(spatial_index_type*)>>
(index_file_.find<spatial_index_type>("rtree-index").first,
[](spatial_index_type* si) {std::cerr << "calling deleter:" << si << std::endl;});
std::cerr << "TREE:" << tree_.get() << std::endl;
auto bounds = tree_->bounds();
double min_x = bounds.min_corner().get<0>();
double min_y = bounds.min_corner().get<1>();
double max_x = bounds.max_corner().get<0>();
double max_y = bounds.max_corner().get<1>();
std::cerr << "Bounding box: " << min_x << "," << min_y << "," << max_x << "," << max_y << std::endl;
extent_ = { min_x, min_y, max_x, max_y };
}
}
}
@ -206,27 +220,30 @@ void geojson_datasource::initialise_descriptor(mapnik::feature_ptr const& featur
void geojson_datasource::initialise_disk_index(std::string const& filename)
{
// read extent
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = record;//std::pair<std::uint64_t, std::uint64_t>;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
extent_ = mapnik::util::spatial_index<value_type,
auto ext_f = mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::bounding_box(index);
mapnik::filter_in_box filter(extent_);
std::ifstream,
mapnik::box2d<float>>::bounding_box(index);
extent_ = { ext_f.minx(), ext_f.miny(),ext_f.maxx(), ext_f.maxy() };
std::cerr << "EXTENT:" << extent_ << std::endl;
mapnik::bounding_box_filter<float> filter(ext_f);
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::query_first_n(filter, index, positions, num_features_to_query_);
mapnik::bounding_box_filter<float>,
std::ifstream, mapnik::box2d<float>>::query_first_n(filter, index, positions, num_features_to_query_);
mapnik::util::file file(filename_);
if (!file) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'");
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
for (auto const& pos : positions)
{
std::fseek(file.get(), pos.first, SEEK_SET);
std::fseek(file.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.second);
auto count = std::fread(record.data(), pos.second, 1, file.get());
record.resize(pos.size);
auto count = std::fread(record.data(), pos.size, 1, file.get());
auto const* start = record.data();
auto const* end = (count == 1) ? start + record.size() : start;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1));
@ -348,7 +365,7 @@ void geojson_datasource::initialise_index(Iterator start, Iterator end)
}
}
// packing algorithm
tree_ = std::make_unique<spatial_index_type>(values);
//tree_ = std::make_unique<spatial_index_type>(values);
}
desc_.order_by_name();
}
@ -393,7 +410,6 @@ void geojson_datasource::parse_geojson(Iterator start, Iterator end)
features_.push_back(std::move(feature));
}
using values_container = std::vector< std::pair<box_type, std::pair<std::uint64_t, std::uint64_t>>>;
values_container values;
values.reserve(features_.size());
@ -421,8 +437,7 @@ void geojson_datasource::parse_geojson(Iterator start, Iterator end)
++geometry_index;
}
// packing algorithm
tree_ = std::make_unique<spatial_index_type>(values);
//tree_ = std::make_unique<spatial_index_type>(values);
}
geojson_datasource::~geojson_datasource() {}
@ -453,14 +468,15 @@ boost::optional<mapnik::datasource_geometry_t> geojson_datasource::get_geometry_
int multi_type = 0;
if (has_disk_index_)
{
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = record;//std::pair<std::uint64_t, std::uint64_t>;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
mapnik::filter_in_box filter(extent_);
mapnik::bounding_box_filter<float> filter(mapnik::box2d<float>(extent_.minx(),extent_.miny(), extent_.maxx(),extent_.maxy()));
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::query_first_n(filter, index, positions, num_features_to_query_);
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::query_first_n(filter, index, positions, num_features_to_query_);
mapnik::util::file file(filename_);
@ -468,10 +484,10 @@ boost::optional<mapnik::datasource_geometry_t> geojson_datasource::get_geometry_
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
for (auto const& pos : positions)
{
std::fseek(file.get(), pos.first, SEEK_SET);
std::fseek(file.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.second);
auto count = std::fread(record.data(), pos.second, 1, file.get());
record.resize(pos.size);
auto count = std::fread(record.data(), pos.size, 1, file.get());
auto const* start = record.data();
auto const* end = (count == 1) ? start + record.size() : start;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); // temp feature
@ -586,12 +602,15 @@ mapnik::featureset_ptr geojson_datasource::features(mapnik::query const& q) cons
}
else
{
std::cerr << "Num features:" << index_array.size() << std::endl;
return std::make_shared<geojson_memory_index_featureset>(filename_, std::move(index_array));
}
}
else if (has_disk_index_)
{
mapnik::filter_in_box filter(q.get_bbox());
//mapnik::filter_in_box filter(q.get_bbox());
auto const& bbox = q.get_bbox();
mapnik::bounding_box_filter<float> const filter(mapnik::box2d<float>(bbox.minx(), bbox.miny(), bbox.maxx(), bbox.maxy()));
return std::make_shared<geojson_index_featureset>(filename_, filter);
}

View file

@ -38,6 +38,7 @@
#include <boost/optional.hpp>
#include <boost/version.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/interprocess/managed_mapped_file.hpp>
#pragma GCC diagnostic pop
// stl
@ -46,7 +47,7 @@
#include <string>
#include <map>
#include <deque>
#include <functional>
template <std::size_t Max, std::size_t Min>
struct geojson_linear : boost::geometry::index::linear<Max,Min> {};
@ -74,10 +75,13 @@ struct options_type<geojson_linear<Max,Min> >
class geojson_datasource : public mapnik::datasource
{
public:
using box_type = mapnik::box2d<double>;
using box_type = mapnik::box2d<float>;
using item_type = std::pair<box_type, std::pair<std::uint64_t, std::uint64_t> >;
using spatial_index_type = boost::geometry::index::rtree<item_type,geojson_linear<16,4> >;
using indexable_type = boost::geometry::index::indexable<item_type>;
using equal_to_type = boost::geometry::index::equal_to<item_type>;
using allocator_type = boost::interprocess::allocator<item_type, boost::interprocess::managed_mapped_file::segment_manager>;
using spatial_index_type = boost::geometry::index::rtree<item_type, boost::geometry::index::linear<16>, indexable_type, equal_to_type, allocator_type>;
//using spatial_index_type = boost::geometry::index::rtree<item_type,geojson_linear<16,4> >;
// constructor
geojson_datasource(mapnik::parameters const& params);
virtual ~geojson_datasource ();
@ -101,11 +105,18 @@ private:
bool from_inline_string_;
mapnik::box2d<double> extent_;
std::vector<mapnik::feature_ptr> features_;
std::unique_ptr<spatial_index_type> tree_;
std::unique_ptr<spatial_index_type, std::function<void(spatial_index_type*)>> tree_;
boost::interprocess::managed_mapped_file index_file_;
bool cache_features_ = true;
bool has_disk_index_ = false;
const std::size_t num_features_to_query_;
};
struct record
{
std::uint64_t off;
std::uint64_t size;
float box[4];
};
#endif // GEOJSON_DATASOURCE_HPP

View file

@ -34,8 +34,9 @@
#include <string>
#include <vector>
#include <fstream>
#include <algorithm>
geojson_index_featureset::geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter)
geojson_index_featureset::geojson_index_featureset(std::string const& filename, mapnik::bounding_box_filter<float> const& filter)
:
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
//
@ -44,7 +45,8 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename,
#else
file_(std::fopen(filename.c_str(),"rb"), std::fclose),
#endif
ctx_(std::make_shared<mapnik::context_type>())
ctx_(std::make_shared<mapnik::context_type>()),
query_box_(filter.box_)
{
#if defined (MAPNIK_MEMORY_MAPPED_FILE)
@ -65,11 +67,20 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename,
std::ifstream index(indexname.c_str(), std::ios::binary);
if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: can't open index file " + indexname);
mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::query(filter, index, positions_);
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::query(filter, index, positions_);
std::cerr << "#1 Num features:" << positions_.size() << std::endl;
positions_.erase(std::remove_if(positions_.begin(),
positions_.end(),
[&](value_type const& pos)
{ return !mapnik::box2d<float>{pos.box[0], pos.box[1], pos.box[2], pos.box[3]}.intersects(query_box_);}),
positions_.end());
std::cerr << "#2 Num features:" << positions_.size() << std::endl;
std::sort(positions_.begin(), positions_.end(),
[](value_type const& lhs, value_type const& rhs) { return lhs.first < rhs.first;});
[](value_type const& lhs, value_type const& rhs) { return lhs.off < rhs.off;});
itr_ = positions_.begin();
}
@ -81,13 +92,13 @@ mapnik::feature_ptr geojson_index_featureset::next()
{
auto pos = *itr_++;
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
char const* start = (char const*)mapped_region_->get_address() + pos.first;
char const* end = start + pos.second;
char const* start = (char const*)mapped_region_->get_address() + pos.off;
char const* end = start + pos.size;
#else
std::fseek(file_.get(), pos.first, SEEK_SET);
std::fseek(file_.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.second);
auto count = std::fread(record.data(), pos.second, 1, file_.get());
auto count = std::fread(record.data(), pos.size, 1, file_.get());
auto const* start = record.data();
auto const* end = (count == 1) ? start + record.size() : start;
#endif
@ -96,8 +107,7 @@ mapnik::feature_ptr geojson_index_featureset::next()
using mapnik::json::grammar::iterator_type;
mapnik::json::parse_feature(start, end, *feature, tr); // throw on failure
// skip empty geometries
if (mapnik::geometry::is_empty(feature->get_geometry()))
continue;
if (mapnik::geometry::is_empty(feature->get_geometry())) continue;
return feature;
}
return mapnik::feature_ptr();

View file

@ -41,9 +41,9 @@
class geojson_index_featureset : public mapnik::Featureset
{
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = record;
public:
geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter);
geojson_index_featureset(std::string const& filename, mapnik::bounding_box_filter<float> const& filter);
virtual ~geojson_index_featureset();
mapnik::feature_ptr next();
@ -57,6 +57,7 @@ private:
#endif
mapnik::value_integer feature_id_ = 1;
mapnik::context_ptr ctx_;
mapnik::box2d<float> query_box_;
std::vector<value_type> positions_;
std::vector<value_type>::iterator itr_;
};

View file

@ -68,6 +68,7 @@ int main (int argc, char** argv)
char separator = 0;
char quote = 0;
std::string manual_headers;
po::variables_map vm;
try
{
po::options_description desc("Mapnik CSV/GeoJSON index utility");
@ -82,11 +83,11 @@ int main (int argc, char** argv)
("manual-headers,H", po::value<std::string>(), "CSV manual headers string")
("files",po::value<std::vector<std::string> >(),"Files to index: file1 file2 ...fileN")
("validate-features", "Validate GeoJSON features")
("bbox,b", "output bounding boxes")
;
po::positional_options_description p;
p.add("files",-1);
po::variables_map vm;
po::store(po::command_line_parser(argc, argv)
.options(desc)
.style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise)
@ -208,11 +209,42 @@ int main (int argc, char** argv)
{
std::clog << extent << std::endl;
mapnik::box2d<double> extent_d(extent.minx(), extent.miny(), extent.maxx(), extent.maxy());
mapnik::quad_tree<std::pair<std::uint64_t, std::uint64_t>> tree(extent_d, depth, ratio);
if (vm.count("bbox"))
{
std::fstream file((filename + ".bbox").c_str(),
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
if (!file)
{
std::clog << "cannot open index file for writing file \""
<< (filename + ".bbox") << "\"" << std::endl;
}
else
{
for (auto const& item : boxes)
{
auto ext_f = std::get<0>(item);
tree.insert(std::get<1>(item), mapnik::box2d<double>(ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy()));
auto pos = std::get<1>(item);
file.write(reinterpret_cast<char const*>(&pos.first), sizeof(std::uint64_t));
file.write(reinterpret_cast<char const*>(&pos.second), sizeof(std::uint64_t));
file.write(reinterpret_cast<char const*>(&ext_f), sizeof(ext_f));
}
}
}
else
{
struct record {
std::uint64_t off;
std::uint64_t size;
float box[4];
} rec;
mapnik::quad_tree<record, mapnik::box2d<float>> tree(extent, depth, ratio);
for (auto const& item : boxes)
{
auto ext_f = std::get<0>(item);
rec = { std::get<1>(item).first, std::get<1>(item).second, { ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy() }};
//tree.insert(rec, mapnik::box2d<double>(ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy()));
tree.insert(rec, ext_f);
}
std::fstream file((filename + ".index").c_str(),
@ -233,6 +265,7 @@ int main (int argc, char** argv)
file.close();
}
}
}
else
{
std::clog << "Invalid extent " << extent << std::endl;