New *.index format (bounding box per item) implementation

This commit is contained in:
artemp 2017-08-18 11:16:44 +01:00
parent a682575da2
commit 2aa0ce9d74
9 changed files with 76 additions and 125 deletions

View file

@ -37,6 +37,12 @@ using mapnik::query;
namespace mapnik { namespace util {
struct index_record
{
std::uint64_t off;
std::uint64_t size;
float box[4];
};
template <typename InputStream>
bool check_spatial_index(InputStream& in)

View file

@ -153,12 +153,15 @@ csv_datasource::csv_datasource(parameters const& params)
if (has_disk_index_ && !extent_initialized_)
{
// read bounding box from *.index
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + ".index'");
extent_ = mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::bounding_box(index);
auto ext_f = mapnik::util::spatial_index<value_type,
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::bounding_box(index);
extent_ = { ext_f.minx(), ext_f.miny(),ext_f.maxx(), ext_f.maxy() };
}
//in.close(); no need to call close, rely on dtor
}
@ -316,22 +319,22 @@ csv_datasource::get_geometry_type_impl(std::istream & stream) const
else
{
// try reading *.index
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + ".index'");
mapnik::filter_in_box filter(extent_);
mapnik::bounding_box_filter<float> filter{mapnik::box2d<float>(extent_.minx(), extent_.miny(), extent_.maxx(), extent_.maxy())};
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::query_first_n(filter, index, positions, 5);
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::query_first_n(filter, index, positions, 5);
int multi_type = 0;
for (auto const& val : positions)
{
stream.seekg(val.first);
stream.seekg(val.off);
std::vector<char> record;
record.resize(val.second);
stream.read(record.data(), val.second);
record.resize(val.size);
stream.read(record.data(), val.size);
std::string str(record.begin(), record.end());
try
{
@ -427,7 +430,8 @@ mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
}
else if (has_disk_index_)
{
mapnik::filter_in_box filter(q.get_bbox());
auto const& bbox = q.get_bbox();
mapnik::bounding_box_filter<float> const filter(mapnik::box2d<float>(bbox.minx(), bbox.miny(), bbox.maxx(), bbox.maxy()));
return std::make_shared<csv_index_featureset>(filename_, filter, locator_, separator_, quote_, headers_, ctx_);
}
}

View file

@ -27,7 +27,6 @@
#include <mapnik/feature_factory.hpp>
#include <mapnik/util/utf_conv_win.hpp>
#include <mapnik/util/trim.hpp>
#include <mapnik/util/spatial_index.hpp>
#include <mapnik/geometry.hpp>
// stl
#include <string>
@ -36,7 +35,7 @@
#include <fstream>
csv_index_featureset::csv_index_featureset(std::string const& filename,
mapnik::filter_in_box const& filter,
mapnik::bounding_box_filter<float> const& filter,
locator_type const& locator,
char separator,
char quote,
@ -76,11 +75,16 @@ csv_index_featureset::csv_index_featureset(std::string const& filename,
std::ifstream index(indexname.c_str(), std::ios::binary);
if (!index) throw mapnik::datasource_exception("CSV Plugin: can't open index file " + indexname);
mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
std::ifstream>::query(filter, index, positions_);
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::query(filter, index, positions_);
positions_.erase(std::remove_if(positions_.begin(),
positions_.end(),
[&](value_type const& pos)
{ return !mapnik::box2d<float>{pos.box[0], pos.box[1], pos.box[2], pos.box[3]}.intersects(filter.box_);}),
positions_.end());
std::sort(positions_.begin(), positions_.end(),
[](value_type const& lhs, value_type const& rhs) { return lhs.first < rhs.first;});
[](value_type const& lhs, value_type const& rhs) { return lhs.off < rhs.off;});
itr_ = positions_.begin();
}
@ -113,13 +117,13 @@ mapnik::feature_ptr csv_index_featureset::next()
{
auto pos = *itr_++;
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
char const* start = (char const*)mapped_region_->get_address() + pos.first;
char const* end = start + pos.second;
char const* start = (char const*)mapped_region_->get_address() + pos.off;
char const* end = start + pos.size;
#else
std::fseek(file_.get(), pos.first, SEEK_SET);
std::fseek(file_.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.second);
if (std::fread(record.data(), pos.second, 1, file_.get()) != 1)
if (std::fread(record.data(), pos.size, 1, file_.get()) != 1)
{
return mapnik::feature_ptr();
}

View file

@ -26,6 +26,7 @@
#include <mapnik/feature.hpp>
#include <mapnik/unicode.hpp>
#include <mapnik/geom_util.hpp>
#include <mapnik/util/spatial_index.hpp>
#include "csv_utils.hpp"
#include "csv_datasource.hpp"
@ -40,12 +41,12 @@
class csv_index_featureset : public mapnik::Featureset
{
using value_type = std::pair<std::uint64_t, std::uint64_t>;
using value_type = mapnik::util::index_record;
using locator_type = csv_utils::geometry_column_locator;
public:
csv_index_featureset(std::string const& filename,
mapnik::filter_in_box const& filter,
mapnik::bounding_box_filter<float> const& filter,
locator_type const& locator,
char separator,
char quote,

View file

@ -175,21 +175,7 @@ geojson_datasource::geojson_datasource(parameters const& params)
}
else
{
//initialise_index(start, end);
std::string index_filename = filename_ + ".bbox.rtree";
std::cerr << "loading " << index_filename << " .." << std::endl;
index_file_ = boost::interprocess::managed_mapped_file(boost::interprocess::open_only, index_filename.c_str());
tree_ = std::unique_ptr<spatial_index_type, std::function<void(spatial_index_type*)>>
(index_file_.find<spatial_index_type>("rtree-index").first,
[](spatial_index_type* si) {std::cerr << "calling deleter:" << si << std::endl;});
std::cerr << "TREE:" << tree_.get() << std::endl;
auto bounds = tree_->bounds();
double min_x = bounds.min_corner().get<0>();
double min_y = bounds.min_corner().get<1>();
double max_x = bounds.max_corner().get<0>();
double max_y = bounds.max_corner().get<1>();
std::cerr << "Bounding box: " << min_x << "," << min_y << "," << max_x << "," << max_y << std::endl;
extent_ = { min_x, min_y, max_x, max_y };
initialise_index(start, end);
}
}
}
@ -220,15 +206,14 @@ void geojson_datasource::initialise_descriptor(mapnik::feature_ptr const& featur
void geojson_datasource::initialise_disk_index(std::string const& filename)
{
// read extent
using value_type = record;//std::pair<std::uint64_t, std::uint64_t>;
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
auto ext_f = mapnik::util::spatial_index<value_type,
mapnik::filter_in_box,
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::bounding_box(index);
extent_ = { ext_f.minx(), ext_f.miny(),ext_f.maxx(), ext_f.maxy() };
std::cerr << "EXTENT:" << extent_ << std::endl;
mapnik::bounding_box_filter<float> filter(ext_f);
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type,
@ -365,7 +350,7 @@ void geojson_datasource::initialise_index(Iterator start, Iterator end)
}
}
// packing algorithm
//tree_ = std::make_unique<spatial_index_type>(values);
tree_ = std::make_unique<spatial_index_type>(values);
}
desc_.order_by_name();
}
@ -437,7 +422,7 @@ void geojson_datasource::parse_geojson(Iterator start, Iterator end)
++geometry_index;
}
// packing algorithm
//tree_ = std::make_unique<spatial_index_type>(values);
tree_ = std::make_unique<spatial_index_type>(values);
}
geojson_datasource::~geojson_datasource() {}
@ -468,7 +453,7 @@ boost::optional<mapnik::datasource_geometry_t> geojson_datasource::get_geometry_
int multi_type = 0;
if (has_disk_index_)
{
using value_type = record;//std::pair<std::uint64_t, std::uint64_t>;
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
mapnik::bounding_box_filter<float> filter(mapnik::box2d<float>(extent_.minx(),extent_.miny(), extent_.maxx(),extent_.maxy()));
@ -602,18 +587,15 @@ mapnik::featureset_ptr geojson_datasource::features(mapnik::query const& q) cons
}
else
{
std::cerr << "Num features:" << index_array.size() << std::endl;
return std::make_shared<geojson_memory_index_featureset>(filename_, std::move(index_array));
}
}
else if (has_disk_index_)
{
//mapnik::filter_in_box filter(q.get_bbox());
auto const& bbox = q.get_bbox();
mapnik::bounding_box_filter<float> const filter(mapnik::box2d<float>(bbox.minx(), bbox.miny(), bbox.maxx(), bbox.maxy()));
return std::make_shared<geojson_index_featureset>(filename_, filter);
}
}
// otherwise return an empty featureset
return mapnik::make_invalid_featureset();

View file

@ -38,7 +38,6 @@
#include <boost/optional.hpp>
#include <boost/version.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/interprocess/managed_mapped_file.hpp>
#pragma GCC diagnostic pop
// stl
@ -75,13 +74,9 @@ struct options_type<geojson_linear<Max,Min> >
class geojson_datasource : public mapnik::datasource
{
public:
using box_type = mapnik::box2d<float>;
using box_type = mapnik::box2d<double>;
using item_type = std::pair<box_type, std::pair<std::uint64_t, std::uint64_t> >;
using indexable_type = boost::geometry::index::indexable<item_type>;
using equal_to_type = boost::geometry::index::equal_to<item_type>;
using allocator_type = boost::interprocess::allocator<item_type, boost::interprocess::managed_mapped_file::segment_manager>;
using spatial_index_type = boost::geometry::index::rtree<item_type, boost::geometry::index::linear<16>, indexable_type, equal_to_type, allocator_type>;
//using spatial_index_type = boost::geometry::index::rtree<item_type,geojson_linear<16,4> >;
using spatial_index_type = boost::geometry::index::rtree<item_type,geojson_linear<16,4> >;
// constructor
geojson_datasource(mapnik::parameters const& params);
virtual ~geojson_datasource ();
@ -105,18 +100,10 @@ private:
bool from_inline_string_;
mapnik::box2d<double> extent_;
std::vector<mapnik::feature_ptr> features_;
std::unique_ptr<spatial_index_type, std::function<void(spatial_index_type*)>> tree_;
boost::interprocess::managed_mapped_file index_file_;
std::unique_ptr<spatial_index_type> tree_;
bool cache_features_ = true;
bool has_disk_index_ = false;
const std::size_t num_features_to_query_;
};
struct record
{
std::uint64_t off;
std::uint64_t size;
float box[4];
};
#endif // GEOJSON_DATASOURCE_HPP

View file

@ -25,7 +25,6 @@
#include <mapnik/feature.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/util/utf_conv_win.hpp>
#include <mapnik/util/spatial_index.hpp>
#include <mapnik/util/conversions.hpp>
#include <mapnik/geometry/is_empty.hpp>
#include <mapnik/json/parse_feature.hpp>
@ -45,8 +44,7 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename,
#else
file_(std::fopen(filename.c_str(),"rb"), std::fclose),
#endif
ctx_(std::make_shared<mapnik::context_type>()),
query_box_(filter.box_)
ctx_(std::make_shared<mapnik::context_type>())
{
#if defined (MAPNIK_MEMORY_MAPPED_FILE)
@ -71,13 +69,11 @@ geojson_index_featureset::geojson_index_featureset(std::string const& filename,
std::ifstream,
mapnik::box2d<float>>::query(filter, index, positions_);
std::cerr << "#1 Num features:" << positions_.size() << std::endl;
positions_.erase(std::remove_if(positions_.begin(),
positions_.end(),
[&](value_type const& pos)
{ return !mapnik::box2d<float>{pos.box[0], pos.box[1], pos.box[2], pos.box[3]}.intersects(query_box_);}),
{ return !mapnik::box2d<float>{pos.box[0], pos.box[1], pos.box[2], pos.box[3]}.intersects(filter.box_);}),
positions_.end());
std::cerr << "#2 Num features:" << positions_.size() << std::endl;
std::sort(positions_.begin(), positions_.end(),
[](value_type const& lhs, value_type const& rhs) { return lhs.off < rhs.off;});

View file

@ -26,6 +26,7 @@
#include "geojson_datasource.hpp"
#include <mapnik/feature.hpp>
#include <mapnik/geom_util.hpp>
#include <mapnik/util/spatial_index.hpp>
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
#pragma GCC diagnostic push
@ -41,7 +42,7 @@
class geojson_index_featureset : public mapnik::Featureset
{
using value_type = record;
using value_type = mapnik::util::index_record;
public:
geojson_index_featureset(std::string const& filename, mapnik::bounding_box_filter<float> const& filter);
virtual ~geojson_index_featureset();
@ -57,7 +58,6 @@ private:
#endif
mapnik::value_integer feature_id_ = 1;
mapnik::context_ptr ctx_;
mapnik::box2d<float> query_box_;
std::vector<value_type> positions_;
std::vector<value_type>::iterator itr_;
};

View file

@ -27,6 +27,7 @@
#include <mapnik/version.hpp>
#include <mapnik/util/fs.hpp>
#include <mapnik/quad_tree.hpp>
#include <mapnik/util/spatial_index.hpp>
#include "process_csv_file.hpp"
#include "process_geojson_file_x3.hpp"
@ -83,7 +84,6 @@ int main (int argc, char** argv)
("manual-headers,H", po::value<std::string>(), "CSV manual headers string")
("files",po::value<std::vector<std::string> >(),"Files to index: file1 file2 ...fileN")
("validate-features", "Validate GeoJSON features")
("bbox,b", "output bounding boxes")
;
po::positional_options_description p;
@ -209,61 +209,32 @@ int main (int argc, char** argv)
{
std::clog << extent << std::endl;
mapnik::box2d<double> extent_d(extent.minx(), extent.miny(), extent.maxx(), extent.maxy());
if (vm.count("bbox"))
mapnik::quad_tree<mapnik::util::index_record, mapnik::box2d<float>> tree(extent, depth, ratio);
for (auto const& item : boxes)
{
std::fstream file((filename + ".bbox").c_str(),
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
if (!file)
{
std::clog << "cannot open index file for writing file \""
<< (filename + ".bbox") << "\"" << std::endl;
}
else
{
for (auto const& item : boxes)
{
auto ext_f = std::get<0>(item);
auto pos = std::get<1>(item);
file.write(reinterpret_cast<char const*>(&pos.first), sizeof(std::uint64_t));
file.write(reinterpret_cast<char const*>(&pos.second), sizeof(std::uint64_t));
file.write(reinterpret_cast<char const*>(&ext_f), sizeof(ext_f));
}
}
auto ext_f = std::get<0>(item);
mapnik::util::index_record rec =
{std::get<1>(item).first, std::get<1>(item).second, {ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy()}};
tree.insert(rec, ext_f);
}
std::fstream file((filename + ".index").c_str(),
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
if (!file)
{
std::clog << "cannot open index file for writing file \""
<< (filename + ".index") << "\"" << std::endl;
}
else
{
struct record {
std::uint64_t off;
std::uint64_t size;
float box[4];
} rec;
mapnik::quad_tree<record, mapnik::box2d<float>> tree(extent, depth, ratio);
for (auto const& item : boxes)
{
auto ext_f = std::get<0>(item);
rec = { std::get<1>(item).first, std::get<1>(item).second, { ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy() }};
//tree.insert(rec, mapnik::box2d<double>(ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy()));
tree.insert(rec, ext_f);
}
std::fstream file((filename + ".index").c_str(),
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
if (!file)
{
std::clog << "cannot open index file for writing file \""
<< (filename + ".index") << "\"" << std::endl;
}
else
{
tree.trim();
std::clog << "number nodes=" << tree.count() << std::endl;
std::clog << "number element=" << tree.count_items() << std::endl;
file.exceptions(std::ios::failbit | std::ios::badbit);
tree.write(file);
file.flush();
file.close();
}
tree.trim();
std::clog << "number nodes=" << tree.count() << std::endl;
std::clog << "number element=" << tree.count_items() << std::endl;
file.exceptions(std::ios::failbit | std::ios::badbit);
tree.write(file);
file.flush();
file.close();
}
}
else