From 942fb3c562cc47e055a03d720425a3629dff0c19 Mon Sep 17 00:00:00 2001 From: artemp Date: Fri, 16 Oct 2015 13:15:20 +0100 Subject: [PATCH] geojson.input - implement spatial disk index featureset --- plugins/input/geojson/geojson_datasource.cpp | 71 ++++++++++++++++++- plugins/input/geojson/geojson_datasource.hpp | 2 + .../geojson/geojson_index_featureset.cpp | 62 ++++++++++++++-- .../geojson/geojson_index_featureset.hpp | 27 ++++++- 4 files changed, 153 insertions(+), 9 deletions(-) diff --git a/plugins/input/geojson/geojson_datasource.cpp b/plugins/input/geojson/geojson_datasource.cpp index 8d9bdb50c..ef3b0beb8 100644 --- a/plugins/input/geojson/geojson_datasource.cpp +++ b/plugins/input/geojson/geojson_datasource.cpp @@ -22,6 +22,7 @@ #include "geojson_datasource.hpp" #include "geojson_featureset.hpp" +#include "geojson_index_featureset.hpp" #include "large_geojson_featureset.hpp" #include #include @@ -57,6 +58,9 @@ #include #include #include +#include +#include +#include #if defined(SHAPE_MEMORY_MAPPED_FILE) #pragma GCC diagnostic push @@ -136,13 +140,19 @@ geojson_datasource::geojson_datasource(parameters const& params) filename_ = *base + "/" + *file; else filename_ = *file; + has_disk_index_ = mapnik::util::exists(filename_ + ".index"); } + if (!inline_string_.empty()) { char const* start = inline_string_.c_str(); char const* end = start + inline_string_.size(); parse_geojson(start, end); } + else if (has_disk_index_) + { + initialise_disk_index(filename_); + } else { cache_features_ = *params.get("cache_features", true); @@ -197,6 +207,55 @@ const mapnik::json::feature_grammar ge const mapnik::json::extract_bounding_box_grammar geojson_datasource_static_bbox_grammar; } +void geojson_datasource::initialise_disk_index(std::string const& filename) +{ + // read extent + using value_type = std::pair; + std::ifstream index(filename_ + ".index", std::ios::binary); + if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'"); + extent_ = mapnik::util::spatial_index::bounding_box(index); + mapnik::filter_in_box filter(extent_); + std::vector positions; + mapnik::util::spatial_index::query_first_n(filter, index, positions, 5); + + mapnik::util::file file(filename_); + if (!file.open()) throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'"); + + for (auto const& pos : positions) + { + std::fseek(file.get(), pos.first, SEEK_SET); + std::vector record; + record.resize(pos.second); + std::fread(record.data(), pos.second, 1, file.get()); + auto const* start = record.data(); + auto const* end = start + record.size(); + mapnik::context_ptr ctx = std::make_shared(); + mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx,1)); + using namespace boost::spirit; + standard::space_type space; + if (!boost::spirit::qi::phrase_parse(start, end, + (geojson_datasource_static_feature_grammar)(boost::phoenix::ref(*feature)), space) + || start != end) + { + throw std::runtime_error("Failed to parse geojson feature"); + } + for ( auto const& kv : *feature) + { + auto const& name = std::get<0>(kv); + if (!desc_.has_name(name)) + { + desc_.add_descriptor(mapnik::attribute_descriptor(name, + mapnik::util::apply_visitor(attr_value_converter(), + std::get<1>(kv)))); + } + } + } +} + template void geojson_datasource::initialise_index(Iterator start, Iterator end) { @@ -381,7 +440,11 @@ boost::optional geojson_datasource::get_geometry_ { boost::optional result; int multi_type = 0; - if (cache_features_) + if (has_disk_index_) + { + + } + else if (cache_features_) { unsigned num_features = features_.size(); for (unsigned i = 0; i < num_features && i < 5; ++i) @@ -472,6 +535,12 @@ mapnik::featureset_ptr geojson_datasource::features(mapnik::query const& q) cons return std::make_shared(filename_, std::move(index_array)); } } + else if (has_disk_index_) + { + mapnik::filter_in_box filter(q.get_bbox()); + return std::make_shared(filename_, filter); + } + } // otherwise return an empty featureset pointer return mapnik::featureset_ptr(); diff --git a/plugins/input/geojson/geojson_datasource.hpp b/plugins/input/geojson/geojson_datasource.hpp index 870f64985..212796c11 100644 --- a/plugins/input/geojson/geojson_datasource.hpp +++ b/plugins/input/geojson/geojson_datasource.hpp @@ -98,6 +98,7 @@ public: void parse_geojson(Iterator start, Iterator end); template void initialise_index(Iterator start, Iterator end); + void initialise_disk_index(std::string const& filename); private: mapnik::datasource::datasource_t type_; mapnik::layer_descriptor desc_; @@ -107,6 +108,7 @@ private: std::vector features_; std::unique_ptr tree_; bool cache_features_ = true; + bool has_disk_index_ = false; }; diff --git a/plugins/input/geojson/geojson_index_featureset.cpp b/plugins/input/geojson/geojson_index_featureset.cpp index 27a0e732a..733da76bf 100644 --- a/plugins/input/geojson/geojson_index_featureset.cpp +++ b/plugins/input/geojson/geojson_index_featureset.cpp @@ -21,32 +21,84 @@ *****************************************************************************/ // mapnik +#include "geojson_index_featureset.hpp" #include #include #include #include #include +#include // stl #include #include +#include -#include "geojson_index_featureset.hpp" - -geojson_index_featureset::geojson_index_featureset(std::string const& filename) -: -#ifdef _WINDOWS +geojson_index_featureset::geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter) + : +#if defined(GEOJSON_MEMORY_MAPPED_FILE) + // +#elif defined _WINDOWS file_(_wfopen(mapnik::utf8_to_utf16(filename).c_str(), L"rb"), std::fclose), #else file_(std::fopen(filename.c_str(),"rb"), std::fclose), #endif ctx_(std::make_shared()) { + +#if defined (GEOJSON_MEMORY_MAPPED_FILE) + boost::optional memory = + mapnik::mapped_memory_cache::instance().find(filename, true); + if (memory) + { + mapped_region_ = *memory; + } + else + { + throw std::runtime_error("could not create file mapping for " + filename); + } +#else if (!file_) throw std::runtime_error("Can't open " + filename); +#endif + std::string indexname = filename + ".index"; + std::ifstream index(indexname.c_str(), std::ios::binary); + if (!index) throw mapnik::datasource_exception("GeoJSON Plugin: can't open index file " + indexname); + mapnik::util::spatial_index::query(filter, index, positions_); + + std::sort(positions_.begin(), positions_.end(), + [](value_type const& lhs, value_type const& rhs) { return lhs.first < rhs.first;}); + itr_ = positions_.begin(); } geojson_index_featureset::~geojson_index_featureset() {} mapnik::feature_ptr geojson_index_featureset::next() { + while( itr_ != positions_.end()) + { + auto pos = *itr_++; +#if defined(GEOJSON_MEMORY_MAPPED_FILE) + char const* start = (char const*)mapped_region_->get_address() + pos.first; + char const* end = start + pos.second; +#else + std::fseek(file_.get(), pos.first, SEEK_SET); + std::vector record; + record.resize(pos.second); + std::fread(record.data(), pos.second, 1, file_.get()); + auto const* start = record.data(); + auto const* end = start + record.size(); +#endif + static const mapnik::transcoder tr("utf8"); + static const mapnik::json::feature_grammar grammar(tr); + using namespace boost::spirit; + standard::space_type space; + mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx_,1)); + if (!qi::phrase_parse(start, end, (grammar)(boost::phoenix::ref(*feature)), space) || start != end) + { + throw std::runtime_error("Failed to parse geojson feature"); + } + return feature; + } return mapnik::feature_ptr(); } diff --git a/plugins/input/geojson/geojson_index_featureset.hpp b/plugins/input/geojson/geojson_index_featureset.hpp index dcaf7d915..87b3f38a0 100644 --- a/plugins/input/geojson/geojson_index_featureset.hpp +++ b/plugins/input/geojson/geojson_index_featureset.hpp @@ -23,23 +23,44 @@ #ifndef GEOJSON_INDEX_FEATURESET_HPP #define GEOJSON_INDEX_FEATURESET_HPP -#include +#define GEOJSON_MEMORY_MAPPED_FILE + #include "geojson_datasource.hpp" +#include +#include + +#ifdef GEOJSON_MEMORY_MAPPED_FILE +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#include +#include +#pragma GCC diagnostic pop +#include +#endif #include #include class geojson_index_featureset : public mapnik::Featureset { + using value_type = std::pair; public: - using file_ptr = std::unique_ptr; - geojson_index_featureset(std::string const& filename); + geojson_index_featureset(std::string const& filename, mapnik::filter_in_box const& filter); virtual ~geojson_index_featureset(); mapnik::feature_ptr next(); private: +#if defined (GEOJSON_MEMORY_MAPPED_FILE) + using file_source_type = boost::interprocess::ibufferstream; + mapnik::mapped_region_ptr mapped_region_; +#else + using file_ptr = std::unique_ptr; file_ptr file_; +#endif mapnik::context_ptr ctx_; + std::vector positions_; + std::vector::iterator itr_; }; #endif // GEOJSON_INDEX_FEATURESE_HPP