mapnik/plugins/input/geojson/geojson_datasource.cpp

601 lines
21 KiB
C++
Raw Normal View History

/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
2021-01-05 15:39:07 +01:00
* Copyright (C) 2021 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#include "geojson_datasource.hpp"
#include "geojson_featureset.hpp"
#include "geojson_index_featureset.hpp"
#include "geojson_memory_index_featureset.hpp"
#include <fstream>
#include <algorithm>
#include <mapnik/warning.hpp>
MAPNIK_DISABLE_WARNING_PUSH
2015-11-08 02:53:09 +01:00
#include <mapnik/warning_ignore.hpp>
#include <boost/algorithm/string.hpp>
MAPNIK_DISABLE_WARNING_POP
// mapnik
#include <mapnik/boolean.hpp>
#include <mapnik/unicode.hpp>
#include <mapnik/util/utf_conv_win.hpp>
#include <mapnik/feature.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/feature_kv_iterator.hpp>
#include <mapnik/value/types.hpp>
#include <mapnik/geometry/box2d.hpp>
#include <mapnik/debug.hpp>
#include <mapnik/proj_transform.hpp>
#include <mapnik/projection.hpp>
2014-08-08 13:13:49 +02:00
#include <mapnik/util/variant.hpp>
#include <mapnik/util/file_io.hpp>
#include <mapnik/util/geometry_to_ds_type.hpp>
#include <mapnik/geometry/boost_adapters.hpp>
#include <mapnik/util/fs.hpp>
#include <mapnik/util/spatial_index.hpp>
#include <mapnik/geom_util.hpp>
#include <mapnik/json/parse_feature.hpp>
#include <mapnik/json/extract_bounding_boxes_x3.hpp>
#if defined(MAPNIK_MEMORY_MAPPED_FILE)
#include <mapnik/warning.hpp>
MAPNIK_DISABLE_WARNING_PUSH
2015-11-08 02:53:09 +01:00
#include <mapnik/warning_ignore.hpp>
#include <boost/interprocess/mapped_region.hpp>
MAPNIK_DISABLE_WARNING_POP
#include <mapnik/mapped_memory_cache.hpp>
#endif
using mapnik::datasource;
using mapnik::parameters;
DATASOURCE_PLUGIN(geojson_datasource)
struct attr_value_converter
{
mapnik::eAttributeType operator()(mapnik::value_integer) const { return mapnik::Integer; }
mapnik::eAttributeType operator()(double) const { return mapnik::Double; }
mapnik::eAttributeType operator()(float) const { return mapnik::Double; }
mapnik::eAttributeType operator()(bool) const { return mapnik::Boolean; }
mapnik::eAttributeType operator()(std::string const&) const { return mapnik::String; }
mapnik::eAttributeType operator()(mapnik::value_unicode_string const&) const { return mapnik::String; }
mapnik::eAttributeType operator()(mapnik::value_null const&) const { return mapnik::String; }
};
geojson_datasource::geojson_datasource(parameters const& params)
: datasource(params)
, type_(datasource::Vector)
, desc_(geojson_datasource::name(), *params.get<std::string>("encoding", "utf-8"))
, filename_()
, from_inline_string_(false)
, extent_()
, features_()
, tree_(nullptr)
, num_features_to_query_(
std::max(mapnik::value_integer(1), *params.get<mapnik::value_integer>("num_features_to_query", 5)))
{
boost::optional<std::string> inline_string = params.get<std::string>("inline");
if (!inline_string)
{
boost::optional<std::string> file = params.get<std::string>("file");
if (!file)
throw mapnik::datasource_exception("GeoJSON Plugin: missing <file> parameter");
boost::optional<std::string> base = params.get<std::string>("base");
if (base)
filename_ = *base + "/" + *file;
else
filename_ = *file;
has_disk_index_ = mapnik::util::exists(filename_ + ".index");
}
if (inline_string)
{
from_inline_string_ = true;
char const* start = inline_string->c_str();
char const* end = start + inline_string->size();
parse_geojson(start, end);
}
else if (has_disk_index_)
{
initialise_disk_index(filename_);
}
else
{
cache_features_ = *params.get<mapnik::boolean_type>("cache_features", true);
#if !defined(MAPNIK_MEMORY_MAPPED_FILE)
mapnik::util::file file(filename_);
if (!file)
{
2014-11-22 04:57:24 +01:00
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'");
}
2014-08-27 01:22:12 +02:00
std::string file_buffer;
file_buffer.resize(file.size());
auto count = std::fread(&file_buffer[0], file.size(), 1, file.get());
char const* start = file_buffer.c_str();
char const* end = (count == 1) ? start + file_buffer.length() : start;
#else
boost::optional<mapnik::mapped_region_ptr> mapped_region =
mapnik::mapped_memory_cache::instance().find(filename_, false);
if (!mapped_region)
{
throw std::runtime_error("could not get file mapping for " + filename_);
}
char const* start = reinterpret_cast<char const*>((*mapped_region)->get_address());
char const* end = start + (*mapped_region)->get_size();
#endif
if (cache_features_)
{
parse_geojson(start, end);
}
else
{
initialise_index(start, end);
}
}
}
2014-07-30 12:28:23 +02:00
namespace {
2016-11-28 10:28:12 +01:00
using box_type = box2d<double>;
using boxes_type = std::vector<std::pair<box_type, std::pair<std::uint64_t, std::uint64_t>>>;
using base_iterator_type = char const*;
2015-03-07 21:00:38 +01:00
const mapnik::transcoder geojson_datasource_static_tr("utf8");
} // namespace
2014-07-30 12:28:23 +02:00
void geojson_datasource::initialise_descriptor(mapnik::feature_ptr const& feature)
{
for (auto const& kv : *feature)
{
auto const& name = std::get<0>(kv);
if (!desc_.has_name(name))
{
desc_.add_descriptor(
mapnik::attribute_descriptor(name, mapnik::util::apply_visitor(attr_value_converter(), std::get<1>(kv))));
}
}
}
void geojson_datasource::initialise_disk_index(std::string const& filename)
{
// read extent
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index)
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
auto ext_f =
mapnik::util::spatial_index<value_type, mapnik::bounding_box_filter<float>, std::ifstream, mapnik::box2d<float>>::
bounding_box(index);
extent_ = {ext_f.minx(), ext_f.miny(), ext_f.maxx(), ext_f.maxy()};
mapnik::bounding_box_filter<float> filter(ext_f);
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type, mapnik::bounding_box_filter<float>, std::ifstream, mapnik::box2d<float>>::
query_first_n(filter, index, positions, num_features_to_query_);
mapnik::util::file file(filename_);
if (!file)
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'");
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
for (auto const& pos : positions)
{
std::fseek(file.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.size);
auto count = std::fread(record.data(), pos.size, 1, file.get());
auto const* start = record.data();
auto const* end = (count == 1) ? start + record.size() : start;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1));
try
{
mapnik::json::parse_feature(start, end, *feature, geojson_datasource_static_tr);
} catch (...)
{
throw std::runtime_error("Failed to parse geojson feature");
}
initialise_descriptor(feature);
}
desc_.order_by_name();
}
namespace mapnik {
namespace json {
2016-11-28 18:41:38 +01:00
template<typename Iterator>
void parse_geometry_or_feature(Iterator& itr, Iterator const& end, feature_impl& feature, transcoder const& tr)
2016-11-28 18:41:38 +01:00
{
Iterator start = itr;
try
{
parse_feature(start, end, feature, tr);
} catch (...)
2016-11-28 18:41:38 +01:00
{
start = itr;
parse_geometry(start, end, feature);
}
}
} // namespace json
} // namespace mapnik
2016-11-28 18:41:38 +01:00
template<typename Iterator>
void geojson_datasource::initialise_index(Iterator start, Iterator end)
{
boxes_type boxes;
2016-11-28 18:41:38 +01:00
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
Iterator itr = start;
try
{
mapnik::json::extract_bounding_boxes(itr, end, boxes);
if (itr != end || boxes.empty())
throw std::exception();
// bulk insert initialise r-tree
tree_ = std::make_unique<spatial_index_type>(boxes);
// calculate total extent
std::size_t feature_count = 0;
2016-11-28 18:41:38 +01:00
for (auto const& item : boxes)
{
auto const& box = std::get<0>(item);
auto const& geometry_index = std::get<1>(item);
if (!extent_.valid())
extent_ = box;
else
extent_.expand_to_include(box);
if (feature_count++ < num_features_to_query_)
{
// parse first N features to extract attributes schema.
// NOTE: this doesn't yield correct answer for geoJSON in general, just an indication
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); // temp feature
Iterator itr2 = start + geometry_index.first;
Iterator end2 = itr2 + geometry_index.second;
2016-11-28 10:28:12 +01:00
try
{
mapnik::json::parse_feature(itr2, end2, *feature, geojson_datasource_static_tr);
} catch (...)
{
throw std::runtime_error("Failed to parse geojson feature");
}
initialise_descriptor(feature);
}
}
} catch (...)
{
cache_features_ = true; // force caching single feature
itr = start; // reset iteraror
std::size_t start_id = 1;
2016-11-28 18:41:38 +01:00
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, start_id)); // single feature
try
{
mapnik::json::parse_geometry_or_feature(itr, end, *feature, geojson_datasource_static_tr);
} catch (...)
{
if (from_inline_string_)
throw mapnik::datasource_exception(
"geojson_datasource: Failed to parse GeoJSON file from in-memory string");
else
throw mapnik::datasource_exception("geojson_datasource: Failed to parse GeoJSON file '" + filename_ +
"'");
}
2016-11-28 18:41:38 +01:00
features_.push_back(std::move(feature));
using values_container = std::vector<std::pair<box_type, std::pair<std::uint64_t, std::uint64_t>>>;
values_container values;
values.reserve(features_.size());
std::size_t geometry_index = 0;
for (mapnik::feature_ptr const& f : features_)
{
mapnik::box2d<double> box = f->envelope();
if (box.valid())
{
if (geometry_index == 0)
{
extent_ = box;
}
else
{
extent_.expand_to_include(box);
}
values.emplace_back(box, std::make_pair(geometry_index, 0));
}
if (geometry_index++ < num_features_to_query_)
{
initialise_descriptor(f);
}
}
// packing algorithm
tree_ = std::make_unique<spatial_index_type>(values);
}
desc_.order_by_name();
}
template<typename Iterator>
void geojson_datasource::parse_geojson(Iterator start, Iterator end)
{
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
std::size_t start_id = 1;
Iterator itr = start;
try
{
2016-11-28 18:41:38 +01:00
boxes_type boxes;
mapnik::json::extract_bounding_boxes(itr, end, boxes);
if (itr != end || boxes.empty())
throw std::exception(); // ensure we've consumed all input and we extracted at least one bbox;
2016-11-28 18:41:38 +01:00
for (auto const& item : boxes)
{
2016-11-28 18:41:38 +01:00
auto const& geometry_index = std::get<1>(item);
Iterator itr2 = start + geometry_index.first;
Iterator end2 = itr2 + geometry_index.second;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, start_id++));
mapnik::json::parse_feature(itr2, end2, *feature, geojson_datasource_static_tr);
features_.push_back(std::move(feature));
}
} catch (...)
{
itr = start;
// try parsing as single Feature or single Geometry JSON
2016-11-28 18:41:38 +01:00
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, start_id)); // single feature
try
{
2016-11-28 18:41:38 +01:00
mapnik::json::parse_geometry_or_feature(itr, end, *feature, geojson_datasource_static_tr);
} catch (...)
2016-11-28 18:41:38 +01:00
{
if (from_inline_string_)
throw mapnik::datasource_exception(
"geojson_datasource: Failed to parse GeoJSON file from in-memory string");
else
throw mapnik::datasource_exception("geojson_datasource: Failed to parse GeoJSON file '" + filename_ +
"'");
2016-11-28 18:41:38 +01:00
}
features_.push_back(std::move(feature));
}
using values_container = std::vector<std::pair<box_type, std::pair<std::uint64_t, std::uint64_t>>>;
values_container values;
values.reserve(features_.size());
std::size_t geometry_index = 0;
for (mapnik::feature_ptr const& f : features_)
{
mapnik::box2d<double> box = f->envelope();
if (box.valid())
{
if (geometry_index == 0)
{
extent_ = box;
}
else
{
extent_.expand_to_include(box);
}
values.emplace_back(box, std::make_pair(geometry_index, 0));
}
if (geometry_index < num_features_to_query_)
{
initialise_descriptor(f);
}
++geometry_index;
}
// packing algorithm
tree_ = std::make_unique<spatial_index_type>(values);
}
geojson_datasource::~geojson_datasource() {}
const char* geojson_datasource::name()
{
return "geojson";
}
mapnik::datasource::datasource_t geojson_datasource::type() const
{
return type_;
}
mapnik::box2d<double> geojson_datasource::envelope() const
{
return extent_;
}
mapnik::layer_descriptor geojson_datasource::get_descriptor() const
{
return desc_;
}
boost::optional<mapnik::datasource_geometry_t> geojson_datasource::get_geometry_type() const
{
boost::optional<mapnik::datasource_geometry_t> result;
int multi_type = 0;
if (has_disk_index_)
{
using value_type = mapnik::util::index_record;
std::ifstream index(filename_ + ".index", std::ios::binary);
if (!index)
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + ".index'");
mapnik::bounding_box_filter<float> filter(
mapnik::box2d<float>(extent_.minx(), extent_.miny(), extent_.maxx(), extent_.maxy()));
std::vector<value_type> positions;
mapnik::util::spatial_index<value_type,
mapnik::bounding_box_filter<float>,
std::ifstream,
mapnik::box2d<float>>::query_first_n(filter,
index,
positions,
num_features_to_query_);
mapnik::util::file file(filename_);
if (!file)
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'");
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
for (auto const& pos : positions)
{
std::fseek(file.get(), pos.off, SEEK_SET);
std::vector<char> record;
record.resize(pos.size);
auto count = std::fread(record.data(), pos.size, 1, file.get());
auto const* start = record.data();
auto const* end = (count == 1) ? start + record.size() : start;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); // temp feature
2016-11-28 10:28:12 +01:00
try
{
mapnik::json::parse_feature(start, end, *feature, geojson_datasource_static_tr);
} catch (...)
{
throw std::runtime_error("Failed to parse geojson feature");
}
result = mapnik::util::to_ds_type(feature->get_geometry());
if (result)
{
int type = static_cast<int>(*result);
if (multi_type > 0 && multi_type != type)
{
result.reset(mapnik::datasource_geometry_t::Collection);
return result;
}
multi_type = type;
}
}
}
else if (cache_features_)
{
std::size_t num_features = features_.size();
for (std::size_t i = 0; i < num_features && i < num_features_to_query_; ++i)
{
result = mapnik::util::to_ds_type(features_[i]->get_geometry());
if (result)
{
int type = static_cast<int>(*result);
if (multi_type > 0 && multi_type != type)
{
result.reset(mapnik::datasource_geometry_t::Collection);
return result;
}
multi_type = type;
}
}
}
else
{
mapnik::util::file file(filename_);
if (!file)
{
throw mapnik::datasource_exception("GeoJSON Plugin: could not open: '" + filename_ + "'");
}
auto itr = tree_->qbegin(boost::geometry::index::intersects(extent_));
auto end = tree_->qend();
mapnik::context_ptr ctx = std::make_shared<mapnik::context_type>();
for (std::size_t count = 0; itr != end && count < num_features_to_query_; ++itr, ++count)
{
geojson_datasource::item_type const& item = *itr;
std::uint64_t file_offset = item.second.first;
std::uint64_t size = item.second.second;
std::fseek(file.get(), file_offset, SEEK_SET);
std::vector<char> json;
json.resize(size);
auto count_objects = std::fread(json.data(), size, 1, file.get());
using chr_iterator_type = char const*;
chr_iterator_type start2 = json.data();
chr_iterator_type end2 = (count_objects == 1) ? start2 + json.size() : start2;
mapnik::feature_ptr feature(mapnik::feature_factory::create(ctx, -1)); // temp feature
2016-11-28 10:28:12 +01:00
try
{
mapnik::json::parse_feature(start2, end2, *feature, geojson_datasource_static_tr);
} catch (...)
{
throw std::runtime_error("Failed to parse geojson feature");
}
2016-11-28 10:28:12 +01:00
result = mapnik::util::to_ds_type(feature->get_geometry());
if (result)
{
int type = static_cast<int>(*result);
if (multi_type > 0 && multi_type != type)
{
result.reset(mapnik::datasource_geometry_t::Collection);
return result;
}
multi_type = type;
}
}
}
return result;
}
mapnik::featureset_ptr geojson_datasource::features(mapnik::query const& q) const
{
// if the query box intersects our world extent then query for features
mapnik::box2d<double> const& box = q.get_bbox();
if (extent_.intersects(box))
{
geojson_featureset::array_type index_array;
if (tree_)
{
tree_->query(boost::geometry::index::intersects(box), std::back_inserter(index_array));
// sort index array to preserve original feature ordering in GeoJSON
std::sort(index_array.begin(), index_array.end(), [](item_type const& item0, item_type const& item1) {
return item0.second.first < item1.second.first;
});
if (cache_features_)
{
return std::make_shared<geojson_featureset>(features_, std::move(index_array));
}
else
{
return std::make_shared<geojson_memory_index_featureset>(filename_, std::move(index_array));
}
}
else if (has_disk_index_)
{
auto const& bbox = q.get_bbox();
mapnik::bounding_box_filter<float> const filter(
mapnik::box2d<float>(bbox.minx(), bbox.miny(), bbox.maxx(), bbox.maxy()));
return std::make_shared<geojson_index_featureset>(filename_, filter);
}
}
// otherwise return an empty featureset
return mapnik::make_invalid_featureset();
}
mapnik::featureset_ptr geojson_datasource::features_at_point(mapnik::coord2d const& pt, double tol) const
{
mapnik::box2d<double> query_bbox(pt, pt);
query_bbox.pad(tol);
mapnik::query q(query_bbox);
2015-10-09 12:26:39 +02:00
for (auto const& attr_info : desc_.get_descriptors())
{
2015-10-09 12:26:39 +02:00
q.add_property_name(attr_info.get_name());
}
return features(q);
}