use memory mapped file by default for both index and data parsing (improves loading times by ~50%) - default on non-windows platforms

This commit is contained in:
artemp 2015-09-09 11:53:17 +02:00
parent 310dc968ea
commit 0de6d36000
5 changed files with 71 additions and 8 deletions

View file

@ -37,6 +37,13 @@
#include <mapnik/util/trim.hpp>
#include <mapnik/util/geometry_to_ds_type.hpp>
#include <mapnik/value_types.hpp>
#ifdef CSV_MEMORY_MAPPED_FILE
#include <boost/interprocess/mapped_region.hpp>
#include <boost/interprocess/streams/bufferstream.hpp>
#include <mapnik/mapped_memory_cache.hpp>
#endif
// stl
#include <sstream>
#include <fstream>
@ -102,17 +109,36 @@ csv_datasource::csv_datasource(parameters const& params)
}
else
{
#if defined (_WINDOWS)
#if defined (CSV_MEMORY_MAPPED_FILE)
using file_source_type = boost::interprocess::ibufferstream;
file_source_type in;
mapnik::mapped_region_ptr mapped_region;
boost::optional<mapnik::mapped_region_ptr> memory =
mapnik::mapped_memory_cache::instance().find(filename_, true);
if (memory)
{
mapped_region = *memory;
in.buffer(static_cast<char*>(mapped_region->get_address()),mapped_region->get_size());
}
else
{
throw std::runtime_error("could not create file mapping for " + filename_);
}
#elif defined (_WINDOWS)
std::ifstream in(mapnik::utf8_to_utf16(filename_),std::ios_base::in | std::ios_base::binary);
#else
std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary);
#endif
if (!in.is_open())
{
throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'");
}
#else
std::ifstream in(filename_.c_str(),std::ios_base::in | std::ios_base::binary);
if (!in.is_open())
{
throw mapnik::datasource_exception("CSV Plugin: could not open: '" + filename_ + "'");
}
#endif
parse_csv(in, escape_, separator_, quote_);
in.close();
//in.close(); no need to call close, rely on dtor
}
}
@ -556,11 +582,13 @@ mapnik::featureset_ptr csv_datasource::features(mapnik::query const& q) const
if (tree_)
{
tree_->query(boost::geometry::index::intersects(box),std::back_inserter(index_array));
#if 0
std::sort(index_array.begin(),index_array.end(),
[] (item_type const& item0, item_type const& item1)
{
return item0.second.first < item1.second.first;
});
#endif
if (inline_string_.empty())
{
return std::make_shared<csv_featureset>(filename_, locator_, separator_, headers_, ctx_, std::move(index_array));

View file

@ -45,7 +45,6 @@
#include <boost/version.hpp>
#include <boost/geometry/index/rtree.hpp>
#pragma GCC diagnostic pop
// stl
#include <vector>
#include <deque>

View file

@ -34,7 +34,9 @@
csv_featureset::csv_featureset(std::string const& filename, detail::geometry_column_locator const& locator, std::string const& separator,
std::vector<std::string> const& headers, mapnik::context_ptr const& ctx, array_type && index_array)
:
#ifdef _WINDOWS
#if defined(CSV_MEMORY_MAPPED_FILE)
//
#elif defined( _WINDOWS)
file_(_wfopen(mapnik::utf8_to_utf16(filename).c_str(), L"rb"), std::fclose),
#else
file_(std::fopen(filename.c_str(),"rb"), std::fclose),
@ -48,7 +50,20 @@ csv_featureset::csv_featureset(std::string const& filename, detail::geometry_col
locator_(locator),
tr_("utf8")
{
#if defined (CSV_MEMORY_MAPPED_FILE)
boost::optional<mapnik::mapped_region_ptr> memory =
mapnik::mapped_memory_cache::instance().find(filename, true);
if (memory)
{
mapped_region_ = *memory;
}
else
{
throw std::runtime_error("could not create file mapping for " + filename);
}
#else
if (!file_) throw std::runtime_error("Can't open " + filename);
#endif
}
csv_featureset::~csv_featureset() {}
@ -74,12 +89,17 @@ mapnik::feature_ptr csv_featureset::next()
csv_datasource::item_type const& item = *index_itr_++;
std::size_t file_offset = item.second.first;
std::size_t size = item.second.second;
#if defined(CSV_MEMORY_MAPPED_FILE)
char const* start = (char const*)mapped_region_->get_address() + file_offset;
char const* end = start + size;
#else
std::fseek(file_.get(), file_offset, SEEK_SET);
std::vector<char> record;
record.resize(size);
std::fread(record.data(), size, 1, file_.get());
auto const* start = record.data();
auto const* end = start + record.size();
#endif
return parse_feature(start, end);
}
return mapnik::feature_ptr();

View file

@ -30,9 +30,15 @@
#include <deque>
#include <cstdio>
#ifdef CSV_MEMORY_MAPPED_FILE
#include <boost/interprocess/mapped_region.hpp>
#include <boost/interprocess/streams/bufferstream.hpp>
#include <mapnik/mapped_memory_cache.hpp>
#endif
class csv_featureset : public mapnik::Featureset
{
using file_ptr = std::unique_ptr<std::FILE, int (*)(std::FILE *)>;
using locator_type = detail::geometry_column_locator;
public:
using array_type = std::deque<csv_datasource::item_type>;
@ -46,7 +52,13 @@ public:
mapnik::feature_ptr next();
private:
mapnik::feature_ptr parse_feature(char const* beg, char const* end);
#if defined (CSV_MEMORY_MAPPED_FILE)
using file_source_type = boost::interprocess::ibufferstream;
mapnik::mapped_region_ptr mapped_region_;
#else
using file_ptr = std::unique_ptr<std::FILE, int (*)(std::FILE *)>;
file_ptr file_;
#endif
std::string const& separator_;
std::vector<std::string> const& headers_;
const array_type index_array_;

View file

@ -44,6 +44,10 @@
#include <cstdio>
#include <algorithm>
#ifndef _WINDOWS
#define CSV_MEMORY_MAPPED_FILE
#endif
namespace csv_utils
{