use memory mapped files for reading shape files

This commit is contained in:
Artem Pavlenko 2008-02-04 11:12:32 +00:00
parent 57cb2edef1
commit a8859645b3
14 changed files with 307 additions and 331 deletions

View file

@ -162,6 +162,7 @@ BOOST_LIBSHEADERS = [
# ['system', 'boost/system/system_error.hpp', True], # uncomment this on Darwin + boost_1_35
['filesystem', 'boost/filesystem/operations.hpp', True],
['regex', 'boost/regex.hpp', True],
['iostreams','boost/iostreams/device/mapped_file.hpp',True],
['program_options', 'boost/program_options.hpp', False]
]

View file

@ -33,11 +33,17 @@ shape_src = Split(
shapefile.cpp
shape_index_featureset.cpp
shape_io.cpp
shp_index.cpp
"""
)
libraries = []
thread_suffix = '-mt'
if env['PLATFORM'] == 'FreeBSD':
thread_suffix = ''
if env['THREADING'] == 'multi':
libraries = ['boost_iostreams%s%s' % (env['BOOST_APPEND'],thread_suffix) ]
else:
libraries = ['boost_iostreams%s' % (env['BOOST_APPEND']) ]
if env['PLATFORM'] == 'Darwin':
libraries.append('mapnik')
libraries.append('iconv')

View file

@ -35,13 +35,13 @@ dbf_file::dbf_file()
record_length_(0),
record_(0) {}
dbf_file::dbf_file(const char* file_name)
dbf_file::dbf_file(std::string const& file_name)
:num_records_(0),
num_fields_(0),
record_length_(0),
file_(file_name),
record_(0)
{
file_.open(file_name);
if (file_.is_open())
{
read_header();
@ -52,16 +52,6 @@ dbf_file::dbf_file(const char* file_name)
dbf_file::~dbf_file()
{
::operator delete(record_);
file_.close();
}
bool dbf_file::open(const std::string& file_name)
{
file_.open(file_name.c_str(),std::ios::in|std::ios::binary);
if (file_.is_open())
read_header();
return file_?true:false;
}

View file

@ -23,12 +23,16 @@
#ifndef DBFFILE_HPP
#define DBFFILE_HPP
#include <mapnik/feature.hpp>
// boost
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
// stl
#include <vector>
#include <string>
#include <fstream>
#include <cassert>
#include <mapnik/feature.hpp>
using mapnik::transcoder;
using mapnik::Feature;
@ -44,37 +48,36 @@ struct field_descriptor
int offset_;
};
using namespace boost::iostreams;
class dbf_file
{
private:
int num_records_;
int num_fields_;
int record_length_;
std::vector<field_descriptor> fields_;
std::ifstream file_;
char* record_;
private:
int num_records_;
int num_fields_;
int record_length_;
std::vector<field_descriptor> fields_;
stream<mapped_file_source> file_;
char* record_;
public:
dbf_file();
dbf_file(const char* file_name);
dbf_file(const std::string& file_name);
~dbf_file();
bool open(const std::string& file_name);
bool is_open();
void close();
int num_records() const;
int num_fields() const;
field_descriptor const& descriptor(int col) const;
void move_to(int index);
std::string string_value(int col) const;
void add_attribute(int col, transcoder const& tr, Feature const& f) const throw();
private:
dbf_file(const dbf_file&);
dbf_file& operator=(const dbf_file&);
void read_header();
int read_short();
int read_int();
void skip(int bytes);
dbf_file();
dbf_file(const std::string& file_name);
~dbf_file();
bool is_open();
void close();
int num_records() const;
int num_fields() const;
field_descriptor const& descriptor(int col) const;
void move_to(int index);
std::string string_value(int col) const;
void add_attribute(int col, transcoder const& tr, Feature const& f) const throw();
private:
dbf_file(const dbf_file&);
dbf_file& operator=(const dbf_file&);
void read_header();
int read_short();
int read_int();
void skip(int bytes);
};
#endif //DBFFILE_HPP

View file

@ -23,8 +23,15 @@
//$Id: shape_index_featureset.cc 36 2005-04-05 14:32:18Z pavlenko $
#include <mapnik/feature_factory.hpp>
// boost
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include "shape_index_featureset.hpp"
using namespace boost::iostreams;
template <typename filterT>
shape_index_featureset<filterT>::shape_index_featureset(const filterT& filter,
const std::string& shape_file,
@ -38,17 +45,17 @@ shape_index_featureset<filterT>::shape_index_featureset(const filterT& filter,
{
shape_.shp().skip(100);
std::string indexname(shape_file + ".index");
std::ifstream file(indexname.c_str(),std::ios::in|std::ios::binary);
stream<mapped_file_source> file(shape_file + ".index");
if (file)
{
shp_index<filterT>::query(filter,file,ids_);
file.close();
shp_index<filterT,stream<mapped_file_source> >::query(filter,file,ids_);
file.close();
}
#ifdef MAPNIK_DEBUG
std::sort(ids_.begin(),ids_.end());
//#ifdef MAPNIK_DEBUG
std::clog<< "query size=" << ids_.size() << "\n";
#endif
//#endif
itr_ = ids_.begin();
@ -60,7 +67,7 @@ shape_index_featureset<filterT>::shape_index_featureset(const filterT& filter,
{
if (shape_.dbf().descriptor(i).name_ == *pos)
{
attr_ids_.push_back(i);
attr_ids_.insert(i);
break;
}
}
@ -170,7 +177,7 @@ feature_ptr shape_index_featureset<filterT>::next()
if (attr_ids_.size())
{
shape_.dbf().move_to(shape_.id_);
std::vector<int>::const_iterator pos=attr_ids_.begin();
std::set<int>::const_iterator pos=attr_ids_.begin();
while (pos!=attr_ids_.end())
{
try

View file

@ -35,9 +35,9 @@ class shape_index_featureset : public Featureset
int shape_type_;
shape_io shape_;
boost::scoped_ptr<transcoder> tr_;
std::set<int> ids_;
std::set<int>::iterator itr_;
std::vector<int> attr_ids_;
std::vector<int> ids_;
std::vector<int>::iterator itr_;
std::set<int> attr_ids_;
mutable Envelope<double> feature_ext_;
mutable int total_geom_size;
mutable int count_;

View file

@ -32,11 +32,12 @@ const std::string shape_io::DBF = ".dbf";
shape_io::shape_io(const std::string& shape_name)
: type_(shape_null),
shp_(shape_name + SHP),
dbf_(shape_name + DBF),
reclength_(0),
id_(0)
{
bool ok = (shp_.open(shape_name + SHP) &&
dbf_.open(shape_name + DBF));
bool ok = (shp_.is_open() && dbf_.is_open());
if (!ok)
{
throw datasource_exception("cannot read shape file");
@ -49,7 +50,6 @@ shape_io::~shape_io()
dbf_.close();
}
void shape_io::move_to (int pos)
{
shp_.seek(pos);
@ -57,6 +57,12 @@ void shape_io::move_to (int pos)
reclength_ = shp_.read_xdr_integer();
type_ = shp_.read_ndr_integer();
if (shp_.is_eof()) {
id_ = 0;
reclength_ = 0;
type_ = shape_null;
}
if (type_ != shape_point && type_ != shape_pointm && type_ != shape_pointz)
{
shp_.read_envelope(cur_extent_);

View file

@ -31,36 +31,36 @@ using mapnik::geometry2d;
struct shape_io
{
static const std::string SHP;
static const std::string SHX;
static const std::string DBF;
static const std::string SHP;
static const std::string SHX;
static const std::string DBF;
unsigned type_;
shape_file shp_;
shape_file shx_;
dbf_file dbf_;
unsigned reclength_;
unsigned id_;
Envelope<double> cur_extent_;
shape_file shp_;
shape_file shx_;
dbf_file dbf_;
unsigned type_;
unsigned reclength_;
unsigned id_;
Envelope<double> cur_extent_;
public:
enum shapeType
{
shape_null = 0,
shape_point = 1,
shape_polyline = 3,
shape_polygon = 5,
shape_multipoint = 8,
shape_pointz = 11,
shape_polylinez = 13,
shape_polygonz = 15,
shape_multipointz = 18,
shape_pointm = 21,
shape_polylinem = 23,
shape_polygonm = 25,
shape_multipointm = 28,
shape_multipatch = 31
};
public:
enum shapeType
{
shape_null = 0,
shape_point = 1,
shape_polyline = 3,
shape_polygon = 5,
shape_multipoint = 8,
shape_pointz = 11,
shape_polylinez = 13,
shape_polygonz = 15,
shape_multipointz = 18,
shape_pointm = 21,
shape_polylinem = 23,
shape_polygonm = 25,
shape_multipointm = 28,
shape_multipatch = 31
};
shape_io(const std::string& shape_name);
~shape_io();

View file

@ -25,25 +25,9 @@
shape_file::shape_file() {}
shape_file::shape_file(const std::string& file_name)
{
//file_.rdbuf()->pubsetbuf(buff_,buffer_size);
file_.open(file_name.c_str(),std::ios::in|std::ios::binary);
}
shape_file::~shape_file()
{
if (file_ && file_.is_open())
file_.close();
}
bool shape_file::open(const std::string& file_name)
{
//file_.rdbuf()->pubsetbuf(buff_,buffer_size);
file_.open(file_name.c_str(),std::ios::in | std::ios::binary);
return file_?true:false;
}
: file_(file_name) {}
shape_file::~shape_file() {}
bool shape_file::is_open()
{

View file

@ -20,64 +20,66 @@
*
*****************************************************************************/
//$Id: shapefile.hh 33 2005-04-04 13:01:03Z pavlenko $
//$Id: shapefile.hpp 33 2005-04-04 13:01:03Z pavlenko $
#ifndef SHAPEFILE_HPP
#define SHAPEFILE_HPP
#include <fstream>
#include <mapnik/envelope.hpp>
// boost
#include <boost/utility.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
using mapnik::Envelope;
struct shape_record
{
char* data;
size_t size;
size_t pos;
explicit shape_record(size_t size)
: data(static_cast<char*>(::operator new(sizeof(char)*size))),
size(size),
pos(0) {}
char* rawdata()
{
return &data[0];
}
void skip(unsigned n)
{
pos+=n;
}
int read_ndr_integer()
{
int val=(data[pos] & 0xff) |
const char* data;
size_t size;
mutable size_t pos;
explicit shape_record(size_t size)
: size(size),
pos(0) {}
void set_data(const char * data_)
{
data = data_;
}
void skip(unsigned n)
{
pos+=n;
}
int read_ndr_integer()
{
int val=(data[pos] & 0xff) |
(data[pos+1] & 0xff) << 8 |
(data[pos+2] & 0xff) << 16 |
(data[pos+3] & 0xff) << 24;
pos+=4;
return val;
}
int read_xdr_integer()
{
int val=(data[pos] & 0xff) << 24 |
pos+=4;
return val;
}
int read_xdr_integer()
{
int val=(data[pos] & 0xff) << 24 |
(data[pos+1] & 0xff) << 16 |
(data[pos+2] & 0xff) << 8 |
(data[pos+3] & 0xff);
pos+=4;
return val;
}
double read_double()
{
double val;
pos+=4;
return val;
}
double read_double()
{
double val;
#ifndef WORDS_BIGENDIAN
std::memcpy(&val,&data[pos],8);
std::memcpy(&val,&data[pos],8);
#else
long long bits = ((long long)data[pos] & 0xff) |
long long bits = ((long long)data[pos] & 0xff) |
((long long)data[pos+1] & 0xff) << 8 |
((long long)data[pos+2] & 0xff) << 16 |
((long long)data[pos+3] & 0xff) << 24 |
@ -85,64 +87,62 @@ struct shape_record
((long long)data[pos+5] & 0xff) << 40 |
((long long)data[pos+6] & 0xff) << 48 |
((long long)data[pos+7] & 0xff) << 56 ;
std::memcpy(&val,&bits,8);
std::memcpy(&val,&bits,8);
#endif
pos+=8;
return val;
}
long remains()
{
return (size-pos);
}
~shape_record()
{
::operator delete(data);
}
pos+=8;
return val;
}
long remains()
{
return (size-pos);
}
~shape_record() {}
};
class shape_file
using namespace boost::iostreams;
class shape_file : boost::noncopyable
{
std::ifstream file_;
//static const int buffer_size = 16;
//char buff_[buffer_size];
public:
shape_file();
shape_file(const std::string& file_name);
~shape_file();
bool open(const std::string& file_name);
bool is_open();
void close();
inline void read_record(shape_record& rec)
{
file_.read(rec.rawdata(),rec.size);
}
inline int read_xdr_integer()
{
char b[4];
file_.read(b, 4);
return b[3] & 0xffu | (b[2] & 0xffu) << 8 |
stream<mapped_file_source> file_;
public:
shape_file();
shape_file(const std::string& file_name);
~shape_file();
bool is_open();
void close();
inline void read_record(shape_record& rec)
{
rec.set_data(file_->data() + file_.tellg());
file_.seekg(rec.size,std::ios::cur);
}
inline int read_xdr_integer()
{
char b[4];
file_.read(b, 4);
return b[3] & 0xffu | (b[2] & 0xffu) << 8 |
(b[1] & 0xffu) << 16 | (b[0] & 0xffu) << 24;
}
inline int read_ndr_integer()
{
char b[4];
file_.read(b,4);
return b[0]&0xffu | (b[1]&0xffu) << 8 |
}
inline int read_ndr_integer()
{
char b[4];
file_.read(b,4);
return b[0]&0xffu | (b[1]&0xffu) << 8 |
(b[2]&0xffu) << 16 | (b[3]&0xffu) << 24;
}
inline double read_double()
{
double val;
}
inline double read_double()
{
double val;
#ifndef WORDS_BIGENDIAN
file_.read(reinterpret_cast<char*>(&val),8);
file_.read(reinterpret_cast<char*>(&val),8);
#else
char b[8];
file_.read(b,8);
long long bits = ((long long)b[0] & 0xff) |
char b[8];
file_.read(b,8);
long long bits = ((long long)b[0] & 0xff) |
((long long)b[1] & 0xff) << 8 |
((long long)b[2] & 0xff) << 16 |
((long long)b[3] & 0xff) << 24 |
@ -150,52 +150,48 @@ public:
((long long)b[5] & 0xff) << 40 |
((long long)b[6] & 0xff) << 48 |
((long long)b[7] & 0xff) << 56 ;
memcpy(&val,&bits,8);
memcpy(&val,&bits,8);
#endif
return val;
}
inline void read_envelope(Envelope<double>& envelope)
{
return val;
}
inline void read_envelope(Envelope<double>& envelope)
{
#ifndef WORDS_BIGENDIAN
file_.read(reinterpret_cast<char*>(&envelope),sizeof(envelope));
file_.read(reinterpret_cast<char*>(&envelope),sizeof(envelope));
#else
double minx=read_double();
double miny=read_double();
double maxx=read_double();
double maxy=read_double();
envelope.init(minx,miny,maxx,maxy);
double minx=read_double();
double miny=read_double();
double maxx=read_double();
double maxy=read_double();
envelope.init(minx,miny,maxx,maxy);
#endif
}
inline void skip(std::streampos bytes)
{
file_.seekg(bytes,std::ios::cur);
}
inline void rewind()
{
seek(100);
}
inline void seek(std::streampos pos)
{
file_.seekg(pos,std::ios::beg);
}
inline std::streampos pos()
{
return file_.tellg();
}
inline bool is_eof()
{
return file_.eof();
}
private:
shape_file(const shape_file&);
shape_file& operator=(const shape_file&);
}
inline void skip(std::streampos bytes)
{
file_.seekg(bytes,std::ios::cur);
}
inline void rewind()
{
seek(100);
}
inline void seek(std::streampos pos)
{
file_.seekg(pos,std::ios::beg);
}
inline std::streampos pos()
{
return file_.tellg();
}
inline bool is_eof()
{
return file_.eof();
}
};
#endif //SHAPEFILE_HPP

View file

@ -1,80 +0,0 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2006 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#include <mapnik/geom_util.hpp>
#include "shp_index.hpp"
template <typename filterT>
void shp_index<filterT>::query(const filterT& filter,std::ifstream& file,std::set<int>& pos)
{
file.seekg(16,std::ios::beg);
query_node(filter,file,pos);
}
template <typename filterT>
void shp_index<filterT>::query_node(const filterT& filter,std::ifstream& file,std::set<int>& ids)
{
int offset=read_ndr_integer(file);
Envelope<double> node_ext;
read_envelope(file,node_ext);
int num_shapes=read_ndr_integer(file);
if (!filter.pass(node_ext))
{
file.seekg(offset+num_shapes*4+4,std::ios::cur);
return;
}
for (int i=0;i<num_shapes;++i)
{
int id=read_ndr_integer(file);
ids.insert(id);
}
int children=read_ndr_integer(file);
for (int j=0;j<children;++j)
{
query_node(filter,file,ids);
}
}
template <typename filterT>
int shp_index<filterT>::read_ndr_integer(std::ifstream& file)
{
char b[4];
file.read(b,4);
return (b[0]&0xff) | (b[1]&0xff)<<8 | (b[2]&0xff)<<16 | (b[3]&0xff)<<24;
}
template <typename filterT>
void shp_index<filterT>::read_envelope(std::ifstream& file,Envelope<double>& envelope)
{
file.read(reinterpret_cast<char*>(&envelope),sizeof(envelope));
}
template class shp_index<mapnik::filter_in_box>;
template class shp_index<mapnik::filter_at_point>;

View file

@ -25,7 +25,7 @@
// st
#include <fstream>
#include <set>
#include <vector>
// mapnik
#include <mapnik/envelope.hpp>
#include <mapnik/query.hpp>
@ -33,19 +33,73 @@
using mapnik::Envelope;
using mapnik::query;
template <typename filterT>
template <typename filterT, typename IStream = std::ifstream>
class shp_index
{
public:
static void query(const filterT& filter,std::ifstream& file,std::set<int>& pos);
static void query(const filterT& filter, IStream& file,std::vector<int>& pos);
private:
shp_index();
~shp_index();
shp_index(const shp_index&);
shp_index& operator=(const shp_index&);
static int read_ndr_integer(std::ifstream& in);
static void read_envelope(std::ifstream& in,Envelope<double> &envelope);
static void query_node(const filterT& filter,std::ifstream& file,std::set<int>& pos);
static int read_ndr_integer(IStream & in);
static void read_envelope(IStream & in,Envelope<double> &envelope);
static void query_node(const filterT& filter,IStream & in,std::vector<int>& pos);
};
template <typename filterT,typename IStream>
void shp_index<filterT, IStream>::query(const filterT& filter,IStream & file,std::vector<int>& pos)
{
file.seekg(16,std::ios::beg);
query_node(filter,file,pos);
}
template <typename filterT, typename IStream>
void shp_index<filterT,IStream>::query_node(const filterT& filter,IStream & file,std::vector<int>& ids)
{
int offset=read_ndr_integer(file);
Envelope<double> node_ext;
read_envelope(file,node_ext);
int num_shapes=read_ndr_integer(file);
if (!filter.pass(node_ext))
{
file.seekg(offset+num_shapes*4+4,std::ios::cur);
return;
}
for (int i=0;i<num_shapes;++i)
{
int id=read_ndr_integer(file);
ids.push_back(id);
}
int children=read_ndr_integer(file);
for (int j=0;j<children;++j)
{
query_node(filter,file,ids);
}
}
template <typename filterT,typename IStream>
int shp_index<filterT,IStream>::read_ndr_integer(IStream & file)
{
char b[4];
file.read(b,4);
return (b[0]&0xff) | (b[1]&0xff)<<8 | (b[2]&0xff)<<16 | (b[3]&0xff)<<24;
}
template <typename filterT,typename IStream>
void shp_index<filterT,IStream>::read_envelope(IStream & file,Envelope<double>& envelope)
{
file.read(reinterpret_cast<char*>(&envelope),sizeof(envelope));
}
#endif //SHP_INDEX_HH

View file

@ -42,9 +42,13 @@ source = Split(
headers = ['#plugins/input/shape'] + env['CPPPATH']
boost_program_options = 'boost_program_options%s' % env['BOOST_APPEND']
boost_iostreams = 'boost_iostreams%s' % env['BOOST_APPEND']
if env['THREADING'] == 'multi':
boost_program_options = '%s%s' % (boost_program_options,thread_suffix)
shapeindex = env.Program('shapeindex', source, CPPPATH=headers, LIBS=boost_program_options)
boost_iostreams = '%s%s' % (boost_iostreams,thread_suffix)
libraries = [boost_program_options,boost_iostreams]
shapeindex = env.Program('shapeindex', source, CPPPATH=headers, LIBS=libraries)
env.Install(install_prefix + '/bin', shapeindex)
env.Alias('install', install_prefix + '/bin')

View file

@ -31,7 +31,8 @@
#include <boost/algorithm/string.hpp>
#include <boost/program_options.hpp>
#include "quadtree.hpp"
#include "shape.hpp"
#include "shapefile.hpp"
#include "shape_io.hpp"
const int MAXDEPTH = 64;
const int DEFAULT_DEPTH = 8;
@ -45,6 +46,7 @@ int main (int argc,char** argv)
namespace po = boost::program_options;
using std::string;
using std::vector;
using std::clog;
bool verbose=false;
unsigned int depth=DEFAULT_DEPTH;
@ -112,14 +114,17 @@ int main (int argc,char** argv)
while (itr != shape_files.end())
{
std::clog<<"processing "<<*itr << std::endl;
shape_file shp;
//shape_file shp;
std::string shapename(*itr++);
if (!shp.open(shapename+".shp")) {
shape_file shp(shapename+".shp");
if (!shp.is_open()) {
std::clog<<"error : cannot open "<< (shapename+".shp") <<"\n";
continue;
}
shp.read_xdr_integer(); //file_code == 9994
int code = shp.read_xdr_integer(); //file_code == 9994
std::clog << code << "\n";
shp.skip(5*4);
int file_length=shp.read_xdr_integer();