shapeindex - re-factor to read offset and record length from *.shx (allows reading erroneous shapefiles from NE)

This commit is contained in:
artemp 2015-10-13 13:29:16 +01:00
parent 4093f10f61
commit 3368b6d1de

View file

@ -43,15 +43,11 @@ int main (int argc,char** argv)
{ {
using namespace mapnik; using namespace mapnik;
namespace po = boost::program_options; namespace po = boost::program_options;
using std::string;
using std::vector;
using std::clog;
using std::endl;
bool verbose=false; bool verbose=false;
unsigned int depth=DEFAULT_DEPTH; unsigned int depth=DEFAULT_DEPTH;
double ratio=DEFAULT_RATIO; double ratio=DEFAULT_RATIO;
vector<string> shape_files; std::vector<std::string> shape_files;
try try
{ {
@ -62,7 +58,7 @@ int main (int argc,char** argv)
("verbose,v","verbose output") ("verbose,v","verbose output")
("depth,d", po::value<unsigned int>(), "max tree depth\n(default 8)") ("depth,d", po::value<unsigned int>(), "max tree depth\n(default 8)")
("ratio,r",po::value<double>(),"split ratio (default 0.55)") ("ratio,r",po::value<double>(),"split ratio (default 0.55)")
("shape_files",po::value<vector<string> >(),"shape files to index: file1 file2 ...fileN") ("shape_files",po::value<std::vector<std::string> >(),"shape files to index: file1 file2 ...fileN")
; ;
po::positional_options_description p; po::positional_options_description p;
@ -73,13 +69,13 @@ int main (int argc,char** argv)
if (vm.count("version")) if (vm.count("version"))
{ {
clog << "version 0.3.0" <<std::endl; std::clog << "version 0.3.0" <<std::endl;
return 1; return 1;
} }
if (vm.count("help")) if (vm.count("help"))
{ {
clog << desc << endl; std::clog << desc << std::endl;
return 1; return 1;
} }
if (vm.count("verbose")) if (vm.count("verbose"))
@ -97,148 +93,125 @@ int main (int argc,char** argv)
if (vm.count("shape_files")) if (vm.count("shape_files"))
{ {
shape_files=vm["shape_files"].as< vector<string> >(); shape_files=vm["shape_files"].as< std::vector<std::string> >();
} }
} }
catch (std::exception const& ex) catch (std::exception const& ex)
{ {
clog << "Error: " << ex.what() << endl; std::clog << "Error: " << ex.what() << std::endl;
return -1; return -1;
} }
clog << "max tree depth:" << depth << endl; std::clog << "max tree depth:" << depth << std::endl;
clog << "split ratio:" << ratio << endl; std::clog << "split ratio:" << ratio << std::endl;
//vector<string>::const_iterator itr = shape_files.begin();
if (shape_files.size() == 0) if (shape_files.size() == 0)
{ {
clog << "no shape files to index" << endl; std::clog << "no shape files to index" << std::endl;
return 0; return 0;
} }
for (auto const& filename : shape_files) for (auto const& filename : shape_files)
{ {
clog << "processing " << filename << endl; std::clog << "processing " << filename << std::endl;
std::string shapename (filename); std::string shapename (filename);
boost::algorithm::ireplace_last(shapename,".shp",""); boost::algorithm::ireplace_last(shapename,".shp","");
std::string shapename_full (shapename + ".shp"); std::string shapename_full (shapename + ".shp");
std::string shxname(shapename + ".shx");
if (! mapnik::util::exists (shapename_full)) if (! mapnik::util::exists (shapename_full))
{ {
clog << "Error : file " << shapename_full << " does not exist" << endl; std::clog << "Error : file " << shapename_full << " does not exist" << std::endl;
continue;
}
if (! mapnik::util::exists(shxname))
{
std::clog << "Error : shapefile index file (*.shx) " << shxname << " does not exist" << std::endl;
continue; continue;
} }
shape_file shp (shapename_full); shape_file shp (shapename_full);
if (! shp.is_open()) { if (! shp.is_open())
clog << "Error : cannot open " << shapename_full << endl; {
std::clog << "Error : cannot open " << shapename_full << std::endl;
continue; continue;
} }
int code = shp.read_xdr_integer(); //file_code == 9994 shape_file shx (shxname);
clog << code << endl; if (!shx.is_open())
shp.skip(5*4); {
std::clog << "Error : cannot open " << shxname << std::endl;
continue;
}
int file_length=shp.read_xdr_integer(); int code = shx.read_xdr_integer(); //file_code == 9994
int version=shp.read_ndr_integer(); std::clog << code << std::endl;
int shape_type=shp.read_ndr_integer(); shx.skip(5*4);
int file_length=shx.read_xdr_integer();
int version=shx.read_ndr_integer();
int shape_type=shx.read_ndr_integer();
box2d<double> extent; box2d<double> extent;
shp.read_envelope(extent); shx.read_envelope(extent);
clog << "length=" << file_length << endl; std::clog << "length=" << file_length << std::endl;
clog << "version=" << version << endl; std::clog << "version=" << version << std::endl;
clog << "type=" << shape_type << endl; std::clog << "type=" << shape_type << std::endl;
clog << "extent:" << extent << endl; std::clog << "extent:" << extent << std::endl;
int pos=50; int pos = 50;
shp.seek(pos*2); shx.seek(pos * 2);
mapnik::quad_tree<int> tree(extent,depth,ratio); mapnik::quad_tree<int> tree(extent, depth, ratio);
int count=0; int count = 0;
while (true) {
long offset=shp.pos(); while (true)
int record_number=shp.read_xdr_integer(); {
int content_length=shp.read_xdr_integer(); int offset = shx.read_xdr_integer();
shape_type = shp.read_ndr_integer(); int content_length = shx.read_xdr_integer();
pos += 4;
box2d<double> item_ext; box2d<double> item_ext;
if (shape_type==shape_io::shape_null) shp.seek(offset * 2);
int record_number = shp.read_xdr_integer();
if (content_length != shp.read_xdr_integer())
{ {
if (pos >= file_length) std::clog << "Content length mismatch for record number " << record_number << std::endl;
{ continue;
break;
}
else
{
// still need to increment pos, or the pos counter
// won't indicate EOF until too late.
pos+=4+content_length;
continue;
}
} }
else if (shape_type==shape_io::shape_point) shape_type = shp.read_ndr_integer();
if (shape_type==shape_io::shape_point
|| shape_type==shape_io::shape_pointm
|| shape_type == shape_io::shape_pointz)
{ {
double x=shp.read_double(); double x=shp.read_double();
double y=shp.read_double(); double y=shp.read_double();
item_ext=box2d<double>(x,y,x,y); item_ext=box2d<double>(x,y,x,y);
} }
else if (shape_type==shape_io::shape_pointm)
{
double x=shp.read_double();
double y=shp.read_double();
// skip m
shp.read_double();
item_ext=box2d<double>(x,y,x,y);
}
else if (shape_type==shape_io::shape_pointz)
{
double x=shp.read_double();
double y=shp.read_double();
// skip z
shp.read_double();
// According to ESRI shapefile doc
// A PointZ consists of a triplet of double-precision coordinates in the order X, Y, Z plus a
// measure.
// PointZ
// {
// Double X // X coordinate
// Double Y // Y coordinate
// Double Z // Z coordinate
// Double M // Measure
// }
// But OGR creates shapefiles with M missing so we need to skip M only if present
// NOTE: content_length is in 16-bit words
if ( content_length == 18)
{
shp.read_double();
}
item_ext=box2d<double>(x,y,x,y);
}
else else
{ {
shp.read_envelope(item_ext); shp.read_envelope(item_ext);
shp.skip(2*content_length-4*8-4);
} }
tree.insert(offset,item_ext);
tree.insert(offset * 2,item_ext);
if (verbose) if (verbose)
{ {
clog << "record number " << record_number << " box=" << item_ext << endl; std::clog << "record number " << record_number << " box=" << item_ext << std::endl;
} }
pos+=4+content_length;
++count; ++count;
if (pos >= file_length) break; if (pos >= file_length) break;
} }
clog << " number shapes=" << count << endl; std::clog << " number shapes=" << count << std::endl;
std::fstream file((shapename+".index").c_str(), std::fstream file((shapename+".index").c_str(),
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary); std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
if (!file) { if (!file)
clog << "cannot open index file for writing file \"" {
<< (shapename+".index") << "\"" << endl; std::clog << "cannot open index file for writing file \""
} else { << (shapename+".index") << "\"" << std::endl;
}
else
{
tree.trim(); tree.trim();
std::clog << " number nodes=" << tree.count() << std::endl; std::clog << " number nodes=" << tree.count() << std::endl;
file.exceptions(std::ios::failbit | std::ios::badbit); file.exceptions(std::ios::failbit | std::ios::badbit);
@ -248,6 +221,6 @@ int main (int argc,char** argv)
} }
} }
clog << "done!" << endl; std::clog << "done!" << std::endl;
return 0; return 0;
} }