mapnik/utils/shapefile/shapefile_reader.py

116 lines
3.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# Utility to interrogate ESRI shape files
import os
import sys
import struct
2012-12-10 22:06:55 +00:00
ShapeType = { 0 : "NullShape",
1 : "Point",
3 : "PolyLine",
5 : "Polygon",
8 : "MultiPoint",
11: "PointZ",
13: "PolyLineZ",
15: "PolygonZ",
18: "MultiPointZ",
21: "PointM",
23: "PolyLineM",
25: "PolygonM",
28: "MultiPointM",
31: "MultiPatch"}
def test_record(_type, record) :
if _type == 0:
2019-12-31 11:39:57 +00:00
print("NULL shape")
2012-12-10 22:06:55 +00:00
elif _type == 11: #PointZ
test_pointz(record)
elif _type == 5:
test_polygon(record)
2012-12-10 22:06:55 +00:00
def test_pointz(record):
_type, = struct.unpack("<i", record[0:4])
if _type == 0:
2019-12-31 11:39:57 +00:00
print("NULL shape")
return
2012-12-10 22:06:55 +00:00
if len(record) != 36 :
2019-12-31 11:39:57 +00:00
print("BAD SHAPE FILE: expected 36 bytes got", len(record), file=sys.stderr)
2012-12-10 22:06:55 +00:00
sys.exit(1)
x,y,z,m = struct.unpack("<dddd",record[4:36])
2012-12-10 22:06:55 +00:00
if _type != 11:
2019-12-31 11:39:57 +00:00
print("BAD SHAPE FILE: expected PointZ or NullShape got", _type, file=sys.stderr)
2012-12-10 22:06:55 +00:00
sys.exit(1)
def test_polygon(record):
_type, = struct.unpack("<i", record[0:4])
if _type == 0:
2019-12-31 11:39:57 +00:00
print("NULL shape")
return
x0, y0, x1, y0, num_parts, num_points = struct.unpack("<ddddii", record[4:44])
if _type != 5:
2019-12-31 11:39:57 +00:00
print("BAD SHAPE FILE: expected Polygon or NullShape got", _type, file=sys.stderr)
sys.exit(1)
length = len(record)
rec_length = 44 + num_parts * 4 + num_points * 16
2019-12-31 11:39:57 +00:00
if rec_length != length:
print("BAD SHAPE FILE: expected", rec_length, "got", length, file=sys.stderr)
sys.exit(1)
if __name__ == "__main__" :
if len(sys.argv) !=2:
2019-12-31 11:39:57 +00:00
print("Usage:",sys.argv[0],"<shapefile>", file=sys.stderr)
sys.exit(1)
shx_filename = sys.argv[1][:-3]+"shx"
shp_filename = sys.argv[1][:-3]+"shp"
shx = open(shx_filename)
shp = open(shp_filename)
header = (struct.Struct(">IIIIIII"),struct.Struct("<IIdddddddd"))
2012-12-03 17:47:59 +00:00
# SHX header
_,_,_,_,_,_,shx_file_length = header[0].unpack_from(shx.read(28))
_,_,lox,loy,hix,hiy,_,_,_,_ = header[1].unpack_from(shx.read(72))
shx_bbox = [lox,loy,hix,hiy]
2012-12-03 17:47:59 +00:00
# SHP header
_,_,_,_,_,_,shp_file_length = header[0].unpack_from(shp.read(28))
2012-12-10 22:06:55 +00:00
version,_type,lox,loy,hix,hiy,_,_,_,_ = header[1].unpack_from(shp.read(72))
2012-12-03 17:47:59 +00:00
shp_bbox = [lox,loy,hix,hiy]
2019-12-31 11:39:57 +00:00
if shx_bbox != shp_bbox :
print("BAD SHAPE FILE: bounding box mismatch in *.shp and *.shx", shp_bbox, shx_bbox)
sys.exit(1)
2019-12-31 11:39:57 +00:00
print("SHX FILE_LENGTH=",shx_file_length,"bytes")
print("SHP FILE_LENGTH=",shp_file_length,"bytes")
2012-12-03 17:47:59 +00:00
2019-12-31 11:39:57 +00:00
print("TYPE", ShapeType[_type])
print("BBOX(",lox,loy,hix,hiy,")")
record_header = struct.Struct(">II")
record = struct.Struct(">II")
2012-12-03 17:47:59 +00:00
calc_total_size = 50
count = 0
while shx.tell() <= shx_file_length * 2 - 4 * 2 :
offset,shx_content_length = record.unpack_from(shx.read(8))
shp.seek(offset*2, os.SEEK_SET)
2012-12-03 17:47:59 +00:00
record_number,content_length = record_header.unpack_from(shp.read(8))
2019-12-31 11:39:57 +00:00
if shx_content_length != content_length:
print("BAD SHAPE FILE: content_lenght mismatch in SHP and SHX",shx_content_length,content_length)
2012-12-03 18:15:34 +00:00
sys.exit(1)
2012-12-10 22:06:55 +00:00
##
test_record(_type, shp.read(2*content_length))
2012-12-03 17:47:59 +00:00
calc_total_size +=(4 + content_length)
count+=1
2019-12-31 11:39:57 +00:00
print("SHAPES COUNT=",count)
2012-12-03 18:15:34 +00:00
delta = shp_file_length-calc_total_size
if delta > 0 :
2019-12-31 11:39:57 +00:00
print("BAD SHAPE FILE: extra ", 2*delta,"bytes")
2012-12-03 18:15:34 +00:00
elif delta < 0:
2019-12-31 11:39:57 +00:00
print("BAD SHAPE FILE: missing ", 2*delta,"bytes")
2012-12-03 17:47:59 +00:00
else:
2019-12-31 11:39:57 +00:00
print("SHAPE FILE LOOKS GOOD!")