mapnik/utils/pgsql2sqlite/pgsql2sqlite.hpp

420 lines
13 KiB
C++
Raw Normal View History

/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2009 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
//$Id$
2009-02-21 00:04:30 +01:00
#include "sqlite.hpp"
#include <mapnik/datasource.hpp>
#include <mapnik/wkb.hpp>
#include "connection_manager.hpp"
#include "cursorresultset.hpp"
// boost
#include <boost/cstdint.hpp>
#include <boost/optional.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/program_options.hpp>
//stl
#include <iostream>
#include <fstream>
namespace mapnik {
2010-06-02 13:03:30 +02:00
std::string numeric2string(const char* buf)
{
int16_t ndigits = int2net(buf);
int16_t weight = int2net(buf+2);
int16_t sign = int2net(buf+4);
int16_t dscale = int2net(buf+6);
2010-06-02 13:03:30 +02:00
boost::scoped_array<int16_t> digits(new int16_t[ndigits]);
for (int n=0; n < ndigits ;++n)
{
digits[n] = int2net(buf+8+n*2);
}
2010-06-02 13:03:30 +02:00
std::ostringstream ss;
2010-06-02 13:03:30 +02:00
if (sign == 0x4000) ss << "-";
2010-06-02 13:03:30 +02:00
int i = std::max(weight,int16_t(0));
int d = 0;
while ( i >= 0)
{
if (i <= weight && d < ndigits)
ss << digits[d++];
2010-06-02 13:03:30 +02:00
else
ss << '0';
2010-06-02 13:03:30 +02:00
i--;
}
if (dscale > 0)
{
ss << '.';
while ( i >= -dscale)
{
if (i <= weight && d < ndigits)
2010-06-02 13:03:30 +02:00
ss << digits[d++];
i--;
2010-06-02 13:03:30 +02:00
}
}
return ss.str();
}
2010-06-02 13:03:30 +02:00
struct blob_to_hex
{
std::string operator() (const char* blob, unsigned size)
{
std::string buf;
buf.reserve(size*2);
std::ostringstream s(buf);
s.seekp(0);
char hex[3];
std::memset(hex,0,3);
for ( unsigned pos=0; pos < size; ++pos)
{
std::sprintf (hex, "%02X", int(blob[pos]) & 0xff);
s << hex;
2010-06-02 13:03:30 +02:00
}
return s.str();
}
};
2010-06-02 13:03:30 +02:00
bool valid_envelope(mapnik::box2d<double> const& e)
{
return (e.minx() <= e.maxx() && e.miny() <= e.maxy()) ;
}
2010-06-02 13:03:30 +02:00
std::string table_from_sql(std::string const& sql)
{
std::string table_name = boost::algorithm::to_lower_copy(sql);
boost::algorithm::replace_all(table_name,"\n"," ");
2010-06-02 13:03:30 +02:00
std::string::size_type idx = table_name.rfind("from");
if (idx!=std::string::npos)
{
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
idx=table_name.find_first_not_of(" ",idx+4);
if (idx != std::string::npos)
{
table_name=table_name.substr(idx);
2010-06-02 13:03:30 +02:00
}
idx=table_name.find_first_of(" ),");
if (idx != std::string::npos)
{
table_name = table_name.substr(0,idx);
2010-06-02 13:03:30 +02:00
}
}
return table_name;
}
2010-06-02 13:03:30 +02:00
template <typename Connection>
void pgsql2sqlite(Connection conn,
std::string const& query,
std::string const& output_table_name,
std::string const& output_filename)
{
namespace sqlite = mapnik::sqlite;
sqlite::database db(output_filename);
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
boost::shared_ptr<ResultSet> rs = conn->executeQuery("select * from (" + query + ") as query limit 0;");
int count = rs->getNumFields();
2010-06-02 13:03:30 +02:00
std::ostringstream select_sql;
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
select_sql << "select ";
2010-06-02 13:03:30 +02:00
for (int i=0; i<count; ++i)
{
if (i!=0) select_sql << ",";
select_sql << "\"" << rs->getFieldName(i) << "\"";
}
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
select_sql << " from (" << query << ") as query";
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
std::string table_name = table_from_sql(query);
2010-06-02 13:03:30 +02:00
std::string schema_name="";
std::string::size_type idx=table_name.find_last_of('.');
if (idx!=std::string::npos)
{
schema_name=table_name.substr(0,idx);
table_name=table_name.substr(idx+1);
}
else
{
table_name=table_name.substr(0);
}
2010-06-02 13:03:30 +02:00
std::ostringstream geom_col_sql;
geom_col_sql << "select f_geometry_column,srid,type from geometry_columns ";
geom_col_sql << "where f_table_name='" << table_name << "'";
if (schema_name.length() > 0)
{
geom_col_sql <<" and f_table_schema='"<< schema_name <<"'";
}
2010-06-02 13:03:30 +02:00
rs = conn->executeQuery(geom_col_sql.str());
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
int srid = -1;
std::string geom_col = "UNKNOWN";
std::string geom_type = "UNKNOWN";
2010-06-02 13:03:30 +02:00
if ( rs->next())
{
try
{
2009-02-21 00:04:30 +01:00
srid = boost::lexical_cast<int>(rs->getValue("srid"));
2010-06-02 13:03:30 +02:00
}
catch (boost::bad_lexical_cast &ex)
{
2009-02-21 00:04:30 +01:00
std::clog << ex.what() << std::endl;
2010-06-02 13:03:30 +02:00
}
geom_col = rs->getValue("f_geometry_column");
geom_type = rs->getValue("type");
}
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
// add AsBinary(<geometry_column>) modifier
std::string select_sql_str = select_sql.str();
boost::algorithm::replace_all(select_sql_str, "\"" + geom_col + "\"","AsBinary(" + geom_col+") as " + geom_col);
#ifdef MAPNIK_DEBUG
2010-06-02 13:03:30 +02:00
std::cout << select_sql_str << "\n";
#endif
2010-06-02 13:03:30 +02:00
std::ostringstream cursor_sql;
std::string cursor_name("my_cursor");
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
cursor_sql << "DECLARE " << cursor_name << " BINARY INSENSITIVE NO SCROLL CURSOR WITH HOLD FOR " << select_sql_str << " FOR READ ONLY";
conn->execute(cursor_sql.str());
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
boost::shared_ptr<CursorResultSet> cursor(new CursorResultSet(conn,cursor_name,10000));
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
unsigned num_fields = cursor->getNumFields();
2010-06-02 13:03:30 +02:00
if (num_fields == 0) return;
2009-07-08 22:49:28 +02:00
2010-06-02 13:03:30 +02:00
std::string feature_id = "fid";
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
std::ostringstream create_sql;
create_sql << "create table if not exists " << output_table_name << " (" << feature_id << " INTEGER PRIMARY KEY AUTOINCREMENT,";
2009-02-26 16:35:07 +01:00
2010-06-02 13:03:30 +02:00
int geometry_oid = -1;
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
std::string output_table_insert_sql = "insert into " + output_table_name + " values (?";
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
for ( unsigned pos = 0; pos < num_fields ; ++pos)
{
if (pos > 0)
{
create_sql << ",";
}
output_table_insert_sql +=",?";
int oid = cursor->getTypeOID(pos);
if (geom_col == cursor->getFieldName(pos))
{
geometry_oid = oid;
create_sql << "'" << cursor->getFieldName(pos) << "' BLOB";
}
2009-02-21 01:23:41 +01:00
else
{
2010-06-02 13:03:30 +02:00
create_sql << "'" << cursor->getFieldName(pos);
switch (oid)
{
case 20:
case 21:
case 23:
create_sql << "' INTEGER";
break;
case 700:
case 701:
create_sql << "' REAL";
break;
default:
create_sql << "' TEXT";
break;
}
2009-02-21 01:23:41 +01:00
}
2010-06-02 13:03:30 +02:00
}
2010-06-02 13:03:30 +02:00
create_sql << ");";
output_table_insert_sql +=")";
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
std::cout << "client_encoding=" << conn->client_encoding() << "\n";
std::cout << "geometry_column=" << geom_col << "(" << geom_type
<< ") srid=" << srid << " oid=" << geometry_oid << "\n";
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
db.execute("begin;");
// output table sql
db.execute(create_sql.str());
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
// spatial index sql
std::string spatial_index_sql = "create virtual table idx_" + output_table_name
+ "_" + geom_col + " using rtree(pkid, xmin, xmax, ymin, ymax)";
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
db.execute(spatial_index_sql);
2010-06-02 13:03:30 +02:00
//blob_to_hex hex;
int pkid = 0;
2010-06-02 13:03:30 +02:00
std::string spatial_index_insert_sql = "insert into idx_" + output_table_name + "_"
+ geom_col + " values (?,?,?,?,?)" ;
2010-06-02 13:03:30 +02:00
sqlite::prepared_statement spatial_index(db,spatial_index_insert_sql);
#ifdef MAPNIK_DEBUG
2010-06-02 13:03:30 +02:00
std::cout << output_table_insert_sql << "\n";
#endif
2010-06-02 13:03:30 +02:00
sqlite::prepared_statement output_table(db,output_table_insert_sql);
2009-02-21 00:04:30 +01:00
2010-06-02 13:03:30 +02:00
while (cursor->next())
{
++pkid;
2009-02-21 12:22:17 +01:00
2010-06-02 13:03:30 +02:00
sqlite::record_type output_rec;
output_rec.push_back(sqlite::value_type(pkid));
bool empty_geom = true;
const char * buf = 0;
for (unsigned pos=0 ; pos < num_fields; ++pos)
{
if (! cursor->isNull(pos))
{
2010-06-02 13:03:30 +02:00
int size=cursor->getFieldLength(pos);
int oid = cursor->getTypeOID(pos);
buf=cursor->getValue(pos);
2010-06-02 13:03:30 +02:00
switch (oid)
{
case 25:
case 1042:
case 1043:
{
std::string text(buf);
boost::algorithm::replace_all(text,"'","''");
output_rec.push_back(sqlite::value_type(text));
break;
}
case 23:
output_rec.push_back(sqlite::value_type(int4net(buf)));
break;
case 21:
output_rec.push_back(sqlite::value_type(int2net(buf)));
break;
case 700:
{
float val;
float4net(val,buf);
output_rec.push_back(sqlite::value_type(val));
break;
}
case 701:
{
double val;
float8net(val,buf);
output_rec.push_back(sqlite::value_type(val));
break;
}
case 1700:
{
std::string str = numeric2string(buf);
try
{
double val = boost::lexical_cast<double>(str);
output_rec.push_back(sqlite::value_type(val));
2010-06-02 13:03:30 +02:00
}
catch (boost::bad_lexical_cast & ex)
{
std::clog << ex.what() << "\n";
2010-06-02 13:03:30 +02:00
}
break;
}
2010-06-02 13:03:30 +02:00
default:
{
if (oid == geometry_oid)
{
mapnik::Feature feat(pkid);
geometry_utils::from_wkb(feat,buf,size,false,wkbGeneric);
if (feat.num_geometries() > 0)
{
geometry_type const& geom=feat.get_geometry(0);
2010-06-02 13:03:30 +02:00
box2d<double> bbox = geom.envelope();
if (valid_envelope(bbox))
{
sqlite::record_type rec;
rec.push_back(sqlite::value_type(pkid));
rec.push_back(sqlite::value_type(bbox.minx()));
rec.push_back(sqlite::value_type(bbox.maxx()));
rec.push_back(sqlite::value_type(bbox.miny()));
rec.push_back(sqlite::value_type(bbox.maxy()));
spatial_index.insert_record(rec);
empty_geom = false;
}
}
2009-02-21 12:22:17 +01:00
2010-06-02 13:03:30 +02:00
//output_rec.push_back(sqlite::value_type("X'" + hex(buf,size) + "'"));
2009-02-21 12:22:17 +01:00
output_rec.push_back(sqlite::blob(buf,size));
2010-06-02 13:03:30 +02:00
}
else
{
output_rec.push_back(sqlite::null_type());
}
break;
}
}
}
else
{
2010-06-02 13:03:30 +02:00
output_rec.push_back(sqlite::null_type());
}
2010-06-02 13:03:30 +02:00
}
2009-02-21 12:22:17 +01:00
2010-06-02 13:03:30 +02:00
if (!empty_geom) output_table.insert_record(output_rec);
if (pkid % 1000 == 0)
{
std::cout << "\r processing " << pkid << " features";
std::cout.flush();
2010-06-02 13:03:30 +02:00
}
2009-02-21 01:23:41 +01:00
2010-06-02 13:03:30 +02:00
if (pkid % 100000 == 0)
{
db.execute("commit;begin;");
}
}
// commit
db.execute("commit;");
std::cout << "\r processed " << pkid << " features";
std::cout << "\n Done!" << std::endl;
}
}