2015-10-01 14:16:32 +00:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* This file is part of Mapnik (c++ mapping toolkit)
|
|
|
|
*
|
2021-01-05 14:39:07 +00:00
|
|
|
* Copyright (C) 2021 Artem Pavlenko
|
2015-10-01 14:16:32 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <fstream>
|
2017-02-27 12:22:55 +00:00
|
|
|
#include <mapnik/version.hpp>
|
2015-10-01 14:16:32 +00:00
|
|
|
#include <mapnik/util/fs.hpp>
|
|
|
|
#include <mapnik/quad_tree.hpp>
|
2017-08-18 10:16:44 +00:00
|
|
|
#include <mapnik/util/spatial_index.hpp>
|
2015-10-09 10:27:35 +00:00
|
|
|
|
|
|
|
#include "process_csv_file.hpp"
|
2016-11-11 09:40:52 +00:00
|
|
|
#include "process_geojson_file_x3.hpp"
|
2015-11-08 01:53:09 +00:00
|
|
|
|
2020-11-19 14:30:30 +00:00
|
|
|
#include <mapnik/warning.hpp>
|
|
|
|
MAPNIK_DISABLE_WARNING_PUSH
|
2015-11-08 01:53:09 +00:00
|
|
|
#include <mapnik/warning_ignore.hpp>
|
2015-10-01 14:16:32 +00:00
|
|
|
#include <boost/algorithm/string.hpp>
|
|
|
|
#include <boost/program_options.hpp>
|
2020-11-19 14:30:30 +00:00
|
|
|
MAPNIK_DISABLE_WARNING_POP
|
2015-10-01 14:16:32 +00:00
|
|
|
|
|
|
|
const int DEFAULT_DEPTH = 8;
|
|
|
|
const double DEFAULT_RATIO = 0.55;
|
|
|
|
|
2022-01-26 19:41:37 +00:00
|
|
|
namespace mapnik {
|
|
|
|
namespace detail {
|
2015-10-09 10:27:35 +00:00
|
|
|
|
|
|
|
bool is_csv(std::string const& filename)
|
|
|
|
{
|
2022-01-26 19:41:37 +00:00
|
|
|
return boost::iends_with(filename, ".csv") || boost::iends_with(filename, ".tsv");
|
2015-10-09 10:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool is_geojson(std::string const& filename)
|
|
|
|
{
|
2022-01-26 19:41:37 +00:00
|
|
|
return boost::iends_with(filename, ".geojson") || boost::iends_with(filename, ".json");
|
2015-10-09 10:27:35 +00:00
|
|
|
}
|
|
|
|
|
2022-01-26 19:41:37 +00:00
|
|
|
} // namespace detail
|
|
|
|
} // namespace mapnik
|
2015-10-09 10:27:35 +00:00
|
|
|
|
2022-01-26 19:41:37 +00:00
|
|
|
int main(int argc, char** argv)
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2022-01-26 19:41:37 +00:00
|
|
|
// using namespace mapnik;
|
2015-10-01 14:16:32 +00:00
|
|
|
namespace po = boost::program_options;
|
|
|
|
bool verbose = false;
|
2015-11-12 13:45:25 +00:00
|
|
|
bool validate_features = false;
|
2015-10-01 14:16:32 +00:00
|
|
|
unsigned int depth = DEFAULT_DEPTH;
|
|
|
|
double ratio = DEFAULT_RATIO;
|
2015-10-09 10:27:35 +00:00
|
|
|
std::vector<std::string> files;
|
2015-10-05 15:56:33 +00:00
|
|
|
char separator = 0;
|
|
|
|
char quote = 0;
|
2015-10-01 14:16:32 +00:00
|
|
|
std::string manual_headers;
|
2017-08-18 12:42:54 +00:00
|
|
|
mapnik::box2d<float> bbox;
|
|
|
|
bool use_bbox = false;
|
2017-08-16 09:35:04 +00:00
|
|
|
po::variables_map vm;
|
2015-10-01 14:16:32 +00:00
|
|
|
try
|
|
|
|
{
|
2015-10-09 10:27:35 +00:00
|
|
|
po::options_description desc("Mapnik CSV/GeoJSON index utility");
|
2022-01-26 19:41:37 +00:00
|
|
|
// clang-format off
|
2015-10-01 14:16:32 +00:00
|
|
|
desc.add_options()
|
2017-08-18 12:42:54 +00:00
|
|
|
("help,h", "Produce usage message")
|
|
|
|
("version,V","Print version string")
|
|
|
|
("verbose,v","Verbose output")
|
|
|
|
("depth,d", po::value<unsigned int>(), "Max tree depth\n(default 8)")
|
|
|
|
("ratio,r",po::value<double>(),"Split ratio (default 0.55)")
|
2015-10-05 08:34:02 +00:00
|
|
|
("separator,s", po::value<char>(), "CSV columns separator")
|
|
|
|
("quote,q", po::value<char>(), "CSV columns quote")
|
2015-10-01 14:16:32 +00:00
|
|
|
("manual-headers,H", po::value<std::string>(), "CSV manual headers string")
|
2015-10-09 10:27:35 +00:00
|
|
|
("files",po::value<std::vector<std::string> >(),"Files to index: file1 file2 ...fileN")
|
2015-11-12 13:45:25 +00:00
|
|
|
("validate-features", "Validate GeoJSON features")
|
2017-08-18 12:42:54 +00:00
|
|
|
("bbox,b", po::value<std::string>(), "Only index features within bounding box: --bbox=minx,miny,maxx,maxy")
|
2015-10-01 14:16:32 +00:00
|
|
|
;
|
2022-01-26 19:41:37 +00:00
|
|
|
// clang-format on
|
2015-10-01 14:16:32 +00:00
|
|
|
po::positional_options_description p;
|
2022-01-26 19:41:37 +00:00
|
|
|
p.add("files", -1);
|
2016-11-11 09:48:03 +00:00
|
|
|
po::store(po::command_line_parser(argc, argv)
|
2022-01-26 19:41:37 +00:00
|
|
|
.options(desc)
|
|
|
|
.style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise)
|
|
|
|
.positional(p)
|
|
|
|
.run(),
|
|
|
|
vm);
|
2015-10-01 14:16:32 +00:00
|
|
|
po::notify(vm);
|
|
|
|
|
|
|
|
if (vm.count("version"))
|
|
|
|
{
|
2017-02-27 12:22:55 +00:00
|
|
|
std::clog << "version " << MAPNIK_VERSION_STRING << std::endl;
|
2015-10-01 14:16:32 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (vm.count("help"))
|
|
|
|
{
|
2015-10-09 10:27:35 +00:00
|
|
|
std::clog << desc << std::endl;
|
2015-10-01 14:16:32 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (vm.count("verbose"))
|
|
|
|
{
|
|
|
|
verbose = true;
|
|
|
|
}
|
2015-11-12 13:45:25 +00:00
|
|
|
if (vm.count("validate-features"))
|
|
|
|
{
|
|
|
|
validate_features = true;
|
|
|
|
}
|
2015-10-01 14:16:32 +00:00
|
|
|
if (vm.count("depth"))
|
|
|
|
{
|
|
|
|
depth = vm["depth"].as<unsigned int>();
|
|
|
|
}
|
|
|
|
if (vm.count("ratio"))
|
|
|
|
{
|
|
|
|
ratio = vm["ratio"].as<double>();
|
|
|
|
}
|
|
|
|
if (vm.count("separator"))
|
|
|
|
{
|
2015-10-05 08:34:02 +00:00
|
|
|
separator = vm["separator"].as<char>();
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
if (vm.count("quote"))
|
|
|
|
{
|
2015-10-05 08:34:02 +00:00
|
|
|
quote = vm["quote"].as<char>();
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
if (vm.count("manual-headers"))
|
|
|
|
{
|
|
|
|
manual_headers = vm["manual-headers"].as<std::string>();
|
|
|
|
}
|
2015-10-09 10:27:35 +00:00
|
|
|
if (vm.count("files"))
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2022-01-26 19:41:37 +00:00
|
|
|
files = vm["files"].as<std::vector<std::string>>();
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
2017-08-18 12:42:54 +00:00
|
|
|
if (vm.count("bbox") && bbox.from_string(vm["bbox"].as<std::string>()))
|
|
|
|
{
|
|
|
|
use_bbox = true;
|
|
|
|
}
|
2022-01-26 19:41:37 +00:00
|
|
|
} catch (std::exception const& ex)
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-09 10:27:35 +00:00
|
|
|
std::clog << "Error: " << ex.what() << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
|
2015-10-20 21:49:58 +00:00
|
|
|
std::vector<std::string> files_to_process;
|
2015-10-01 14:16:32 +00:00
|
|
|
|
2015-10-20 21:49:58 +00:00
|
|
|
for (auto const& filename : files)
|
|
|
|
{
|
|
|
|
if (!mapnik::util::exists(filename))
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mapnik::detail::is_csv(filename) || mapnik::detail::is_geojson(filename))
|
|
|
|
{
|
|
|
|
files_to_process.push_back(filename);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (files_to_process.size() == 0)
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-09 10:27:35 +00:00
|
|
|
std::clog << "no files to index" << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
|
2015-10-20 21:49:58 +00:00
|
|
|
std::clog << "max tree depth:" << depth << std::endl;
|
|
|
|
std::clog << "split ratio:" << ratio << std::endl;
|
|
|
|
|
2016-03-24 17:12:16 +00:00
|
|
|
using box_type = mapnik::box2d<float>;
|
2017-08-15 10:14:11 +00:00
|
|
|
using item_type = std::pair<box_type, std::pair<std::uint64_t, std::uint64_t>>;
|
2015-10-09 10:27:35 +00:00
|
|
|
|
2015-10-20 21:49:58 +00:00
|
|
|
for (auto const& filename : files_to_process)
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-20 19:15:51 +00:00
|
|
|
if (!mapnik::util::exists(filename))
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-09 10:27:35 +00:00
|
|
|
std::clog << "Error : file " << filename << " does not exist" << std::endl;
|
2015-10-01 14:16:32 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-10-09 10:27:35 +00:00
|
|
|
std::vector<item_type> boxes;
|
2016-03-24 17:12:16 +00:00
|
|
|
box_type extent;
|
2015-10-09 10:27:35 +00:00
|
|
|
if (mapnik::detail::is_csv(filename))
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-20 19:15:51 +00:00
|
|
|
std::clog << "processing '" << filename << "' as CSV\n";
|
2015-10-09 10:27:35 +00:00
|
|
|
auto result = mapnik::detail::process_csv_file(boxes, filename, manual_headers, separator, quote);
|
2016-04-25 08:11:55 +00:00
|
|
|
if (!result.first)
|
|
|
|
{
|
|
|
|
std::clog << "Error: failed to process " << filename << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2015-10-09 10:27:35 +00:00
|
|
|
extent = result.second;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
2015-10-09 10:27:35 +00:00
|
|
|
else if (mapnik::detail::is_geojson(filename))
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2015-10-20 19:15:51 +00:00
|
|
|
std::clog << "processing '" << filename << "' as GeoJSON\n";
|
2022-01-26 19:41:37 +00:00
|
|
|
std::pair<bool, mapnik::box2d<float>> result;
|
2016-11-29 08:20:41 +00:00
|
|
|
result = mapnik::detail::process_geojson_file_x3(boxes, filename, validate_features, verbose);
|
2015-11-12 16:12:06 +00:00
|
|
|
if (!result.first)
|
|
|
|
{
|
|
|
|
std::clog << "Error: failed to process " << filename << std::endl;
|
2016-04-25 08:11:55 +00:00
|
|
|
return EXIT_FAILURE;
|
2015-11-12 16:12:06 +00:00
|
|
|
}
|
2015-10-09 11:49:58 +00:00
|
|
|
extent = result.second;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
|
2015-10-09 10:27:35 +00:00
|
|
|
if (extent.valid())
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2017-08-18 12:55:04 +00:00
|
|
|
auto tree_extent = use_bbox ? bbox : extent;
|
|
|
|
std::clog << tree_extent << std::endl;
|
|
|
|
mapnik::quad_tree<mapnik::util::index_record, mapnik::box2d<float>> tree(tree_extent, depth, ratio);
|
2017-08-18 10:16:44 +00:00
|
|
|
for (auto const& item : boxes)
|
|
|
|
{
|
|
|
|
auto ext_f = std::get<0>(item);
|
2022-01-26 19:41:37 +00:00
|
|
|
if (use_bbox && !bbox.intersects(ext_f))
|
|
|
|
continue;
|
|
|
|
mapnik::util::index_record rec = {std::get<1>(item).first, std::get<1>(item).second, ext_f};
|
2017-08-18 10:16:44 +00:00
|
|
|
tree.insert(rec, ext_f);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::fstream file((filename + ".index").c_str(),
|
|
|
|
std::ios::in | std::ios::out | std::ios::trunc | std::ios::binary);
|
|
|
|
if (!file)
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2022-01-26 19:41:37 +00:00
|
|
|
std::clog << "cannot open index file for writing file \"" << (filename + ".index") << "\"" << std::endl;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
2015-10-09 10:27:35 +00:00
|
|
|
else
|
2015-10-01 14:16:32 +00:00
|
|
|
{
|
2017-08-18 10:16:44 +00:00
|
|
|
tree.trim();
|
|
|
|
std::clog << "number nodes=" << tree.count() << std::endl;
|
|
|
|
std::clog << "number element=" << tree.count_items() << std::endl;
|
|
|
|
file.exceptions(std::ios::failbit | std::ios::badbit);
|
|
|
|
tree.write(file);
|
|
|
|
file.flush();
|
|
|
|
file.close();
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
|
|
|
}
|
2016-04-25 08:11:55 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
std::clog << "Invalid extent " << extent << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|
2015-10-09 10:27:35 +00:00
|
|
|
std::clog << "done!" << std::endl;
|
|
|
|
return EXIT_SUCCESS;
|
2015-10-01 14:16:32 +00:00
|
|
|
}
|