Compare commits

...

19 commits

Author SHA1 Message Date
Tom MacWright
a2ab9b5588 More fixes 2012-02-06 16:48:51 -05:00
Tom MacWright
d12b4e322f Updating ogr and csv to output null stats 2012-02-06 15:36:29 -05:00
Tom MacWright
534d111e02 Clarify naming, addresses @springmeyer's comment 2012-02-02 13:30:01 -05:00
Tom MacWright
8d9fb2b540 Fix up shared_ptr impl. Fixes #1060. 2012-02-02 13:22:38 -05:00
Tom MacWright
9c1af869b6 Switched to shared_ptr at the cost of a segfault 2012-02-02 10:36:57 -05:00
Tom MacWright
f836a43bca Head of stats changes - moving towards shared_ptr 2012-02-02 10:06:37 -05:00
Tom MacWright
6f06316f28 Merge branch 'master' of https://github.com/mapnik/mapnik into statistics 2012-02-01 15:53:36 -05:00
Tom MacWright
ff450b4998 Actual data-based sqlite statistics test, fix kinks in the implementation. 2012-02-01 15:52:17 -05:00
Tom MacWright
d1c65beb1c First working shot of statistics in sqlite. Needs test coverage next 2012-02-01 15:44:05 -05:00
Tom MacWright
55d5a8ae82 Merge branch 'master' of https://github.com/mapnik/mapnik into statistics 2012-02-01 13:58:22 -05:00
Tom MacWright
abe4a39717 Merge branch 'master' into statistics 2012-01-31 14:58:16 -05:00
Tom MacWright
95bef08977 merge and adapt to new features work 2012-01-30 18:55:20 -05:00
Tom MacWright
96b3281bd6 Stub out statistics on common datasources 2012-01-30 17:37:36 -05:00
Tom MacWright
441c13646c Adding passing python test for statistics. 2012-01-27 15:11:45 -05:00
Tom MacWright
f3249c8fda Bind statistics in Python 2012-01-26 18:32:16 -05:00
Tom MacWright
4c46385bf6 Work around non-numeric input 2012-01-26 17:40:02 -05:00
Tom MacWright
d466b49671 Working accumulator-based statistics for memory datasource 2012-01-26 16:58:32 -05:00
Tom MacWright
a6b03cc21c Working loop of values 2012-01-26 15:32:24 -05:00
Tom MacWright
38aadc16af First bit stats push. 2012-01-20 18:46:13 -05:00
19 changed files with 362 additions and 19 deletions

View file

@ -92,6 +92,24 @@ boost::python::dict describe(boost::shared_ptr<mapnik::datasource> const& ds)
return description;
}
boost::python::dict statistics(boost::shared_ptr<mapnik::datasource> const& ds)
{
boost::python::dict description;
mapnik::statistics_ptr stats = ds->get_statistics();
std::map<std::string, mapnik::parameters>::iterator it;
for (it = stats->begin(); it != stats->end(); it++) {
boost::python::dict field;
mapnik::parameters::const_iterator k = it->second.begin();
for (; k != it->second.end(); ++k) {
field[k->first] = boost::get<double>(k->second);
}
description[it->first] = field;
}
return description;
}
boost::python::list fields(boost::shared_ptr<mapnik::datasource> const& ds)
{
boost::python::list flds;
@ -164,6 +182,7 @@ void export_datasource()
.def("type",&datasource::type)
.def("geometry_type",&datasource::get_geometry_type)
.def("describe",&describe)
.def("statistics",&statistics)
.def("envelope",&datasource::envelope)
.def("features",&datasource::features)
.def("bind",&datasource::bind)

View file

@ -32,6 +32,7 @@
#include <mapnik/feature_layer_desc.hpp>
// boost
#include <boost/shared_container_iterator.hpp>
#include <boost/utility.hpp>
#include <boost/shared_ptr.hpp>
@ -50,6 +51,9 @@ struct MAPNIK_DECL Featureset : private boost::noncopyable
};
typedef MAPNIK_DECL boost::shared_ptr<Featureset> featureset_ptr;
typedef MAPNIK_DECL std::map<std::string, mapnik::parameters> statistics;
typedef MAPNIK_DECL boost::shared_ptr< std::map<std::string, mapnik::parameters> > statistics_ptr;
typedef MAPNIK_DECL boost::shared_container_iterator< std::map<std::string, mapnik::parameters> > statistics_ptr_iterator;
class MAPNIK_DECL datasource_exception : public std::exception
{
@ -114,6 +118,7 @@ public:
virtual box2d<double> envelope() const=0;
virtual boost::optional<geometry_t> get_geometry_type() const=0;
virtual layer_descriptor get_descriptor() const=0;
virtual statistics_ptr get_statistics() const=0;
virtual ~datasource() {};
protected:
parameters params_;

View file

@ -0,0 +1,110 @@
/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2011 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
/*
#ifndef MAPNIK_LAYER_STATISTICS_HPP
#define MAPNIK_LAYER_STATISTICS_HPP
// mapnik
#include <mapnik/attribute_descriptor.hpp>
// stl
#include <string>
#include <vector>
#include <iostream>
namespace mapnik
{
class ds_statistics
{
public:
layer_descriptor(std::string const& name, std::string const& encoding)
: name_(name),
encoding_(encoding) {}
layer_descriptor(layer_descriptor const& other)
: name_(other.name_),
encoding_(other.encoding_),
desc_ar_(other.desc_ar_) {}
void set_name(std::string const& name)
{
name_ = name;
}
std::string const& get_name() const
{
return name_;
}
void set_encoding(std::string const& encoding)
{
encoding_ = encoding;
}
std::string const& get_encoding() const
{
return encoding_;
}
void add_descriptor(attribute_descriptor const& desc)
{
desc_ar_.push_back(desc);
}
std::vector<attribute_descriptor> const& get_descriptors() const
{
return desc_ar_;
}
std::vector<attribute_descriptor>& get_descriptors()
{
return desc_ar_;
}
private:
std::string name_;
std::string encoding_;
std::vector<attribute_descriptor> desc_ar_;
};
template <typename charT,typename traits>
inline std::basic_ostream<charT,traits>&
operator << (std::basic_ostream<charT,traits>& out,
layer_descriptor const& ld)
{
out << "name: " << ld.get_name() << "\n";
out << "encoding: " << ld.get_encoding() << "\n";
std::vector<attribute_descriptor> const& desc_ar = ld.get_descriptors();
std::vector<attribute_descriptor>::const_iterator pos = desc_ar.begin();
while (pos != desc_ar.end())
{
out << *pos++ << "\n";
}
return out;
}
}
*/
#endif // MAPNIK_LAYER_STATISTICS_HPP

View file

@ -27,11 +27,30 @@
#include <mapnik/datasource.hpp>
#include <mapnik/feature_layer_desc.hpp>
// boost
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/min.hpp>
#include <boost/accumulators/statistics/max.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/variance.hpp>
#include <boost/accumulators/statistics/median.hpp>
#include <boost/accumulators/framework/accumulator_set.hpp>
// stl
#include <vector>
namespace mapnik {
typedef boost::accumulators::accumulator_set<
double, boost::accumulators::features<
boost::accumulators::tag::mean,
boost::accumulators::tag::median,
boost::accumulators::tag::variance,
boost::accumulators::tag::min,
boost::accumulators::tag::max
> > statistics_accumulator;
class MAPNIK_DECL memory_datasource : public datasource
{
friend class memory_featureset;
@ -45,10 +64,12 @@ public:
box2d<double> envelope() const;
boost::optional<geometry_t> get_geometry_type() const;
layer_descriptor get_descriptor() const;
statistics_ptr get_statistics() const;
size_t size() const;
void clear();
private:
std::vector<feature_ptr> features_;
std::map<std::string, statistics_accumulator> accumulators_;
mapnik::layer_descriptor desc_;
};

View file

@ -851,6 +851,12 @@ mapnik::box2d<double> csv_datasource::envelope() const
return extent_;
}
mapnik::statistics_ptr csv_datasource::get_statistics() const
{
if (! is_bound_) bind();
return boost::make_shared<mapnik::statistics>(stats_);
}
boost::optional<mapnik::datasource::geometry_t> csv_datasource::get_geometry_type() const
{
if (! is_bound_) bind();

View file

@ -19,6 +19,7 @@ public:
mapnik::box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
mapnik::layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
void bind() const;
template <typename T>
void parse_csv(T& stream,
@ -38,6 +39,7 @@ private:
mutable std::string quote_;
mutable std::vector<std::string> headers_;
mutable std::string manual_headers_;
mutable std::map<std::string, mapnik::parameters> stats_;
mutable bool strict_;
mutable bool quiet_;
mutable double filesize_max_;

View file

@ -25,6 +25,7 @@
#include "gdal_featureset.hpp"
// mapnik
#include <boost/make_shared.hpp>
#include <mapnik/ptree_helpers.hpp>
#include <mapnik/geom_util.hpp>
@ -199,6 +200,12 @@ gdal_datasource::~gdal_datasource()
{
}
mapnik::statistics_ptr gdal_datasource::get_statistics() const
{
if (! is_bound_) bind();
return boost::make_shared<mapnik::statistics>(stats_);
}
datasource::datasource_t gdal_datasource::type() const
{
return datasource::Raster;

View file

@ -44,6 +44,7 @@ public:
mapnik::box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
mapnik::layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
void bind() const;
private:
mutable mapnik::box2d<double> extent_;
@ -56,6 +57,7 @@ private:
mutable double dy_;
mutable int nbands_;
mutable bool shared_dataset_;
mutable std::map<std::string, mapnik::parameters> stats_;
double filter_factor_;
inline GDALDataset* open_dataset() const;
};

View file

@ -429,6 +429,12 @@ layer_descriptor ogr_datasource::get_descriptor() const
return desc_;
}
mapnik::statistics_ptr ogr_datasource::get_statistics() const
{
if (! is_bound_) bind();
return boost::make_shared<mapnik::statistics>(stats_);
}
void validate_attribute_names(query const& q, std::vector<attribute_descriptor> const& names )
{
std::set<std::string> const& attribute_names = q.property_names();

View file

@ -48,6 +48,7 @@ public:
mapnik::box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
mapnik::layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
void bind() const;
private:
@ -59,6 +60,7 @@ private:
mutable ogr_layer_ptr layer_;
mutable std::string layer_name_;
mutable mapnik::layer_descriptor desc_;
mutable std::map<std::string, mapnik::parameters> stats_;
mutable bool indexed_;
};

View file

@ -358,6 +358,12 @@ layer_descriptor postgis_datasource::get_descriptor() const
}
mapnik::statistics_ptr postgis_datasource::get_statistics() const
{
if (! is_bound_) bind();
return stats_;
}
std::string postgis_datasource::sql_bbox(box2d<double> const& env) const
{
std::ostringstream b;

View file

@ -85,11 +85,13 @@ public:
mapnik::box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
postgis_datasource(const parameters &params, bool bind=true);
~postgis_datasource();
void bind() const;
private:
std::string sql_bbox(box2d<double> const& env) const;
mapnik::statistics_ptr stats_;
std::string populate_tokens(const std::string& sql, double scale_denom, box2d<double> const& env) const;
std::string populate_tokens(const std::string& sql) const;
static std::string unquote(const std::string& sql);

View file

@ -240,6 +240,12 @@ layer_descriptor shape_datasource::get_descriptor() const
return desc_;
}
mapnik::statistics_ptr shape_datasource::get_statistics() const
{
if (! is_bound_) bind();
return stats_;
}
featureset_ptr shape_datasource::features(const query& q) const
{
if (!is_bound_) bind();

View file

@ -52,6 +52,7 @@ public:
box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
void bind() const;
private:
shape_datasource(const shape_datasource&);
@ -62,6 +63,7 @@ private:
std::string shape_name_;
mutable boost::shared_ptr<shape_io> shape_;
mutable shape_io::shapeType shape_type_;
mutable mapnik::statistics_ptr stats_;
mutable long file_length_;
mutable box2d<double> extent_;
mutable bool indexed_;

View file

@ -316,7 +316,6 @@ void sqlite_datasource::bind() const
}
}
*/
boost::shared_ptr<sqlite_resultset> rs = dataset_->execute_query(query.str());
if (sqlite_utils::create_spatial_index(index_db,index_table_,rs))
@ -484,6 +483,65 @@ box2d<double> sqlite_datasource::envelope() const
return extent_;
}
mapnik::statistics_ptr sqlite_datasource::get_statistics() const
{
if (! is_bound_) bind();
std::map<std::string, mapnik::parameters> stats;
std::ostringstream s;
std::vector<attribute_descriptor>::const_iterator itr = desc_.get_descriptors().begin();
std::vector<attribute_descriptor>::const_iterator end = desc_.get_descriptors().end();
std::vector<std::string> field_names;
for ( ; itr != end; ++itr) {
std::string fld_name = itr->get_name();
if (fld_name != key_field_ &&
itr->get_type() == mapnik::Double ||
itr->get_type() == mapnik::Integer) {
field_names.push_back("MIN([" + itr->get_name() + "])");
field_names.push_back("MAX([" + itr->get_name() + "])");
field_names.push_back("AVG([" + itr->get_name() + "])");
}
}
std::string query(table_);
query = populate_tokens(table_);
s << "SELECT ";
s << boost::algorithm::join(field_names, ",");
s << " FROM ";
s << query ;
#ifdef MAPNIK_DEBUG
std::clog << "Sqlite Plugin: " << s.str() << std::endl;
#endif
boost::shared_ptr<sqlite_resultset> rs(dataset_->execute_query(s.str()));
while (rs->is_valid() && rs->step_next())
{
itr = desc_.get_descriptors().begin();
int col = 0;
for ( ; itr != end; ++itr) {
std::string fld_name = itr->get_name();
if (fld_name != key_field_ &&
itr->get_type() == mapnik::Double ||
itr->get_type() == mapnik::Integer) {
mapnik::parameters p;
p["min"] = rs->column_double(col);
col++;
p["max"] = rs->column_double(col);
col++;
p["mean"] = rs->column_double(col);
col++;
stats[itr->get_name()] = p;
}
}
}
return boost::make_shared<mapnik::statistics>(stats);
}
boost::optional<mapnik::datasource::geometry_t> sqlite_datasource::get_geometry_type() const
{
if (! is_bound_) bind();

View file

@ -49,6 +49,7 @@ public:
mapnik::box2d<double> envelope() const;
boost::optional<mapnik::datasource::geometry_t> get_geometry_type() const;
mapnik::layer_descriptor get_descriptor() const;
mapnik::statistics_ptr get_statistics() const;
void bind() const;
private:

View file

@ -24,7 +24,12 @@
// mapnik
#include <mapnik/memory_datasource.hpp>
#include <mapnik/memory_featureset.hpp>
#include <mapnik/params.hpp>
#include <mapnik/feature_factory.hpp>
#include <mapnik/feature_kv_iterator.hpp>
#include <boost/make_shared.hpp>
#include <boost/math/distributions/normal.hpp>
// stl
#include <algorithm>
@ -64,8 +69,15 @@ memory_datasource::~memory_datasource() {}
void memory_datasource::push(feature_ptr feature)
{
// TODO - collect attribute descriptors?
//desc_.add_descriptor(attribute_descriptor(fld_name,mapnik::Integer));
mapnik::feature_kv_iterator::feature_kv_iterator it(*feature, true);
mapnik::feature_kv_iterator::feature_kv_iterator end(*feature);
for (; it != end; ++it) {
try {
accumulators_[boost::get<0>(*it)](boost::get<1>(*it).to_double());
} catch(boost::bad_lexical_cast &) {
// string values are not accumulated.
}
}
features_.push_back(feature);
}
@ -108,6 +120,23 @@ layer_descriptor memory_datasource::get_descriptor() const
return desc_;
}
statistics_ptr memory_datasource::get_statistics() const
{
std::map<std::string, mapnik::parameters> _stats;
std::map<std::string, statistics_accumulator>::const_iterator it = accumulators_.begin();
std::map<std::string, statistics_accumulator>::const_iterator end = accumulators_.end();
for (; it != end; ++it) {
mapnik::parameters p;
p["mean"] = boost::accumulators::mean(it->second);
p["median"] = boost::accumulators::median(it->second);
p["min"] = boost::accumulators::min(it->second);
p["stddev"] = sqrt(boost::accumulators::variance(it->second));
p["max"] = boost::accumulators::max(it->second);
_stats[it->first] = p;
}
return boost::make_shared<mapnik::statistics>(_stats);
}
size_t memory_datasource::size() const
{
return features_.size();

View file

@ -38,6 +38,14 @@ if 'sqlite' in mapnik.DatasourceCache.instance().plugin_names():
# the above should not throw but will result in no features
eq_(feature,None)
def test_attachdb_with_multiple_files():
ds = mapnik.SQLite(file='../data/sqlite/world.sqlite', table='world_merc')
stats = ds.statistics()
eq_(stats['pop2005']['min'], 0)
eq_(stats['pop2005']['max'], 1312978855)
eq_(stats['area']['max'], 1638094)
print stats
def test_attachdb_with_absolute_file():
# The point table and index is in the qgis_spatiallite.sqlite
# database. If either is not found, then this fails

View file

@ -0,0 +1,51 @@
#encoding: utf8
import itertools
import unittest
class MemoryDatasource(unittest.TestCase):
ids = itertools.count(0)
def makeOne(self, *args, **kw):
from mapnik import MemoryDatasource
return MemoryDatasource(*args, **kw)
def makeFeature(self, wkt, **properties):
from mapnik import Feature
f = Feature(self.ids.next())
f.add_geometries_from_wkt(wkt)
for k,v in properties.iteritems():
f[k] = v
return f
def test_default_constructor(self):
f = self.makeOne()
self.failUnless(f is not None)
def test_add_feature(self):
md = self.makeOne()
self.failUnlessEqual(md.num_features(), 0)
md.add_feature(self.makeFeature('Point(2 3)', foo='bar'))
self.failUnlessEqual(md.num_features(), 1)
from mapnik import Coord
retrieved = md.features_at_point(Coord(2,3)).features
self.failUnlessEqual(len(retrieved), 1)
f = retrieved[0]
self.failUnlessEqual(f['foo'], 'bar')
retrieved = md.features_at_point(Coord(20,30)).features
self.failUnlessEqual(len(retrieved), 0)
def test_statistics(self):
md = self.makeOne()
self.failUnlessEqual(md.num_features(), 0)
md.add_feature(self.makeFeature('Point(2 3)', a=1))
self.failUnlessEqual(md.num_features(), 1)
stats = md.statistics()
self.failUnless(stats.has_key('a'))
self.failUnlessEqual(stats['a']['min'], 1)
self.failUnlessEqual(stats['a']['max'], 1)
self.failUnlessEqual(stats['a']['mean'], 1)
if __name__ == "__main__":
[eval(run)() for run in dir() if 'test_' in run]