cgal/Classification/include/CGAL/Classifier.h

1035 lines
30 KiB
C++

// Copyright (c) 2016 INRIA Sophia-Antipolis (France).
// All rights reserved.
//
// This file is part of CGAL (www.cgal.org).
// You can redistribute it and/or modify it under the terms of the GNU
// General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// Licensees holding a valid commercial license may use this file in
// accordance with the commercial license agreement provided with the software.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
//
// $URL$
// $Id$
//
// Author(s) : Simon Giraudot, Florent Lafarge
#ifndef CGAL_CLASSIFIER_H
#define CGAL_CLASSIFIER_H
#include <cstdio>
#include <cassert>
#include <vector>
#include <list>
#include <set>
#include <string>
#include <queue>
#include <CGAL/bounding_box.h>
#include <CGAL/centroid.h>
#include <CGAL/compute_average_spacing.h>
#include <CGAL/linear_least_squares_fitting_3.h>
#include <CGAL/Classification/Planimetric_grid.h>
#include <CGAL/Classification/Attribute.h>
#include <CGAL/Classification/Type.h>
#include <CGAL/internal/Surface_mesh_segmentation/Alpha_expansion_graph_cut.h>
#define CGAL_CLASSIFICATION_VERBOSE
#if defined(CGAL_CLASSIFICATION_VERBOSE)
#define CGAL_CLASSIFICATION_CERR std::cerr
#else
#define CGAL_CLASSIFICATION_CERR std::ostream(0)
#endif
//#define CGAL_CLASSTRAINING_VERBOSE
#if defined(CGAL_CLASSTRAINING_VERBOSE)
#define CGAL_CLASSTRAINING_CERR std::cerr
#else
#define CGAL_CLASSTRAINING_CERR std::ostream(0)
#endif
namespace CGAL {
/*!
\ingroup PkgClassification
\brief Classifies a data set based on a set of attribute and a set of classification types.
This class implement the core of the algorithm. It uses a data set as
input and assign each input iterator to a classification type among a
set of user defined classification types.
To achieve this classification algorithm, a set of local geometric
attributes are used, such as:
- planarity
- elevation
- vertical dispersion
The user must define a set of classification types such as:
- building
- ground
- vegetation
Each pair of attribute/type must be assigned an effect (for example,
vegetation has a low planarity and a high vertical dispersion) and
each attribute must be assigned a weight. These parameters can be set
up by hand or by providing a training set for each classification
type.
\tparam RandomAccessIterator Iterator over the input items
\tparam ItemMap is a model of `ReadablePropertyMap`
*/
template <typename RandomAccessIterator,
typename ItemMap>
class Classifier
{
public:
/// \cond SKIP_IN_MANUAL
typedef typename ItemMap::value_type Item;
typedef typename Classification::Type_handle Type_handle;
typedef typename Classification::Attribute_handle Attribute_handle;
#ifdef CGAL_DO_NOT_USE_BOYKOV_KOLMOGOROV_MAXFLOW_SOFTWARE
typedef internal::Alpha_expansion_graph_cut_boost Alpha_expansion;
#else
typedef internal::Alpha_expansion_graph_cut_boykov_kolmogorov Alpha_expansion;
#endif
private:
class Item_range
{
std::size_t m_size;
RandomAccessIterator m_begin;
ItemMap m_item_map;
public:
Item_range (RandomAccessIterator begin, RandomAccessIterator end,
ItemMap item_map)
: m_size (end - begin), m_begin (begin), m_item_map (item_map)
{ }
std::size_t size() const { return m_size; }
const Item& operator[] (std::size_t index) const { return get (m_item_map, *(m_begin + index)); }
RandomAccessIterator begin() { return m_begin; }
RandomAccessIterator end() { return m_begin + m_size; }
friend RandomAccessIterator operator+ (Item_range& range, std::size_t index)
{
return range.begin() + index;
}
};
Item_range m_input;
std::vector<std::size_t> m_assigned_type;
std::vector<std::size_t> m_training_type;
std::vector<double> m_confidence;
std::vector<Type_handle> m_types;
std::vector<Attribute_handle> m_attributes;
typedef Classification::Type::Attribute_effect Attribute_effect;
std::vector<std::vector<Attribute_effect> > m_effect_table;
/// \endcond
public:
/// \name Constructor
/// @{
/*!
\brief Constructs a classification object based on the input iterators.
This method just initializes the structure and does not compute
anything.
\param begin Iterator to the first input object
\param end Past-the-end iterator
\param item_map Property map to access the input items
*/
Classifier (RandomAccessIterator begin,
RandomAccessIterator end,
ItemMap item_map)
: m_input (begin, end, item_map)
{
}
/// @}
/// \name Classification
/// @{
/*!
\brief Runs the classification algorithm without any regularization.
There is no relationship between items, the classification energy
is only minimized itemwise. This method is quick but produce
suboptimal results.
*/
void run()
{
prepare_classification ();
// data term initialisation
for (std::size_t s = 0; s < m_input.size(); s++)
{
std::size_t nb_class_best=0;
double val_class_best = (std::numeric_limits<double>::max)();
std::vector<double> values;
for(std::size_t k = 0; k < m_effect_table.size(); ++ k)
{
double value = classification_value (k, s);
values.push_back (value);
if(val_class_best > value)
{
val_class_best = value;
nb_class_best=k;
}
}
m_assigned_type[s] = nb_class_best;
std::sort (values.begin(), values.end());
m_confidence[s] = values[1] - values[0];
}
}
/*!
\brief Runs the classification algorithm with a local smoothing.
The computed classification energy is smoothed on a user defined
local neighborhood of items. This method is a compromise between
efficiency and reliability.
\tparam NeighborQuery is a model of `NeighborQuery`
\param neighbor_query is used to access neighborhoods of items
*/
template <typename NeighborQuery>
void run_with_local_smoothing (const NeighborQuery& neighbor_query)
{
prepare_classification ();
// data term initialisation
CGAL_CLASSIFICATION_CERR << "Labeling... ";
std::vector<std::vector<double> > values
(m_types.size(),
std::vector<double> (m_input.size(), -1.));
for (std::size_t s=0; s < m_input.size(); ++ s)
{
std::vector<std::size_t> neighbors;
neighbor_query (m_input[s], std::back_inserter (neighbors));
std::vector<double> mean (values.size(), 0.);
for (std::size_t n = 0; n < neighbors.size(); ++ n)
{
if (values[0][neighbors[n]] < 0.)
for(std::size_t k = 0; k < m_effect_table.size(); ++ k)
{
values[k][neighbors[n]] = classification_value (k, neighbors[n]);
mean[k] += values[k][neighbors[n]];
}
else
for (std::size_t j = 0; j < values.size(); ++ j)
mean[j] += values[j][neighbors[n]];
}
std::size_t nb_class_best=0;
double val_class_best = (std::numeric_limits<double>::max)();
for(std::size_t k = 0; k < mean.size(); ++ k)
{
mean[k] /= neighbors.size();
if(val_class_best > mean[k])
{
val_class_best = mean[k];
nb_class_best = k;
}
}
m_assigned_type[s] = nb_class_best;
std::sort (mean.begin(), mean.end());
m_confidence[s] = mean[1] - mean[0];
}
}
/*!
\brief Runs the classification algorithm with a global
regularizarion based on a graphcut.
The computed classification energy is globally regularized through
and alpha-expansion algorithm. This method is slow but provides
the user with good quality results.
\tparam NeighborQuery is a model of `NeighborQuery`
\param neighbor_query is used to access neighborhoods of items
\param weight Weight of the regularization with respect to the
classification energy. Higher values produce more regularized
output but may result in a loss of details.
*/
template <typename NeighborQuery>
void run_with_graphcut (const NeighborQuery& neighbor_query,
const double& weight)
{
prepare_classification ();
// data term initialisation
CGAL_CLASSIFICATION_CERR << "Labeling with regularization weight " << weight << "... ";
std::vector<std::pair<std::size_t, std::size_t> > edges;
std::vector<double> edge_weights;
std::vector<std::vector<double> > probability_matrix
(m_effect_table.size(), std::vector<double>(m_input.size(), 0.));
std::vector<std::size_t>(m_input.size()).swap(m_assigned_type);
for (std::size_t s = 0; s < m_input.size(); ++ s)
{
std::vector<std::size_t> neighbors;
neighbor_query (m_input[s], std::back_inserter (neighbors));
for (std::size_t i = 0; i < neighbors.size(); ++ i)
if (s != neighbors[i])
{
edges.push_back (std::make_pair (s, neighbors[i]));
edge_weights.push_back (weight);
}
std::size_t nb_class_best = 0;
double val_class_best = (std::numeric_limits<double>::max)();
for(std::size_t k = 0; k < m_effect_table.size(); ++ k)
{
double value = classification_value (k, s);
probability_matrix[k][s] = value;
if(val_class_best > value)
{
val_class_best = value;
nb_class_best = k;
}
}
m_assigned_type[s] = nb_class_best;
}
Alpha_expansion graphcut;
graphcut(edges, edge_weights, probability_matrix, m_assigned_type);
}
/// @}
/// \name Classification Types
/// @{
/*!
\brief Instanciates and adds a classification type.
\param name ID of the classification type.
\return A handle to the newly added classification type.
*/
Type_handle add_classification_type (const char* name)
{
Type_handle out (new Classification::Type (name));
m_types.push_back (out);
return out;
}
/*!
\brief Adds a classification type.
\param type The handle to the classification type that must be added.
*/
void add_classification_type (Type_handle type)
{
m_types.push_back (type);
}
/*!
\brief Removes a classification type.
\param type The handle to the classification type that must be removed.
\return `true` if the classification type was correctly removed,
`false` if its handle was not found inside the object.
*/
bool remove_classification_type (Type_handle type)
{
std::size_t idx = (std::size_t)(-1);
for (std::size_t i = 0; i < m_types.size(); ++ i)
if (m_types[i] == type)
{
m_types.erase (m_types.begin() + i);
idx = i;
break;
}
if (idx == (std::size_t)(-1))
return false;
std::cerr << idx << std::endl;
for (std::size_t i = 0; i < m_assigned_type.size(); ++ i)
if (m_assigned_type[i] == (std::size_t)(-1))
continue;
else if (m_assigned_type[i] > idx)
m_assigned_type[i] --;
else if (m_assigned_type[i] == idx)
m_assigned_type[i] = (std::size_t)(-1);
for (std::size_t i = 0; i < m_training_type.size(); ++ i)
if (m_assigned_type[i] == (std::size_t)(-1))
continue;
else if (m_training_type[i] > idx)
m_training_type[i] --;
else if (m_training_type[i] == idx)
m_training_type[i] = (std::size_t)(-1);
return true;
}
/// \cond SKIP_IN_MANUAL
std::size_t number_of_classification_types () const
{
return m_types.size();
}
Type_handle get_classification_type (std::size_t idx)
{
return m_types[idx];
}
/// \endcond
/*!
\brief Removes all classification types.
*/
void clear_classification_types ()
{
m_types.clear();
}
/// @}
/// \name Attributes
/// @{
/*!
\brief Adds an attribute.
\param attribute Handle of the attribute to add.
*/
void add_attribute (Attribute_handle attribute)
{
m_attributes.push_back (attribute);
}
/*!
\brief Removes all attributes.
*/
void clear_attributes ()
{
m_attributes.clear();
}
/// \cond SKIP_IN_MANUAL
std::size_t number_of_attributes() const
{
return m_attributes.size();
}
Attribute_handle get_attribute(std::size_t idx)
{
return m_attributes[idx];
}
/// \endcond
/// @}
/// \name Output
/// @{
/*!
\brief Gets the classification type of an indexed item.
\note If classification was not performed (using `run()`,
`run_with_local_smoothing()` or `run_with_graphcut()`), this
function always returns the default empty `Type_handle`.
\param index Index of the input item
\return Pointer to the classification type
*/
Type_handle classification_type_of (std::size_t index) const
{
if (m_assigned_type.size() <= index
|| m_assigned_type[index] == (std::size_t)(-1))
{
return Type_handle();
}
return m_types[m_assigned_type[index]];
}
/// \cond SKIP_IN_MANUAL
bool classification_prepared() const
{
return !(m_assigned_type.empty());
}
void set_classification_type_of (std::size_t index, Type_handle class_type)
{
for (std::size_t i = 0; i < m_types.size(); ++ i)
if (m_types[i] == class_type)
{
m_assigned_type[index] = i;
return;
}
m_assigned_type[index] = (std::size_t)(-1);
}
/// \endcond
/*!
\brief Gets the confidence of the classification type of an indexed item.
\note If classification was not performed (using `run()`,
`run_with_local_smoothing()` or `run_with_graphcut()`), this
function always returns 0.
\param index Index of the input item
\return Confidence ranging from 0 (not confident at all) to 1 (very confident).
*/
double confidence_of (std::size_t index) const
{
if (m_confidence.size() <= index)
return 0.;
return m_confidence[index];
}
/// @}
/// \name Training
/// @{
/*!
\brief Runs the training algorithm.
The object must have been filled with the `Classification::Type`
and `Classification::Attribute` objects before running this
function.
Each classification type must be given a small set of user-defined
inliers to provide the training algorithm with a ground truth.
\param nb_tests Number of tests to perform. Higher values may
provide the user with better results at the cost of higher
computation time. Using a value of at least 10 times the number of
attributes is advised.
\return Minimum ratio (over all classification types) of provided
ground truth items correctly classified using the best
configuration found.
*/
double training (std::size_t nb_tests = 300)
{
if (m_training_type.empty())
return 0.;
std::vector<std::vector<std::size_t> > training_sets (m_types.size());
for (std::size_t i = 0; i < m_training_type.size(); ++ i)
if (m_training_type[i] != (std::size_t)(-1))
training_sets[m_training_type[i]].push_back (i);
for (std::size_t i = 0; i < training_sets.size(); ++ i)
if (training_sets[i].empty())
std::cerr << "WARNING: \"" << m_types[i]->id() << "\" doesn't have a training set." << std::endl;
std::vector<double> best_weights (m_attributes.size(), 1.);
struct Attribute_training
{
bool skipped;
double wmin;
double wmax;
double factor;
};
std::vector<Attribute_training> att_train;
std::size_t nb_trials = 100;
double wmin = 1e-5, wmax = 1e5;
double factor = std::pow (wmax/wmin, 1. / (double)nb_trials);
std::size_t att_used = 0;
for (std::size_t j = 0; j < m_attributes.size(); ++ j)
{
Attribute_handle att = m_attributes[j];
best_weights[j] = att->weight;
std::size_t nb_useful = 0;
double min = (std::numeric_limits<double>::max)();
double max = -(std::numeric_limits<double>::max)();
att->weight = wmin;
for (std::size_t i = 0; i < 100; ++ i)
{
estimate_attribute_effect(training_sets, att);
if (attribute_useful(att))
{
CGAL_CLASSTRAINING_CERR << "#";
nb_useful ++;
min = (std::min) (min, att->weight);
max = (std::max) (max, att->weight);
}
else
CGAL_CLASSTRAINING_CERR << "-";
att->weight *= factor;
}
CGAL_CLASSTRAINING_CERR << std::endl;
CGAL_CLASSTRAINING_CERR << att->id() << " useful in "
<< nb_useful << "% of the cases, in interval [ "
<< min << " ; " << max << " ]" << std::endl;
att_train.push_back (Attribute_training());
att_train.back().skipped = false;
att_train.back().wmin = min / factor;
att_train.back().wmax = max * factor;
if (nb_useful < 2)
{
att_train.back().skipped = true;
att->weight = 0.;
best_weights[j] = att->weight;
}
else if (best_weights[j] == 1.)
{
att->weight = 0.5 * (att_train.back().wmin + att_train.back().wmax);
best_weights[j] = att->weight;
++ att_used;
}
else
{
att->weight = best_weights[j];
++ att_used;
}
estimate_attribute_effect(training_sets, att);
}
std::size_t nb_trials_per_attribute = 1 + (std::size_t)(nb_tests / (double)(att_used));
std::cerr << "Trials = " << nb_tests << ", attributes = " << att_used
<< ", trials per att = " << nb_trials_per_attribute << std::endl;
for (std::size_t i = 0; i < att_train.size(); ++ i)
if (!(att_train[i].skipped))
att_train[i].factor = std::pow (att_train[i].wmax / att_train[i].wmin,
1. / (double)nb_trials_per_attribute);
prepare_classification();
double best_score = training_compute_worst_score(training_sets, 0.);
double best_confidence = training_compute_worst_confidence(training_sets, 0.);
std::cerr << "TRAINING GLOBALLY: Best score evolution: " << std::endl;
std::cerr << 100. * best_score << "% (found at initialization)" << std::endl;
std::size_t current_att_changed = 0;
for (std::size_t i = 0; i < att_used; ++ i)
{
while (att_train[current_att_changed].skipped)
{
++ current_att_changed;
if (current_att_changed == m_attributes.size())
current_att_changed = 0;
}
std::size_t nb_used = 0;
for (std::size_t j = 0; j < m_attributes.size(); ++ j)
{
if (j == current_att_changed)
continue;
m_attributes[j]->weight = best_weights[j];
estimate_attribute_effect(training_sets, m_attributes[j]);
if (attribute_useful(m_attributes[j]))
nb_used ++;
}
Attribute_handle current_att = m_attributes[current_att_changed];
const Attribute_training& tr = att_train[current_att_changed];
current_att->weight = tr.wmin;
for (std::size_t j = 0; j < nb_trials_per_attribute; ++ j)
{
estimate_attribute_effect(training_sets, current_att);
prepare_classification();
double worst_confidence = training_compute_worst_confidence(training_sets,
best_confidence);
double worst_score = training_compute_worst_score(training_sets,
best_score);
if (worst_score > best_score
&& worst_confidence > best_confidence)
{
best_score = worst_score;
best_confidence = worst_confidence;
std::cerr << 100. * best_score << "% (found at iteration "
<< (i * nb_trials_per_attribute) + j << "/" << nb_tests << ", "
<< nb_used + (attribute_useful(current_att) ? 1 : 0)
<< "/" << m_attributes.size() << " attribute(s) used)" << std::endl;
for (std::size_t k = 0; k < m_attributes.size(); ++ k)
{
Attribute_handle att = m_attributes[k];
best_weights[k] = att->weight;
}
}
current_att->weight *= tr.factor;
}
++ current_att_changed;
}
for (std::size_t i = 0; i < best_weights.size(); ++ i)
{
Attribute_handle att = m_attributes[i];
att->weight = best_weights[i];
}
estimate_attributes_effects(training_sets);
std::cerr << std::endl << "Best score found is at least " << 100. * best_score
<< "% of correct classification" << std::endl;
std::size_t nb_removed = 0;
for (std::size_t i = 0; i < best_weights.size(); ++ i)
{
Attribute_handle att = m_attributes[i];
CGAL_CLASSTRAINING_CERR << "ATTRIBUTE " << att->id() << ": " << best_weights[i] << std::endl;
att->weight = best_weights[i];
Classification::Type::Attribute_effect side = m_types[0]->attribute_effect(att);
bool to_remove = true;
for (std::size_t j = 0; j < m_types.size(); ++ j)
{
Type_handle ctype = m_types[j];
if (ctype->attribute_effect(att) == Classification::Type::FAVORED_ATT)
CGAL_CLASSTRAINING_CERR << " * Favored for ";
else if (ctype->attribute_effect(att) == Classification::Type::PENALIZED_ATT)
CGAL_CLASSTRAINING_CERR << " * Penalized for ";
else
CGAL_CLASSTRAINING_CERR << " * Neutral for ";
if (ctype->attribute_effect(att) != side)
to_remove = false;
CGAL_CLASSTRAINING_CERR << ctype->id() << std::endl;
}
if (to_remove)
{
CGAL_CLASSTRAINING_CERR << " -> Useless! Should be removed" << std::endl;
++ nb_removed;
}
}
std::cerr << nb_removed
<< " attribute(s) out of " << m_attributes.size() << " are useless" << std::endl;
return best_score;
}
/*!
\brief Resets training sets.
*/
void reset_training_sets()
{
std::vector<std::size_t>(m_input.size(), (std::size_t)(-1)).swap (m_training_type);
}
/*!
\brief Adds the input item specified by index `idx` as an inlier
of `class_type` for the training algorithm.
\param class_type Handle to the classification type.
\param idx Index of the input item.
*/
bool add_training_index (Type_handle class_type,
std::size_t idx)
{
std::size_t type_idx = (std::size_t)(-1);
for (std::size_t i = 0; i < m_types.size(); ++ i)
if (m_types[i] == class_type)
{
type_idx = i;
break;
}
if (type_idx == (std::size_t)(-1))
return false;
if (m_training_type.empty())
reset_training_sets();
m_training_type[idx] = type_idx;
return true;
}
/*!
\brief Adds input items specified by a range of indices as
inliers of `class_type` for the training algorithm.
\param class_type Handle to the classification type.
\param first Iterator to the first index to add
\param beyond Past-the-end iterator
\tparam IndexIterator Iterator with `std::size_t` as a
`value_type`. \cgalModels InputIterator
*/
template <class IndexIterator>
bool add_training_set (Type_handle class_type,
IndexIterator first,
IndexIterator beyond)
{
std::size_t type_idx = (std::size_t)(-1);
for (std::size_t i = 0; i < m_types.size(); ++ i)
if (m_types[i] == class_type)
{
type_idx = i;
break;
}
if (type_idx == (std::size_t)(-1))
return false;
if (m_training_type.empty())
reset_training_sets();
for (IndexIterator it = first; it != beyond; ++ it)
m_training_type[*it] = type_idx;
return true;
}
/// @}
/// \cond SKIP_IN_MANUAL
Type_handle training_type_of (std::size_t index) const
{
if (m_training_type.size() <= index
|| m_training_type[index] == (std::size_t)(-1))
return Type_handle();
return m_types[m_training_type[index]];
}
void prepare_classification ()
{
// Reset data structure
std::vector<std::size_t>(m_input.size(), (std::size_t)(-1)).swap (m_assigned_type);
std::vector<double>(m_input.size()).swap (m_confidence);
m_effect_table = std::vector<std::vector<Attribute_effect> >
(m_types.size(), std::vector<Attribute_effect> (m_attributes.size(),
Classification::Type::NEUTRAL_ATT));
for (std::size_t i = 0; i < m_effect_table.size (); ++ i)
for (std::size_t j = 0; j < m_effect_table[i].size (); ++ j)
m_effect_table[i][j] = m_types[i]->attribute_effect (m_attributes[j]);
}
/// \endcond
private:
double classification_value (const std::size_t& class_type,
const std::size_t& pt_index) const
{
double out = 0.;
for (std::size_t i = 0; i < m_effect_table[class_type].size(); ++ i)
{
if (m_attributes[i]->weight == 0.)
continue;
if (m_effect_table[class_type][i] == Classification::Type::FAVORED_ATT)
out += m_attributes[i]->favored (pt_index);
else if (m_effect_table[class_type][i] == Classification::Type::PENALIZED_ATT)
out += m_attributes[i]->penalized (pt_index);
else if (m_effect_table[class_type][i] == Classification::Type::NEUTRAL_ATT)
out += m_attributes[i]->ignored (pt_index);
}
return out;
}
void estimate_attributes_effects
(const std::vector<std::vector<std::size_t> >& training_sets)
{
for (std::size_t i = 0; i < m_attributes.size(); ++ i)
estimate_attribute_effect (training_sets, m_attributes[i]);
}
void estimate_attribute_effect
(const std::vector<std::vector<std::size_t> >& training_sets,
Attribute_handle att)
{
std::vector<double> mean (m_types.size(), 0.);
for (std::size_t j = 0; j < m_types.size(); ++ j)
{
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
double val = att->normalized(training_sets[j][k]);
mean[j] += val;
}
mean[j] /= training_sets[j].size();
}
std::vector<double> sd (m_types.size(), 0.);
for (std::size_t j = 0; j < m_types.size(); ++ j)
{
Type_handle ctype = m_types[j];
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
double val = att->normalized(training_sets[j][k]);
sd[j] += (val - mean[j]) * (val - mean[j]);
}
sd[j] = std::sqrt (sd[j] / training_sets[j].size());
if (mean[j] - sd[j] > 0.5)
ctype->set_attribute_effect (att, Classification::Type::FAVORED_ATT);
else if (mean[j] + sd[j] < 0.5)
ctype->set_attribute_effect (att, Classification::Type::PENALIZED_ATT);
else
ctype->set_attribute_effect (att, Classification::Type::NEUTRAL_ATT);
}
}
double training_compute_worst_score
(const std::vector<std::vector<std::size_t> >& training_sets,
double lower_bound)
{
double worst_score = 1.;
for (std::size_t j = 0; j < m_types.size(); ++ j)
{
std::size_t nb_okay = 0;
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
std::size_t nb_class_best=0;
double val_class_best = (std::numeric_limits<double>::max)();
for(std::size_t l = 0; l < m_effect_table.size(); ++ l)
{
double value = classification_value (l, training_sets[j][k]);
if(val_class_best > value)
{
val_class_best = value;
nb_class_best = l;
}
}
if (nb_class_best == j)
nb_okay ++;
}
double score = nb_okay / (double)(training_sets[j].size());
if (score < worst_score)
worst_score = score;
if (worst_score < lower_bound)
return worst_score;
}
return worst_score;
}
double training_compute_worst_confidence
(const std::vector<std::vector<std::size_t> >& training_sets,
double lower_bound)
{
double worst_confidence = (std::numeric_limits<double>::max)();
for (std::size_t j = 0; j < m_types.size(); ++ j)
{
double confidence = 0.;
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
std::vector<std::pair<double, std::size_t> > values;
for(std::size_t l = 0; l < m_effect_table.size(); ++ l)
values.push_back (std::make_pair (classification_value (l, training_sets[j][k]),
l));
std::sort (values.begin(), values.end());
if (values[0].second == j)
confidence += values[1].first - values[0].first;
else
{
// for(std::size_t l = 0; l < values.size(); ++ l)
// if (values[l].second == j)
// {
// confidence += values[0].first - values[l].first;
// break;
// }
}
}
confidence /= (double)(training_sets[j].size() * m_attributes.size());
if (confidence < worst_confidence)
worst_confidence = confidence;
if (worst_confidence < lower_bound)
return worst_confidence;
}
return worst_confidence;
}
bool attribute_useful (Attribute_handle att)
{
Classification::Type::Attribute_effect side = m_types[0]->attribute_effect(att);
for (std::size_t k = 1; k < m_types.size(); ++ k)
if (m_types[k]->attribute_effect(att) != side)
return true;
return false;
}
};
} // namespace CGAL
#endif // CGAL_CLASSIFIER_H