cgal/Classification/include/CGAL/Classification/Sum_of_weighted_features_pr...

965 lines
32 KiB
C++

// Copyright (c) 2012 INRIA Sophia-Antipolis (France).
// Copyright (c) 2017 GeometryFactory Sarl (France).
// All rights reserved.
//
// This file is part of CGAL (www.cgal.org).
// You can redistribute it and/or modify it under the terms of the GNU
// General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// Licensees holding a valid commercial license may use this file in
// accordance with the commercial license agreement provided with the software.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
//
// $URL$
// $Id$
//
// Author(s) : Simon Giraudot, Florent Lafarge
#ifndef CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_PREDICATE_H
#define CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_PREDICATE_H
#include <CGAL/Classification/Feature_set.h>
#include <CGAL/Classification/Label_set.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <boost/foreach.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#ifdef CGAL_LINKED_WITH_TBB
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/scalable_allocator.h>
#include <tbb/mutex.h>
#endif // CGAL_LINKED_WITH_TBB
//#define CGAL_CLASSIFICATION_VERBOSE
#if defined(CGAL_CLASSIFICATION_VERBOSE)
#define CGAL_CLASSIFICATION_CERR std::cerr
#else
#define CGAL_CLASSIFICATION_CERR std::ostream(0)
#endif
//#define CGAL_CLASSTRAINING_VERBOSE
#if defined(CGAL_CLASSTRAINING_VERBOSE)
#define CGAL_CLASSTRAINING_CERR std::cerr
#else
#define CGAL_CLASSTRAINING_CERR std::ostream(0)
#endif
namespace CGAL {
namespace Classification {
/*!
\ingroup PkgClassificationPredicates
\brief %Classification predicate based on the sum of weighted
features with user-defined effects on labels.
\cgalModels `CGAL::Classification::Predicate`
*/
class Sum_of_weighted_features_predicate
{
public:
enum Effect /// Defines the effect of an feature on a type.
{
FAVORING = 0, ///< High values of the feature favor this type
NEUTRAL = 1, ///< The feature has no effect on this type
PENALIZING = 2 ///< Low values of the feature favor this type
};
private:
#ifdef CGAL_LINKED_WITH_TBB
class Compute_iou
{
std::vector<std::size_t>& m_training_set;
const Sum_of_weighted_features_predicate& m_predicate;
std::size_t m_label;
std::vector<std::size_t>& m_true_positives;
std::vector<std::size_t>& m_false_positives;
std::vector<std::size_t>& m_false_negatives;
std::vector<tbb::mutex>& m_tp_mutex;
std::vector<tbb::mutex>& m_fp_mutex;
std::vector<tbb::mutex>& m_fn_mutex;
public:
Compute_iou (std::vector<std::size_t>& training_set,
const Sum_of_weighted_features_predicate& predicate,
std::size_t label,
std::vector<std::size_t>& true_positives,
std::vector<std::size_t>& false_positives,
std::vector<std::size_t>& false_negatives,
std::vector<tbb::mutex>& tp_mutex,
std::vector<tbb::mutex>& fp_mutex,
std::vector<tbb::mutex>& fn_mutex)
: m_training_set (training_set)
, m_predicate (predicate)
, m_label (label)
, m_true_positives (true_positives)
, m_false_positives (false_positives)
, m_false_negatives (false_negatives)
, m_tp_mutex (tp_mutex)
, m_fp_mutex (fp_mutex)
, m_fn_mutex (fn_mutex)
{ }
void operator()(const tbb::blocked_range<std::size_t>& r) const
{
for (std::size_t k = r.begin(); k != r.end(); ++ k)
{
std::size_t res = 0;
std::vector<float> v;
m_predicate (m_training_set[k], v);
float min = std::numeric_limits<float>::max();
for(std::size_t l = 0; l < v.size(); ++ l)
if (v[l] < min)
{
min = v[l];
res = l;
}
if (m_label == res)
{
m_tp_mutex[m_label].lock();
++ m_true_positives[m_label];
m_tp_mutex[m_label].unlock();
continue;
}
m_fp_mutex[res].lock();
++ m_false_positives[res];
m_fp_mutex[res].unlock();
m_fn_mutex[m_label].lock();
++ m_false_negatives[m_label];
m_fn_mutex[m_label].unlock();
}
}
};
#endif // CGAL_LINKED_WITH_TBB
Label_set& m_labels;
Feature_set& m_features;
std::vector<float> m_weights;
std::vector<std::vector<Effect> > m_effect_table;
mutable std::map<Label_handle, std::size_t> m_map_labels;
mutable std::map<Feature_handle, std::size_t> m_map_features;
public:
/// \name Constructor
/// @{
/*!
\brief Instantiate the predicate using the sets of `labels` and `features`.
\note If the label set of the feature set are modified after
instantiating this object (addition of removal of a label and/or of
a feature), another predicate object should be instantiated as the
internal data structures of this one are invalidated.
*/
Sum_of_weighted_features_predicate (Label_set& labels,
Feature_set& features)
: m_labels (labels), m_features (features),
m_weights (features.size(), 1.),
m_effect_table (labels.size(), std::vector<Effect>
(features.size(),
NEUTRAL))
{
for (std::size_t i = 0; i < labels.size(); ++ i)
m_map_labels[labels[i]] = i;
for (std::size_t i = 0; i < features.size(); ++ i)
m_map_features[features[i]] = i;
}
/// @}
/// \name Weights and Effects
/// @{
/*!
\brief Sets the weight of `feature` (`weight` must be positive).
*/
void set_weight (Feature_handle feature, float weight)
{
m_weights[m_map_features[feature]] = weight;
}
/// \cond SKIP_IN_MANUAL
void set_weight (std::size_t feature, float weight)
{
m_weights[feature] = weight;
}
/// \endcond
/*!
\brief Returns the weight of `feature`.
*/
float weight (Feature_handle feature) const
{
return m_weights[m_map_features[feature]];
}
/// \cond SKIP_IN_MANUAL
float weight (std::size_t feature) const
{
return m_weights[feature];
}
/// \endcond
/*!
\brief Sets the `effect` of `feature` on `label`.
*/
void set_effect (Label_handle label, Feature_handle feature,
Effect effect)
{
m_effect_table[m_map_labels[label]][m_map_features[feature]] = effect;
}
/// \cond SKIP_IN_MANUAL
void set_effect (std::size_t label, std::size_t feature,
Effect effect)
{
m_effect_table[label][feature] = effect;
}
/// \endcond
/*!
\brief Returns the `effect` of `feature` on `label`.
*/
Effect effect (Label_handle label, Feature_handle feature) const
{
return m_effect_table[m_map_labels[label]][m_map_features[feature]];
}
/// \cond SKIP_IN_MANUAL
Effect effect (std::size_t label, std::size_t feature) const
{
return m_effect_table[label][feature];
}
/// \endcond
/// @}
/// \cond SKIP_IN_MANUAL
void operator() (std::size_t item_index,
std::vector<float>& out) const
{
out.resize (m_labels.size());
for (std::size_t l = 0; l < m_labels.size(); ++ l)
{
out[l] = 0.;
for (std::size_t f = 0; f < m_features.size(); ++ f)
if (weight(f) != 0.)
out[l] += value (l, f, item_index);
}
}
/// \endcond
/// \name Training
/// @{
/*!
\brief Runs the training algorithm.
From the set of provided ground truth, this algorithm estimates
the sets of weights and effects that produce the most accurate
result with respect to this ground truth.
\note Each label should be assigned at least one ground truth
item.
\param ground_truth vector of label indices. It should contain for
each input item, in the same order as the input set, the index of
the corresponding label in the `Label_set` provided in the
constructor. Input items that do not have a ground truth
information should be given the value `std::size_t(-1)`.
\param nb_tests number of tests to perform. Higher values may
provide the user with better results at the cost of a higher
computation time. Using a value of at least 10 times the number of
features is advised.
\return mean intersection-over-union over each label between the
provided ground truth and the best classification found by the
training set.
*/
template <typename ConcurrencyTag>
float train (const std::vector<std::size_t>& ground_truth,
std::size_t nb_tests = 300)
{
std::vector<std::vector<std::size_t> > training_sets (m_labels.size());
std::size_t nb_tot = 0;
for (std::size_t i = 0; i < ground_truth.size(); ++ i)
if (ground_truth[i] != std::size_t(-1))
{
training_sets[ground_truth[i]].push_back (i);
++ nb_tot;
}
CGAL_CLASSIFICATION_CERR << "Training using " << nb_tot << " inliers" << std::endl;
for (std::size_t i = 0; i < m_labels.size(); ++ i)
if (training_sets.size() <= i || training_sets[i].empty())
std::cerr << "WARNING: \"" << m_labels[i]->name() << "\" doesn't have a training set." << std::endl;
std::vector<float> best_weights (m_features.size(), 1.);
struct Feature_training
{
std::size_t i;
float wmin;
float wmax;
float factor;
bool operator<(const Feature_training& other) const
{
return (wmin / wmax) < (other.wmin / other.wmax);
}
};
std::vector<Feature_training> feature_train;
std::size_t nb_trials = 100;
float wmin = 1e-5, wmax = 1e5;
float factor = std::pow (wmax/wmin, 1. / (float)nb_trials);
for (std::size_t j = 0; j < m_features.size(); ++ j)
{
Feature_handle feature = m_features[j];
best_weights[j] = weight(j);
std::size_t nb_useful = 0;
float min = (std::numeric_limits<float>::max)();
float max = -(std::numeric_limits<float>::max)();
set_weight(j, wmin);
for (std::size_t i = 0; i < 100; ++ i)
{
estimate_feature_effect(j, training_sets);
if (feature_useful(j))
{
CGAL_CLASSTRAINING_CERR << "#";
nb_useful ++;
min = (std::min) (min, weight(j));
max = (std::max) (max, weight(j));
}
else
CGAL_CLASSTRAINING_CERR << "-";
set_weight(j, factor * weight(j));
}
CGAL_CLASSTRAINING_CERR << std::endl;
CGAL_CLASSTRAINING_CERR << feature->name() << " useful in "
<< nb_useful << "% of the cases, in interval [ "
<< min << " ; " << max << " ]" << std::endl;
if (nb_useful < 2)
{
set_weight(j, 0.);
best_weights[j] = weight(j);
continue;
}
feature_train.push_back (Feature_training());
feature_train.back().i = j;
feature_train.back().wmin = min / factor;
feature_train.back().wmax = max * factor;
if (best_weights[j] == 1.)
{
set_weight(j, 0.5 * (feature_train.back().wmin + feature_train.back().wmax));
best_weights[j] = weight(j);
}
else
set_weight(j, best_weights[j]);
estimate_feature_effect(j, training_sets);
}
std::size_t nb_trials_per_feature = 1 + (std::size_t)(nb_tests / (float)(feature_train.size()));
CGAL_CLASSIFICATION_CERR << "Trials = " << nb_tests << ", features = " << feature_train.size()
<< ", trials per feature = " << nb_trials_per_feature << std::endl;
for (std::size_t i = 0; i < feature_train.size(); ++ i)
feature_train[i].factor
= std::pow (feature_train[i].wmax / feature_train[i].wmin,
1. / (float)nb_trials_per_feature);
float best_score = 0.;
best_score = compute_mean_iou<ConcurrencyTag>(training_sets);
CGAL_CLASSIFICATION_CERR << "TRAINING GLOBALLY: Best score evolution: " << std::endl;
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at initialization)" << std::endl;
std::sort (feature_train.begin(), feature_train.end());
for (std::size_t i = 0; i < feature_train.size(); ++ i)
{
const Feature_training& tr = feature_train[i];
std::size_t current_feature_changed = tr.i;
Feature_handle current_feature = m_features[current_feature_changed];
std::size_t nb_used = 0;
for (std::size_t j = 0; j < m_features.size(); ++ j)
{
if (j == current_feature_changed)
continue;
set_weight(j, best_weights[j]);
estimate_feature_effect(j, training_sets);
if (feature_useful(j))
nb_used ++;
else
set_weight(j, 0.);
}
set_weight(current_feature_changed, tr.wmin);
for (std::size_t j = 0; j < nb_trials_per_feature; ++ j)
{
estimate_feature_effect(current_feature_changed, training_sets);
float worst_score = 0.;
worst_score = compute_mean_iou<ConcurrencyTag>(training_sets);
if (worst_score > best_score)
{
best_score = worst_score;
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at iteration "
<< (i * nb_trials_per_feature) + j << "/" << nb_tests << ", "
<< nb_used + (feature_useful(current_feature_changed) ? 1 : 0)
<< "/" << m_features.size() << " feature(s) used)" << std::endl;
for (std::size_t k = 0; k < m_features.size(); ++ k)
best_weights[k] = weight(k);
}
set_weight(current_feature_changed, weight(current_feature_changed) * tr.factor);
}
}
for (std::size_t i = 0; i < best_weights.size(); ++ i)
set_weight(i, best_weights[i]);
estimate_features_effects(training_sets);
CGAL_CLASSIFICATION_CERR << std::endl << "Best score found is at least " << 100. * best_score
<< "% of correct classification" << std::endl;
std::size_t nb_removed = 0;
for (std::size_t i = 0; i < best_weights.size(); ++ i)
{
Feature_handle feature = m_features[i];
CGAL_CLASSTRAINING_CERR << "FEATURE " << feature->name() << ": " << best_weights[i] << std::endl;
set_weight(i, best_weights[i]);
Effect side = effect(0, i);
bool to_remove = true;
for (std::size_t j = 0; j < m_labels.size(); ++ j)
{
Label_handle clabel = m_labels[j];
if (effect(j,i) == FAVORING)
CGAL_CLASSTRAINING_CERR << " * Favored for ";
else if (effect(j,i) == PENALIZING)
CGAL_CLASSTRAINING_CERR << " * Penalized for ";
else
CGAL_CLASSTRAINING_CERR << " * Neutral for ";
if (effect(j,i) != side)
to_remove = false;
CGAL_CLASSTRAINING_CERR << clabel->name() << std::endl;
}
if (to_remove)
{
CGAL_CLASSTRAINING_CERR << " -> Useless! Should be removed" << std::endl;
++ nb_removed;
}
}
CGAL_CLASSIFICATION_CERR << nb_removed
<< " feature(s) out of " << m_features.size() << " are useless" << std::endl;
return best_score;
}
/// @}
/// \cond SKIP_IN_MANUAL
template <typename ConcurrencyTag>
float train_random (const std::vector<std::size_t>& ground_truth,
std::size_t nb_tests = 300)
{
std::vector<std::vector<std::size_t> > training_sets (m_labels.size());
std::size_t nb_tot = 0;
for (std::size_t i = 0; i < ground_truth.size(); ++ i)
if (ground_truth[i] != std::size_t(-1))
{
training_sets[ground_truth[i]].push_back (i);
++ nb_tot;
}
CGAL_CLASSIFICATION_CERR << "Training using " << nb_tot << " inliers" << std::endl;
for (std::size_t i = 0; i < m_labels.size(); ++ i)
if (training_sets.size() <= i || training_sets[i].empty())
std::cerr << "WARNING: \"" << m_labels[i]->name() << "\" doesn't have a training set." << std::endl;
std::vector<float> best_weights (m_features.size(), 1.);
struct Feature_training
{
std::size_t i;
float wmin;
float wmax;
float factor;
bool operator<(const Feature_training& other) const
{
return (wmin / wmax) < (other.wmin / other.wmax);
}
};
std::vector<Feature_training> feature_train;
std::size_t nb_trials = 100;
float wmin = 1e-5, wmax = 1e5;
float factor = std::pow (wmax/wmin, 1. / (float)nb_trials);
for (std::size_t j = 0; j < m_features.size(); ++ j)
{
Feature_handle feature = m_features[j];
best_weights[j] = weight(j);
std::size_t nb_useful = 0;
float min = (std::numeric_limits<float>::max)();
float max = -(std::numeric_limits<float>::max)();
set_weight(j, wmin);
for (std::size_t i = 0; i < 100; ++ i)
{
estimate_feature_effect(j, training_sets);
if (feature_useful(j))
{
CGAL_CLASSTRAINING_CERR << "#";
nb_useful ++;
min = (std::min) (min, weight(j));
max = (std::max) (max, weight(j));
}
else
CGAL_CLASSTRAINING_CERR << "-";
set_weight(j, factor * weight(j));
}
CGAL_CLASSTRAINING_CERR << std::endl;
CGAL_CLASSTRAINING_CERR << feature->name() << " useful in "
<< nb_useful << "% of the cases, in interval [ "
<< min << " ; " << max << " ]" << std::endl;
if (nb_useful < 2)
{
set_weight(j, 0.);
best_weights[j] = weight(j);
continue;
}
feature_train.push_back (Feature_training());
feature_train.back().i = j;
feature_train.back().wmin = min / factor;
feature_train.back().wmax = max * factor;
if (best_weights[j] == 1.)
{
set_weight(j, 0.5 * (feature_train.back().wmin + feature_train.back().wmax));
best_weights[j] = weight(j);
}
else
set_weight(j, best_weights[j]);
estimate_feature_effect(j, training_sets);
}
CGAL_CLASSIFICATION_CERR << "Trials = " << nb_tests << ", features = " << feature_train.size() << std::endl;
float best_score = compute_mean_iou<ConcurrencyTag>(training_sets);
CGAL_CLASSIFICATION_CERR << "TRAINING GLOBALLY: Best score evolution: " << std::endl;
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at initialization)" << std::endl;
for (std::size_t i = 0; i < nb_tests; ++ i)
{
std::size_t nb_used = 0;
std::size_t j = rand() % feature_train.size();
set_weight (feature_train[j].i,
feature_train[j].wmin + ((feature_train[j].wmax - feature_train[j].wmin)
* (rand() / float(RAND_MAX))));
estimate_feature_effect(feature_train[j].i, training_sets);
float worst_score = compute_mean_iou<ConcurrencyTag>(training_sets);
if (worst_score > best_score)
{
best_score = worst_score;
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at iteration "
<< i << "/" << nb_tests << ", "
<< nb_used
<< "/" << m_features.size() << " feature(s) used)" << std::endl;
for (std::size_t k = 0; k < m_features.size(); ++ k)
best_weights[k] = weight(k);
}
set_weight (feature_train[j].i,
best_weights[feature_train[j].i]);
estimate_feature_effect(feature_train[j].i, training_sets);
}
for (std::size_t i = 0; i < best_weights.size(); ++ i)
set_weight(i, best_weights[i]);
estimate_features_effects(training_sets);
CGAL_CLASSIFICATION_CERR << std::endl << "Best score found is at least " << 100. * best_score
<< "% of correct classification" << std::endl;
std::size_t nb_removed = 0;
for (std::size_t i = 0; i < best_weights.size(); ++ i)
{
Feature_handle feature = m_features[i];
CGAL_CLASSTRAINING_CERR << "FEATURE " << feature->name() << ": " << best_weights[i] << std::endl;
set_weight(i, best_weights[i]);
Effect side = effect(0, i);
bool to_remove = true;
for (std::size_t j = 0; j < m_labels.size(); ++ j)
{
Label_handle clabel = m_labels[j];
if (effect(j,i) == FAVORING)
CGAL_CLASSTRAINING_CERR << " * Favored for ";
else if (effect(j,i) == PENALIZING)
CGAL_CLASSTRAINING_CERR << " * Penalized for ";
else
CGAL_CLASSTRAINING_CERR << " * Neutral for ";
if (effect(j,i) != side)
to_remove = false;
CGAL_CLASSTRAINING_CERR << clabel->name() << std::endl;
}
if (to_remove)
{
CGAL_CLASSTRAINING_CERR << " -> Useless! Should be removed" << std::endl;
++ nb_removed;
}
}
CGAL_CLASSIFICATION_CERR << nb_removed
<< " feature(s) out of " << m_features.size() << " are useless" << std::endl;
return best_score;
}
/// \endcond
/// \name Input/Output
/// @{
/*!
\brief Saves the current configuration in the stream `output`.
This allows to easily save and recover a specific classification
configuration, that is to say:
- The weight of each feature
- The effects of each feature on each label
The output file is written in an XML format that is readable by
the `load_configuration()` method.
*/
void save_configuration (std::ostream& output)
{
boost::property_tree::ptree tree;
for (std::size_t i = 0; i < m_features.size(); ++ i)
{
if (weight(m_features[i]) == 0)
continue;
boost::property_tree::ptree ptr;
ptr.put("name", m_features[i]->name());
ptr.put("weight", weight(m_features[i]));
tree.add_child("classification.features.feature", ptr);
}
for (std::size_t i = 0; i < m_labels.size(); ++ i)
{
boost::property_tree::ptree ptr;
ptr.put("name", m_labels[i]->name());
for (std::size_t j = 0; j < m_features.size(); ++ j)
{
if (weight(j) == 0)
continue;
boost::property_tree::ptree ptr2;
ptr2.put("name", m_features[j]->name());
Effect e = effect(i, j);
if (e == PENALIZING)
ptr2.put("effect", "penalized");
else if (e == NEUTRAL)
ptr2.put("effect", "neutral");
else if (e == FAVORING)
ptr2.put("effect", "favored");
ptr.add_child("feature", ptr2);
}
tree.add_child("classification.labels.label", ptr);
}
// Write property tree to XML file
boost::property_tree::xml_writer_settings<std::string> settings(' ', 3);
boost::property_tree::write_xml(output, tree, settings);
}
/*!
\brief Loads a configuration from the stream `input`.
The input file should be in the XML format written by the
`save_configuration()` method. Labels and features are described
in the XML file by their name and the corresponding `Label` and
`Feature_base` object should therefore be given the same names as
the ones they had when saving the configuration.
\note If a feature (or label) found in the input file is not found
in the `Feature_set` (`Label_set`) provided by the user in the
constructor, and if `verbose` is set up to `true`, a warning is
displayed.
\note If a feature (or label) provided by the user in the
constructor is not described in the input file, the corresponding
weights and effects are kept to their default values (1 for the
weight and `NEUTRAL` for the effect).
\param input input stream.
\param verbose displays warning if set to `true`. The method is
silent otherwise.
*/
bool load_configuration (std::istream& input, bool verbose = false)
{
bool out = true;
std::map<std::string, std::size_t> map_n2l;
std::map<std::string, std::size_t> map_n2f;
for (std::size_t i = 0; i < m_labels.size(); ++ i)
map_n2l.insert (std::make_pair (m_labels[i]->name(), i));
for (std::size_t i = 0; i < m_features.size(); ++ i)
map_n2f.insert (std::make_pair (m_features[i]->name(), i));
boost::property_tree::ptree tree;
boost::property_tree::read_xml(input, tree);
BOOST_FOREACH(boost::property_tree::ptree::value_type &v, tree.get_child("classification.features"))
{
std::string name = v.second.get<std::string>("name");
typename std::map<std::string, std::size_t>::iterator
found = map_n2f.find (name);
if (found != map_n2f.end())
m_weights[found->second] = v.second.get<float>("weight");
else
{
if (verbose)
std::cerr << "Warning: feature \"" << name << "\" in configuration file not found" << std::endl;
out = false;
}
}
BOOST_FOREACH(boost::property_tree::ptree::value_type &v, tree.get_child("classification.labels"))
{
std::string label_name = v.second.get<std::string>("name");
typename std::map<std::string, std::size_t>::iterator
found = map_n2l.find (label_name);
std::size_t l = 0;
if (found != map_n2l.end())
l = found->second;
else
{
if (verbose)
std::cerr << "Warning: label \"" << label_name << "\" in configuration file not found" << std::endl;
out = false;
continue;
}
BOOST_FOREACH(boost::property_tree::ptree::value_type &v2, v.second)
{
if (v2.first == "name")
continue;
std::string feature_name = v2.second.get<std::string>("name");
typename std::map<std::string, std::size_t>::iterator
found2 = map_n2f.find (feature_name);
std::size_t f = 0;
if (found2 != map_n2f.end())
f = found2->second;
else if (verbose)
{
if (verbose)
std::cerr << "Warning: feature \"" << feature_name << "\" in configuration file not found" << std::endl;
out = false;
continue;
}
std::string e = v2.second.get<std::string>("effect");
if (e == "penalized")
set_effect (l, f, PENALIZING);
else if (e == "neutral")
set_effect (l, f, NEUTRAL);
else
set_effect (l, f, FAVORING);
}
}
return out;
}
/// @}
private:
float value (std::size_t label, std::size_t feature, std::size_t index) const
{
if (m_effect_table[label][feature] == FAVORING)
return favored (feature, index);
else if (m_effect_table[label][feature] == PENALIZING)
return penalized (feature, index);
else
return ignored (feature, index);
}
float normalized (std::size_t feature, std::size_t index) const
{
return (std::max) (0.f, (std::min) (1.f, m_features[feature]->value(index) / m_weights[feature]));
}
float favored (std::size_t feature, std::size_t index) const
{
return (1. - normalized (feature, index));
}
float penalized (std::size_t feature, std::size_t index) const
{
return normalized (feature, index);
}
float ignored (std::size_t, std::size_t) const
{
return 0.5;
}
void estimate_features_effects(std::vector<std::vector<std::size_t> >& training_sets)
{
for (std::size_t i = 0; i < m_features.size(); ++ i)
estimate_feature_effect (i, training_sets);
}
void estimate_feature_effect (std::size_t feature,
std::vector<std::vector<std::size_t> >& training_sets)
{
std::vector<float> mean (m_labels.size(), 0.);
for (std::size_t j = 0; j < m_labels.size(); ++ j)
{
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
float val = normalized(feature, training_sets[j][k]);
mean[j] += val;
}
mean[j] /= training_sets[j].size();
}
std::vector<float> sd (m_labels.size(), 0.);
for (std::size_t j = 0; j < m_labels.size(); ++ j)
{
Label_handle clabel = m_labels[j];
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
float val = normalized(feature, training_sets[j][k]);
sd[j] += (val - mean[j]) * (val - mean[j]);
}
sd[j] = std::sqrt (sd[j] / training_sets[j].size());
if (mean[j] - sd[j] > (2./3.))
set_effect (j, feature, FAVORING);
else if (mean[j] + sd[j] < (1./3.))
set_effect (j, feature, PENALIZING);
else
set_effect (j, feature, NEUTRAL);
}
}
template <typename ConcurrencyTag>
float compute_mean_iou (std::vector<std::vector<std::size_t> >& training_sets)
{
std::vector<std::size_t> true_positives (m_labels.size());
std::vector<std::size_t> false_positives (m_labels.size());
std::vector<std::size_t> false_negatives (m_labels.size());
for (std::size_t j = 0; j < training_sets.size(); ++ j)
{
std::size_t gt = j;
#ifndef CGAL_LINKED_WITH_TBB
CGAL_static_assertion_msg (!(boost::is_convertible<ConcurrencyTag, Parallel_tag>::value),
"Parallel_tag is enabled but TBB is unavailable.");
#else
if (boost::is_convertible<ConcurrencyTag,Parallel_tag>::value)
{
std::vector<tbb::mutex> tp_mutex (m_labels.size());
std::vector<tbb::mutex> fp_mutex (m_labels.size());
std::vector<tbb::mutex> fn_mutex (m_labels.size());
Compute_iou f(training_sets[j], *this, j,
true_positives, false_positives, false_negatives,
tp_mutex, fp_mutex, fn_mutex);
tbb::parallel_for(tbb::blocked_range<size_t>(0, training_sets[j].size ()), f);
}
else
#endif
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
{
std::size_t res = 0;
std::vector<float> v;
(*this) (training_sets[j][k], v);
float min = std::numeric_limits<float>::max();
for(std::size_t l = 0; l < m_labels.size(); ++ l)
if (v[l] < min)
{
min = v[l];
res = l;
}
if (gt == res)
{
++ true_positives[gt];
continue;
}
++ false_positives[res];
++ false_negatives[gt];
}
}
float out = 0.;
for (std::size_t j = 0; j < m_labels.size(); ++ j)
{
float iou = true_positives[j] / float(true_positives[j] + false_positives[j] + false_negatives[j]);
out += iou;
}
return out / m_labels.size();
}
bool feature_useful (std::size_t feature)
{
Effect side = effect(0, feature);
for (std::size_t k = 1; k < m_labels.size(); ++ k)
if (effect(k, feature) != side)
return true;
return false;
}
};
}
}
#endif // CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_PREDICATE_H