mirror of https://github.com/CGAL/cgal
975 lines
31 KiB
C++
975 lines
31 KiB
C++
// Copyright (c) 2012 INRIA Sophia-Antipolis (France).
|
|
// Copyright (c) 2017 GeometryFactory Sarl (France).
|
|
// All rights reserved.
|
|
//
|
|
// This file is part of CGAL (www.cgal.org).
|
|
// You can redistribute it and/or modify it under the terms of the GNU
|
|
// General Public License as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
//
|
|
// Licensees holding a valid commercial license may use this file in
|
|
// accordance with the commercial license agreement provided with the software.
|
|
//
|
|
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
|
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
//
|
|
// $URL$
|
|
// $Id$
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
//
|
|
// Author(s) : Simon Giraudot, Florent Lafarge
|
|
|
|
#ifndef CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_CLASSIFIER_H
|
|
#define CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_CLASSIFIER_H
|
|
|
|
#include <CGAL/license/Classification.h>
|
|
|
|
#include <CGAL/Classification/Feature_set.h>
|
|
#include <CGAL/Classification/Label_set.h>
|
|
#include <CGAL/Classification/internal/verbosity.h>
|
|
#include <CGAL/tags.h>
|
|
|
|
#include <boost/property_tree/ptree.hpp>
|
|
#include <boost/property_tree/xml_parser.hpp>
|
|
#include <boost/foreach.hpp>
|
|
#include <boost/algorithm/string/predicate.hpp>
|
|
#include <boost/algorithm/string.hpp>
|
|
#include <boost/lexical_cast.hpp>
|
|
#include <map>
|
|
#include <iostream>
|
|
|
|
#ifdef CGAL_LINKED_WITH_TBB
|
|
#include <tbb/parallel_for.h>
|
|
#include <tbb/blocked_range.h>
|
|
#include <tbb/scalable_allocator.h>
|
|
#include <tbb/mutex.h>
|
|
#endif // CGAL_LINKED_WITH_TBB
|
|
|
|
#define CLASSIFICATION_TRAINING_QUICK_ESTIMATION
|
|
|
|
namespace CGAL {
|
|
|
|
namespace Classification {
|
|
|
|
/*!
|
|
\ingroup PkgClassificationClassifiers
|
|
|
|
\brief %Classifier based on the sum of weighted features with
|
|
user-defined effects on labels.
|
|
|
|
\cgalModels `CGAL::Classification::Classifier`
|
|
*/
|
|
class Sum_of_weighted_features_classifier
|
|
{
|
|
public:
|
|
|
|
enum Effect /// Defines the effect of a feature on a type.
|
|
{
|
|
FAVORING = 0, ///< High values of the feature favor this type
|
|
NEUTRAL = 1, ///< The feature has no effect on this type
|
|
PENALIZING = 2 ///< Low values of the feature favor this type
|
|
};
|
|
|
|
private:
|
|
|
|
#ifdef CGAL_LINKED_WITH_TBB
|
|
class Compute_iou
|
|
{
|
|
std::vector<std::size_t>& m_training_set;
|
|
const Sum_of_weighted_features_classifier& m_classifier;
|
|
std::size_t m_label;
|
|
std::vector<std::size_t>& m_true_positives;
|
|
std::vector<std::size_t>& m_false_positives;
|
|
std::vector<std::size_t>& m_false_negatives;
|
|
std::vector<tbb::mutex>& m_tp_mutex;
|
|
std::vector<tbb::mutex>& m_fp_mutex;
|
|
std::vector<tbb::mutex>& m_fn_mutex;
|
|
|
|
|
|
public:
|
|
|
|
Compute_iou (std::vector<std::size_t>& training_set,
|
|
const Sum_of_weighted_features_classifier& classifier,
|
|
std::size_t label,
|
|
std::vector<std::size_t>& true_positives,
|
|
std::vector<std::size_t>& false_positives,
|
|
std::vector<std::size_t>& false_negatives,
|
|
std::vector<tbb::mutex>& tp_mutex,
|
|
std::vector<tbb::mutex>& fp_mutex,
|
|
std::vector<tbb::mutex>& fn_mutex)
|
|
: m_training_set (training_set)
|
|
, m_classifier (classifier)
|
|
, m_label (label)
|
|
, m_true_positives (true_positives)
|
|
, m_false_positives (false_positives)
|
|
, m_false_negatives (false_negatives)
|
|
, m_tp_mutex (tp_mutex)
|
|
, m_fp_mutex (fp_mutex)
|
|
, m_fn_mutex (fn_mutex)
|
|
{ }
|
|
|
|
void operator()(const tbb::blocked_range<std::size_t>& r) const
|
|
{
|
|
for (std::size_t k = r.begin(); k != r.end(); ++ k)
|
|
{
|
|
std::size_t res = 0;
|
|
|
|
std::vector<float> v;
|
|
m_classifier (m_training_set[k], v);
|
|
|
|
float min = std::numeric_limits<float>::max();
|
|
for(std::size_t l = 0; l < v.size(); ++ l)
|
|
if (v[l] < min)
|
|
{
|
|
min = v[l];
|
|
res = l;
|
|
}
|
|
|
|
if (m_label == res)
|
|
{
|
|
m_tp_mutex[m_label].lock();
|
|
++ m_true_positives[m_label];
|
|
m_tp_mutex[m_label].unlock();
|
|
continue;
|
|
}
|
|
m_fp_mutex[res].lock();
|
|
++ m_false_positives[res];
|
|
m_fp_mutex[res].unlock();
|
|
|
|
m_fn_mutex[m_label].lock();
|
|
++ m_false_negatives[m_label];
|
|
m_fn_mutex[m_label].unlock();
|
|
}
|
|
}
|
|
|
|
};
|
|
#endif // CGAL_LINKED_WITH_TBB
|
|
|
|
struct Feature_training
|
|
{
|
|
std::size_t i;
|
|
float wmin;
|
|
float wmax;
|
|
float factor;
|
|
|
|
bool operator<(const Feature_training& other) const
|
|
{
|
|
return (wmin / wmax) < (other.wmin / other.wmax);
|
|
}
|
|
};
|
|
|
|
const Label_set& m_labels;
|
|
const Feature_set& m_features;
|
|
std::vector<float> m_weights;
|
|
std::vector<std::vector<Effect> > m_effect_table;
|
|
mutable std::map<Label_handle, std::size_t> m_map_labels;
|
|
mutable std::map<Feature_handle, std::size_t> m_map_features;
|
|
|
|
public:
|
|
|
|
/// \name Constructor
|
|
/// @{
|
|
|
|
/*!
|
|
|
|
\brief Instantiate the classifier using the sets of `labels` and `features`.
|
|
|
|
\note If the label set of the feature set are modified after
|
|
instantiating this object (addition of removal of a label and/or of
|
|
a feature), another classifier object should be instantiated as the
|
|
internal data structures of this one are invalidated.
|
|
*/
|
|
Sum_of_weighted_features_classifier (const Label_set& labels,
|
|
const Feature_set& features)
|
|
: m_labels (labels), m_features (features),
|
|
m_weights (features.size(), 1.),
|
|
m_effect_table (labels.size(), std::vector<Effect>
|
|
(features.size(),
|
|
NEUTRAL))
|
|
{
|
|
for (std::size_t i = 0; i < labels.size(); ++ i)
|
|
m_map_labels[labels[i]] = i;
|
|
for (std::size_t i = 0; i < features.size(); ++ i)
|
|
m_map_features[features[i]] = i;
|
|
}
|
|
|
|
/// @}
|
|
|
|
/// \name Weights and Effects
|
|
/// @{
|
|
|
|
/*!
|
|
\brief Sets the weight of `feature` (`weight` must be positive).
|
|
*/
|
|
void set_weight (Feature_handle feature, float weight)
|
|
{
|
|
m_weights[m_map_features[feature]] = weight;
|
|
}
|
|
/// \cond SKIP_IN_MANUAL
|
|
void set_weight (std::size_t feature, float weight)
|
|
{
|
|
m_weights[feature] = weight;
|
|
}
|
|
/// \endcond
|
|
|
|
/*!
|
|
\brief Returns the weight of `feature`.
|
|
*/
|
|
float weight (Feature_handle feature) const
|
|
{
|
|
return m_weights[m_map_features[feature]];
|
|
}
|
|
/// \cond SKIP_IN_MANUAL
|
|
float weight (std::size_t feature) const
|
|
{
|
|
return m_weights[feature];
|
|
}
|
|
/// \endcond
|
|
|
|
/*!
|
|
\brief Sets the `effect` of `feature` on `label`.
|
|
*/
|
|
void set_effect (Label_handle label, Feature_handle feature,
|
|
Effect effect)
|
|
{
|
|
m_effect_table[m_map_labels[label]][m_map_features[feature]] = effect;
|
|
}
|
|
/// \cond SKIP_IN_MANUAL
|
|
void set_effect (std::size_t label, std::size_t feature,
|
|
Effect effect)
|
|
{
|
|
m_effect_table[label][feature] = effect;
|
|
}
|
|
/// \endcond
|
|
|
|
/*!
|
|
\brief Returns the `effect` of `feature` on `label`.
|
|
*/
|
|
Effect effect (Label_handle label, Feature_handle feature) const
|
|
{
|
|
return m_effect_table[m_map_labels[label]][m_map_features[feature]];
|
|
}
|
|
/// \cond SKIP_IN_MANUAL
|
|
Effect effect (std::size_t label, std::size_t feature) const
|
|
{
|
|
return m_effect_table[label][feature];
|
|
}
|
|
/// \endcond
|
|
|
|
/// @}
|
|
|
|
/// \cond SKIP_IN_MANUAL
|
|
void operator() (std::size_t item_index,
|
|
std::vector<float>& out) const
|
|
{
|
|
out.resize (m_labels.size());
|
|
for (std::size_t l = 0; l < m_labels.size(); ++ l)
|
|
{
|
|
out[l] = 0.;
|
|
for (std::size_t f = 0; f < m_features.size(); ++ f)
|
|
if (weight(f) != 0.)
|
|
out[l] += value (l, f, item_index);
|
|
}
|
|
}
|
|
/// \endcond
|
|
|
|
/// \name Training
|
|
/// @{
|
|
|
|
/*!
|
|
\brief Runs the training algorithm.
|
|
|
|
From the set of provided ground truth, this algorithm estimates
|
|
the sets of weights and effects that produce the most accurate
|
|
result with respect to this ground truth. Old weights and effects
|
|
are discarded.
|
|
|
|
\pre At least one ground truth item should be assigned to each
|
|
label.
|
|
|
|
\param ground_truth vector of label indices. It should contain for
|
|
each input item, in the same order as the input set, the index of
|
|
the corresponding label in the `Label_set` provided in the
|
|
constructor. Input items that do not have a ground truth
|
|
information should be given the value `-1`.
|
|
|
|
\param nb_tests number of tests to perform. Higher values may
|
|
provide the user with better results at the cost of a higher
|
|
computation time. Using a value of at least 10 times the number of
|
|
features is advised.
|
|
|
|
\return mean intersection-over-union over each label between the
|
|
provided ground truth and the best classification found by the
|
|
training set.
|
|
*/
|
|
template <typename ConcurrencyTag, typename LabelIndexRange>
|
|
float train (const LabelIndexRange& ground_truth,
|
|
unsigned int nb_tests = 300)
|
|
{
|
|
std::vector<std::vector<std::size_t> > training_sets (m_labels.size());
|
|
std::size_t nb_tot = 0;
|
|
for (std::size_t i = 0; i < ground_truth.size(); ++ i)
|
|
if (ground_truth[i] != -1)
|
|
{
|
|
training_sets[std::size_t(ground_truth[i])].push_back (i);
|
|
++ nb_tot;
|
|
}
|
|
|
|
#ifdef CLASSIFICATION_TRAINING_QUICK_ESTIMATION
|
|
for (std::size_t i = 0; i < m_labels.size(); ++ i)
|
|
std::random_shuffle (training_sets[i].begin(), training_sets[i].end());
|
|
#endif
|
|
|
|
CGAL_CLASSIFICATION_CERR << "Training using " << nb_tot << " inliers" << std::endl;
|
|
|
|
for (std::size_t i = 0; i < m_labels.size(); ++ i)
|
|
if (training_sets.size() <= i || training_sets[i].empty())
|
|
std::cerr << "WARNING: \"" << m_labels[i]->name() << "\" doesn't have a training set." << std::endl;
|
|
|
|
std::vector<float> best_weights (m_features.size(), 1.);
|
|
|
|
std::vector<Feature_training> feature_train;
|
|
std::size_t nb_trials = 100;
|
|
float wmin = 1e-5f, wmax = 1e5f;
|
|
float factor = std::pow (wmax/wmin, 1.f / float(nb_trials));
|
|
|
|
for (std::size_t j = 0; j < m_features.size(); ++ j)
|
|
{
|
|
Feature_handle feature = m_features[j];
|
|
best_weights[j] = weight(j);
|
|
|
|
std::size_t nb_useful = 0;
|
|
float min = (std::numeric_limits<float>::max)();
|
|
float max = -(std::numeric_limits<float>::max)();
|
|
|
|
set_weight(j, wmin);
|
|
for (std::size_t i = 0; i < 100; ++ i)
|
|
{
|
|
estimate_feature_effect(j, training_sets);
|
|
if (feature_useful(j))
|
|
{
|
|
CGAL_CLASSTRAINING_CERR << "#";
|
|
nb_useful ++;
|
|
min = (std::min) (min, weight(j));
|
|
max = (std::max) (max, weight(j));
|
|
}
|
|
else
|
|
CGAL_CLASSTRAINING_CERR << "-";
|
|
set_weight(j, factor * weight(j));
|
|
}
|
|
CGAL_CLASSTRAINING_CERR << std::endl;
|
|
CGAL_CLASSTRAINING_CERR << feature->name() << " useful in "
|
|
<< nb_useful << "% of the cases, in interval [ "
|
|
<< min << " ; " << max << " ]" << std::endl;
|
|
if (nb_useful < 2)
|
|
{
|
|
set_weight(j, 0.);
|
|
best_weights[j] = weight(j);
|
|
continue;
|
|
}
|
|
|
|
feature_train.push_back (Feature_training());
|
|
feature_train.back().i = j;
|
|
feature_train.back().wmin = min / factor;
|
|
feature_train.back().wmax = max * factor;
|
|
|
|
if (best_weights[j] == 1.)
|
|
{
|
|
set_weight(j, 0.5f * (feature_train.back().wmin + feature_train.back().wmax));
|
|
best_weights[j] = weight(j);
|
|
}
|
|
else
|
|
set_weight(j, best_weights[j]);
|
|
estimate_feature_effect(j, training_sets);
|
|
}
|
|
|
|
std::size_t nb_trials_per_feature = 1 + (std::size_t)(nb_tests / (float)(feature_train.size()));
|
|
CGAL_CLASSIFICATION_CERR << "Trials = " << nb_tests << ", features = " << feature_train.size()
|
|
<< ", trials per feature = " << nb_trials_per_feature << std::endl;
|
|
for (std::size_t i = 0; i < feature_train.size(); ++ i)
|
|
feature_train[i].factor
|
|
= std::pow (feature_train[i].wmax / feature_train[i].wmin,
|
|
1.f / float(nb_trials_per_feature));
|
|
|
|
|
|
float best_score = 0.;
|
|
best_score = compute_mean_iou<ConcurrencyTag>(training_sets);
|
|
|
|
CGAL_CLASSIFICATION_CERR << "TRAINING GLOBALLY: Best score evolution: " << std::endl;
|
|
|
|
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at initialization)" << std::endl;
|
|
|
|
std::sort (feature_train.begin(), feature_train.end());
|
|
for (std::size_t i = 0; i < feature_train.size(); ++ i)
|
|
{
|
|
const Feature_training& tr = feature_train[i];
|
|
std::size_t current_feature_changed = tr.i;
|
|
Feature_handle current_feature = m_features[current_feature_changed];
|
|
|
|
std::size_t nb_used = 0;
|
|
for (std::size_t j = 0; j < m_features.size(); ++ j)
|
|
{
|
|
if (j == current_feature_changed)
|
|
continue;
|
|
|
|
set_weight(j, best_weights[j]);
|
|
estimate_feature_effect(j, training_sets);
|
|
if (feature_useful(j))
|
|
nb_used ++;
|
|
else
|
|
set_weight(j, 0.);
|
|
}
|
|
|
|
set_weight(current_feature_changed, tr.wmin);
|
|
for (std::size_t j = 0; j < nb_trials_per_feature; ++ j)
|
|
{
|
|
estimate_feature_effect(current_feature_changed, training_sets);
|
|
|
|
float worst_score = 0.;
|
|
worst_score = compute_mean_iou<ConcurrencyTag>(training_sets);
|
|
if (worst_score > best_score)
|
|
{
|
|
best_score = worst_score;
|
|
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at iteration "
|
|
<< (i * nb_trials_per_feature) + j << "/" << nb_tests << ", "
|
|
<< nb_used + (feature_useful(current_feature_changed) ? 1 : 0)
|
|
<< "/" << m_features.size() << " feature(s) used)" << std::endl;
|
|
for (std::size_t k = 0; k < m_features.size(); ++ k)
|
|
best_weights[k] = weight(k);
|
|
}
|
|
set_weight(current_feature_changed, weight(current_feature_changed) * tr.factor);
|
|
}
|
|
}
|
|
|
|
for (std::size_t i = 0; i < best_weights.size(); ++ i)
|
|
set_weight(i, best_weights[i]);
|
|
|
|
estimate_features_effects(training_sets);
|
|
|
|
CGAL_CLASSIFICATION_CERR << std::endl << "Best score found is at least " << 100. * best_score
|
|
<< "% of correct classification" << std::endl;
|
|
|
|
std::size_t nb_removed = 0;
|
|
for (std::size_t i = 0; i < best_weights.size(); ++ i)
|
|
{
|
|
Feature_handle feature = m_features[i];
|
|
CGAL_CLASSTRAINING_CERR << "FEATURE " << feature->name() << ": " << best_weights[i] << std::endl;
|
|
set_weight(i, best_weights[i]);
|
|
|
|
Effect side = effect(0, i);
|
|
bool to_remove = true;
|
|
for (std::size_t j = 0; j < m_labels.size(); ++ j)
|
|
{
|
|
Label_handle clabel = m_labels[j];
|
|
if (effect(j,i) == FAVORING)
|
|
CGAL_CLASSTRAINING_CERR << " * Favored for ";
|
|
else if (effect(j,i) == PENALIZING)
|
|
CGAL_CLASSTRAINING_CERR << " * Penalized for ";
|
|
else
|
|
CGAL_CLASSTRAINING_CERR << " * Neutral for ";
|
|
if (effect(j,i) != side)
|
|
to_remove = false;
|
|
CGAL_CLASSTRAINING_CERR << clabel->name() << std::endl;
|
|
}
|
|
if (to_remove)
|
|
{
|
|
CGAL_CLASSTRAINING_CERR << " -> Useless! Should be removed" << std::endl;
|
|
++ nb_removed;
|
|
}
|
|
}
|
|
CGAL_CLASSIFICATION_CERR << nb_removed
|
|
<< " feature(s) out of " << m_features.size() << " are useless" << std::endl;
|
|
|
|
return best_score;
|
|
}
|
|
|
|
/// @}
|
|
|
|
/// \cond SKIP_IN_MANUAL
|
|
template <typename ConcurrencyTag, typename LabelIndexRange>
|
|
float train_random (const LabelIndexRange& ground_truth,
|
|
unsigned int nb_tests = 300)
|
|
{
|
|
std::vector<std::vector<std::size_t> > training_sets (m_labels.size());
|
|
std::size_t nb_tot = 0;
|
|
for (std::size_t i = 0; i < ground_truth.size(); ++ i)
|
|
if (ground_truth[i] != -1)
|
|
{
|
|
training_sets[std::size_t(ground_truth[i])].push_back (i);
|
|
++ nb_tot;
|
|
}
|
|
|
|
CGAL_CLASSIFICATION_CERR << "Training using " << nb_tot << " inliers" << std::endl;
|
|
|
|
for (std::size_t i = 0; i < m_labels.size(); ++ i)
|
|
if (training_sets.size() <= i || training_sets[i].empty())
|
|
std::cerr << "WARNING: \"" << m_labels[i]->name() << "\" doesn't have a training set." << std::endl;
|
|
|
|
std::vector<float> best_weights (m_features.size(), 1.);
|
|
|
|
std::vector<Feature_training> feature_train;
|
|
std::size_t nb_trials = 100;
|
|
float wmin = 1e-5, wmax = 1e5;
|
|
float factor = std::pow (wmax/wmin, 1. / (float)nb_trials);
|
|
|
|
for (std::size_t j = 0; j < m_features.size(); ++ j)
|
|
{
|
|
Feature_handle feature = m_features[j];
|
|
best_weights[j] = weight(j);
|
|
|
|
std::size_t nb_useful = 0;
|
|
float min = (std::numeric_limits<float>::max)();
|
|
float max = -(std::numeric_limits<float>::max)();
|
|
|
|
set_weight(j, wmin);
|
|
for (std::size_t i = 0; i < 100; ++ i)
|
|
{
|
|
estimate_feature_effect(j, training_sets);
|
|
if (feature_useful(j))
|
|
{
|
|
CGAL_CLASSTRAINING_CERR << "#";
|
|
nb_useful ++;
|
|
min = (std::min) (min, weight(j));
|
|
max = (std::max) (max, weight(j));
|
|
}
|
|
else
|
|
CGAL_CLASSTRAINING_CERR << "-";
|
|
set_weight(j, factor * weight(j));
|
|
}
|
|
CGAL_CLASSTRAINING_CERR << std::endl;
|
|
CGAL_CLASSTRAINING_CERR << feature->name() << " useful in "
|
|
<< nb_useful << "% of the cases, in interval [ "
|
|
<< min << " ; " << max << " ]" << std::endl;
|
|
if (nb_useful < 2)
|
|
{
|
|
set_weight(j, 0.);
|
|
best_weights[j] = weight(j);
|
|
continue;
|
|
}
|
|
|
|
feature_train.push_back (Feature_training());
|
|
feature_train.back().i = j;
|
|
feature_train.back().wmin = min / factor;
|
|
feature_train.back().wmax = max * factor;
|
|
|
|
if (best_weights[j] == 1.)
|
|
{
|
|
set_weight(j, 0.5 * (feature_train.back().wmin + feature_train.back().wmax));
|
|
best_weights[j] = weight(j);
|
|
}
|
|
else
|
|
set_weight(j, best_weights[j]);
|
|
estimate_feature_effect(j, training_sets);
|
|
}
|
|
|
|
CGAL_CLASSIFICATION_CERR << "Trials = " << nb_tests << ", features = " << feature_train.size() << std::endl;
|
|
|
|
|
|
float best_score = compute_mean_iou<ConcurrencyTag>(training_sets);
|
|
|
|
CGAL_CLASSIFICATION_CERR << "TRAINING GLOBALLY: Best score evolution: " << std::endl;
|
|
|
|
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at initialization)" << std::endl;
|
|
|
|
for (std::size_t i = 0; i < std::size_t(nb_tests); ++ i)
|
|
{
|
|
std::size_t nb_used = 0;
|
|
std::size_t j = rand() % feature_train.size();
|
|
set_weight (feature_train[j].i,
|
|
feature_train[j].wmin + ((feature_train[j].wmax - feature_train[j].wmin)
|
|
* (rand() / float(RAND_MAX))));
|
|
estimate_feature_effect(feature_train[j].i, training_sets);
|
|
|
|
float worst_score = compute_mean_iou<ConcurrencyTag>(training_sets);
|
|
|
|
if (worst_score > best_score)
|
|
{
|
|
best_score = worst_score;
|
|
CGAL_CLASSIFICATION_CERR << 100. * best_score << "% (found at iteration "
|
|
<< i << "/" << nb_tests << ", "
|
|
<< nb_used
|
|
<< "/" << m_features.size() << " feature(s) used)" << std::endl;
|
|
for (std::size_t k = 0; k < m_features.size(); ++ k)
|
|
best_weights[k] = weight(k);
|
|
}
|
|
set_weight (feature_train[j].i,
|
|
best_weights[feature_train[j].i]);
|
|
estimate_feature_effect(feature_train[j].i, training_sets);
|
|
}
|
|
|
|
for (std::size_t i = 0; i < best_weights.size(); ++ i)
|
|
set_weight(i, best_weights[i]);
|
|
|
|
estimate_features_effects(training_sets);
|
|
|
|
CGAL_CLASSIFICATION_CERR << std::endl << "Best score found is at least " << 100. * best_score
|
|
<< "% of correct classification" << std::endl;
|
|
|
|
std::size_t nb_removed = 0;
|
|
for (std::size_t i = 0; i < best_weights.size(); ++ i)
|
|
{
|
|
Feature_handle feature = m_features[i];
|
|
CGAL_CLASSTRAINING_CERR << "FEATURE " << feature->name() << ": " << best_weights[i] << std::endl;
|
|
set_weight(i, best_weights[i]);
|
|
|
|
Effect side = effect(0, i);
|
|
bool to_remove = true;
|
|
for (std::size_t j = 0; j < m_labels.size(); ++ j)
|
|
{
|
|
Label_handle clabel = m_labels[j];
|
|
if (effect(j,i) == FAVORING)
|
|
CGAL_CLASSTRAINING_CERR << " * Favored for ";
|
|
else if (effect(j,i) == PENALIZING)
|
|
CGAL_CLASSTRAINING_CERR << " * Penalized for ";
|
|
else
|
|
CGAL_CLASSTRAINING_CERR << " * Neutral for ";
|
|
if (effect(j,i) != side)
|
|
to_remove = false;
|
|
CGAL_CLASSTRAINING_CERR << clabel->name() << std::endl;
|
|
}
|
|
if (to_remove)
|
|
{
|
|
CGAL_CLASSTRAINING_CERR << " -> Useless! Should be removed" << std::endl;
|
|
++ nb_removed;
|
|
}
|
|
}
|
|
CGAL_CLASSIFICATION_CERR << nb_removed
|
|
<< " feature(s) out of " << m_features.size() << " are useless" << std::endl;
|
|
|
|
return best_score;
|
|
}
|
|
/// \endcond
|
|
|
|
/// \name Input/Output
|
|
/// @{
|
|
|
|
/*!
|
|
\brief Saves the current configuration in the stream `output`.
|
|
|
|
This allows to easily save and recover a specific classification
|
|
configuration, that is to say:
|
|
|
|
- The weight of each feature
|
|
- The effects of each feature on each label
|
|
|
|
The output file is written in an XML format that is readable by
|
|
the `load_configuration()` method.
|
|
*/
|
|
void save_configuration (std::ostream& output)
|
|
{
|
|
boost::property_tree::ptree tree;
|
|
|
|
for (std::size_t i = 0; i < m_features.size(); ++ i)
|
|
{
|
|
if (weight(m_features[i]) == 0)
|
|
continue;
|
|
boost::property_tree::ptree ptr;
|
|
|
|
ptr.put("name", m_features[i]->name());
|
|
ptr.put("weight", weight(m_features[i]));
|
|
tree.add_child("classification.features.feature", ptr);
|
|
}
|
|
|
|
|
|
for (std::size_t i = 0; i < m_labels.size(); ++ i)
|
|
{
|
|
boost::property_tree::ptree ptr;
|
|
ptr.put("name", m_labels[i]->name());
|
|
for (std::size_t j = 0; j < m_features.size(); ++ j)
|
|
{
|
|
if (weight(j) == 0)
|
|
continue;
|
|
boost::property_tree::ptree ptr2;
|
|
ptr2.put("name", m_features[j]->name());
|
|
Effect e = effect(i, j);
|
|
if (e == PENALIZING)
|
|
ptr2.put("effect", "penalized");
|
|
else if (e == NEUTRAL)
|
|
ptr2.put("effect", "neutral");
|
|
else if (e == FAVORING)
|
|
ptr2.put("effect", "favored");
|
|
ptr.add_child("feature", ptr2);
|
|
}
|
|
tree.add_child("classification.labels.label", ptr);
|
|
}
|
|
// Write property tree to XML file
|
|
boost::property_tree::write_xml(output, tree,
|
|
#if BOOST_VERSION >= 105600
|
|
boost::property_tree::xml_writer_make_settings<std::string>(' ', 3));
|
|
#else
|
|
boost::property_tree::xml_writer_make_settings<char>(' ', 3));
|
|
#endif
|
|
}
|
|
|
|
/*!
|
|
\brief Loads a configuration from the stream `input`. A
|
|
configuration is a set of weights and effects.
|
|
|
|
The input file should be in the XML format written by the
|
|
`save_configuration()` method. Labels and features are described
|
|
in the XML file by their name and the corresponding `Label` and
|
|
`Feature_base` object should therefore be given the same names as
|
|
the ones they had when saving the configuration.
|
|
|
|
\note If a feature (or label) found in the input file is not found
|
|
in the `Feature_set` (`Label_set`) provided by the user in the
|
|
constructor, and if `verbose` is set up to `true`, a warning is
|
|
displayed.
|
|
|
|
\note If a feature (or label) provided by the user in the
|
|
constructor is not described in the input file, the corresponding
|
|
weights and effects are kept to their default values (1 for the
|
|
weight and `NEUTRAL` for the effect).
|
|
|
|
\param input input stream.
|
|
\param verbose displays warning if set to `true`. The method is
|
|
silent otherwise.
|
|
|
|
\return `true` if all weights and effects found in the
|
|
configuration file were applicable to the feature set and label
|
|
set of this classifier, `false` otherwise.
|
|
*/
|
|
bool load_configuration (std::istream& input, bool verbose = false)
|
|
{
|
|
bool out = true;
|
|
std::map<std::string, std::size_t> map_n2l;
|
|
std::map<std::string, std::size_t> map_n2f;
|
|
for (std::size_t i = 0; i < m_labels.size(); ++ i)
|
|
map_n2l.insert (std::make_pair (m_labels[i]->name(), i));
|
|
for (std::size_t i = 0; i < m_features.size(); ++ i)
|
|
map_n2f.insert (std::make_pair (m_features[i]->name(), i));
|
|
|
|
boost::property_tree::ptree tree;
|
|
boost::property_tree::read_xml(input, tree);
|
|
|
|
BOOST_FOREACH(boost::property_tree::ptree::value_type &v, tree.get_child("classification.features"))
|
|
{
|
|
std::string name = v.second.get<std::string>("name");
|
|
std::map<std::string, std::size_t>::iterator
|
|
found = map_n2f.find (name);
|
|
if (found != map_n2f.end())
|
|
m_weights[found->second] = v.second.get<float>("weight");
|
|
else
|
|
{
|
|
if (verbose)
|
|
std::cerr << "Warning: feature \"" << name << "\" in configuration file not found" << std::endl;
|
|
out = false;
|
|
}
|
|
}
|
|
|
|
BOOST_FOREACH(boost::property_tree::ptree::value_type &v, tree.get_child("classification.labels"))
|
|
{
|
|
std::string label_name = v.second.get<std::string>("name");
|
|
std::map<std::string, std::size_t>::iterator
|
|
found = map_n2l.find (label_name);
|
|
std::size_t l = 0;
|
|
if (found != map_n2l.end())
|
|
l = found->second;
|
|
else
|
|
{
|
|
if (verbose)
|
|
std::cerr << "Warning: label \"" << label_name << "\" in configuration file not found" << std::endl;
|
|
out = false;
|
|
continue;
|
|
}
|
|
|
|
BOOST_FOREACH(boost::property_tree::ptree::value_type &v2, v.second)
|
|
{
|
|
if (v2.first == "name")
|
|
continue;
|
|
|
|
std::string feature_name = v2.second.get<std::string>("name");
|
|
|
|
std::map<std::string, std::size_t>::iterator
|
|
found2 = map_n2f.find (feature_name);
|
|
std::size_t f = 0;
|
|
if (found2 != map_n2f.end())
|
|
f = found2->second;
|
|
else if (verbose)
|
|
{
|
|
if (verbose)
|
|
std::cerr << "Warning: feature \"" << feature_name << "\" in configuration file not found" << std::endl;
|
|
out = false;
|
|
continue;
|
|
}
|
|
std::string e = v2.second.get<std::string>("effect");
|
|
if (e == "penalized")
|
|
set_effect (l, f, PENALIZING);
|
|
else if (e == "neutral")
|
|
set_effect (l, f, NEUTRAL);
|
|
else
|
|
set_effect (l, f, FAVORING);
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
/// @}
|
|
|
|
private:
|
|
|
|
float value (std::size_t label, std::size_t feature, std::size_t index) const
|
|
{
|
|
if (m_effect_table[label][feature] == FAVORING)
|
|
return favored (feature, index);
|
|
else if (m_effect_table[label][feature] == PENALIZING)
|
|
return penalized (feature, index);
|
|
else
|
|
return ignored (feature, index);
|
|
}
|
|
|
|
float normalized (std::size_t feature, std::size_t index) const
|
|
{
|
|
return (std::max) (0.f, (std::min) (1.f, m_features[feature]->value(index) / m_weights[feature]));
|
|
}
|
|
float favored (std::size_t feature, std::size_t index) const
|
|
{
|
|
return (1.f - normalized (feature, index));
|
|
}
|
|
float penalized (std::size_t feature, std::size_t index) const
|
|
{
|
|
return normalized (feature, index);
|
|
}
|
|
float ignored (std::size_t, std::size_t) const
|
|
{
|
|
return 0.5f;
|
|
}
|
|
|
|
void estimate_features_effects(std::vector<std::vector<std::size_t> >& training_sets)
|
|
{
|
|
for (std::size_t i = 0; i < m_features.size(); ++ i)
|
|
estimate_feature_effect (i, training_sets);
|
|
}
|
|
|
|
|
|
|
|
void estimate_feature_effect (std::size_t feature,
|
|
std::vector<std::vector<std::size_t> >& training_sets)
|
|
{
|
|
std::vector<float> mean (m_labels.size(), 0.);
|
|
|
|
for (std::size_t j = 0; j < m_labels.size(); ++ j)
|
|
{
|
|
#ifdef CLASSIFICATION_TRAINING_QUICK_ESTIMATION
|
|
std::size_t training_set_size = (std::min) (std::size_t(0.1 * training_sets[j].size()),
|
|
std::size_t(10000));
|
|
#else
|
|
std::size_t training_set_size = training_sets[j].size();
|
|
#endif
|
|
|
|
for (std::size_t k = 0; k < training_set_size; ++ k)
|
|
{
|
|
float val = normalized(feature, training_sets[j][k]);
|
|
mean[j] += val;
|
|
}
|
|
mean[j] /= training_set_size;
|
|
}
|
|
|
|
std::vector<float> sd (m_labels.size(), 0.);
|
|
|
|
for (std::size_t j = 0; j < m_labels.size(); ++ j)
|
|
{
|
|
Label_handle clabel = m_labels[j];
|
|
|
|
#ifdef CLASSIFICATION_TRAINING_QUICK_ESTIMATION
|
|
std::size_t training_set_size = (std::min) (std::size_t(0.1 * training_sets[j].size()),
|
|
std::size_t(10000));
|
|
#else
|
|
std::size_t training_set_size = training_sets[j].size();
|
|
#endif
|
|
for (std::size_t k = 0; k < training_set_size; ++ k)
|
|
{
|
|
float val = normalized(feature, training_sets[j][k]);
|
|
sd[j] += (val - mean[j]) * (val - mean[j]);
|
|
}
|
|
sd[j] = std::sqrt (sd[j] / training_set_size);
|
|
if (mean[j] - sd[j] > (2./3.))
|
|
set_effect (j, feature, FAVORING);
|
|
else if (mean[j] + sd[j] < (1./3.))
|
|
set_effect (j, feature, PENALIZING);
|
|
else
|
|
set_effect (j, feature, NEUTRAL);
|
|
}
|
|
}
|
|
|
|
template <typename ConcurrencyTag>
|
|
float compute_mean_iou (std::vector<std::vector<std::size_t> >& training_sets)
|
|
{
|
|
std::vector<std::size_t> true_positives (m_labels.size());
|
|
std::vector<std::size_t> false_positives (m_labels.size());
|
|
std::vector<std::size_t> false_negatives (m_labels.size());
|
|
|
|
for (std::size_t j = 0; j < training_sets.size(); ++ j)
|
|
{
|
|
std::size_t gt = j;
|
|
|
|
#ifndef CGAL_LINKED_WITH_TBB
|
|
CGAL_static_assertion_msg (!(boost::is_convertible<ConcurrencyTag, Parallel_tag>::value),
|
|
"Parallel_tag is enabled but TBB is unavailable.");
|
|
#else
|
|
if (boost::is_convertible<ConcurrencyTag,Parallel_tag>::value)
|
|
{
|
|
std::vector<tbb::mutex> tp_mutex (m_labels.size());
|
|
std::vector<tbb::mutex> fp_mutex (m_labels.size());
|
|
std::vector<tbb::mutex> fn_mutex (m_labels.size());
|
|
Compute_iou f(training_sets[j], *this, j,
|
|
true_positives, false_positives, false_negatives,
|
|
tp_mutex, fp_mutex, fn_mutex);
|
|
tbb::parallel_for(tbb::blocked_range<size_t>(0, training_sets[j].size ()), f);
|
|
}
|
|
else
|
|
#endif
|
|
for (std::size_t k = 0; k < training_sets[j].size(); ++ k)
|
|
{
|
|
std::size_t res = 0;
|
|
|
|
std::vector<float> v;
|
|
(*this) (training_sets[j][k], v);
|
|
|
|
float min = std::numeric_limits<float>::max();
|
|
for(std::size_t l = 0; l < m_labels.size(); ++ l)
|
|
if (v[l] < min)
|
|
{
|
|
min = v[l];
|
|
res = l;
|
|
}
|
|
|
|
if (gt == res)
|
|
{
|
|
++ true_positives[gt];
|
|
continue;
|
|
}
|
|
++ false_positives[res];
|
|
++ false_negatives[gt];
|
|
}
|
|
}
|
|
|
|
float out = 0.;
|
|
|
|
for (std::size_t j = 0; j < m_labels.size(); ++ j)
|
|
{
|
|
float iou = true_positives[j] / float(true_positives[j] + false_positives[j] + false_negatives[j]);
|
|
out += iou;
|
|
}
|
|
|
|
return out / m_labels.size();
|
|
}
|
|
|
|
|
|
bool feature_useful (std::size_t feature)
|
|
{
|
|
Effect side = effect(0, feature);
|
|
for (std::size_t k = 1; k < m_labels.size(); ++ k)
|
|
if (effect(k, feature) != side)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif // CLASSIFICATION_SUM_OF_WEIGHTED_FEATURES_CLASSIFIER_H
|