Add random forest predicate

2017-03-27 15:17:27 +02:00 · 2017-03-27 15:17:27 +02:00 · 735c3d5d54
parent 34070bfd52
commit 735c3d5d54
1 changed files with 139 additions and 0 deletions
--- a/Classification/include/CGAL/Classification/Random_forest_predicate.h
+++ b/Classification/include/CGAL/Classification/Random_forest_predicate.h
@ -0,0 +1,139 @@
 #ifndef CGAL_CLASSIFICATION_RANDOM_FOREST_PREDICATE_H
 #define CGAL_CLASSIFICATION_RANDOM_FOREST_PREDICATE_H
 #include <CGAL/Classification/Feature_set.h>
 #include <CGAL/Classification/Label_set.h>
 #include <cv.h>       // opencv general include file
 #include <ml.h>		  // opencv machine learning include file
 namespace CGAL {
 namespace Classification {
 /*!
  \ingroup PkgClassificationPredicates
  \brief %Classification predicate based on a random forest algorithm.
  \note This class requires the \ref thirdpartyOpenCV library.
  \cgalModels `CGAL::Classification::Predicate`
 */
 class Random_forest_predicate
 {
  Label_set& m_labels;
  Feature_set& m_features;
  CvRTrees* rtree;
 public:
 /*!
  \brief Instantiate the predicate using the sets of `labels` and `features`.
 */
  Random_forest_predicate (Label_set& labels,
                           Feature_set& features)
    : m_labels (labels), m_features (features), rtree (NULL)
  {  }
  /// \cond SKIP_IN_MANUAL
  ~Random_forest_predicate ()
  {
    if (rtree != NULL)
      delete rtree;
  }
  /// \endcond
  /*!
    \brief Runs the training algorithm.
    From the set of provided ground truth, this algorithm estimates
    sets up the random trees that produce the most accurate result
    with respect to this ground truth.
    For more details on the parameters of this algorithm, please refer
    to [the official documentation of OpenCV](http://docs.opencv.org/2.4/modules/ml/doc/random_trees.html).
    \note Each label should be assigned at least one ground truth
    item.
    \param ground_truth vector of label indices. It should contain for
    each input item, in the same order as the input set, the index of
    the corresponding label in the `Label_set` provided in the
    constructor. Input items that do not have a ground truth
    information should be given the value `std::size_t(-1)`.
  */
  void train (const std::vector<std::size_t>& ground_truth,
              int max_depth = 20,
              int min_sample_count = 5,
              int max_categories = 15,
              int max_number_of_trees_in_the_forest = 100,
              float forest_accuracy = 0.01f)
  {
    if (rtree != NULL)
      delete rtree;
    std::size_t nb_samples = 0;
    for (std::size_t i = 0; i < ground_truth.size(); ++ i)
      if (ground_truth[i] != std::size_t(-1))
        ++ nb_samples;
    cv::Mat training_features (nb_samples, m_features.size(), CV_32FC1);
    cv::Mat training_labels (nb_samples, 1, CV_32FC1);
    for (std::size_t i = 0, index = 0; i < ground_truth.size(); ++ i)
      if (ground_truth[i] != std::size_t(-1))
        {
          for (std::size_t f = 0; f < m_features.size(); ++ f)
            training_features.at<float>(index, f) = m_features[f]->value(i);
          training_labels.at<float>(index, 0) = ground_truth[i];
          ++ index;
        }
    float* priors = new float[m_labels.size()];
    for (std::size_t i = 0; i < m_labels.size(); ++ i)
      priors[i] = 1.;
    CvRTParams params (max_depth, min_sample_count,
                       0, false, max_categories, priors, false, 0,
                       max_number_of_trees_in_the_forest,
                       forest_accuracy,
                       CV_TERMCRIT_ITER | CV_TERMCRIT_EPS
                       );
    cv::Mat var_type (m_features.size() + 1, 1, CV_8U);
    var_type.setTo (cv::Scalar(CV_VAR_NUMERICAL));
    rtree = new CvRTrees;
    rtree->train (training_features, CV_ROW_SAMPLE, training_labels,
                  cv::Mat(), cv::Mat(), var_type, cv::Mat(), params);
    delete[] priors;
  }
  /// \cond SKIP_IN_MANUAL
  void probabilities (std::size_t item_index, std::vector<float>& out) const
  {
    out.resize (m_labels.size(), 1.);
    cv::Mat feature (1, m_features.size(), CV_32FC1);
    for (std::size_t f = 0; f < m_features.size(); ++ f)
      feature.at<float>(0, f) = m_features[f]->value(item_index);
    float result = rtree->predict (feature, cv::Mat());
    std::size_t label = std::size_t(result);
    if (label < out.size())
      out[label] = 0.;
  }
  /// \endcond
 };
 }
 }
 #endif // CGAL_CLASSIFICATION_RANDOM_FOREST_PREDICATE_H