mirror of https://github.com/CGAL/cgal
some modifications on EM, first version of k-means clustering.
This commit is contained in:
parent
b6f2edc59d
commit
ae1b4eb61f
|
|
@ -4013,6 +4013,7 @@ Surface_mesh_segmentation/example/Surface_mesh_segmentation/CMakeLists.txt -text
|
|||
Surface_mesh_segmentation/example/Surface_mesh_segmentation/simple_example.cpp -text
|
||||
Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h -text
|
||||
Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h -text
|
||||
Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h -text
|
||||
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/copyright -text
|
||||
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/license.txt -text
|
||||
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/maintainer -text
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
#ifndef CGAL_SURFACE_MESH_SEGMENTATION_H
|
||||
#define CGAL_SURFACE_MESH_SEGMENTATION_H
|
||||
/* NEED TO BE DONE
|
||||
* About implementation:
|
||||
* +) I am not using BGL, as far as I checked there is a progress on BGL redesign
|
||||
/* NEED TO BE DONE */
|
||||
/* About implementation:
|
||||
/* +) I am not using BGL, as far as I checked there is a progress on BGL redesign
|
||||
(https://cgal.geometryfactory.com/CGAL/Members/wiki/Features/BGL) which introduces some features
|
||||
for face-based traversal / manipulation by FaceGraphs
|
||||
* +) Deciding on which parameters will be taken from user
|
||||
* +) Make it more readable: calculate_sdf_value_of_facet function.
|
||||
for face-based traversal / manipulation by FaceGraphs */
|
||||
/* +) Deciding on which parameters will be taken from user */
|
||||
/* +) Make it more readable: calculate_sdf_value_of_facet function.
|
||||
|
||||
/* About paper (and correctness / efficiency etc.):
|
||||
/* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly */
|
||||
/* +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough) */
|
||||
/* +) Deciding how to generate rays in cone: for now using "polar angle" and "accept-reject (square)" and "concentric mapping" techniques */
|
||||
|
||||
* About paper (and correctness / efficiency etc.):
|
||||
* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly
|
||||
* +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough)
|
||||
* +) Deciding how to generate rays in cone: for now using "polar angle" and "generate in square then accept-reject" techniques
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
|
|
@ -21,7 +21,9 @@
|
|||
#include <utility>
|
||||
|
||||
//#include "Expectation_maximization.h"
|
||||
//#include "K_means_clustering.h"
|
||||
#include <CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h>
|
||||
#include <CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h>
|
||||
|
||||
#include <CGAL/AABB_tree.h>
|
||||
#include <CGAL/AABB_traits.h>
|
||||
|
|
@ -58,7 +60,7 @@ protected:
|
|||
typedef std::map<Facet_handle, int> Face_center_map;
|
||||
/*Sampled points from disk, t1 = coordinate-x, t2 = coordinate-y, t3 = angle with cone-normal. */
|
||||
typedef CGAL::Triple<double, double, double> Disk_sample;
|
||||
typedef std::vector<CGAL::Triple<double, double, double> > Disk_samples_list;
|
||||
typedef std::vector<CGAL::Triple<double, double, double>> Disk_samples_list;
|
||||
|
||||
template <typename ValueTypeName>
|
||||
struct compare_pairs {
|
||||
|
|
@ -69,7 +71,7 @@ protected:
|
|||
|
||||
template <typename ValueTypeName>
|
||||
struct compare_pairs_using_first {
|
||||
bool operator()(const ValueTypeName& v1, const ValueTypeName& v2) {
|
||||
bool operator()(ValueTypeName& v1, ValueTypeName& v2) {
|
||||
return v1.first < v2.first;
|
||||
}
|
||||
};
|
||||
|
|
@ -88,11 +90,13 @@ public:
|
|||
|
||||
std::ofstream log_file;
|
||||
|
||||
//std::vector<int> center_memberships_temp;
|
||||
|
||||
//member functions
|
||||
public:
|
||||
Surface_mesh_segmentation(Polyhedron* mesh,
|
||||
int number_of_rays_sqrt = 6, double cone_angle = (2.0 / 3.0) * CGAL_PI,
|
||||
int number_of_centers = 1);
|
||||
int number_of_rays_sqrt = 9, double cone_angle = (2.0 / 3.0) * CGAL_PI,
|
||||
int number_of_centers = 2);
|
||||
|
||||
void calculate_sdf_values();
|
||||
|
||||
|
|
@ -118,6 +122,8 @@ public:
|
|||
void smooth_sdf_values();
|
||||
|
||||
void apply_GMM_fitting();
|
||||
void apply_K_means_clustering();
|
||||
void apply_GMM_fitting_with_K_means_init();
|
||||
|
||||
void write_sdf_values(const char* file_name);
|
||||
void read_sdf_values(const char* file_name);
|
||||
|
|
@ -133,9 +139,8 @@ inline Surface_mesh_segmentation<Polyhedron>::Surface_mesh_segmentation(
|
|||
{
|
||||
disk_sampling_concentric_mapping();
|
||||
calculate_sdf_values();
|
||||
//write_sdf_values("sdf_values_2.txt");
|
||||
//read_sdf_values("sdf_values.txt");
|
||||
//apply_GMM_fitting();
|
||||
//write_sdf_values("sdf_values_sample_9_p.txt");
|
||||
//read_sdf_values("sdf_values_sample_9_p.txt");
|
||||
}
|
||||
|
||||
template <class Polyhedron>
|
||||
|
|
@ -173,10 +178,12 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_of_facet(
|
|||
v2 = v2 / CGAL::sqrt(v2.squared_length());
|
||||
|
||||
int ray_count = number_of_rays_sqrt * number_of_rays_sqrt;
|
||||
|
||||
std::vector<double> ray_distances, ray_weights;
|
||||
ray_distances.reserve(ray_count);
|
||||
ray_weights.reserve(ray_count);
|
||||
double angle_st_dev = cone_angle / 4; //Not sure what to use here.
|
||||
|
||||
double angle_st_dev = cone_angle / 3; //Not sure what to use here.
|
||||
double length_of_normal = 1.0 / tan(cone_angle / 2);
|
||||
Vector normal = normal_const * length_of_normal;
|
||||
|
||||
|
|
@ -200,6 +207,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_of_facet(
|
|||
return calculate_sdf_value_from_rays_with_trimmed_mean(ray_distances,
|
||||
ray_weights);
|
||||
}
|
||||
|
||||
template <class Polyhedron>
|
||||
void Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum(
|
||||
const Ray& ray, const Tree& tree, const Facet_handle& facet,
|
||||
|
|
@ -261,8 +269,8 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
|
|||
double total_weights = 0.0, total_distance = 0.0;
|
||||
double median_sdf = 0.0, mean_sdf = 0.0, st_dev = 0.0;
|
||||
/* Calculate mean sdf */
|
||||
std::vector<double>::iterator w_it = ray_weights.begin();
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
typename std::vector<double>::iterator w_it = ray_weights.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end();
|
||||
++dist_it, ++w_it) {
|
||||
total_distance += (*dist_it) * (*w_it);
|
||||
|
|
@ -284,7 +292,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
|
|||
median_sdf = ray_distances[half_ray_count];
|
||||
}
|
||||
/* Calculate st dev using mean_sdf as mean */
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it) {
|
||||
double dif = (*dist_it) - mean_sdf;
|
||||
st_dev += dif * dif;
|
||||
|
|
@ -292,7 +300,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
|
|||
st_dev = CGAL::sqrt(st_dev / (ray_distances.size()));
|
||||
/* Calculate sdf, accept rays : ray_dist - median < st dev */
|
||||
w_it = ray_weights.begin();
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end();
|
||||
++dist_it, ++w_it) {
|
||||
if(fabs((*dist_it) - median_sdf) > st_dev * 0.5) {
|
||||
|
|
@ -312,8 +320,8 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
|
|||
{
|
||||
double total_weights = 0.0, total_distance = 0.0;
|
||||
double mean_sdf = 0.0, st_dev = 0.0;
|
||||
std::vector<double>::iterator w_it = ray_weights.begin();
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
typename std::vector<double>::iterator w_it = ray_weights.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
|
||||
total_distance += (*dist_it) * (*w_it);
|
||||
total_weights += (*w_it);
|
||||
|
|
@ -321,7 +329,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
|
|||
mean_sdf = total_distance / total_weights;
|
||||
total_weights = 0.0;
|
||||
total_distance = 0.0;
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it) {
|
||||
double dif = (*dist_it) - mean_sdf;
|
||||
st_dev += dif * dif;
|
||||
|
|
@ -329,7 +337,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
|
|||
st_dev = CGAL::sqrt(st_dev / (ray_distances.size()));
|
||||
|
||||
w_it = ray_weights.begin();
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
|
||||
if(fabs((*dist_it) - mean_sdf) > st_dev) {
|
||||
continue;
|
||||
|
|
@ -346,16 +354,16 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_trimme
|
|||
std::vector<double>& ray_distances,
|
||||
std::vector<double>& ray_weights) const
|
||||
{
|
||||
std::vector<std::pair<double, double> > distances_with_weights;
|
||||
std::vector<std::pair<double, double>> distances_with_weights;
|
||||
distances_with_weights.reserve(ray_distances.size());
|
||||
typename std::vector<double>::iterator w_it = ray_weights.begin();
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
|
||||
distances_with_weights.push_back(std::pair<double, double>((*dist_it),
|
||||
(*w_it)));
|
||||
}
|
||||
std::sort(distances_with_weights.begin(), distances_with_weights.end(),
|
||||
compare_pairs_using_first<std::pair<double, double> >());
|
||||
compare_pairs_using_first<std::pair<double, double>>());
|
||||
int b = floor(distances_with_weights.size() / 20.0 + 0.5); // Eliminate %5.
|
||||
int e = distances_with_weights.size() - b; // Eliminate %5.
|
||||
|
||||
|
|
@ -371,7 +379,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_trimme
|
|||
|
||||
total_weights = 0.0;
|
||||
total_distance = 0.0;
|
||||
for(std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
|
||||
dist_it != ray_distances.end(); ++dist_it) {
|
||||
double dif = (*dist_it) - trimmed_mean;
|
||||
st_dev += dif * dif;
|
||||
|
|
@ -518,12 +526,55 @@ inline void Surface_mesh_segmentation<Polyhedron>::apply_GMM_fitting()
|
|||
Expectation_maximization fitter(number_of_centers, sdf_vector);
|
||||
std::vector<int> center_memberships;
|
||||
fitter.fill_with_center_ids(center_memberships);
|
||||
std::vector<int>::iterator center_it = center_memberships.begin();
|
||||
typename std::vector<int>::iterator center_it = center_memberships.begin();
|
||||
for(Facet_iterator facet_it = mesh->facets_begin();
|
||||
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
|
||||
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Polyhedron>
|
||||
inline void Surface_mesh_segmentation<Polyhedron>::apply_K_means_clustering()
|
||||
{
|
||||
centers.clear();
|
||||
std::vector<double> sdf_vector;
|
||||
for(Facet_iterator facet_it = mesh->facets_begin();
|
||||
facet_it != mesh->facets_end(); ++facet_it) {
|
||||
sdf_vector.push_back(sdf_values[facet_it]);
|
||||
}
|
||||
K_means_clustering clusterer(number_of_centers, sdf_vector);
|
||||
std::vector<int> center_memberships;
|
||||
clusterer.fill_with_center_ids(center_memberships);
|
||||
typename std::vector<int>::iterator center_it = center_memberships.begin();
|
||||
for(Facet_iterator facet_it = mesh->facets_begin();
|
||||
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
|
||||
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
|
||||
}
|
||||
//center_memberships_temp = center_memberships; //remove
|
||||
}
|
||||
template <class Polyhedron>
|
||||
inline void
|
||||
Surface_mesh_segmentation<Polyhedron>::apply_GMM_fitting_with_K_means_init()
|
||||
{
|
||||
centers.clear();
|
||||
std::vector<double> sdf_vector;
|
||||
for(Facet_iterator facet_it = mesh->facets_begin();
|
||||
facet_it != mesh->facets_end(); ++facet_it) {
|
||||
sdf_vector.push_back(sdf_values[facet_it]);
|
||||
}
|
||||
K_means_clustering clusterer(number_of_centers, sdf_vector);
|
||||
std::vector<int> center_memberships;
|
||||
clusterer.fill_with_center_ids(center_memberships);
|
||||
//std::vector<int> center_memberships = center_memberships_temp;
|
||||
Expectation_maximization fitter(number_of_centers, sdf_vector,
|
||||
center_memberships);
|
||||
center_memberships.clear();
|
||||
fitter.fill_with_center_ids(center_memberships);
|
||||
typename std::vector<int>::iterator center_it = center_memberships.begin();
|
||||
for(Facet_iterator facet_it = mesh->facets_begin();
|
||||
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
|
||||
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Polyhedron>
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
#ifndef CGAL_EXPECTATION_MAXIMIZATION_H
|
||||
#define CGAL_EXPECTATION_MAXIMIZATION_H
|
||||
#ifndef CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
|
||||
#define CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
|
||||
/* NEED TO BE DONE */
|
||||
/* About implementation:
|
||||
/* Calculating probability multiple times: need to solved. */
|
||||
/* Calculating probability multiple times */
|
||||
/* About safe division: where centers have no members */
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
//#define DIV_INV_SQRT_2_PI 0.3989422804
|
||||
namespace CGAL
|
||||
{
|
||||
|
|
@ -18,17 +20,22 @@ public:
|
|||
double deviation;
|
||||
double mixing_coefficient;
|
||||
|
||||
Gaussian_center(): mean(0), deviation(0), mixing_coefficient(1.0) {
|
||||
}
|
||||
Gaussian_center(double mean, double deviation, double mixing_coefficient)
|
||||
: mean(mean), deviation(deviation), mixing_coefficient(mixing_coefficient) {
|
||||
}
|
||||
double probability(double x) const {
|
||||
double e_over = -0.5 * pow((x - mean) / deviation, 2);
|
||||
return exp(e_over) * (1.0 / deviation) ;
|
||||
return exp(e_over) / deviation;
|
||||
}
|
||||
double probability_proportional(double x) const {
|
||||
double e_over = -0.5 * pow((x - mean) / deviation, 2);
|
||||
return exp(e_over);
|
||||
}
|
||||
bool operator < (const Gaussian_center& center) {
|
||||
return mean < center.mean;
|
||||
}
|
||||
void calculate_parameters(const std::vector<Gaussian_point>& points);
|
||||
};
|
||||
|
||||
|
|
@ -72,7 +79,7 @@ inline void Gaussian_center::calculate_parameters(const
|
|||
}
|
||||
new_deviation = sqrt(new_deviation/total_membership);
|
||||
/* Calculate new mixing coefficient */
|
||||
mixing_coefficient = total_membership / points.size();
|
||||
mixing_coefficient = total_membership;
|
||||
|
||||
deviation = new_deviation;
|
||||
mean = new_mean;
|
||||
|
|
@ -84,13 +91,19 @@ public:
|
|||
std::vector<Gaussian_center> centers;
|
||||
std::vector<Gaussian_point> points;
|
||||
double threshold;
|
||||
int maximum_iteration;
|
||||
bool is_converged;
|
||||
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
|
||||
const std::vector<int>& initial_centers = std::vector<int>(),
|
||||
int maximum_iteration = 500)
|
||||
: points(data.begin(), data.end()), threshold(1e-4),
|
||||
maximum_iteration(maximum_iteration), is_converged(false) {
|
||||
|
||||
Expectation_maximization(int number_of_centers, const std::vector<double>& data)
|
||||
: points(data.begin(), data.end()), threshold(0.00000001) {
|
||||
initiate_centers(number_of_centers);
|
||||
initiate_centers(number_of_centers, initial_centers);
|
||||
calculate_fitting();
|
||||
}
|
||||
void fill_with_center_ids(std::vector<int>& data_centers) {
|
||||
data_centers.reserve(points.size());
|
||||
for(std::vector<Gaussian_point>::iterator point_it = points.begin();
|
||||
point_it != points.end(); ++point_it) {
|
||||
double max_likelihood = 0.0;
|
||||
|
|
@ -108,15 +121,46 @@ public:
|
|||
}
|
||||
}
|
||||
protected:
|
||||
void initiate_centers(int number_of_centers) {
|
||||
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
||||
double initial_mixing_coefficient = 1.0 / number_of_centers;
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
|
||||
Gaussian_center center(initial_mean, initial_deviation,
|
||||
initial_mixing_coefficient);
|
||||
centers.push_back(center);
|
||||
void initiate_centers(int number_of_centers,
|
||||
const std::vector<int>& initial_centers) {
|
||||
if(initial_centers.empty()) {
|
||||
/* Uniformly generate centers */
|
||||
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
||||
double initial_mixing_coefficient = 1.0 / number_of_centers;
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
|
||||
centers.push_back(Gaussian_center(initial_mean, initial_deviation,
|
||||
initial_mixing_coefficient));
|
||||
}
|
||||
} else {
|
||||
/* Calculate mean */
|
||||
int number_of_point = initial_centers.size();
|
||||
centers = std::vector<Gaussian_center>(number_of_centers);
|
||||
std::vector<int> member_count(number_of_centers, 0);
|
||||
|
||||
for(int i = 0; i < number_of_point; ++i) {
|
||||
int center_id = initial_centers[i];
|
||||
double data = points[i].data;
|
||||
centers[center_id].mean += data;
|
||||
member_count[center_id] += 1;
|
||||
}
|
||||
/* Assign mean, and mixing coef */
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
centers[i].mean /= member_count[i];
|
||||
centers[i].mixing_coefficient = member_count[i] / static_cast<double>
|
||||
(number_of_point);
|
||||
}
|
||||
/* Calculate deviation */
|
||||
for(int i = 0; i < number_of_point; ++i) {
|
||||
int center_id = initial_centers[i];
|
||||
double data = points[i].data;
|
||||
centers[center_id].deviation += pow(data - centers[center_id].mean, 2);
|
||||
}
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
centers[i].deviation = sqrt(centers[i].deviation / member_count[i]);
|
||||
}
|
||||
}
|
||||
sort(centers.begin(), centers.end());
|
||||
}
|
||||
/*Calculates total membership values for a point */
|
||||
void calculate_membership() {
|
||||
|
|
@ -147,15 +191,22 @@ protected:
|
|||
}
|
||||
return likelihood;
|
||||
}
|
||||
double iterate() {
|
||||
calculate_membership();
|
||||
calculate_parameters();
|
||||
return calculate_likelihood();
|
||||
}
|
||||
void calculate_fitting() {
|
||||
double prev_likelihood, likelihood = 0.0;
|
||||
double prev_likelihood = 0.0;
|
||||
double likelihood = iterate();
|
||||
int iteration_count = 0;
|
||||
is_converged = false;
|
||||
do {
|
||||
calculate_membership();
|
||||
calculate_parameters();
|
||||
prev_likelihood = likelihood;
|
||||
likelihood = calculate_likelihood();
|
||||
} while(likelihood - prev_likelihood > threshold * likelihood);
|
||||
likelihood = iterate();
|
||||
is_converged = likelihood - prev_likelihood < threshold * likelihood;
|
||||
} while(!is_converged && ++iteration_count < maximum_iteration);
|
||||
}
|
||||
};
|
||||
}//namespace CGAL
|
||||
#endif //CGAL_EXPECTATION_MAXIMIZATION_H
|
||||
#endif //CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
#ifndef CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H
|
||||
#define CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H
|
||||
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace CGAL
|
||||
{
|
||||
|
||||
class K_means_point;
|
||||
|
||||
class K_means_center
|
||||
{
|
||||
public:
|
||||
double mean;
|
||||
int number_of_points;
|
||||
protected:
|
||||
double new_mean;
|
||||
|
||||
public:
|
||||
K_means_center(double mean): mean(mean), new_mean(0), number_of_points(0) {
|
||||
}
|
||||
void add_point(double data) {
|
||||
++number_of_points;
|
||||
new_mean += data;
|
||||
}
|
||||
void calculate_mean() {
|
||||
mean = new_mean / number_of_points;
|
||||
new_mean = 0;
|
||||
number_of_points = 0;
|
||||
}
|
||||
bool operator < (const K_means_center& center) {
|
||||
return mean < center.mean;
|
||||
}
|
||||
};
|
||||
|
||||
class K_means_point
|
||||
{
|
||||
public:
|
||||
double data;
|
||||
int center_id;
|
||||
K_means_point(double data, int center_id = -1) : data(data),
|
||||
center_id(center_id) {
|
||||
}
|
||||
/* Finds closest center to point,
|
||||
/* Adds itself to the closest center's mean,
|
||||
/* Returns true if center is changed.*/
|
||||
bool calculate_new_center(std::vector<K_means_center>& centers) {
|
||||
int new_center_id = 0;
|
||||
double min_distance = fabs(centers[0].mean - data);
|
||||
for(int i = 1; i < centers.size(); ++i) {
|
||||
double new_distance = fabs(centers[i].mean - data);
|
||||
if(new_distance < min_distance) {
|
||||
new_center_id = i;
|
||||
min_distance = new_distance;
|
||||
}
|
||||
}
|
||||
bool is_center_changed = (new_center_id != center_id);
|
||||
center_id = new_center_id;
|
||||
|
||||
centers[center_id].add_point(data);
|
||||
return is_center_changed;
|
||||
}
|
||||
};
|
||||
|
||||
class K_means_clustering
|
||||
{
|
||||
public:
|
||||
std::vector<K_means_center> centers;
|
||||
std::vector<K_means_point> points;
|
||||
int maximum_iteration;
|
||||
bool is_converged;
|
||||
K_means_clustering(int number_of_centers, const std::vector<double>& data,
|
||||
int number_of_run = 1000, int maximum_iteration = 500)
|
||||
: points(data.begin(), data.end()), maximum_iteration(maximum_iteration),
|
||||
is_converged(false) {
|
||||
calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
|
||||
}
|
||||
|
||||
void fill_with_center_ids(std::vector<int>& data_centers) {
|
||||
for(std::vector<K_means_point>::iterator point_it = points.begin();
|
||||
point_it != points.end(); ++point_it) {
|
||||
data_centers.push_back(point_it->center_id);
|
||||
}
|
||||
}
|
||||
|
||||
void initiate_centers(int number_of_centers, bool randomly = false) {
|
||||
centers.clear();
|
||||
if(!randomly) {
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
|
||||
centers.push_back(K_means_center(initial_mean));
|
||||
}
|
||||
} else {
|
||||
srand(time(NULL));
|
||||
for(int i = 0; i < number_of_centers; ++i) {
|
||||
double initial_mean = points[rand() % points.size()].data;
|
||||
centers.push_back(K_means_center(initial_mean));
|
||||
}
|
||||
}
|
||||
sort(centers.begin(), centers.end());
|
||||
}
|
||||
|
||||
void calculate_clustering() {
|
||||
int iteration_count = 0;
|
||||
is_converged = false;
|
||||
do {
|
||||
for(std::vector<K_means_point>::iterator point_it = points.begin();
|
||||
point_it != points.end(); ++point_it) {
|
||||
bool center_changed = point_it->calculate_new_center(centers);
|
||||
is_converged |= center_changed;
|
||||
}
|
||||
for(std::vector<K_means_center>::iterator center_it = centers.begin();
|
||||
center_it != centers.end(); ++center_it) {
|
||||
center_it->calculate_mean();
|
||||
}
|
||||
} while(!is_converged && ++iteration_count < maximum_iteration);
|
||||
}
|
||||
|
||||
void calculate_clustering_with_multiple_run(int number_of_centers,
|
||||
int number_of_run) {
|
||||
initiate_centers(number_of_centers);
|
||||
calculate_clustering();
|
||||
|
||||
std::vector<K_means_center> min_centers = centers;
|
||||
double error = sum_of_squares();
|
||||
while(--number_of_run > 0) {
|
||||
initiate_centers(number_of_centers, true);
|
||||
/* here, clearing center_ids of points might be neccessary */
|
||||
calculate_clustering();
|
||||
double new_error = sum_of_squares();
|
||||
if(error > new_error) {
|
||||
error = new_error;
|
||||
min_centers = centers;
|
||||
}
|
||||
}
|
||||
/* By saving points (min_points) also, we can get rid of this part */
|
||||
/* But since centers are already converged this step will require one iteration */
|
||||
centers = min_centers;
|
||||
calculate_clustering();
|
||||
}
|
||||
double sum_of_squares() const {
|
||||
double sum = 0;
|
||||
for(std::vector<K_means_center>::const_iterator center_it = centers.begin();
|
||||
center_it != centers.end(); ++center_it) {
|
||||
for(std::vector<K_means_point>::const_iterator point_it = points.begin();
|
||||
point_it != points.end(); ++point_it) {
|
||||
sum += pow(center_it->mean - point_it->data, 2);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
};
|
||||
}//namespace CGAL
|
||||
#endif //CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H
|
||||
Loading…
Reference in New Issue