some modifications on EM, first version of k-means clustering.

This commit is contained in:
Ílker Yaz 2012-05-31 16:30:18 +00:00
parent b6f2edc59d
commit ae1b4eb61f
4 changed files with 314 additions and 54 deletions

1
.gitattributes vendored
View File

@ -4013,6 +4013,7 @@ Surface_mesh_segmentation/example/Surface_mesh_segmentation/CMakeLists.txt -text
Surface_mesh_segmentation/example/Surface_mesh_segmentation/simple_example.cpp -text
Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h -text
Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h -text
Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h -text
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/copyright -text
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/license.txt -text
Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/maintainer -text

View File

@ -1,18 +1,18 @@
#ifndef CGAL_SURFACE_MESH_SEGMENTATION_H
#define CGAL_SURFACE_MESH_SEGMENTATION_H
/* NEED TO BE DONE
* About implementation:
* +) I am not using BGL, as far as I checked there is a progress on BGL redesign
/* NEED TO BE DONE */
/* About implementation:
/* +) I am not using BGL, as far as I checked there is a progress on BGL redesign
(https://cgal.geometryfactory.com/CGAL/Members/wiki/Features/BGL) which introduces some features
for face-based traversal / manipulation by FaceGraphs
* +) Deciding on which parameters will be taken from user
* +) Make it more readable: calculate_sdf_value_of_facet function.
for face-based traversal / manipulation by FaceGraphs */
/* +) Deciding on which parameters will be taken from user */
/* +) Make it more readable: calculate_sdf_value_of_facet function.
/* About paper (and correctness / efficiency etc.):
/* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly */
/* +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough) */
/* +) Deciding how to generate rays in cone: for now using "polar angle" and "accept-reject (square)" and "concentric mapping" techniques */
* About paper (and correctness / efficiency etc.):
* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly
* +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough)
* +) Deciding how to generate rays in cone: for now using "polar angle" and "generate in square then accept-reject" techniques
*/
#include <iostream>
#include <fstream>
#include <cmath>
@ -21,7 +21,9 @@
#include <utility>
//#include "Expectation_maximization.h"
//#include "K_means_clustering.h"
#include <CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h>
#include <CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h>
#include <CGAL/AABB_tree.h>
#include <CGAL/AABB_traits.h>
@ -58,7 +60,7 @@ protected:
typedef std::map<Facet_handle, int> Face_center_map;
/*Sampled points from disk, t1 = coordinate-x, t2 = coordinate-y, t3 = angle with cone-normal. */
typedef CGAL::Triple<double, double, double> Disk_sample;
typedef std::vector<CGAL::Triple<double, double, double> > Disk_samples_list;
typedef std::vector<CGAL::Triple<double, double, double>> Disk_samples_list;
template <typename ValueTypeName>
struct compare_pairs {
@ -69,7 +71,7 @@ protected:
template <typename ValueTypeName>
struct compare_pairs_using_first {
bool operator()(const ValueTypeName& v1, const ValueTypeName& v2) {
bool operator()(ValueTypeName& v1, ValueTypeName& v2) {
return v1.first < v2.first;
}
};
@ -88,11 +90,13 @@ public:
std::ofstream log_file;
//std::vector<int> center_memberships_temp;
//member functions
public:
Surface_mesh_segmentation(Polyhedron* mesh,
int number_of_rays_sqrt = 6, double cone_angle = (2.0 / 3.0) * CGAL_PI,
int number_of_centers = 1);
int number_of_rays_sqrt = 9, double cone_angle = (2.0 / 3.0) * CGAL_PI,
int number_of_centers = 2);
void calculate_sdf_values();
@ -118,6 +122,8 @@ public:
void smooth_sdf_values();
void apply_GMM_fitting();
void apply_K_means_clustering();
void apply_GMM_fitting_with_K_means_init();
void write_sdf_values(const char* file_name);
void read_sdf_values(const char* file_name);
@ -133,9 +139,8 @@ inline Surface_mesh_segmentation<Polyhedron>::Surface_mesh_segmentation(
{
disk_sampling_concentric_mapping();
calculate_sdf_values();
//write_sdf_values("sdf_values_2.txt");
//read_sdf_values("sdf_values.txt");
//apply_GMM_fitting();
//write_sdf_values("sdf_values_sample_9_p.txt");
//read_sdf_values("sdf_values_sample_9_p.txt");
}
template <class Polyhedron>
@ -173,10 +178,12 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_of_facet(
v2 = v2 / CGAL::sqrt(v2.squared_length());
int ray_count = number_of_rays_sqrt * number_of_rays_sqrt;
std::vector<double> ray_distances, ray_weights;
ray_distances.reserve(ray_count);
ray_weights.reserve(ray_count);
double angle_st_dev = cone_angle / 4; //Not sure what to use here.
double angle_st_dev = cone_angle / 3; //Not sure what to use here.
double length_of_normal = 1.0 / tan(cone_angle / 2);
Vector normal = normal_const * length_of_normal;
@ -200,6 +207,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_of_facet(
return calculate_sdf_value_from_rays_with_trimmed_mean(ray_distances,
ray_weights);
}
template <class Polyhedron>
void Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum(
const Ray& ray, const Tree& tree, const Facet_handle& facet,
@ -261,8 +269,8 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
double total_weights = 0.0, total_distance = 0.0;
double median_sdf = 0.0, mean_sdf = 0.0, st_dev = 0.0;
/* Calculate mean sdf */
std::vector<double>::iterator w_it = ray_weights.begin();
for(std::vector<double>::iterator dist_it = ray_distances.begin();
typename std::vector<double>::iterator w_it = ray_weights.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end();
++dist_it, ++w_it) {
total_distance += (*dist_it) * (*w_it);
@ -284,7 +292,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
median_sdf = ray_distances[half_ray_count];
}
/* Calculate st dev using mean_sdf as mean */
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it) {
double dif = (*dist_it) - mean_sdf;
st_dev += dif * dif;
@ -292,7 +300,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays(
st_dev = CGAL::sqrt(st_dev / (ray_distances.size()));
/* Calculate sdf, accept rays : ray_dist - median < st dev */
w_it = ray_weights.begin();
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end();
++dist_it, ++w_it) {
if(fabs((*dist_it) - median_sdf) > st_dev * 0.5) {
@ -312,8 +320,8 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
{
double total_weights = 0.0, total_distance = 0.0;
double mean_sdf = 0.0, st_dev = 0.0;
std::vector<double>::iterator w_it = ray_weights.begin();
for(std::vector<double>::iterator dist_it = ray_distances.begin();
typename std::vector<double>::iterator w_it = ray_weights.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
total_distance += (*dist_it) * (*w_it);
total_weights += (*w_it);
@ -321,7 +329,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
mean_sdf = total_distance / total_weights;
total_weights = 0.0;
total_distance = 0.0;
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it) {
double dif = (*dist_it) - mean_sdf;
st_dev += dif * dif;
@ -329,7 +337,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_mean(
st_dev = CGAL::sqrt(st_dev / (ray_distances.size()));
w_it = ray_weights.begin();
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
if(fabs((*dist_it) - mean_sdf) > st_dev) {
continue;
@ -346,16 +354,16 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_trimme
std::vector<double>& ray_distances,
std::vector<double>& ray_weights) const
{
std::vector<std::pair<double, double> > distances_with_weights;
std::vector<std::pair<double, double>> distances_with_weights;
distances_with_weights.reserve(ray_distances.size());
typename std::vector<double>::iterator w_it = ray_weights.begin();
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it, ++w_it) {
distances_with_weights.push_back(std::pair<double, double>((*dist_it),
(*w_it)));
}
std::sort(distances_with_weights.begin(), distances_with_weights.end(),
compare_pairs_using_first<std::pair<double, double> >());
compare_pairs_using_first<std::pair<double, double>>());
int b = floor(distances_with_weights.size() / 20.0 + 0.5); // Eliminate %5.
int e = distances_with_weights.size() - b; // Eliminate %5.
@ -371,7 +379,7 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_from_rays_with_trimme
total_weights = 0.0;
total_distance = 0.0;
for(std::vector<double>::iterator dist_it = ray_distances.begin();
for(typename std::vector<double>::iterator dist_it = ray_distances.begin();
dist_it != ray_distances.end(); ++dist_it) {
double dif = (*dist_it) - trimmed_mean;
st_dev += dif * dif;
@ -518,12 +526,55 @@ inline void Surface_mesh_segmentation<Polyhedron>::apply_GMM_fitting()
Expectation_maximization fitter(number_of_centers, sdf_vector);
std::vector<int> center_memberships;
fitter.fill_with_center_ids(center_memberships);
std::vector<int>::iterator center_it = center_memberships.begin();
typename std::vector<int>::iterator center_it = center_memberships.begin();
for(Facet_iterator facet_it = mesh->facets_begin();
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
}
}
template <class Polyhedron>
inline void Surface_mesh_segmentation<Polyhedron>::apply_K_means_clustering()
{
centers.clear();
std::vector<double> sdf_vector;
for(Facet_iterator facet_it = mesh->facets_begin();
facet_it != mesh->facets_end(); ++facet_it) {
sdf_vector.push_back(sdf_values[facet_it]);
}
K_means_clustering clusterer(number_of_centers, sdf_vector);
std::vector<int> center_memberships;
clusterer.fill_with_center_ids(center_memberships);
typename std::vector<int>::iterator center_it = center_memberships.begin();
for(Facet_iterator facet_it = mesh->facets_begin();
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
}
//center_memberships_temp = center_memberships; //remove
}
template <class Polyhedron>
inline void
Surface_mesh_segmentation<Polyhedron>::apply_GMM_fitting_with_K_means_init()
{
centers.clear();
std::vector<double> sdf_vector;
for(Facet_iterator facet_it = mesh->facets_begin();
facet_it != mesh->facets_end(); ++facet_it) {
sdf_vector.push_back(sdf_values[facet_it]);
}
K_means_clustering clusterer(number_of_centers, sdf_vector);
std::vector<int> center_memberships;
clusterer.fill_with_center_ids(center_memberships);
//std::vector<int> center_memberships = center_memberships_temp;
Expectation_maximization fitter(number_of_centers, sdf_vector,
center_memberships);
center_memberships.clear();
fitter.fill_with_center_ids(center_memberships);
typename std::vector<int>::iterator center_it = center_memberships.begin();
for(Facet_iterator facet_it = mesh->facets_begin();
facet_it != mesh->facets_end(); ++facet_it, ++center_it) {
centers.insert(std::pair<Facet_handle, int>(facet_it, (*center_it)));
}
}
template <class Polyhedron>

View File

@ -1,10 +1,12 @@
#ifndef CGAL_EXPECTATION_MAXIMIZATION_H
#define CGAL_EXPECTATION_MAXIMIZATION_H
#ifndef CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
#define CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
/* NEED TO BE DONE */
/* About implementation:
/* Calculating probability multiple times: need to solved. */
/* Calculating probability multiple times */
/* About safe division: where centers have no members */
#include <vector>
#include <cmath>
#include <algorithm>
//#define DIV_INV_SQRT_2_PI 0.3989422804
namespace CGAL
{
@ -18,17 +20,22 @@ public:
double deviation;
double mixing_coefficient;
Gaussian_center(): mean(0), deviation(0), mixing_coefficient(1.0) {
}
Gaussian_center(double mean, double deviation, double mixing_coefficient)
: mean(mean), deviation(deviation), mixing_coefficient(mixing_coefficient) {
}
double probability(double x) const {
double e_over = -0.5 * pow((x - mean) / deviation, 2);
return exp(e_over) * (1.0 / deviation) ;
return exp(e_over) / deviation;
}
double probability_proportional(double x) const {
double e_over = -0.5 * pow((x - mean) / deviation, 2);
return exp(e_over);
}
bool operator < (const Gaussian_center& center) {
return mean < center.mean;
}
void calculate_parameters(const std::vector<Gaussian_point>& points);
};
@ -72,7 +79,7 @@ inline void Gaussian_center::calculate_parameters(const
}
new_deviation = sqrt(new_deviation/total_membership);
/* Calculate new mixing coefficient */
mixing_coefficient = total_membership / points.size();
mixing_coefficient = total_membership;
deviation = new_deviation;
mean = new_mean;
@ -84,13 +91,19 @@ public:
std::vector<Gaussian_center> centers;
std::vector<Gaussian_point> points;
double threshold;
int maximum_iteration;
bool is_converged;
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
const std::vector<int>& initial_centers = std::vector<int>(),
int maximum_iteration = 500)
: points(data.begin(), data.end()), threshold(1e-4),
maximum_iteration(maximum_iteration), is_converged(false) {
Expectation_maximization(int number_of_centers, const std::vector<double>& data)
: points(data.begin(), data.end()), threshold(0.00000001) {
initiate_centers(number_of_centers);
initiate_centers(number_of_centers, initial_centers);
calculate_fitting();
}
void fill_with_center_ids(std::vector<int>& data_centers) {
data_centers.reserve(points.size());
for(std::vector<Gaussian_point>::iterator point_it = points.begin();
point_it != points.end(); ++point_it) {
double max_likelihood = 0.0;
@ -108,15 +121,46 @@ public:
}
}
protected:
void initiate_centers(int number_of_centers) {
double initial_deviation = 1.0 / (2.0 * number_of_centers);
double initial_mixing_coefficient = 1.0 / number_of_centers;
for(int i = 0; i < number_of_centers; ++i) {
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
Gaussian_center center(initial_mean, initial_deviation,
initial_mixing_coefficient);
centers.push_back(center);
void initiate_centers(int number_of_centers,
const std::vector<int>& initial_centers) {
if(initial_centers.empty()) {
/* Uniformly generate centers */
double initial_deviation = 1.0 / (2.0 * number_of_centers);
double initial_mixing_coefficient = 1.0 / number_of_centers;
for(int i = 0; i < number_of_centers; ++i) {
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
centers.push_back(Gaussian_center(initial_mean, initial_deviation,
initial_mixing_coefficient));
}
} else {
/* Calculate mean */
int number_of_point = initial_centers.size();
centers = std::vector<Gaussian_center>(number_of_centers);
std::vector<int> member_count(number_of_centers, 0);
for(int i = 0; i < number_of_point; ++i) {
int center_id = initial_centers[i];
double data = points[i].data;
centers[center_id].mean += data;
member_count[center_id] += 1;
}
/* Assign mean, and mixing coef */
for(int i = 0; i < number_of_centers; ++i) {
centers[i].mean /= member_count[i];
centers[i].mixing_coefficient = member_count[i] / static_cast<double>
(number_of_point);
}
/* Calculate deviation */
for(int i = 0; i < number_of_point; ++i) {
int center_id = initial_centers[i];
double data = points[i].data;
centers[center_id].deviation += pow(data - centers[center_id].mean, 2);
}
for(int i = 0; i < number_of_centers; ++i) {
centers[i].deviation = sqrt(centers[i].deviation / member_count[i]);
}
}
sort(centers.begin(), centers.end());
}
/*Calculates total membership values for a point */
void calculate_membership() {
@ -147,15 +191,22 @@ protected:
}
return likelihood;
}
double iterate() {
calculate_membership();
calculate_parameters();
return calculate_likelihood();
}
void calculate_fitting() {
double prev_likelihood, likelihood = 0.0;
double prev_likelihood = 0.0;
double likelihood = iterate();
int iteration_count = 0;
is_converged = false;
do {
calculate_membership();
calculate_parameters();
prev_likelihood = likelihood;
likelihood = calculate_likelihood();
} while(likelihood - prev_likelihood > threshold * likelihood);
likelihood = iterate();
is_converged = likelihood - prev_likelihood < threshold * likelihood;
} while(!is_converged && ++iteration_count < maximum_iteration);
}
};
}//namespace CGAL
#endif //CGAL_EXPECTATION_MAXIMIZATION_H
#endif //CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H

View File

@ -0,0 +1,157 @@
#ifndef CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H
#define CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H
#include <vector>
#include <cmath>
#include <ctime>
#include <cstdlib>
namespace CGAL
{
class K_means_point;
class K_means_center
{
public:
double mean;
int number_of_points;
protected:
double new_mean;
public:
K_means_center(double mean): mean(mean), new_mean(0), number_of_points(0) {
}
void add_point(double data) {
++number_of_points;
new_mean += data;
}
void calculate_mean() {
mean = new_mean / number_of_points;
new_mean = 0;
number_of_points = 0;
}
bool operator < (const K_means_center& center) {
return mean < center.mean;
}
};
class K_means_point
{
public:
double data;
int center_id;
K_means_point(double data, int center_id = -1) : data(data),
center_id(center_id) {
}
/* Finds closest center to point,
/* Adds itself to the closest center's mean,
/* Returns true if center is changed.*/
bool calculate_new_center(std::vector<K_means_center>& centers) {
int new_center_id = 0;
double min_distance = fabs(centers[0].mean - data);
for(int i = 1; i < centers.size(); ++i) {
double new_distance = fabs(centers[i].mean - data);
if(new_distance < min_distance) {
new_center_id = i;
min_distance = new_distance;
}
}
bool is_center_changed = (new_center_id != center_id);
center_id = new_center_id;
centers[center_id].add_point(data);
return is_center_changed;
}
};
class K_means_clustering
{
public:
std::vector<K_means_center> centers;
std::vector<K_means_point> points;
int maximum_iteration;
bool is_converged;
K_means_clustering(int number_of_centers, const std::vector<double>& data,
int number_of_run = 1000, int maximum_iteration = 500)
: points(data.begin(), data.end()), maximum_iteration(maximum_iteration),
is_converged(false) {
calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
}
void fill_with_center_ids(std::vector<int>& data_centers) {
for(std::vector<K_means_point>::iterator point_it = points.begin();
point_it != points.end(); ++point_it) {
data_centers.push_back(point_it->center_id);
}
}
void initiate_centers(int number_of_centers, bool randomly = false) {
centers.clear();
if(!randomly) {
for(int i = 0; i < number_of_centers; ++i) {
double initial_mean = (i + 1.0) / (number_of_centers + 1.0);
centers.push_back(K_means_center(initial_mean));
}
} else {
srand(time(NULL));
for(int i = 0; i < number_of_centers; ++i) {
double initial_mean = points[rand() % points.size()].data;
centers.push_back(K_means_center(initial_mean));
}
}
sort(centers.begin(), centers.end());
}
void calculate_clustering() {
int iteration_count = 0;
is_converged = false;
do {
for(std::vector<K_means_point>::iterator point_it = points.begin();
point_it != points.end(); ++point_it) {
bool center_changed = point_it->calculate_new_center(centers);
is_converged |= center_changed;
}
for(std::vector<K_means_center>::iterator center_it = centers.begin();
center_it != centers.end(); ++center_it) {
center_it->calculate_mean();
}
} while(!is_converged && ++iteration_count < maximum_iteration);
}
void calculate_clustering_with_multiple_run(int number_of_centers,
int number_of_run) {
initiate_centers(number_of_centers);
calculate_clustering();
std::vector<K_means_center> min_centers = centers;
double error = sum_of_squares();
while(--number_of_run > 0) {
initiate_centers(number_of_centers, true);
/* here, clearing center_ids of points might be neccessary */
calculate_clustering();
double new_error = sum_of_squares();
if(error > new_error) {
error = new_error;
min_centers = centers;
}
}
/* By saving points (min_points) also, we can get rid of this part */
/* But since centers are already converged this step will require one iteration */
centers = min_centers;
calculate_clustering();
}
double sum_of_squares() const {
double sum = 0;
for(std::vector<K_means_center>::const_iterator center_it = centers.begin();
center_it != centers.end(); ++center_it) {
for(std::vector<K_means_point>::const_iterator point_it = points.begin();
point_it != points.end(); ++point_it) {
sum += pow(center_it->mean - point_it->data, 2);
}
}
return sum;
}
};
}//namespace CGAL
#endif //CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H