mirror of https://github.com/CGAL/cgal
EM - using probability-matrix (center x point), a faster implementation (nearly 2x).
This commit is contained in:
parent
b96af69120
commit
8aa3d37806
|
|
@ -6,7 +6,6 @@
|
||||||
* (https://cgal.geometryfactory.com/CGAL/Members/wiki/Features/BGL) which introduces some features
|
* (https://cgal.geometryfactory.com/CGAL/Members/wiki/Features/BGL) which introduces some features
|
||||||
* for face-based traversal / manipulation by FaceGraphs
|
* for face-based traversal / manipulation by FaceGraphs
|
||||||
* +) Deciding on which parameters will be taken from user
|
* +) Deciding on which parameters will be taken from user
|
||||||
* +) Make it more readable: calculate_sdf_value_of_facet function.
|
|
||||||
*
|
*
|
||||||
* About paper (and correctness / efficiency etc.):
|
* About paper (and correctness / efficiency etc.):
|
||||||
* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly
|
* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly
|
||||||
|
|
@ -28,6 +27,8 @@
|
||||||
#include <CGAL/AABB_polyhedron_triangle_primitive.h>
|
#include <CGAL/AABB_polyhedron_triangle_primitive.h>
|
||||||
#include <CGAL/utility.h>
|
#include <CGAL/utility.h>
|
||||||
|
|
||||||
|
#include <boost/optional.hpp>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
@ -70,11 +71,13 @@ protected:
|
||||||
Tree;
|
Tree;
|
||||||
typedef typename Tree::Object_and_primitive_id
|
typedef typename Tree::Object_and_primitive_id
|
||||||
Object_and_primitive_id;
|
Object_and_primitive_id;
|
||||||
|
typedef typename Tree::Primitive_id
|
||||||
|
Primitive_id;
|
||||||
|
|
||||||
typedef std::map<Facet_handle, double> Face_value_map;
|
typedef std::map<Facet_handle, double> Face_value_map;
|
||||||
typedef std::map<Facet_handle, int> Face_center_map;
|
typedef std::map<Facet_handle, int> Face_center_map;
|
||||||
/*Sampled points from disk, t1 = coordinate-x, t2 = coordinate-y, t3 = angle with cone-normal (weight). */
|
/*Sampled points from disk, t1 = coordinate-x, t2 = coordinate-y, t3 = angle with cone-normal (weight). */
|
||||||
typedef CGAL::Triple<double, double, double> Disk_sample;
|
typedef CGAL::Triple<double, double, double> Disk_sample;
|
||||||
typedef std::vector<CGAL::Triple<double, double, double> > Disk_samples_list;
|
typedef std::vector<CGAL::Triple<double, double, double> > Disk_samples_list;
|
||||||
|
|
||||||
template <typename ValueTypeName>
|
template <typename ValueTypeName>
|
||||||
|
|
@ -111,12 +114,10 @@ public:
|
||||||
|
|
||||||
double calculate_sdf_value_of_facet (const Facet_handle& facet,
|
double calculate_sdf_value_of_facet (const Facet_handle& facet,
|
||||||
const Tree& tree) const;
|
const Tree& tree) const;
|
||||||
void cast_and_return_minimum (const Ray& ray, const Tree& tree,
|
boost::optional<double> cast_and_return_minimum(const Ray& ray,
|
||||||
const Facet_handle& facet,
|
const Tree& tree, const Facet_handle& facet) const;
|
||||||
bool& is_found, double& min_distance) const;
|
boost::optional<double> cast_and_return_minimum_use_closest(const Ray& ray,
|
||||||
void cast_and_return_minimum_use_closest (const Ray& ray, const Tree& tree,
|
const Tree& tree, const Facet_handle& facet) const;
|
||||||
const Facet_handle& facet,
|
|
||||||
bool& is_found, double& min_distance) const;
|
|
||||||
|
|
||||||
double calculate_sdf_value_from_rays (std::vector<double>& ray_distances,
|
double calculate_sdf_value_from_rays (std::vector<double>& ray_distances,
|
||||||
std::vector<double>& ray_weights) const;
|
std::vector<double>& ray_weights) const;
|
||||||
|
|
@ -157,8 +158,8 @@ inline Surface_mesh_segmentation<Polyhedron>::Surface_mesh_segmentation(
|
||||||
calculate_sdf_values();
|
calculate_sdf_values();
|
||||||
SEG_DEBUG(std::cout << t)
|
SEG_DEBUG(std::cout << t)
|
||||||
apply_GMM_fitting();
|
apply_GMM_fitting();
|
||||||
//write_sdf_values("sdf_values_sample_dino_2.txt");
|
//write_sdf_values("sdf_values_sample_dino_ws.txt");
|
||||||
//read_sdf_values("sdf_values_sample_dino.txt");
|
//read_sdf_values("sdf_values_sample_elephant.txt");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Polyhedron>
|
template <class Polyhedron>
|
||||||
|
|
@ -211,55 +212,57 @@ Surface_mesh_segmentation<Polyhedron>::calculate_sdf_value_of_facet(
|
||||||
sample_it != disk_samples.end(); ++sample_it) {
|
sample_it != disk_samples.end(); ++sample_it) {
|
||||||
Vector disk_vector = v1 * sample_it->first + v2 * sample_it->second;
|
Vector disk_vector = v1 * sample_it->first + v2 * sample_it->second;
|
||||||
Ray ray(center, normal + disk_vector);
|
Ray ray(center, normal + disk_vector);
|
||||||
double min_distance;
|
|
||||||
bool is_found;
|
//boost::optional<double> min_distance = cast_and_return_minimum(ray, tree, facet);
|
||||||
cast_and_return_minimum_use_closest(ray, tree, facet, is_found, min_distance);
|
boost::optional<double> min_distance = cast_and_return_minimum_use_closest(ray,
|
||||||
if(!is_found) {
|
tree, facet);
|
||||||
|
if(!min_distance) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ray_weights.push_back(sample_it->third);
|
ray_weights.push_back(sample_it->third);
|
||||||
ray_distances.push_back(min_distance);
|
ray_distances.push_back(*min_distance);
|
||||||
}
|
}
|
||||||
return calculate_sdf_value_from_rays_with_trimmed_mean(ray_distances,
|
return calculate_sdf_value_from_rays_with_trimmed_mean(ray_distances,
|
||||||
ray_weights);
|
ray_weights);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Polyhedron>
|
template <class Polyhedron>
|
||||||
void Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum(
|
boost::optional<double>
|
||||||
const Ray& ray, const Tree& tree, const Facet_handle& facet,
|
Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum(
|
||||||
bool& is_found, double& min_distance) const
|
const Ray& ray, const Tree& tree, const Facet_handle& facet) const
|
||||||
{
|
{
|
||||||
|
boost::optional<double> min_distance;
|
||||||
std::list<Object_and_primitive_id> intersections;
|
std::list<Object_and_primitive_id> intersections;
|
||||||
tree.all_intersection(ray, std::back_inserter(intersections));
|
tree.all_intersections(ray, std::back_inserter(intersections));
|
||||||
Vector min_i_ray;
|
Vector min_i_ray;
|
||||||
typename Tree::Primitive_id min_id;
|
Primitive_id min_id;
|
||||||
is_found = false;
|
|
||||||
for(typename std::list<Object_and_primitive_id>::iterator op_it =
|
for(typename std::list<Object_and_primitive_id>::iterator op_it =
|
||||||
intersections.begin();
|
intersections.begin();
|
||||||
op_it != intersections.end() ; ++op_it) {
|
op_it != intersections.end() ; ++op_it) {
|
||||||
CGAL::Object object = op_it->first;
|
CGAL::Object object = op_it->first;
|
||||||
typename Tree::Primitive_id id = op_it->second;
|
Primitive_id id = op_it->second;
|
||||||
Point i_point;
|
if(id == facet) {
|
||||||
if(id == facet) {
|
|
||||||
continue; //Since center is located on related facet, we should skip it if there is an intersection with it.
|
continue; //Since center is located on related facet, we should skip it if there is an intersection with it.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Point i_point;
|
||||||
if(!CGAL::assign(i_point, object)) {
|
if(!CGAL::assign(i_point, object)) {
|
||||||
continue; //What to do here (in case of intersection object is a segment), I am not sure ???
|
continue; //What to do here (in case of intersection object is a segment), I am not sure ???
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector i_ray = (ray.source() - i_point);
|
Vector i_ray = (ray.source() - i_point);
|
||||||
double new_distance = i_ray.squared_length();
|
double new_distance = i_ray.squared_length();
|
||||||
if(!is_found || new_distance < min_distance) {
|
if(!min_distance || new_distance < min_distance) {
|
||||||
min_distance = new_distance;
|
min_distance = new_distance;
|
||||||
min_id = id;
|
min_id = id;
|
||||||
min_i_ray = i_ray;
|
min_i_ray = i_ray;
|
||||||
is_found = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!is_found) {
|
if(!min_distance) {
|
||||||
return;
|
return min_distance;
|
||||||
}
|
}
|
||||||
min_distance = sqrt(min_distance);
|
|
||||||
Point min_v1 = min_id->halfedge()->vertex()->point();
|
Point min_v1 = min_id->halfedge()->vertex()->point();
|
||||||
Point min_v2 = min_id->halfedge()->next()->vertex()->point();
|
Point min_v2 = min_id->halfedge()->next()->vertex()->point();
|
||||||
Point min_v3 = min_id->halfedge()->next()->next()->vertex()->point();
|
Point min_v3 = min_id->halfedge()->next()->next()->vertex()->point();
|
||||||
|
|
@ -267,43 +270,52 @@ void Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum(
|
||||||
|
|
||||||
if(CGAL::angle(CGAL::ORIGIN + min_i_ray, Point(CGAL::ORIGIN),
|
if(CGAL::angle(CGAL::ORIGIN + min_i_ray, Point(CGAL::ORIGIN),
|
||||||
CGAL::ORIGIN + min_normal) != CGAL::ACUTE) {
|
CGAL::ORIGIN + min_normal) != CGAL::ACUTE) {
|
||||||
is_found = false;
|
return boost::optional<double>();
|
||||||
}
|
}
|
||||||
//total_intersection += intersections.size();
|
return (min_distance = sqrt(*min_distance));
|
||||||
//total_cast++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Polyhedron>
|
template <class Polyhedron>
|
||||||
void Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum_use_closest (
|
boost::optional<double>
|
||||||
|
Surface_mesh_segmentation<Polyhedron>::cast_and_return_minimum_use_closest (
|
||||||
const Ray& ray, const Tree& tree,
|
const Ray& ray, const Tree& tree,
|
||||||
const Facet_handle& facet, bool& is_found, double& min_distance) const
|
const Facet_handle& facet) const
|
||||||
{
|
{
|
||||||
is_found = false;
|
//static double dist = 0.1;
|
||||||
|
//boost::optional<double> min_distance_2 = dist++;
|
||||||
|
//return min_distance_2;
|
||||||
|
|
||||||
|
boost::optional<double> min_distance;
|
||||||
boost::optional<Object_and_primitive_id> intersection =
|
boost::optional<Object_and_primitive_id> intersection =
|
||||||
tree.closest_intersection(ray);
|
tree.closest_intersection(ray);
|
||||||
|
//boost::optional<Object_and_primitive_id> intersection = tree.any_intersection(ray);
|
||||||
if(!intersection) {
|
if(!intersection) {
|
||||||
return;
|
return min_distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
CGAL::Object object = intersection->first;
|
CGAL::Object object = intersection->first;
|
||||||
typename Tree::Primitive_id min_id = intersection->second;
|
Primitive_id min_id = intersection->second;
|
||||||
|
|
||||||
|
if(min_id == facet) {
|
||||||
|
CGAL_error(); // There should be no such case, after center-facet arrangments.
|
||||||
|
}
|
||||||
|
|
||||||
Point i_point;
|
Point i_point;
|
||||||
if(!CGAL::assign(i_point, object)) {
|
if(!CGAL::assign(i_point, object)) {
|
||||||
return;
|
return min_distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector min_i_ray = ray.source() - i_point;
|
Vector min_i_ray = ray.source() - i_point;
|
||||||
min_distance = sqrt(min_i_ray.squared_length());
|
|
||||||
|
|
||||||
Point min_v1 = min_id->halfedge()->vertex()->point();
|
Point min_v1 = min_id->halfedge()->vertex()->point();
|
||||||
Point min_v2 = min_id->halfedge()->next()->vertex()->point();
|
Point min_v2 = min_id->halfedge()->next()->vertex()->point();
|
||||||
Point min_v3 = min_id->halfedge()->next()->next()->vertex()->point();
|
Point min_v3 = min_id->halfedge()->next()->next()->vertex()->point();
|
||||||
Vector min_normal = CGAL::normal(min_v1, min_v2, min_v3) * -1.0;
|
Vector min_normal = CGAL::normal(min_v1, min_v2, min_v3) * -1.0;
|
||||||
|
|
||||||
if(CGAL::angle(CGAL::ORIGIN + min_i_ray, Point(CGAL::ORIGIN),
|
if(CGAL::angle(CGAL::ORIGIN + min_i_ray, Point(CGAL::ORIGIN),
|
||||||
CGAL::ORIGIN + min_normal) == CGAL::ACUTE) {
|
CGAL::ORIGIN + min_normal) != CGAL::ACUTE) {
|
||||||
is_found = true;
|
return min_distance;
|
||||||
}
|
}
|
||||||
|
return (min_distance = sqrt(min_i_ray.squared_length()));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Polyhedron>
|
template <class Polyhedron>
|
||||||
|
|
@ -629,7 +641,7 @@ inline void Surface_mesh_segmentation<Polyhedron>::apply_GMM_fitting()
|
||||||
sdf_vector.push_back(pair_it->second);
|
sdf_vector.push_back(pair_it->second);
|
||||||
}
|
}
|
||||||
SEG_DEBUG(Timer t)
|
SEG_DEBUG(Timer t)
|
||||||
internal::Expectation_maximization fitter(number_of_centers, sdf_vector);
|
internal::Expectation_maximization fitter(number_of_centers, sdf_vector, 50);
|
||||||
SEG_DEBUG(std::cout << t)
|
SEG_DEBUG(std::cout << t)
|
||||||
std::vector<int> center_memberships;
|
std::vector<int> center_memberships;
|
||||||
fitter.fill_with_center_ids(center_memberships);
|
fitter.fill_with_center_ids(center_memberships);
|
||||||
|
|
|
||||||
|
|
@ -2,18 +2,26 @@
|
||||||
#define CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
|
#define CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H
|
||||||
/* NEED TO BE DONE */
|
/* NEED TO BE DONE */
|
||||||
/* About implementation:
|
/* About implementation:
|
||||||
* Calculating probability multiple times, solution: storing matrix(cluster, point) for probability values.
|
* + There are two implementations : one of them is using probability matrix (center x point), the other is not.
|
||||||
* About safe division: where centers have no members, in graph cut it may happen
|
* + Also code size is twice large, I did not try to simplify before deciding on whether we are going to make
|
||||||
|
* both approaches (with matrix and without) available or not.
|
||||||
|
* + There are a lot of parameters (with default values) and initialization types,
|
||||||
|
* so I am planning to use whether 'Named Parameter Idiom' or Boost Parameter Library...
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#define DEF_MAX_ITER 100
|
||||||
|
#define DEF_THRESHOLD 1e-4
|
||||||
|
#define USE_MATRIX true
|
||||||
//#define ACTIVATE_SEGMENTATION_EM_DEBUG
|
//#define ACTIVATE_SEGMENTATION_EM_DEBUG
|
||||||
#ifdef ACTIVATE_SEGMENTATION_EM_DEBUG
|
#ifdef ACTIVATE_SEGMENTATION_EM_DEBUG
|
||||||
#define SEG_DEBUG(x) x;
|
#define SEG_DEBUG(x) x;
|
||||||
|
|
@ -23,6 +31,8 @@
|
||||||
|
|
||||||
namespace CGAL
|
namespace CGAL
|
||||||
{
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
class Gaussian_point;
|
class Gaussian_point;
|
||||||
|
|
||||||
|
|
@ -46,6 +56,9 @@ public:
|
||||||
double e_over = -0.5 * pow((x - mean) / deviation, 2);
|
double e_over = -0.5 * pow((x - mean) / deviation, 2);
|
||||||
return exp(e_over);
|
return exp(e_over);
|
||||||
}
|
}
|
||||||
|
double probability_with_coef(double x) const {
|
||||||
|
return probability(x) * mixing_coefficient;
|
||||||
|
}
|
||||||
bool operator < (const Gaussian_center& center) const {
|
bool operator < (const Gaussian_center& center) const {
|
||||||
return mean < center.mean;
|
return mean < center.mean;
|
||||||
}
|
}
|
||||||
|
|
@ -60,12 +73,13 @@ public:
|
||||||
|
|
||||||
Gaussian_point(double data): data(data), total_membership(0.0) {
|
Gaussian_point(double data): data(data), total_membership(0.0) {
|
||||||
}
|
}
|
||||||
void calculate_total_membership(const std::vector<Gaussian_center>& centers) {
|
double calculate_total_membership(const std::vector<Gaussian_center>& centers) {
|
||||||
total_membership = 0.0;
|
total_membership = 0.0;
|
||||||
for(std::vector<Gaussian_center>::const_iterator it = centers.begin();
|
for(std::vector<Gaussian_center>::const_iterator it = centers.begin();
|
||||||
it != centers.end(); ++it) {
|
it != centers.end(); ++it) {
|
||||||
total_membership += it->probability(data) * it->mixing_coefficient;
|
total_membership += it->probability(data) * it->mixing_coefficient;
|
||||||
}
|
}
|
||||||
|
return total_membership;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -106,22 +120,52 @@ public:
|
||||||
int maximum_iteration;
|
int maximum_iteration;
|
||||||
bool is_converged;
|
bool is_converged;
|
||||||
protected:
|
protected:
|
||||||
int seed;
|
unsigned int seed;
|
||||||
|
std::vector<std::vector<double> > probability_matrix;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/* For uniform initialization, with one run */
|
||||||
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
|
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
|
||||||
const std::vector<int>& initial_centers = std::vector<int>(),
|
double threshold = DEF_THRESHOLD, int maximum_iteration = DEF_MAX_ITER,
|
||||||
int number_of_runs = 50, int maximum_iteration = 100)
|
bool use_matrix = USE_MATRIX)
|
||||||
: points(data.begin(), data.end()), threshold(1e-4),
|
: points(data.begin(), data.end()), threshold(threshold),
|
||||||
maximum_iteration(maximum_iteration), is_converged(false), seed(time(NULL)) {
|
maximum_iteration(maximum_iteration), is_converged(false), seed(0),
|
||||||
srand(seed);
|
probability_matrix( use_matrix ? std::vector<std::vector<double> >
|
||||||
if(initial_centers.empty()) {
|
(number_of_centers, std::vector<double>(points.size())) :
|
||||||
calculate_fitting_with_multiple_run(number_of_centers, number_of_runs);
|
std::vector<std::vector<double> >()) {
|
||||||
} else {
|
for(int i = 0; i < 20; ++i) {
|
||||||
initiate_centers(number_of_centers, initial_centers);
|
initiate_centers_uniformly(number_of_centers);
|
||||||
calculate_fitting();
|
use_matrix ? calculate_fitting_with_matrix() : calculate_fitting();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* For initialization with provided center ids per point, with one run */
|
||||||
|
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
|
||||||
|
const std::vector<int>& initial_center_ids,
|
||||||
|
double threshold = DEF_THRESHOLD, int maximum_iteration = DEF_MAX_ITER,
|
||||||
|
bool use_matrix = USE_MATRIX)
|
||||||
|
: points(data.begin(), data.end()), threshold(threshold),
|
||||||
|
maximum_iteration(maximum_iteration), is_converged(false), seed(0),
|
||||||
|
probability_matrix( use_matrix ? std::vector<std::vector<double> >
|
||||||
|
(number_of_centers, std::vector<double>(points.size())) :
|
||||||
|
std::vector<std::vector<double> >()) {
|
||||||
|
initiate_centers_from_memberships(number_of_centers, initial_center_ids);
|
||||||
|
use_matrix ? calculate_fitting_with_matrix() : calculate_fitting();
|
||||||
|
}
|
||||||
|
/* For initialization with random center selection (Forgy), with multiple run */
|
||||||
|
Expectation_maximization(int number_of_centers, const std::vector<double>& data,
|
||||||
|
int number_of_runs,
|
||||||
|
double threshold = DEF_THRESHOLD, int maximum_iteration = DEF_MAX_ITER,
|
||||||
|
bool use_matrix = USE_MATRIX)
|
||||||
|
: points(data.begin(), data.end()), threshold(threshold),
|
||||||
|
maximum_iteration(maximum_iteration), is_converged(false),
|
||||||
|
seed(static_cast<unsigned int>(time(NULL))),
|
||||||
|
probability_matrix( use_matrix ? std::vector<std::vector<double> >
|
||||||
|
(number_of_centers, std::vector<double>(points.size())) :
|
||||||
|
std::vector<std::vector<double> >()) {
|
||||||
|
srand(seed);
|
||||||
|
calculate_fitting_with_multiple_run(number_of_centers, number_of_runs,
|
||||||
|
use_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
void fill_with_center_ids(std::vector<int>& data_centers) {
|
void fill_with_center_ids(std::vector<int>& data_centers) {
|
||||||
data_centers.reserve(points.size());
|
data_centers.reserve(points.size());
|
||||||
|
|
@ -142,6 +186,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
void initiate_centers_randomly(int number_of_centers) {
|
void initiate_centers_randomly(int number_of_centers) {
|
||||||
centers.clear();
|
centers.clear();
|
||||||
/* Randomly generate means of centers */
|
/* Randomly generate means of centers */
|
||||||
|
|
@ -149,7 +194,7 @@ protected:
|
||||||
double initial_mixing_coefficient = 1.0;
|
double initial_mixing_coefficient = 1.0;
|
||||||
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
||||||
for(int i = 0; i < number_of_centers; ++i) {
|
for(int i = 0; i < number_of_centers; ++i) {
|
||||||
double random_index = rand() % points.size();
|
int random_index = rand() % points.size();
|
||||||
//center_indexes.push_back(random_index);
|
//center_indexes.push_back(random_index);
|
||||||
Gaussian_point mean_point = points[random_index];
|
Gaussian_point mean_point = points[random_index];
|
||||||
double initial_mean = mean_point.data;
|
double initial_mean = mean_point.data;
|
||||||
|
|
@ -162,7 +207,7 @@ protected:
|
||||||
|
|
||||||
void initiate_centers_uniformly(int number_of_centers) {
|
void initiate_centers_uniformly(int number_of_centers) {
|
||||||
centers.clear();
|
centers.clear();
|
||||||
/* Uniformly generate centers */
|
/* Uniformly generate means of centers */
|
||||||
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
double initial_deviation = 1.0 / (2.0 * number_of_centers);
|
||||||
double initial_mixing_coefficient = 1.0;
|
double initial_mixing_coefficient = 1.0;
|
||||||
for(int i = 0; i < number_of_centers; ++i) {
|
for(int i = 0; i < number_of_centers; ++i) {
|
||||||
|
|
@ -174,15 +219,14 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
void initiate_centers_from_memberships(int number_of_centers,
|
void initiate_centers_from_memberships(int number_of_centers,
|
||||||
const std::vector<int>& initial_centers) {
|
const std::vector<int>& initial_center_ids) {
|
||||||
centers.clear();
|
|
||||||
/* Calculate mean */
|
/* Calculate mean */
|
||||||
int number_of_point = initial_centers.size();
|
int number_of_points = initial_center_ids.size();
|
||||||
centers = std::vector<Gaussian_center>(number_of_centers);
|
centers = std::vector<Gaussian_center>(number_of_centers);
|
||||||
std::vector<int> member_count(number_of_centers, 0);
|
std::vector<int> member_count(number_of_centers, 0);
|
||||||
|
|
||||||
for(int i = 0; i < number_of_point; ++i) {
|
for(int i = 0; i < number_of_points; ++i) {
|
||||||
int center_id = initial_centers[i];
|
int center_id = initial_center_ids[i];
|
||||||
double data = points[i].data;
|
double data = points[i].data;
|
||||||
centers[center_id].mean += data;
|
centers[center_id].mean += data;
|
||||||
member_count[center_id] += 1;
|
member_count[center_id] += 1;
|
||||||
|
|
@ -191,11 +235,11 @@ protected:
|
||||||
for(int i = 0; i < number_of_centers; ++i) {
|
for(int i = 0; i < number_of_centers; ++i) {
|
||||||
centers[i].mean /= member_count[i];
|
centers[i].mean /= member_count[i];
|
||||||
centers[i].mixing_coefficient = member_count[i] / static_cast<double>
|
centers[i].mixing_coefficient = member_count[i] / static_cast<double>
|
||||||
(number_of_point);
|
(number_of_points);
|
||||||
}
|
}
|
||||||
/* Calculate deviation */
|
/* Calculate deviation */
|
||||||
for(int i = 0; i < number_of_point; ++i) {
|
for(int i = 0; i < number_of_points; ++i) {
|
||||||
int center_id = initial_centers[i];
|
int center_id = initial_center_ids[i];
|
||||||
double data = points[i].data;
|
double data = points[i].data;
|
||||||
centers[center_id].deviation += pow(data - centers[center_id].mean, 2);
|
centers[center_id].deviation += pow(data - centers[center_id].mean, 2);
|
||||||
}
|
}
|
||||||
|
|
@ -205,29 +249,6 @@ protected:
|
||||||
sort(centers.begin(), centers.end());
|
sort(centers.begin(), centers.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
void initiate_centers(int number_of_centers,
|
|
||||||
const std::vector<int>& initial_centers) {
|
|
||||||
if(initial_centers.empty()) {
|
|
||||||
initiate_centers_uniformly(number_of_centers);
|
|
||||||
} else {
|
|
||||||
initiate_centers_from_memberships(number_of_centers, initial_centers);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*Calculates total membership values for a point */
|
|
||||||
void calculate_membership() {
|
|
||||||
for(std::vector<Gaussian_point>::iterator point_it = points.begin();
|
|
||||||
point_it != points.end(); ++point_it) {
|
|
||||||
point_it->calculate_total_membership(centers);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
double calculate_deviation(const Gaussian_point& point) const {
|
|
||||||
double deviation = 0.0;
|
|
||||||
for(std::vector<Gaussian_point>::const_iterator point_it = points.begin();
|
|
||||||
point_it != points.end(); ++point_it) {
|
|
||||||
deviation += pow(point_it->data - point.data, 2);
|
|
||||||
}
|
|
||||||
return sqrt(deviation / points.size());
|
|
||||||
}
|
|
||||||
/*Calculates new parameter values for each cluster */
|
/*Calculates new parameter values for each cluster */
|
||||||
void calculate_parameters() {
|
void calculate_parameters() {
|
||||||
for(std::vector<Gaussian_center>::iterator center_it = centers.begin();
|
for(std::vector<Gaussian_center>::iterator center_it = centers.begin();
|
||||||
|
|
@ -235,35 +256,109 @@ protected:
|
||||||
center_it->calculate_parameters(points);
|
center_it->calculate_parameters(points);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void calculate_parameters_with_matrix() {
|
||||||
|
for(std::size_t center_i = 0; center_i < centers.size(); ++center_i) {
|
||||||
|
// Calculate new mean
|
||||||
|
double new_mean = 0.0, total_membership = 0.0;
|
||||||
|
for(std::size_t point_i = 0; point_i < points.size(); ++point_i) {
|
||||||
|
double membership = probability_matrix[center_i][point_i];
|
||||||
|
new_mean += membership * points[point_i].data;
|
||||||
|
total_membership += membership;
|
||||||
|
}
|
||||||
|
new_mean /= total_membership;
|
||||||
|
// Calculate new deviation
|
||||||
|
double new_deviation = 0.0;
|
||||||
|
for(std::size_t point_i = 0; point_i < points.size(); ++point_i) {
|
||||||
|
double membership = probability_matrix[center_i][point_i];
|
||||||
|
new_deviation += membership * pow(points[point_i].data - new_mean, 2);
|
||||||
|
}
|
||||||
|
new_deviation = sqrt(new_deviation/total_membership);
|
||||||
|
// Assign new parameters
|
||||||
|
centers[center_i].mixing_coefficient = total_membership;
|
||||||
|
centers[center_i].deviation = new_deviation;
|
||||||
|
centers[center_i].mean = new_mean;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*Calculates how much this adjustment is likely to represent data*/
|
/*Calculates how much this adjustment is likely to represent data*/
|
||||||
double calculate_likelihood() {
|
double calculate_likelihood() {
|
||||||
double likelihood = 0.0;
|
double likelihood = 0.0;
|
||||||
for(std::vector<Gaussian_point>::iterator point_it = points.begin();
|
for(std::vector<Gaussian_point>::iterator point_it = points.begin();
|
||||||
point_it != points.end(); ++point_it) {
|
point_it != points.end(); ++point_it) {
|
||||||
double point_likelihood = 0.0;
|
double point_likelihood = point_it->calculate_total_membership(centers);
|
||||||
for(std::vector<Gaussian_center>::iterator center_it = centers.begin();
|
|
||||||
center_it != centers.end(); ++center_it) {
|
|
||||||
point_likelihood += center_it->mixing_coefficient * center_it->probability(
|
|
||||||
point_it->data);
|
|
||||||
}
|
|
||||||
likelihood += log(point_likelihood);
|
likelihood += log(point_likelihood);
|
||||||
}
|
}
|
||||||
return likelihood;
|
return likelihood;
|
||||||
}
|
}
|
||||||
|
|
||||||
double iterate() {
|
double calculate_likelihood_with_matrix() {
|
||||||
calculate_membership();
|
/**
|
||||||
|
* Calculate Log-likelihood
|
||||||
|
* The trick (merely a trick) is while calculating log-likelihood, we also store probability results into matrix,
|
||||||
|
* so that in next iteration we do not have to calculate them again.
|
||||||
|
*/
|
||||||
|
double likelihood = 0.0;
|
||||||
|
for(std::size_t point_i = 0; point_i < points.size(); ++point_i) {
|
||||||
|
double total_membership = 0.0;
|
||||||
|
for(std::size_t center_i = 0; center_i < centers.size(); ++center_i) {
|
||||||
|
double membership = centers[center_i].probability_with_coef(
|
||||||
|
points[point_i].data);
|
||||||
|
total_membership += membership;
|
||||||
|
probability_matrix[center_i][point_i] = membership;
|
||||||
|
}
|
||||||
|
for(std::size_t center_i = 0; center_i < centers.size(); ++center_i) {
|
||||||
|
probability_matrix[center_i][point_i] /= total_membership;
|
||||||
|
}
|
||||||
|
likelihood += log(total_membership);
|
||||||
|
}
|
||||||
|
return likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
double iterate(bool first_iteration) {
|
||||||
|
// E-step
|
||||||
|
if(first_iteration) {
|
||||||
|
calculate_likelihood();
|
||||||
|
}
|
||||||
|
// M-step
|
||||||
calculate_parameters();
|
calculate_parameters();
|
||||||
|
// Likelihood step
|
||||||
|
// It also stores total_membership values into points, so corresponds to E-step of next iteration.
|
||||||
return calculate_likelihood();
|
return calculate_likelihood();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double iterate_with_matrix(bool first_iteration) {
|
||||||
|
// E-step
|
||||||
|
if(first_iteration) {
|
||||||
|
calculate_likelihood_with_matrix();
|
||||||
|
}
|
||||||
|
// M-step
|
||||||
|
calculate_parameters_with_matrix();
|
||||||
|
// Likelihood step
|
||||||
|
// It also stores membership values into matrix, so corresponds to E-step of next iteration.
|
||||||
|
return calculate_likelihood_with_matrix();
|
||||||
|
}
|
||||||
|
|
||||||
double calculate_fitting() {
|
double calculate_fitting() {
|
||||||
double likelihood = -(std::numeric_limits<double>::max)(), prev_likelihood;
|
double likelihood = -(std::numeric_limits<double>::max)(), prev_likelihood;
|
||||||
int iteration_count = 0;
|
int iteration_count = 0;
|
||||||
is_converged = false;
|
is_converged = false;
|
||||||
while(!is_converged && iteration_count++ < maximum_iteration) {
|
while(!is_converged && iteration_count++ < maximum_iteration) {
|
||||||
prev_likelihood = likelihood;
|
prev_likelihood = likelihood;
|
||||||
likelihood = iterate();
|
likelihood = iterate(iteration_count == 1);
|
||||||
|
is_converged = likelihood - prev_likelihood < threshold * fabs(likelihood);
|
||||||
|
}
|
||||||
|
SEG_DEBUG(std::cout << likelihood << " " << iteration_count << std::endl)
|
||||||
|
return likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
double calculate_fitting_with_matrix() {
|
||||||
|
double likelihood = -(std::numeric_limits<double>::max)(), prev_likelihood;
|
||||||
|
int iteration_count = 0;
|
||||||
|
is_converged = false;
|
||||||
|
while(!is_converged && iteration_count++ < maximum_iteration) {
|
||||||
|
prev_likelihood = likelihood;
|
||||||
|
likelihood = iterate_with_matrix(iteration_count == 1);
|
||||||
is_converged = likelihood - prev_likelihood < threshold * fabs(likelihood);
|
is_converged = likelihood - prev_likelihood < threshold * fabs(likelihood);
|
||||||
}
|
}
|
||||||
SEG_DEBUG(std::cout << likelihood << " " << iteration_count << std::endl)
|
SEG_DEBUG(std::cout << likelihood << " " << iteration_count << std::endl)
|
||||||
|
|
@ -271,15 +366,16 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
void calculate_fitting_with_multiple_run(int number_of_centers,
|
void calculate_fitting_with_multiple_run(int number_of_centers,
|
||||||
int number_of_run) {
|
int number_of_run, bool use_matrix) {
|
||||||
double max_likelihood = -(std::numeric_limits<double>::max)();
|
double max_likelihood = -(std::numeric_limits<double>::max)();
|
||||||
std::vector<Gaussian_center> max_centers;
|
std::vector<Gaussian_center> max_centers;
|
||||||
|
|
||||||
while(number_of_run-- > 0) {
|
while(number_of_run-- > 0) {
|
||||||
initiate_centers_randomly(number_of_centers);
|
initiate_centers_randomly(number_of_centers);
|
||||||
|
|
||||||
double likelihood = calculate_fitting();
|
double likelihood = use_matrix ? calculate_fitting_with_matrix() :
|
||||||
write_center_parameters("center_paramters.txt");
|
calculate_fitting();
|
||||||
|
//write_center_parameters("center_paramters.txt");
|
||||||
if(likelihood > max_likelihood) {
|
if(likelihood > max_likelihood) {
|
||||||
max_centers = centers;
|
max_centers = centers;
|
||||||
max_likelihood = likelihood;
|
max_likelihood = likelihood;
|
||||||
|
|
@ -307,7 +403,11 @@ protected:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}//namespace internal
|
||||||
}//namespace CGAL
|
}//namespace CGAL
|
||||||
|
#undef DEF_MAX_ITER
|
||||||
|
#undef DEF_THRESHOLD
|
||||||
|
#undef USE_MATRIX
|
||||||
#ifdef SEG_DEBUG
|
#ifdef SEG_DEBUG
|
||||||
#undef SEG_DEBUG
|
#undef SEG_DEBUG
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
namespace CGAL
|
namespace CGAL
|
||||||
{
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
class K_means_point;
|
class K_means_point;
|
||||||
|
|
||||||
|
|
@ -81,13 +83,14 @@ public:
|
||||||
int maximum_iteration;
|
int maximum_iteration;
|
||||||
bool is_converged;
|
bool is_converged;
|
||||||
protected:
|
protected:
|
||||||
int seed;
|
unsigned int seed;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
K_means_clustering(int number_of_centers, const std::vector<double>& data,
|
K_means_clustering(int number_of_centers, const std::vector<double>& data,
|
||||||
int number_of_run = 50, int maximum_iteration = 100)
|
int number_of_run = 50, int maximum_iteration = 100)
|
||||||
: points(data.begin(), data.end()), maximum_iteration(maximum_iteration),
|
: points(data.begin(), data.end()), maximum_iteration(maximum_iteration),
|
||||||
is_converged(false), seed(time(NULL)) {
|
is_converged(false),
|
||||||
|
seed(static_cast<unsigned int>(time(NULL))) {
|
||||||
srand(seed);
|
srand(seed);
|
||||||
calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
|
calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
|
||||||
}
|
}
|
||||||
|
|
@ -180,6 +183,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}//namespace internal
|
||||||
}//namespace CGAL
|
}//namespace CGAL
|
||||||
#ifdef SEG_DEBUG
|
#ifdef SEG_DEBUG
|
||||||
#undef SEG_DEBUG
|
#undef SEG_DEBUG
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue