From ae1b4eb61f6eb0c76132d91d3152e2039649c059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8Dlker=20Yaz?= Date: Thu, 31 May 2012 16:30:18 +0000 Subject: [PATCH] some modifications on EM, first version of k-means clustering. --- .gitattributes | 1 + .../include/CGAL/Surface_mesh_segmentation.h | 115 +++++++++---- .../Expectation_maximization.h | 95 ++++++++--- .../K_means_clustering.h | 157 ++++++++++++++++++ 4 files changed, 314 insertions(+), 54 deletions(-) create mode 100644 Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h diff --git a/.gitattributes b/.gitattributes index 4b60dfa3431..f727eb99162 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4013,6 +4013,7 @@ Surface_mesh_segmentation/example/Surface_mesh_segmentation/CMakeLists.txt -text Surface_mesh_segmentation/example/Surface_mesh_segmentation/simple_example.cpp -text Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h -text Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h -text +Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h -text Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/copyright -text Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/license.txt -text Surface_mesh_segmentation/package_info/Surface_mesh_segmentation/maintainer -text diff --git a/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h b/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h index 36d98b88af4..01750615bc7 100644 --- a/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h +++ b/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h @@ -1,18 +1,18 @@ #ifndef CGAL_SURFACE_MESH_SEGMENTATION_H #define CGAL_SURFACE_MESH_SEGMENTATION_H -/* NEED TO BE DONE - * About implementation: - * +) I am not using BGL, as far as I checked there is a progress on BGL redesign +/* NEED TO BE DONE */ +/* About implementation: +/* +) I am not using BGL, as far as I checked there is a progress on BGL redesign (https://cgal.geometryfactory.com/CGAL/Members/wiki/Features/BGL) which introduces some features - for face-based traversal / manipulation by FaceGraphs - * +) Deciding on which parameters will be taken from user - * +) Make it more readable: calculate_sdf_value_of_facet function. + for face-based traversal / manipulation by FaceGraphs */ +/* +) Deciding on which parameters will be taken from user */ +/* +) Make it more readable: calculate_sdf_value_of_facet function. + +/* About paper (and correctness / efficiency etc.): +/* +) Weighting ray distances with inverse of their angles: not sure how to weight exactly */ +/* +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough) */ +/* +) Deciding how to generate rays in cone: for now using "polar angle" and "accept-reject (square)" and "concentric mapping" techniques */ - * About paper (and correctness / efficiency etc.): - * +) Weighting ray distances with inverse of their angles: not sure how to weight exactly - * +) Anisotropic smoothing: have no idea what it is exactly, should read some material (google search is not enough) - * +) Deciding how to generate rays in cone: for now using "polar angle" and "generate in square then accept-reject" techniques - */ #include #include #include @@ -21,7 +21,9 @@ #include //#include "Expectation_maximization.h" +//#include "K_means_clustering.h" #include +#include #include #include @@ -58,7 +60,7 @@ protected: typedef std::map Face_center_map; /*Sampled points from disk, t1 = coordinate-x, t2 = coordinate-y, t3 = angle with cone-normal. */ typedef CGAL::Triple Disk_sample; - typedef std::vector > Disk_samples_list; + typedef std::vector> Disk_samples_list; template struct compare_pairs { @@ -69,7 +71,7 @@ protected: template struct compare_pairs_using_first { - bool operator()(const ValueTypeName& v1, const ValueTypeName& v2) { + bool operator()(ValueTypeName& v1, ValueTypeName& v2) { return v1.first < v2.first; } }; @@ -88,11 +90,13 @@ public: std::ofstream log_file; + //std::vector center_memberships_temp; + //member functions public: Surface_mesh_segmentation(Polyhedron* mesh, - int number_of_rays_sqrt = 6, double cone_angle = (2.0 / 3.0) * CGAL_PI, - int number_of_centers = 1); + int number_of_rays_sqrt = 9, double cone_angle = (2.0 / 3.0) * CGAL_PI, + int number_of_centers = 2); void calculate_sdf_values(); @@ -118,6 +122,8 @@ public: void smooth_sdf_values(); void apply_GMM_fitting(); + void apply_K_means_clustering(); + void apply_GMM_fitting_with_K_means_init(); void write_sdf_values(const char* file_name); void read_sdf_values(const char* file_name); @@ -133,9 +139,8 @@ inline Surface_mesh_segmentation::Surface_mesh_segmentation( { disk_sampling_concentric_mapping(); calculate_sdf_values(); - //write_sdf_values("sdf_values_2.txt"); - //read_sdf_values("sdf_values.txt"); - //apply_GMM_fitting(); + //write_sdf_values("sdf_values_sample_9_p.txt"); + //read_sdf_values("sdf_values_sample_9_p.txt"); } template @@ -173,10 +178,12 @@ Surface_mesh_segmentation::calculate_sdf_value_of_facet( v2 = v2 / CGAL::sqrt(v2.squared_length()); int ray_count = number_of_rays_sqrt * number_of_rays_sqrt; + std::vector ray_distances, ray_weights; ray_distances.reserve(ray_count); ray_weights.reserve(ray_count); - double angle_st_dev = cone_angle / 4; //Not sure what to use here. + + double angle_st_dev = cone_angle / 3; //Not sure what to use here. double length_of_normal = 1.0 / tan(cone_angle / 2); Vector normal = normal_const * length_of_normal; @@ -200,6 +207,7 @@ Surface_mesh_segmentation::calculate_sdf_value_of_facet( return calculate_sdf_value_from_rays_with_trimmed_mean(ray_distances, ray_weights); } + template void Surface_mesh_segmentation::cast_and_return_minimum( const Ray& ray, const Tree& tree, const Facet_handle& facet, @@ -261,8 +269,8 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays( double total_weights = 0.0, total_distance = 0.0; double median_sdf = 0.0, mean_sdf = 0.0, st_dev = 0.0; /* Calculate mean sdf */ - std::vector::iterator w_it = ray_weights.begin(); - for(std::vector::iterator dist_it = ray_distances.begin(); + typename std::vector::iterator w_it = ray_weights.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it, ++w_it) { total_distance += (*dist_it) * (*w_it); @@ -284,7 +292,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays( median_sdf = ray_distances[half_ray_count]; } /* Calculate st dev using mean_sdf as mean */ - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it) { double dif = (*dist_it) - mean_sdf; st_dev += dif * dif; @@ -292,7 +300,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays( st_dev = CGAL::sqrt(st_dev / (ray_distances.size())); /* Calculate sdf, accept rays : ray_dist - median < st dev */ w_it = ray_weights.begin(); - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it, ++w_it) { if(fabs((*dist_it) - median_sdf) > st_dev * 0.5) { @@ -312,8 +320,8 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_mean( { double total_weights = 0.0, total_distance = 0.0; double mean_sdf = 0.0, st_dev = 0.0; - std::vector::iterator w_it = ray_weights.begin(); - for(std::vector::iterator dist_it = ray_distances.begin(); + typename std::vector::iterator w_it = ray_weights.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it, ++w_it) { total_distance += (*dist_it) * (*w_it); total_weights += (*w_it); @@ -321,7 +329,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_mean( mean_sdf = total_distance / total_weights; total_weights = 0.0; total_distance = 0.0; - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it) { double dif = (*dist_it) - mean_sdf; st_dev += dif * dif; @@ -329,7 +337,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_mean( st_dev = CGAL::sqrt(st_dev / (ray_distances.size())); w_it = ray_weights.begin(); - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it, ++w_it) { if(fabs((*dist_it) - mean_sdf) > st_dev) { continue; @@ -346,16 +354,16 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_trimme std::vector& ray_distances, std::vector& ray_weights) const { - std::vector > distances_with_weights; + std::vector> distances_with_weights; distances_with_weights.reserve(ray_distances.size()); typename std::vector::iterator w_it = ray_weights.begin(); - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it, ++w_it) { distances_with_weights.push_back(std::pair((*dist_it), (*w_it))); } std::sort(distances_with_weights.begin(), distances_with_weights.end(), - compare_pairs_using_first >()); + compare_pairs_using_first>()); int b = floor(distances_with_weights.size() / 20.0 + 0.5); // Eliminate %5. int e = distances_with_weights.size() - b; // Eliminate %5. @@ -371,7 +379,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_trimme total_weights = 0.0; total_distance = 0.0; - for(std::vector::iterator dist_it = ray_distances.begin(); + for(typename std::vector::iterator dist_it = ray_distances.begin(); dist_it != ray_distances.end(); ++dist_it) { double dif = (*dist_it) - trimmed_mean; st_dev += dif * dif; @@ -518,12 +526,55 @@ inline void Surface_mesh_segmentation::apply_GMM_fitting() Expectation_maximization fitter(number_of_centers, sdf_vector); std::vector center_memberships; fitter.fill_with_center_ids(center_memberships); - std::vector::iterator center_it = center_memberships.begin(); + typename std::vector::iterator center_it = center_memberships.begin(); for(Facet_iterator facet_it = mesh->facets_begin(); facet_it != mesh->facets_end(); ++facet_it, ++center_it) { centers.insert(std::pair(facet_it, (*center_it))); } +} +template +inline void Surface_mesh_segmentation::apply_K_means_clustering() +{ + centers.clear(); + std::vector sdf_vector; + for(Facet_iterator facet_it = mesh->facets_begin(); + facet_it != mesh->facets_end(); ++facet_it) { + sdf_vector.push_back(sdf_values[facet_it]); + } + K_means_clustering clusterer(number_of_centers, sdf_vector); + std::vector center_memberships; + clusterer.fill_with_center_ids(center_memberships); + typename std::vector::iterator center_it = center_memberships.begin(); + for(Facet_iterator facet_it = mesh->facets_begin(); + facet_it != mesh->facets_end(); ++facet_it, ++center_it) { + centers.insert(std::pair(facet_it, (*center_it))); + } + //center_memberships_temp = center_memberships; //remove +} +template +inline void +Surface_mesh_segmentation::apply_GMM_fitting_with_K_means_init() +{ + centers.clear(); + std::vector sdf_vector; + for(Facet_iterator facet_it = mesh->facets_begin(); + facet_it != mesh->facets_end(); ++facet_it) { + sdf_vector.push_back(sdf_values[facet_it]); + } + K_means_clustering clusterer(number_of_centers, sdf_vector); + std::vector center_memberships; + clusterer.fill_with_center_ids(center_memberships); + //std::vector center_memberships = center_memberships_temp; + Expectation_maximization fitter(number_of_centers, sdf_vector, + center_memberships); + center_memberships.clear(); + fitter.fill_with_center_ids(center_memberships); + typename std::vector::iterator center_it = center_memberships.begin(); + for(Facet_iterator facet_it = mesh->facets_begin(); + facet_it != mesh->facets_end(); ++facet_it, ++center_it) { + centers.insert(std::pair(facet_it, (*center_it))); + } } template diff --git a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h index 01fa28d7f0a..1bf8ecd6d99 100644 --- a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h +++ b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h @@ -1,10 +1,12 @@ -#ifndef CGAL_EXPECTATION_MAXIMIZATION_H -#define CGAL_EXPECTATION_MAXIMIZATION_H +#ifndef CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H +#define CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H /* NEED TO BE DONE */ /* About implementation: -/* Calculating probability multiple times: need to solved. */ +/* Calculating probability multiple times */ +/* About safe division: where centers have no members */ #include #include +#include //#define DIV_INV_SQRT_2_PI 0.3989422804 namespace CGAL { @@ -18,17 +20,22 @@ public: double deviation; double mixing_coefficient; + Gaussian_center(): mean(0), deviation(0), mixing_coefficient(1.0) { + } Gaussian_center(double mean, double deviation, double mixing_coefficient) : mean(mean), deviation(deviation), mixing_coefficient(mixing_coefficient) { } double probability(double x) const { double e_over = -0.5 * pow((x - mean) / deviation, 2); - return exp(e_over) * (1.0 / deviation) ; + return exp(e_over) / deviation; } double probability_proportional(double x) const { double e_over = -0.5 * pow((x - mean) / deviation, 2); return exp(e_over); } + bool operator < (const Gaussian_center& center) { + return mean < center.mean; + } void calculate_parameters(const std::vector& points); }; @@ -72,7 +79,7 @@ inline void Gaussian_center::calculate_parameters(const } new_deviation = sqrt(new_deviation/total_membership); /* Calculate new mixing coefficient */ - mixing_coefficient = total_membership / points.size(); + mixing_coefficient = total_membership; deviation = new_deviation; mean = new_mean; @@ -84,13 +91,19 @@ public: std::vector centers; std::vector points; double threshold; + int maximum_iteration; + bool is_converged; + Expectation_maximization(int number_of_centers, const std::vector& data, + const std::vector& initial_centers = std::vector(), + int maximum_iteration = 500) + : points(data.begin(), data.end()), threshold(1e-4), + maximum_iteration(maximum_iteration), is_converged(false) { - Expectation_maximization(int number_of_centers, const std::vector& data) - : points(data.begin(), data.end()), threshold(0.00000001) { - initiate_centers(number_of_centers); + initiate_centers(number_of_centers, initial_centers); calculate_fitting(); } void fill_with_center_ids(std::vector& data_centers) { + data_centers.reserve(points.size()); for(std::vector::iterator point_it = points.begin(); point_it != points.end(); ++point_it) { double max_likelihood = 0.0; @@ -108,15 +121,46 @@ public: } } protected: - void initiate_centers(int number_of_centers) { - double initial_deviation = 1.0 / (2.0 * number_of_centers); - double initial_mixing_coefficient = 1.0 / number_of_centers; - for(int i = 0; i < number_of_centers; ++i) { - double initial_mean = (i + 1.0) / (number_of_centers + 1.0); - Gaussian_center center(initial_mean, initial_deviation, - initial_mixing_coefficient); - centers.push_back(center); + void initiate_centers(int number_of_centers, + const std::vector& initial_centers) { + if(initial_centers.empty()) { + /* Uniformly generate centers */ + double initial_deviation = 1.0 / (2.0 * number_of_centers); + double initial_mixing_coefficient = 1.0 / number_of_centers; + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = (i + 1.0) / (number_of_centers + 1.0); + centers.push_back(Gaussian_center(initial_mean, initial_deviation, + initial_mixing_coefficient)); + } + } else { + /* Calculate mean */ + int number_of_point = initial_centers.size(); + centers = std::vector(number_of_centers); + std::vector member_count(number_of_centers, 0); + + for(int i = 0; i < number_of_point; ++i) { + int center_id = initial_centers[i]; + double data = points[i].data; + centers[center_id].mean += data; + member_count[center_id] += 1; + } + /* Assign mean, and mixing coef */ + for(int i = 0; i < number_of_centers; ++i) { + centers[i].mean /= member_count[i]; + centers[i].mixing_coefficient = member_count[i] / static_cast + (number_of_point); + } + /* Calculate deviation */ + for(int i = 0; i < number_of_point; ++i) { + int center_id = initial_centers[i]; + double data = points[i].data; + centers[center_id].deviation += pow(data - centers[center_id].mean, 2); + } + for(int i = 0; i < number_of_centers; ++i) { + centers[i].deviation = sqrt(centers[i].deviation / member_count[i]); + } } + sort(centers.begin(), centers.end()); } /*Calculates total membership values for a point */ void calculate_membership() { @@ -147,15 +191,22 @@ protected: } return likelihood; } + double iterate() { + calculate_membership(); + calculate_parameters(); + return calculate_likelihood(); + } void calculate_fitting() { - double prev_likelihood, likelihood = 0.0; + double prev_likelihood = 0.0; + double likelihood = iterate(); + int iteration_count = 0; + is_converged = false; do { - calculate_membership(); - calculate_parameters(); prev_likelihood = likelihood; - likelihood = calculate_likelihood(); - } while(likelihood - prev_likelihood > threshold * likelihood); + likelihood = iterate(); + is_converged = likelihood - prev_likelihood < threshold * likelihood; + } while(!is_converged && ++iteration_count < maximum_iteration); } }; }//namespace CGAL -#endif //CGAL_EXPECTATION_MAXIMIZATION_H \ No newline at end of file +#endif //CGAL_SEGMENTATION_EXPECTATION_MAXIMIZATION_H \ No newline at end of file diff --git a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h new file mode 100644 index 00000000000..35eba5b044b --- /dev/null +++ b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h @@ -0,0 +1,157 @@ +#ifndef CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H +#define CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H + +#include +#include +#include +#include + +namespace CGAL +{ + +class K_means_point; + +class K_means_center +{ +public: + double mean; + int number_of_points; +protected: + double new_mean; + +public: + K_means_center(double mean): mean(mean), new_mean(0), number_of_points(0) { + } + void add_point(double data) { + ++number_of_points; + new_mean += data; + } + void calculate_mean() { + mean = new_mean / number_of_points; + new_mean = 0; + number_of_points = 0; + } + bool operator < (const K_means_center& center) { + return mean < center.mean; + } +}; + +class K_means_point +{ +public: + double data; + int center_id; + K_means_point(double data, int center_id = -1) : data(data), + center_id(center_id) { + } + /* Finds closest center to point, + /* Adds itself to the closest center's mean, + /* Returns true if center is changed.*/ + bool calculate_new_center(std::vector& centers) { + int new_center_id = 0; + double min_distance = fabs(centers[0].mean - data); + for(int i = 1; i < centers.size(); ++i) { + double new_distance = fabs(centers[i].mean - data); + if(new_distance < min_distance) { + new_center_id = i; + min_distance = new_distance; + } + } + bool is_center_changed = (new_center_id != center_id); + center_id = new_center_id; + + centers[center_id].add_point(data); + return is_center_changed; + } +}; + +class K_means_clustering +{ +public: + std::vector centers; + std::vector points; + int maximum_iteration; + bool is_converged; + K_means_clustering(int number_of_centers, const std::vector& data, + int number_of_run = 1000, int maximum_iteration = 500) + : points(data.begin(), data.end()), maximum_iteration(maximum_iteration), + is_converged(false) { + calculate_clustering_with_multiple_run(number_of_centers, number_of_run); + } + + void fill_with_center_ids(std::vector& data_centers) { + for(std::vector::iterator point_it = points.begin(); + point_it != points.end(); ++point_it) { + data_centers.push_back(point_it->center_id); + } + } + + void initiate_centers(int number_of_centers, bool randomly = false) { + centers.clear(); + if(!randomly) { + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = (i + 1.0) / (number_of_centers + 1.0); + centers.push_back(K_means_center(initial_mean)); + } + } else { + srand(time(NULL)); + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = points[rand() % points.size()].data; + centers.push_back(K_means_center(initial_mean)); + } + } + sort(centers.begin(), centers.end()); + } + + void calculate_clustering() { + int iteration_count = 0; + is_converged = false; + do { + for(std::vector::iterator point_it = points.begin(); + point_it != points.end(); ++point_it) { + bool center_changed = point_it->calculate_new_center(centers); + is_converged |= center_changed; + } + for(std::vector::iterator center_it = centers.begin(); + center_it != centers.end(); ++center_it) { + center_it->calculate_mean(); + } + } while(!is_converged && ++iteration_count < maximum_iteration); + } + + void calculate_clustering_with_multiple_run(int number_of_centers, + int number_of_run) { + initiate_centers(number_of_centers); + calculate_clustering(); + + std::vector min_centers = centers; + double error = sum_of_squares(); + while(--number_of_run > 0) { + initiate_centers(number_of_centers, true); + /* here, clearing center_ids of points might be neccessary */ + calculate_clustering(); + double new_error = sum_of_squares(); + if(error > new_error) { + error = new_error; + min_centers = centers; + } + } + /* By saving points (min_points) also, we can get rid of this part */ + /* But since centers are already converged this step will require one iteration */ + centers = min_centers; + calculate_clustering(); + } + double sum_of_squares() const { + double sum = 0; + for(std::vector::const_iterator center_it = centers.begin(); + center_it != centers.end(); ++center_it) { + for(std::vector::const_iterator point_it = points.begin(); + point_it != points.end(); ++point_it) { + sum += pow(center_it->mean - point_it->data, 2); + } + } + return sum; + } +}; +}//namespace CGAL +#endif //CGAL_SEGMENTATION_K_MEANS_CLUSTERING_H