From a9215c5c0071e446b45a3bfcd2a773c6a9183112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8Dlker=20Yaz?= Date: Tue, 5 Jun 2012 13:11:44 +0000 Subject: [PATCH] small modifications on k-means and segmentation, random initialization is added in EM. --- .../include/CGAL/Surface_mesh_segmentation.h | 39 ++++- .../Expectation_maximization.h | 142 ++++++++++++------ .../K_means_clustering.h | 36 +++-- 3 files changed, 150 insertions(+), 67 deletions(-) diff --git a/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h b/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h index da4d741c414..f21087e3ad2 100644 --- a/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h +++ b/Surface_mesh_segmentation/include/CGAL/Surface_mesh_segmentation.h @@ -21,6 +21,7 @@ #include #include +//#include "Timer.h" //#include "Expectation_maximization.h" //#include "K_means_clustering.h" #include @@ -98,7 +99,7 @@ public: //member functions public: Surface_mesh_segmentation(Polyhedron* mesh, - int number_of_rays_sqrt = 9, double cone_angle = (2.0 / 3.0) * CGAL_PI, + int number_of_rays_sqrt = 7, double cone_angle = (2.0 / 3.0) * CGAL_PI, int number_of_centers = 2); void calculate_sdf_values(); @@ -130,7 +131,7 @@ public: void write_sdf_values(const char* file_name); void read_sdf_values(const char* file_name); - + void read_center_ids(const char* file_name); }; template @@ -140,11 +141,13 @@ inline Surface_mesh_segmentation::Surface_mesh_segmentation( : mesh(mesh), cone_angle(cone_angle), number_of_rays_sqrt(number_of_rays_sqrt), number_of_centers(number_of_centers), log_file("log_file.txt") { + //Timer t; disk_sampling_concentric_mapping(); calculate_sdf_values(); + //std::cout << t; apply_GMM_fitting_with_K_means_init(); - //write_sdf_values("sdf_values_sample_cactus.txt"); - //read_sdf_values("sdf_values_sample_cactus.txt"); + //write_sdf_values("sdf_values_sample_dino_2.txt"); + //read_sdf_values("sdf_values_sample_camel.txt"); } template @@ -232,7 +235,7 @@ void Surface_mesh_segmentation::cast_and_return_minimum( continue; //What to do here (in case of intersection object is a segment), I am not sure ??? } Vector i_ray = (ray.source() - i_point); - double new_distance = CGAL::sqrt(i_ray.squared_length()); + double new_distance = i_ray.squared_length(); if(!is_found || new_distance < min_distance) { min_distance = new_distance; min_id = id; @@ -243,6 +246,7 @@ void Surface_mesh_segmentation::cast_and_return_minimum( if(!is_found) { return; } + min_distance = sqrt(min_distance); Point min_v1 = min_id->halfedge()->vertex()->point(); Point min_v2 = min_id->halfedge()->next()->vertex()->point(); Point min_v3 = min_id->halfedge()->next()->next()->vertex()->point(); @@ -298,7 +302,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays( double dif = (*dist_it) - mean_sdf; st_dev += dif * dif; } - st_dev = CGAL::sqrt(st_dev / (ray_distances.size())); + st_dev = sqrt(st_dev / ray_distances.size()); /* Calculate sdf, accept rays : ray_dist - median < st dev */ w_it = ray_weights.begin(); for(std::vector::iterator dist_it = ray_distances.begin(); @@ -335,7 +339,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_mean( double dif = (*dist_it) - mean_sdf; st_dev += dif * dif; } - st_dev = CGAL::sqrt(st_dev / (ray_distances.size())); + st_dev = sqrt(st_dev / ray_distances.size()); w_it = ray_weights.begin(); for(std::vector::iterator dist_it = ray_distances.begin(); @@ -385,7 +389,7 @@ Surface_mesh_segmentation::calculate_sdf_value_from_rays_with_trimme double dif = (*dist_it) - trimmed_mean; st_dev += dif * dif; } - st_dev = CGAL::sqrt(st_dev / (ray_distances.size())); + st_dev = sqrt(st_dev / ray_distances.size()); w_it = ray_weights.begin(); for(std::vector::iterator dist_it = ray_distances.begin(); @@ -613,6 +617,25 @@ inline void Surface_mesh_segmentation::read_sdf_values( sdf_values.insert(std::pair(facet_it, sdf_value)); } } + +template +inline void Surface_mesh_segmentation::read_center_ids( + const char* file_name) +{ + std::ifstream input(file_name); + centers.clear(); + int max_center = 0; + for(Facet_iterator facet_it = mesh->facets_begin(); + facet_it != mesh->facets_end(); ++facet_it) { + int center_id; + input >> center_id; + centers.insert(std::pair(facet_it, center_id)); + if(center_id > max_center) { + max_center = center_id; + } + } + number_of_centers = max_center + 1; +} } //namespace CGAL #undef ANGLE_ST_DEV_DIVIDER #undef LOG_5 diff --git a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h index a06d594a0cd..b106bed6038 100644 --- a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h +++ b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/Expectation_maximization.h @@ -8,7 +8,10 @@ #include #include #include -//#define DIV_INV_SQRT_2_PI 0.3989422804 +#include +#include +#include + namespace CGAL { @@ -81,7 +84,6 @@ inline void Gaussian_center::calculate_parameters(const new_deviation = sqrt(new_deviation/total_membership); /* Calculate new mixing coefficient */ mixing_coefficient = total_membership; - deviation = new_deviation; mean = new_mean; } @@ -94,14 +96,21 @@ public: double threshold; int maximum_iteration; bool is_converged; +protected: + int seed; + +public: Expectation_maximization(int number_of_centers, const std::vector& data, const std::vector& initial_centers = std::vector(), int maximum_iteration = 100) - : points(data.begin(), data.end()), threshold(1e-4), - maximum_iteration(maximum_iteration), is_converged(false) { + : points(data.begin(), data.end()), threshold(1e-3), + maximum_iteration(maximum_iteration), is_converged(false), seed(time(NULL)) { + srand(seed); + //calculate_fitting_with_multiple_run(number_of_centers, 50); initiate_centers(number_of_centers, initial_centers); calculate_fitting(); } + void fill_with_center_ids(std::vector& data_centers) { data_centers.reserve(points.size()); for(std::vector::iterator point_it = points.begin(); @@ -121,46 +130,71 @@ public: } } protected: + void initiate_centers_randomly(int number_of_centers) { + centers.clear(); + /* Randomly generate means of centers */ + double initial_deviation = 1.0 / (2.0 * number_of_centers); + double initial_mixing_coefficient = 1.0 / number_of_centers; + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = points[rand() % points.size()].data; + centers.push_back(Gaussian_center(initial_mean, initial_deviation, + initial_mixing_coefficient)); + } + sort(centers.begin(), centers.end()); + } + + void initiate_centers_uniformly(int number_of_centers) { + centers.clear(); + /* Uniformly generate centers */ + double initial_deviation = 1.0 / (2.0 * number_of_centers); + double initial_mixing_coefficient = 1.0 / number_of_centers; + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = (i + 1.0) / (number_of_centers + 1.0); + centers.push_back(Gaussian_center(initial_mean, initial_deviation, + initial_mixing_coefficient)); + } + sort(centers.begin(), centers.end()); + } + + void initiate_centers_from_memberships(int number_of_centers, + const std::vector& initial_centers) { + centers.clear(); + /* Calculate mean */ + int number_of_point = initial_centers.size(); + centers = std::vector(number_of_centers); + std::vector member_count(number_of_centers, 0); + + for(int i = 0; i < number_of_point; ++i) { + int center_id = initial_centers[i]; + double data = points[i].data; + centers[center_id].mean += data; + member_count[center_id] += 1; + } + /* Assign mean, and mixing coef */ + for(int i = 0; i < number_of_centers; ++i) { + centers[i].mean /= member_count[i]; + centers[i].mixing_coefficient = member_count[i] / static_cast + (number_of_point); + } + /* Calculate deviation */ + for(int i = 0; i < number_of_point; ++i) { + int center_id = initial_centers[i]; + double data = points[i].data; + centers[center_id].deviation += pow(data - centers[center_id].mean, 2); + } + for(int i = 0; i < number_of_centers; ++i) { + centers[i].deviation = sqrt(centers[i].deviation / member_count[i]); + } + sort(centers.begin(), centers.end()); + } + void initiate_centers(int number_of_centers, const std::vector& initial_centers) { if(initial_centers.empty()) { - /* Uniformly generate centers */ - double initial_deviation = 1.0 / (2.0 * number_of_centers); - double initial_mixing_coefficient = 1.0 / number_of_centers; - for(int i = 0; i < number_of_centers; ++i) { - double initial_mean = (i + 1.0) / (number_of_centers + 1.0); - centers.push_back(Gaussian_center(initial_mean, initial_deviation, - initial_mixing_coefficient)); - } + initiate_centers_uniformly(number_of_centers); } else { - /* Calculate mean */ - int number_of_point = initial_centers.size(); - centers = std::vector(number_of_centers); - std::vector member_count(number_of_centers, 0); - - for(int i = 0; i < number_of_point; ++i) { - int center_id = initial_centers[i]; - double data = points[i].data; - centers[center_id].mean += data; - member_count[center_id] += 1; - } - /* Assign mean, and mixing coef */ - for(int i = 0; i < number_of_centers; ++i) { - centers[i].mean /= member_count[i]; - centers[i].mixing_coefficient = member_count[i] / static_cast - (number_of_point); - } - /* Calculate deviation */ - for(int i = 0; i < number_of_point; ++i) { - int center_id = initial_centers[i]; - double data = points[i].data; - centers[center_id].deviation += pow(data - centers[center_id].mean, 2); - } - for(int i = 0; i < number_of_centers; ++i) { - centers[i].deviation = sqrt(centers[i].deviation / member_count[i]); - } + initiate_centers_from_memberships(number_of_centers, initial_centers); } - sort(centers.begin(), centers.end()); } /*Calculates total membership values for a point */ void calculate_membership() { @@ -191,21 +225,41 @@ protected: } return likelihood; } + double iterate() { calculate_membership(); calculate_parameters(); return calculate_likelihood(); } - void calculate_fitting() { - double prev_likelihood = 0.0; - double likelihood = iterate(); + + double calculate_fitting() { + double likelihood = (std::numeric_limits::min)(), prev_likelihood; int iteration_count = 0; is_converged = false; - do { + while(!is_converged && iteration_count++ < maximum_iteration) { prev_likelihood = likelihood; likelihood = iterate(); is_converged = likelihood - prev_likelihood < threshold * likelihood; - } while(!is_converged && ++iteration_count < maximum_iteration); + } + //std::cout << likelihood << " " << iteration_count << std::endl; + return likelihood; + } + + void calculate_fitting_with_multiple_run(int number_of_centers, + int number_of_run) { + double max_likelihood = (std::numeric_limits::min)(); + std::vector max_centers; + + while(number_of_run-- > 0) { + initiate_centers_randomly(number_of_centers); + + double likelihood = calculate_fitting(); + if(likelihood > max_likelihood) { + max_centers = centers; + max_likelihood = likelihood; + } + } + centers = max_centers; } }; }//namespace CGAL diff --git a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h index d5c000356f1..f927b285c3c 100644 --- a/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h +++ b/Surface_mesh_segmentation/include/CGAL/internal/Surface_mesh_segmentation/K_means_clustering.h @@ -73,10 +73,15 @@ public: std::vector points; int maximum_iteration; bool is_converged; +protected: + int seed; + +public: K_means_clustering(int number_of_centers, const std::vector& data, int number_of_run = 50, int maximum_iteration = 100) : points(data.begin(), data.end()), maximum_iteration(maximum_iteration), - is_converged(false) { + is_converged(false), seed(time(NULL)) { + srand(seed); calculate_clustering_with_multiple_run(number_of_centers, number_of_run); } @@ -87,19 +92,20 @@ public: } } - void initiate_centers(int number_of_centers, bool randomly = false) { + void initiate_centers_uniformly(int number_of_centers) { centers.clear(); - if(!randomly) { - for(int i = 0; i < number_of_centers; ++i) { - double initial_mean = (i + 1.0) / (number_of_centers + 1.0); - centers.push_back(K_means_center(initial_mean)); - } - } else { - srand(time(NULL)); - for(int i = 0; i < number_of_centers; ++i) { - double initial_mean = points[rand() % points.size()].data; - centers.push_back(K_means_center(initial_mean)); - } + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = (i + 1.0) / (number_of_centers + 1.0); + centers.push_back(K_means_center(initial_mean)); + } + sort(centers.begin(), centers.end()); + } + /* Forgy method */ + void initiate_centers_randomly(int number_of_centers) { + centers.clear(); + for(int i = 0; i < number_of_centers; ++i) { + double initial_mean = points[rand() % points.size()].data; + centers.push_back(K_means_center(initial_mean)); } sort(centers.begin(), centers.end()); } @@ -120,7 +126,7 @@ public: } } is_converged = !any_center_changed; - //std::cout << iteration_count << std::endl; + //std::cout << iteration_count << " " << is_converged << std::endl; } void calculate_clustering_with_multiple_run(int number_of_centers, @@ -130,7 +136,7 @@ public: while(--number_of_run > 0) { clear_center_ids(); - initiate_centers(number_of_centers, true); + initiate_centers_randomly(number_of_centers); calculate_clustering(); double new_error = sum_of_squares(); if(error > new_error) {