From 69272e4d9af732529249ba1ac8fef4ebdf1f133a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Jamin?= Date: Tue, 10 Apr 2012 13:23:51 +0000 Subject: [PATCH] Test with a worksharing strategy (based on TBB task scheduler) + test with parallel_do. --- .gitattributes | 1 + Mesh_2/include/CGAL/Mesher_level.h | 280 ++++++++++++--- Mesh_3/benchmark/Mesh_3/concurrency.cpp | 59 +++- Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp | 34 +- Mesh_3/demo/Mesh_3/Mesh_function.h | 8 + Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp | 25 +- Mesh_3/demo/Mesh_3/config.h | 28 +- .../CGAL/Mesh_3/Locking_data_structures.h | 4 +- .../CGAL/Mesh_3/Worksharing_data_structures.h | 322 ++++++++++++++++++ .../CGAL/Concurrent_compact_container.h | 3 +- .../include/CGAL/Triangulation_3.h | 34 +- .../CGAL/Triangulation_ds_vertex_base_3.h | 33 +- 12 files changed, 745 insertions(+), 86 deletions(-) create mode 100644 Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h diff --git a/.gitattributes b/.gitattributes index c9c544a15e5..d76de42573e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2709,6 +2709,7 @@ Mesh_3/examples/Mesh_3/old_stuff/inputs/cube.mesh -text svneol=unset#application Mesh_3/examples/Mesh_3/old_stuff/inputs/tangle.mesh -text svneol=unset#application/octet-stream Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h -text Mesh_3/include/CGAL/Mesh_3/Profiling_tools.h -text +Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h -text Mesh_3/include/CGAL/Meshes/Filtered_multimap_container.h -text Mesh_3/include/CGAL/Triangulation_lazy_ds_cell_base_3.h -text Mesh_3/package_info/Mesh_3/description.txt -text diff --git a/Mesh_2/include/CGAL/Mesher_level.h b/Mesh_2/include/CGAL/Mesher_level.h index af44636a10a..710ad4da9cf 100644 --- a/Mesh_2/include/CGAL/Mesher_level.h +++ b/Mesh_2/include/CGAL/Mesher_level.h @@ -26,24 +26,28 @@ #endif #ifdef CONCURRENT_MESH_3 - #include +# include - #include +# include - #include //CJTODO: remove? - #include //CJTODO: remove? - #include // CJODO TEMP? - #include - - #ifdef CGAL_CONCURRENT_MESH_3_PROFILING - #define CGAL_PROFILE - #include - #endif +# include //CJTODO: remove? +# include //CJTODO: remove? +# include // CJODO TEMP? +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS +# include +# include +# endif +# include + +# ifdef CGAL_CONCURRENT_MESH_3_PROFILING +# define CGAL_PROFILE +# include +# endif // CJTODO TEMP TEST -#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX +# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX extern bool g_is_set_cell_active; -#endif +# endif // CJTODO TEMP: not thread-safe => move it to Mesher_3 extern CGAL::Bbox_3 g_bbox; @@ -545,6 +549,11 @@ public: typedef typename Derived::Container::Element Container_element; typedef typename Derived::Container::Quality Container_quality; + //======================================================= + //================= PARALLEL_FOR? + //======================================================= + +# ifdef CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR /*std::pair raw_elements[ELEMENT_BATCH_SIZE];*/ std::vector container_elements; @@ -573,9 +582,9 @@ public: indices.push_back(iElt); } -# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE std::cerr << "Refining a batch of " << iElt << " elements..."; -# endif +# endif // Doesn't help much //typedef Spatial_sort_traits_adapter_3 Search_traits; @@ -599,8 +608,6 @@ public: { for( size_t i = r.begin() ; i != r.end() ; ) { - before_next_element_refinement(visitor); - std::ptrdiff_t index = indices[i]; Container_element ce = container_elements[index]; @@ -616,7 +623,6 @@ public: break; case COULD_NOT_LOCK_ZONE: - case COULD_NOT_LOCK_ELEMENT: { // Swap indices[i] and indices[i+1] if (i+1 != r.end()) @@ -634,11 +640,15 @@ public: break; } + case COULD_NOT_LOCK_ELEMENT: + // We retry it now case THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE: // We retry it since we switched to exact computation // for the adjacent cells circumcenters break; } + + before_next_element_refinement(visitor); } } ); @@ -681,9 +691,195 @@ public: } } -# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE - std::cerr << " batch done." << std::endl; -# endif +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE + std::cerr << " batch done." << std::endl; +# endif + + //======================================================= + //================= PARALLEL_DO? + //======================================================= + +# elif defined(CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO) + std::vector container_elements; + container_elements.reserve(ELEMENT_BATCH_SIZE); + + while(!no_longer_element_to_refine()) + { + Container_element ce = derived().get_next_raw_element_impl().second; + pop_next_element(); + container_elements.push_back(ce); + } + +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE + std::cerr << "Refining elements in parallel..."; +# endif + + // CJTODO: lambda functions OK? + + //g_is_set_cell_active = false; + previous_level.add_to_TLS_lists(true); + add_to_TLS_lists(true); + tbb::parallel_do( + container_elements.begin(), container_elements.end(), + [&] (Container_element& ce, tbb::parallel_do_feeder& feeder) + { + Mesher_level_conflict_status status; + do + { + status = try_lock_and_refine_element(ce, visitor); + } + while (status == COULD_NOT_LOCK_ELEMENT + || status == THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE); + + switch (status) + { + case NO_CONFLICT: + case CONFLICT_AND_ELEMENT_SHOULD_BE_DROPPED: + case ELEMENT_WAS_A_ZOMBIE: + break; + + case COULD_NOT_LOCK_ZONE: + { + feeder.add(ce); + break; + } + + /*case COULD_NOT_LOCK_ELEMENT: + // We retry it now + case THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE: + // We retry it since we switched to exact computation + // for the adjacent cells circumcenters + break;*/ + } + + before_next_element_refinement(visitor); + + // Finally we add the new local bad_elements to the feeder + while (no_longer_local_element_to_refine() == false) + { + typedef typename Derived::Container::Element Container_element; + Container_element ce = derived().get_next_local_raw_element_impl().second; + pop_next_local_element(); + + feeder.add(ce); + } + } + ); + splice_local_lists(); + CGAL_assertion(no_longer_element_to_refine()); + //previous_level.splice_local_lists(); // useless + previous_level.add_to_TLS_lists(false); + add_to_TLS_lists(false); + //g_is_set_cell_active = true; + + +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE + std::cerr << " done." << std::endl; +# endif + //======================================================= + //================= TASKS? + //======================================================= + +# elif defined(CGAL_MESH_3_WORKSHARING_USES_TASKS) + + std::vector container_elements; + container_elements.reserve(ELEMENT_BATCH_SIZE); + + int iElt = 0; + for( ; + iElt < ELEMENT_BATCH_SIZE && !no_longer_element_to_refine() ; + ++iElt ) + { + Container_element ce = derived().get_next_raw_element_impl().second; + pop_next_element(); + container_elements.push_back(ce); + } + +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE + std::cerr << "Refining a batch of " << iElt << " elements..."; +# endif + + // CJTODO: lambda functions OK? + if (iElt > 20) + { + //g_is_set_cell_active = false; + previous_level.add_to_TLS_lists(true); + add_to_TLS_lists(true); + + tbb::task& empty_root_task = *new( tbb::task::allocate_root() ) tbb::empty_task; + empty_root_task.set_ref_count(iElt + 1); + + for( size_t i = 0 ; i < iElt ; ++i) + { + Container_element ce = container_elements[i]; + + Mesh_3::enqueue_work( + [&, ce, visitor]() + { + Mesher_level_conflict_status status; + do + { + status = try_lock_and_refine_element(ce, visitor); + before_next_element_refinement(visitor); + } + while (status != NO_CONFLICT + && status != CONFLICT_AND_ELEMENT_SHOULD_BE_DROPPED + && status != ELEMENT_WAS_A_ZOMBIE); + }, + empty_root_task, + circumcenter(derived().extract_element_from_container_value(ce))); + } + empty_root_task.wait_for_all(); + tbb::task::destroy(empty_root_task); + + splice_local_lists(); + //previous_level.splice_local_lists(); // useless + previous_level.add_to_TLS_lists(false); + add_to_TLS_lists(false); + //g_is_set_cell_active = true; + } + // Go sequential + else + { + for (int i = 0 ; i < iElt ; ) + { + std::ptrdiff_t index = i; + + Derived &derivd = derived(); + //Container_element ce = raw_elements[index].second; + Container_element ce = container_elements[index]; + if( !derivd.is_zombie(ce) ) + { + // Lock the element area on the grid + Element element = derivd.extract_element_from_container_value(ce); + + const Mesher_level_conflict_status result + = try_to_refine_element(element, visitor); + + if (result != CONFLICT_BUT_ELEMENT_CAN_BE_RECONSIDERED + && result != THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE) + { + ++i; + } + } + else + { + ++i; + } + // Unlock + unlock_all_thread_local_elements(); + } + } + +# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE + std::cerr << " batch done." << std::endl; +# endif + +#endif + //======================================================= + //================= / WORKSHARING STRATEGY + //======================================================= + } /** @@ -712,36 +908,37 @@ public: std::cerr << "Trying to insert point: " << p << std::endl; #endif + +//========================================= //==== Simple Grid locking +//========================================= #if defined(CGAL_MESH_3_CONCURRENT_REFINEMENT) && \ defined(CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING) Mesher_level_conflict_status result; Zone zone; - if( g_lock_grid.try_lock(p).first ) - { - before_conflicts(e, p, visitor); + + before_conflicts(e, p, visitor); - bool could_lock_zone; - bool facet_not_in_its_cz = false; - zone = conflicts_zone(p, e, facet_not_in_its_cz, could_lock_zone); + bool could_lock_zone; + bool facet_not_in_its_cz = false; + zone = conflicts_zone(p, e, facet_not_in_its_cz, could_lock_zone); - if (!could_lock_zone) - result = COULD_NOT_LOCK_ZONE; - else if (facet_not_in_its_cz) - result = THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE; - else - result = test_point_conflict(p, zone, visitor); - } - else - { + if (!could_lock_zone) result = COULD_NOT_LOCK_ZONE; - } + else if (facet_not_in_its_cz) + result = THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE; + else + result = test_point_conflict(p, zone, visitor); -//==== !Simple Grid locking +//========================================= +//==== NOT Simple Grid locking +//========================================= #else + + before_conflicts(e, p, visitor); - // Concurrent? + //=========== Concurrent? ============= # ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT bool could_lock_zone; bool facet_not_in_its_cz = false; @@ -754,7 +951,7 @@ public: else result = test_point_conflict(p, zone, visitor); - // ... or not? + //=========== or not? ================= # else bool facet_not_in_its_cz = false; Zone zone = conflicts_zone(p, e, facet_not_in_its_cz); @@ -766,6 +963,9 @@ public: # endif #endif +//========================================= +//==== / Simple Grid locking +//========================================= #ifdef CGAL_MESHES_DEBUG_REFINEMENT_POINTS std::cerr << "(" << p << ") "; diff --git a/Mesh_3/benchmark/Mesh_3/concurrency.cpp b/Mesh_3/benchmark/Mesh_3/concurrency.cpp index c9e8b1ae092..34a2ccce719 100644 --- a/Mesh_3/benchmark/Mesh_3/concurrency.cpp +++ b/Mesh_3/benchmark/Mesh_3/concurrency.cpp @@ -14,7 +14,7 @@ # define CGAL_MESH_3_CONCURRENT_REFINEMENT // In case some code uses CGAL_PROFILE, it needs to be concurrent # define CGAL_CONCURRENT_PROFILE -//# define CGAL_CONCURRENT_MESH_3_VERBOSE +# define CGAL_CONCURRENT_MESH_3_VERBOSE // ========================================================================== // Locking strategy @@ -24,10 +24,29 @@ //# define CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK # define CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING //# define CGAL_MESH_3_CONCURRENT_REFINEMENT_LOCK_ADJ_CELLS +//# define CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX +//# define CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX +//# define CGAL_MESH_3_WORKSHARING_USES_TASKS +//# define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR +# define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO + +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS + const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 25; + const int MESH_3_FIRST_GRID_LOCK_RADIUS = 0; + + const int MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS = 2; + const int MESH_3_WORK_STATS_GRID_NUM_CELLS = + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS* + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS* + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS; + +# else const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30; const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2; const int MESH_3_REFINEMENT_GRAINSIZE = 10; +# endif + # ifdef CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK # include @@ -46,7 +65,7 @@ // Concurrency Parameters // ========================================================================== - const size_t ELEMENT_BATCH_SIZE = 10000; + const size_t ELEMENT_BATCH_SIZE = 100000; // ========================================================================== // Profiling @@ -85,8 +104,20 @@ bool g_temp = false; Global_mutex_type g_global_mutex; // CJTODO: temporary // CJTODO TEMP: not thread-safe => move it to Mesher_3 + // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ] - CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3); + //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off"; + //CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3); + + // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], + const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off"; + CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5); + +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS +# include // CJODO TEMP? + CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds; +# endif + # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS); @@ -172,14 +203,24 @@ bool refine_mesh(const std::string &input_filename) // Create domain Mesh_domain domain(polyhedron); + // Very small elements Mesh_parameters params; params.facet_angle = 25; + params.facet_sizing = 0.001; + params.facet_approx = 0.0068; + params.tet_shape = 3; + params.tet_sizing = 0.001; + + // Middle-size elements + /*Mesh_parameters params; + params.facet_angle = 25; params.facet_sizing = 0.002; params.facet_approx = 0.0068; - /*params.tet_shape = 3; - params.tet_sizing = 1.;*/ - + params.tet_shape = 3; + params.tet_sizing = 0.005;*/ + std::cerr + << "File: " << input_filename << std::endl << "Parameters: " << std::endl << params.log() << std::endl; @@ -187,9 +228,9 @@ bool refine_mesh(const std::string &input_filename) Mesh_criteria criteria( facet_angle=params.facet_angle, facet_size=params.facet_sizing, - facet_distance=params.facet_approx/*, + facet_distance=params.facet_approx, cell_size=params.tet_sizing, - cell_radius_edge_ratio=params.tet_shape*/ + cell_radius_edge_ratio=params.tet_shape ); // Mesh generation @@ -219,7 +260,7 @@ int main() for(int i = 1 ; ; ++i) { std::cerr << "Refinement #" << i << "..." << std::endl; - refine_mesh("D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off"); + refine_mesh(INPUT_FILE_NAME); std::cerr << "Refinement #" << i << " done." << std::endl; std::cerr << std::endl << "---------------------------------" << std::endl << std::endl; } diff --git a/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp b/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp index 3d3d3b54099..60ac014b9d7 100644 --- a/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp +++ b/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp @@ -34,28 +34,38 @@ bool g_temp = false; #ifdef CONCURRENT_MESH_3 - #include // CJODO TEMP? - #include + +# include +# include // CJODO TEMP? // CJTODO TEMP TEST -#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX +# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX bool g_is_set_cell_active = true; -#endif +# endif Global_mutex_type g_global_mutex; // CJTODO: temporary - - // CJTODO TEMP: not thread-safe => move it to Mesher_3 // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ] - CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3); + const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off"; + CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3); + + // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], + //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off"; + //CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5); + +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS +# include // CJODO TEMP? + CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds; +# endif + # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING - CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS); + CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS); # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK) -# include -# include -# include - tbb::enumerable_thread_specific > > g_tls_locked_cells; +# include +# include +# include + tbb::enumerable_thread_specific > > g_tls_locked_cells; # endif #endif diff --git a/Mesh_3/demo/Mesh_3/Mesh_function.h b/Mesh_3/demo/Mesh_3/Mesh_function.h index 376cfa62c6b..7178b13e6da 100644 --- a/Mesh_3/demo/Mesh_3/Mesh_function.h +++ b/Mesh_3/demo/Mesh_3/Mesh_function.h @@ -37,6 +37,11 @@ #include "C3t3_type.h" #include "Meshing_thread.h" +// CJTODO TEMP: not thread-safe => move it to Mesher_3 +#include // CJODO TEMP? +#ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING + extern CGAL::Mesh_3::Refinement_grid_type g_lock_grid; +#endif struct Mesh_parameters { @@ -158,6 +163,9 @@ launch() ++it ) { Vertex_handle v = c3t3_.triangulation().insert(it->first); +# ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING + g_lock_grid.unlock_all_tls_locked_cells(); +#endif c3t3_.set_dimension(v,2); // by construction, points are on surface c3t3_.set_index(v,it->second); } diff --git a/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp b/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp index 1ab13c86ea6..723d7d74ab4 100644 --- a/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp +++ b/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp @@ -20,19 +20,30 @@ #include #ifdef CONCURRENT_MESH_3 - #include // CJODO TEMP? - #include +# include // CJODO TEMP? + +# include // CJTODO TEMP TEST -#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX +# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX bool g_is_set_cell_active = true; -#endif +# endif //Global_mutex_type g_global_mutex; // CJTODO: temporary - - // CJTODO TEMP: not thread-safe => move it to Mesher_3 + // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ] - CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3); + const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off"; + CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3); + + // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], + //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off"; + //CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5); + +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS +# include // CJODO TEMP? + CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds; +# endif + # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS); diff --git a/Mesh_3/demo/Mesh_3/config.h b/Mesh_3/demo/Mesh_3/config.h index 644d58c550a..e8207b758d7 100644 --- a/Mesh_3/demo/Mesh_3/config.h +++ b/Mesh_3/demo/Mesh_3/config.h @@ -50,11 +50,29 @@ # define CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING //# define CGAL_MESH_3_CONCURRENT_REFINEMENT_LOCK_ADJ_CELLS //# define CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX - - const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30; - const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2; - const int MESH_3_REFINEMENT_GRAINSIZE = 10; +//# define CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX +# define CGAL_MESH_3_WORKSHARING_USES_TASKS +//# define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR +//# define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO + +# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS + const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 25; + const int MESH_3_FIRST_GRID_LOCK_RADIUS = 0; + + const int MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS = 2; + const int MESH_3_WORK_STATS_GRID_NUM_CELLS = + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS* + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS* + MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS; + +# else + const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30; + const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2; + const int MESH_3_REFINEMENT_GRAINSIZE = 10; +# endif + + # ifdef CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK # include typedef tbb::recursive_mutex Cell_mutex_type; // CJTODO try others @@ -72,7 +90,7 @@ // Concurrency Parameters // ========================================================================== - const size_t ELEMENT_BATCH_SIZE = 30000; + const size_t ELEMENT_BATCH_SIZE = 100000; // ========================================================================== // Profiling diff --git a/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h b/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h index f85d10042c7..5a923a792ea 100644 --- a/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h +++ b/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h @@ -719,9 +719,9 @@ protected: TLS_locked_cells m_tls_locked_cells; }; -typedef Simple_grid_locking_ds Refinement_grid_type; +//typedef Simple_grid_locking_ds Refinement_grid_type; //typedef Simple_grid_locking_ds_with_mutex Refinement_grid_type; -//typedef Simple_grid_locking_ds_with_thread_ids Refinement_grid_type; +typedef Simple_grid_locking_ds_with_thread_ids Refinement_grid_type; } //namespace Mesh_3 diff --git a/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h new file mode 100644 index 00000000000..61b5497765c --- /dev/null +++ b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h @@ -0,0 +1,322 @@ +// Copyright (c) 2012 INRIA Sophia-Antipolis (France). +// All rights reserved. +// +// This file is part of CGAL (www.cgal.org). +// You can redistribute it and/or modify it under the terms of the GNU +// General Public License as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// Licensees holding a valid commercial license may use this file in +// accordance with the commercial license agreement provided with the software. +// +// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE +// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +// +// $URL: $ +// $Id: $ +// +// Author(s) : Clement Jamin + +#ifdef CONCURRENT_MESH_3 + +#ifndef CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H +#define CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H + +#include + +#include +#include + +// CJTODO TEMP: not thread-safe => move it to Mesher_3 +extern CGAL::Bbox_3 g_bbox; + +namespace CGAL { +namespace Mesh_3 { + +// Forward declarations +class Dynamic_load_based_worksharing_ds; +// Typedef +typedef Dynamic_load_based_worksharing_ds Worksharing_ds_type; + + + +class Work_statistics +{ +public: + // Constructors + + Work_statistics(const Bbox_3 &bbox, + int num_grid_cells_per_axis) + : m_num_grid_cells_per_axis(num_grid_cells_per_axis) + { + m_laziest_cell_index = 0; + m_laziest_cell_occupation = 1000; + + int num_cells = + num_grid_cells_per_axis*num_grid_cells_per_axis*num_grid_cells_per_axis; + m_occupation_grid = new tbb::atomic[num_cells]; + // Initialize grid + for (int i = 0 ; i < num_cells ; ++i) + m_occupation_grid[i] = 0; + + // Keep mins and resolutions + m_xmin = bbox.xmin(); + m_ymin = bbox.ymin(); + m_zmin = bbox.zmin(); + double n = static_cast(num_grid_cells_per_axis); + m_resolution_x = n / (bbox.xmax() - m_xmin); + m_resolution_y = n / (bbox.ymax() - m_ymin); + m_resolution_z = n / (bbox.zmax() - m_zmin); + } + + /// Destructor + ~Work_statistics() + { + delete [] m_occupation_grid; + } + + void add_occupation(int cell_index, int to_add, int num_items_in_work_queue) + { + int new_occupation = + (m_occupation_grid[cell_index].fetch_and_add(to_add)) + + to_add; + + // If this cell is the current most lazy, update the value + if (cell_index == m_laziest_cell_index) + { + if (num_items_in_work_queue == 0) + // So that it won't stay long the laziest + m_laziest_cell_occupation = 999999; + else + m_laziest_cell_occupation = new_occupation; + } + else if (num_items_in_work_queue > 0 + && new_occupation <= m_laziest_cell_occupation) + { + m_laziest_cell_index = cell_index; + m_laziest_cell_occupation = new_occupation; + } + } + + void add_occupation(int index_x, int index_y, int index_z, + int to_add, int num_items_in_work_queue) + { + int index = + index_z*m_num_grid_cells_per_axis*m_num_grid_cells_per_axis + + index_y*m_num_grid_cells_per_axis + + index_x; + return add_occupation(index, to_add, num_items_in_work_queue); + } + + /// P3 must provide .x(), .y(), .z() + template + int compute_index(const P3 &point) + { + // Compute indices on grid + int index_x = static_cast( (to_double(point.x()) - m_xmin) * m_resolution_x); + index_x = std::max( 0, std::min(index_x, m_num_grid_cells_per_axis - 1) ); + int index_y = static_cast( (to_double(point.y()) - m_ymin) * m_resolution_y); + index_y = std::max( 0, std::min(index_y, m_num_grid_cells_per_axis - 1) ); + int index_z = static_cast( (to_double(point.z()) - m_zmin) * m_resolution_z); + index_z = std::max( 0, std::min(index_z, m_num_grid_cells_per_axis - 1) ); + + int index = + index_z*m_num_grid_cells_per_axis*m_num_grid_cells_per_axis + + index_y*m_num_grid_cells_per_axis + + index_x; + + return index; + } + + /// P3 must provide .x(), .y(), .z() + // Returns index in grid + template + int add_occupation(const P3 &point, int to_add, int num_items_in_work_queue) + { + int index = compute_index(point); + add_occupation(index, to_add, num_items_in_work_queue); + return index; + } + + int get_laziest_cell_index() + { + return m_laziest_cell_index; + } + +protected: + int m_num_grid_cells_per_axis; + double m_xmin; + double m_ymin; + double m_zmin; + double m_resolution_x; + double m_resolution_y; + double m_resolution_z; + tbb::atomic * m_occupation_grid; + + tbb::atomic m_laziest_cell_index; + tbb::atomic m_laziest_cell_occupation; +}; + + +/* + * ============== + * class WorkItem + * Abstract base class for a piece of work. + * ============== + */ +class WorkItem +{ +public: + WorkItem() {} + // Derived class defines the actual work. + virtual void run() = 0; + virtual void set_index(int) = 0; + virtual int get_index() const = 0; +}; + +template +class ConcreteWorkItem + : public WorkItem +{ +public: + ConcreteWorkItem(const Func& func) + : m_func(func), m_index(-1) + {} + + void run() + { + m_func(); + delete this; + } + + void set_index(int index) + { + m_index = index; + } + + int get_index() const + { + return m_index; + } + +private: + Func m_func; + int m_index; +}; + + + +/* + * ================= + * class RunWorkItem + * ================= + */ +class RunWorkItem + : public tbb::task +{ +public: + RunWorkItem() {} + +private: + /*override*/inline tbb::task* execute(); +}; + + + +/* + * ======================================= + * class Dynamic_load_based_worksharing_ds + * ======================================= + */ +class Dynamic_load_based_worksharing_ds +{ +public: + // Constructors + Dynamic_load_based_worksharing_ds() + : m_stats(g_bbox, MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS) + { + for (int i = 0 ; i < MESH_3_WORK_STATS_GRID_NUM_CELLS ; ++i) + m_num_items[i] = 0; + } + + /// Destructor + ~Dynamic_load_based_worksharing_ds() + { + } + + template + void add(WorkItem * p_item, const P3 &point, tbb::task &parent_task) + { + int index = m_stats.compute_index(point); + p_item->set_index(index); + m_work_items[index].push(p_item); + ++m_num_items[index]; + // CJTODO: try "spawn" instead of enqueue + tbb::task::enqueue(*new(parent_task.allocate_child()) RunWorkItem); + } + + void run_next_work_item() + { + WorkItem *p_item = 0; + int index = m_stats.get_laziest_cell_index(); + bool popped = m_work_items[index].try_pop(p_item); + // If queue is empty + if (!popped) + { + // Look for an non-empty queue + for (index = 0 ; !popped ; ++index) + { + CGAL_assertion(index < MESH_3_WORK_STATS_GRID_NUM_CELLS); + popped = m_work_items[index].try_pop(p_item); + } + + --index; + } + --m_num_items[index]; + CGAL_assertion(p_item != 0); + m_stats.add_occupation(index, 1, m_num_items[index]); + p_item->run(); + m_stats.add_occupation(index, -1, m_num_items[index]); + } + +protected: + Work_statistics m_stats; + tbb::concurrent_queue m_work_items[MESH_3_WORK_STATS_GRID_NUM_CELLS]; + tbb::atomic m_num_items [MESH_3_WORK_STATS_GRID_NUM_CELLS]; +}; + + +} //namespace Mesh_3 +} //namespace CGAL + +extern CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds; + +namespace CGAL +{ +namespace Mesh_3 +{ + +inline tbb::task* RunWorkItem::execute() +{ + g_worksharing_ds.run_next_work_item(); + return NULL; +} + +/* + * ===================== + * function enqueue_work + * ===================== + */ +template +void enqueue_work(Func f, tbb::task &parent_task, const P3 &point) +{ + g_worksharing_ds.add(new ConcreteWorkItem(f), + point, + parent_task); +} + +} //namespace Mesh_3 +} //namespace CGAL + +#endif // CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H +#endif // CONCURRENT_MESH_3 diff --git a/STL_Extension/include/CGAL/Concurrent_compact_container.h b/STL_Extension/include/CGAL/Concurrent_compact_container.h index 58745cb7bbf..0c9358d07ee 100644 --- a/STL_Extension/include/CGAL/Concurrent_compact_container.h +++ b/STL_Extension/include/CGAL/Concurrent_compact_container.h @@ -562,7 +562,8 @@ private: } typedef tbb::enumerable_thread_specific Free_lists; - typedef tbb::spin_mutex Mutex; // CJTODO: try others + typedef tbb::queuing_mutex Mutex; // CJTODO: try others + //typedef tbb::spin_mutex Mutex; // CJTODO: try others // We store a vector of pointers to all allocated blocks and their sizes. // Knowing all pointers, we don't have to walk to the end of a block to reach diff --git a/Triangulation_3/include/CGAL/Triangulation_3.h b/Triangulation_3/include/CGAL/Triangulation_3.h index 2b8eccfb2b1..5f37e466cd1 100644 --- a/Triangulation_3/include/CGAL/Triangulation_3.h +++ b/Triangulation_3/include/CGAL/Triangulation_3.h @@ -523,6 +523,31 @@ public: #ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT + bool try_lock_vertex(Vertex_handle vh, int lock_radius = 0) const + { +#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX + int grid_index = vh->get_grid_index_cache(); + if (grid_index >= 0) + { + if (g_lock_grid.try_lock(grid_index, lock_radius)) + { + // Has the cached valeu changed in the meantime? + if (vh->get_grid_index_cache() == grid_index) + return true; + } + return false; + } + else + { + std::pair r = g_lock_grid.try_lock(vh->point(), lock_radius); + vh->set_grid_index_cache(r.second); + return r.first; + } +#else + return g_lock_grid.try_lock(vh->point(), lock_radius).first; +#endif + } + bool try_lock_element(Cell_handle cell_handle, int lock_radius = 0) const { bool success = true; @@ -537,10 +562,10 @@ public: // We do not lock the infinite vertex if (!is_infinite(vh)) { - success = g_lock_grid.try_lock(vh->point(), lock_radius).first; + success = try_lock_vertex(vh, lock_radius); } # else - success = g_lock_grid.try_lock(vh->point(), lock_radius).first; + success = try_lock_vertex(vh, lock_radius); # endif } # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK) @@ -556,10 +581,11 @@ public: # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING // Lock the element area on the grid Cell_handle cell = facet.first; - for (int iVertex = (facet.second+1)&3 ; success && iVertex != facet.second ; iVertex = (iVertex+1)&3) + for (int iVertex = (facet.second+1)&3 ; + success && iVertex != facet.second ; iVertex = (iVertex+1)&3) { Vertex_handle vh = cell->vertex(iVertex); - success = g_lock_grid.try_lock(vh->point(), lock_radius).first; + success = try_lock_vertex(vh, lock_radius); } # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK) success = facet.first->try_lock(); // CJTODO: we lock the cell => stupid? diff --git a/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h b/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h index 4a3b452fc26..ca58888ee53 100644 --- a/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h +++ b/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h @@ -32,6 +32,10 @@ # endif #endif +#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX +# include +#endif + namespace CGAL { template < typename TDS = void > @@ -46,13 +50,16 @@ public: struct Rebind_TDS { typedef Triangulation_ds_vertex_base_3 Other; }; + Triangulation_ds_vertex_base_3() + : _c() #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX - Triangulation_ds_vertex_base_3() - : _c(), m_visited(false) {} -#else - Triangulation_ds_vertex_base_3() - : _c() {} + , m_visited(false) #endif + { +#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX + m_grid_index_cache = -1; +#endif + } Triangulation_ds_vertex_base_3(Cell_handle c) : _c(c) {} @@ -90,6 +97,17 @@ public: { return cell() != Cell_handle(); } + +#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX + void set_grid_index_cache (int index) + { + m_grid_index_cache = index; + } + int get_grid_index_cache() + { + return m_grid_index_cache; + } +#endif // For use by the Compact_container. void * for_compact_container() const @@ -101,9 +119,12 @@ private: #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX mutable tbb::spin_mutex m_mutex; +#endif +#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX + tbb::atomic m_grid_index_cache; #endif Cell_handle _c; - + #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX public: bool m_visited; // CJTODO TEMP TEST