Merge pull request #4389 from lrineau/DT_3-issue4388-GF

Fix  DT3 parallel performance loss (issue #4388)
This commit is contained in:
Laurent Rineau 2019-12-17 09:16:52 +01:00
commit 674627937c
5 changed files with 230 additions and 32 deletions

View File

@ -288,6 +288,24 @@ private:
CGAL::cpp11::atomic<bool>* const stop_ptr;
#endif
#ifdef CGAL_LINKED_WITH_TBB
std::size_t approximate_number_of_vertices(CGAL::Parallel_tag) const {
# if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
return r_c3t3_.triangulation().tds().vertices().approximate_size();
# else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
CGAL_error_msg(
"If you want to use the Mesh_3 feature \"maximal_number_of_vertices\"\n"
"with CGAL::Parallel_tag then you need to recompile the code with the\n"
"preprocessor macro CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE\n"
"set to 1. That will induce a performance loss of 3%.\n");
# endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
}
#endif // CGAL_LINKED_WITH_TBB
std::size_t approximate_number_of_vertices(CGAL::Sequential_tag) const {
return r_c3t3_.triangulation().number_of_vertices();
}
bool forced_stop() const {
#ifndef CGAL_NO_ATOMIC
if(stop_ptr != 0 &&
@ -298,8 +316,7 @@ private:
}
#endif // not defined CGAL_NO_ATOMIC
if(maximal_number_of_vertices_ != 0 &&
r_c3t3_.triangulation().number_of_vertices() >=
maximal_number_of_vertices_)
approximate_number_of_vertices(Concurrency_tag()) >= maximal_number_of_vertices_)
{
if(error_code_ != 0) {
*error_code_ = CGAL_MESH_3_MAXIMAL_NUMBER_OF_VERTICES_REACHED;
@ -502,15 +519,15 @@ refine_mesh(std::string dump_after_refine_surface_prefix)
% r_tr.number_of_vertices()
% nbsteps % cells_mesher_.debug_info()
% (nbsteps / timer.time());
if(! forced_stop() &&
refinement_stage == REFINE_FACETS &&
if(refinement_stage == REFINE_FACETS &&
! forced_stop() &&
facets_mesher_.is_algorithm_done())
{
facets_mesher_.scan_edges();
refinement_stage = REFINE_FACETS_AND_EDGES;
}
if(! forced_stop() &&
refinement_stage == REFINE_FACETS_AND_EDGES &&
if(refinement_stage == REFINE_FACETS_AND_EDGES &&
! forced_stop() &&
facets_mesher_.is_algorithm_done())
{
facets_mesher_.scan_vertices();
@ -800,7 +817,7 @@ status() const
if(boost::is_convertible<Concurrency_tag, Parallel_tag>::value) {
const WorksharingDataStructureType* ws_ds =
this->get_worksharing_data_structure();
return Mesher_status(r_c3t3_.triangulation().number_of_vertices(),
return Mesher_status(approximate_number_of_vertices(Concurrency_tag()),
0,
ws_ds->approximate_number_of_enqueued_element());
}

View File

@ -115,15 +115,40 @@ namespace CCC_internal {
template< typename pointer, typename size_type, typename CCC >
class Free_list {
public:
Free_list() : m_head(nullptr), m_size(0) {}
Free_list() : m_head(nullptr), m_size(0) {
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
// Note that the performance penalty with
// CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE=1 is
// measured to be 3%, in a parallel insertion of 100k random
// points, in Delaunay_triangulation_3.
refresh_approximate_size();
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
}
void init() { m_head = nullptr; m_size = 0; }
pointer head() const { return m_head; }
void set_head(pointer p) { m_head = p; }
size_type size() const { return m_size; }
void set_size(size_type s) { m_size = s; }
void inc_size() { ++m_size; }
void dec_size() { --m_size; }
void set_size(size_type s) {
m_size = s;
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
refresh_approximate_size();
#endif
}
void inc_size() {
++m_size;
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
if(m_size > (m_approximate_size * precision_of_approximate_size_plus_1))
refresh_approximate_size();
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
}
void dec_size() {
--m_size;
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
if((m_size * precision_of_approximate_size_plus_1) < m_approximate_size)
refresh_approximate_size();
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
}
bool empty() { return size() == 0; }
// Warning: copy the pointer, not the data!
Free_list& operator= (const Free_list& other)
@ -149,9 +174,26 @@ public:
other.init(); // clear other
}
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type approximate_size() const {
return m_atomic_approximate_size.load(std::memory_order_relaxed);
}
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
protected:
pointer m_head; // the free list head pointer
size_type m_size; // the free list size
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
// `m_size` plus or minus `precision_of_approximate_size - 1`
static constexpr double precision_of_approximate_size_plus_1 = 1.10;
size_type m_approximate_size;
std::atomic<size_type> m_atomic_approximate_size;
void refresh_approximate_size() {
m_approximate_size = m_size;
m_atomic_approximate_size.store(m_size, std::memory_order_relaxed);
}
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
};
// Class Concurrent_compact_container
@ -247,12 +289,16 @@ public:
void swap(Self &c)
{
std::swap(m_alloc, c.m_alloc);
std::swap(m_capacity, c.m_capacity);
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
{ // non-atomic swap
size_type other_size = c.m_size;
c.m_size = size_type(m_size);
m_size = other_size;
size_type other_capacity = c.m_capacity;
c.m_capacity = size_type(m_capacity);
m_capacity = other_capacity;
}
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
std::swap(m_capacity, c.m_capacity);
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
std::swap(m_block_size, c.m_block_size);
std::swap(m_first_item, c.m_first_item);
std::swap(m_last_item, c.m_last_item);
@ -338,7 +384,6 @@ private:
#ifndef CGAL_NO_ASSERTIONS
std::memset(&*x, 0, sizeof(T));
#endif*/
--m_size;
put_on_free_list(&*x, fl);
}
public:
@ -359,12 +404,14 @@ public:
// The complexity is O(size(free list = capacity-size)).
void merge(Self &d);
// If `CGAL_NO_ATOMIC` is defined, do not call this function while others
// are inserting/erasing elements
// Do not call this function while others are inserting/erasing elements
size_type size() const
{
#ifdef CGAL_NO_ATOMIC
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type size = m_capacity.load(std::memory_order_relaxed);
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type size = m_capacity;
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
for( typename Free_lists::iterator it_free_list = m_free_lists.begin() ;
it_free_list != m_free_lists.end() ;
++it_free_list )
@ -372,11 +419,22 @@ public:
size -= it_free_list->size();
}
return size;
#else // atomic can be used
return m_size;
#endif
}
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type approximate_size() const
{
size_type size = m_capacity.load(std::memory_order_relaxed);
for( typename Free_lists::iterator it_free_list = m_free_lists.begin() ;
it_free_list != m_free_lists.end() ;
++it_free_list )
{
size -= it_free_list->approximate_size();
}
return size;
}
#endif // CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type max_size() const
{
return std::allocator_traits<allocator_type>::max_size(m_alloc);
@ -384,7 +442,11 @@ public:
size_type capacity() const
{
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
return m_capacity.load(std::memory_order_relaxed);
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
return m_capacity;
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
}
// void resize(size_type sz, T c = T()); // TODO makes sense ???
@ -482,7 +544,6 @@ private:
{
CGAL_assertion(type(ret) == USED);
fl->dec_size();
++m_size;
m_time_stamper->set_time_stamp(ret);
return iterator(ret, 0);
}
@ -575,23 +636,21 @@ private:
m_first_item = nullptr;
m_last_item = nullptr;
m_all_items = All_items();
m_size = 0;
m_time_stamper->reset();
}
allocator_type m_alloc;
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
std::atomic<size_type> m_capacity;
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type m_capacity;
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
size_type m_block_size;
Free_lists m_free_lists;
pointer m_first_item;
pointer m_last_item;
All_items m_all_items;
mutable Mutex m_mutex;
#ifdef CGAL_NO_ATOMIC
size_type m_size;
#else
CGAL::cpp11::atomic<size_type> m_size;
#endif
// This is a pointer, so that the definition of Compact_container does
// not require a complete type `T`.
@ -601,7 +660,6 @@ private:
template < class T, class Allocator >
void Concurrent_compact_container<T, Allocator>::merge(Self &d)
{
m_size += d.m_size;
CGAL_precondition(&d != this);
// Allocators must be "compatible" :
@ -640,7 +698,11 @@ void Concurrent_compact_container<T, Allocator>::merge(Self &d)
}
m_all_items.insert(m_all_items.end(), d.m_all_items.begin(), d.m_all_items.end());
// Add the capacities.
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
m_capacity.fetch_add(d.m_capacity, std::memory_order_relaxed);
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
m_capacity += d.m_capacity;
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
// It seems reasonnable to take the max of the block sizes.
m_block_size = (std::max)(m_block_size, d.m_block_size);
// Clear d.
@ -678,7 +740,11 @@ void Concurrent_compact_container<T, Allocator>::
old_block_size = m_block_size;
new_block = m_alloc.allocate(old_block_size + 2);
m_all_items.push_back(std::make_pair(new_block, old_block_size + 2));
#if CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
m_capacity.fetch_add(old_block_size, std::memory_order_relaxed);
#else // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
m_capacity += old_block_size;
#endif // not CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE
// We insert this new block at the end.
if (m_last_item == nullptr) // First time

View File

@ -90,6 +90,8 @@ create_cmake_script_with_options()
# Created by the script cgal_create_CMakeLists
# This is the CMake script for compiling a set of CGAL applications.
cmake_minimum_required(VERSION 3.1...3.15)
EOF
#---------------------------------------------------------------------------
if [ "$SINGLE_SOURCE" = "n" ]; then
@ -102,8 +104,6 @@ EOF
cat << 'EOF'
cmake_minimum_required(VERSION 2.8.11)
# CGAL and its components
EOF
if [ -n "$ENABLE_CTEST" ]; then

View File

@ -0,0 +1,62 @@
# Created by the script cgal_create_CMakeLists
# This is the CMake script for compiling a set of CGAL applications.
cmake_minimum_required(VERSION 3.1...3.15)
project( Triangulation_3 )
# CGAL and its components
find_package( CGAL QUIET COMPONENTS )
if ( NOT CGAL_FOUND )
message(STATUS "This project requires the CGAL library, and will not be compiled.")
return()
endif()
# Boost and its components
find_package( Boost REQUIRED )
if ( NOT Boost_FOUND )
message(STATUS "This project requires the Boost library, and will not be compiled.")
return()
endif()
# include for local directory
# include for local package
# Creating entries for all C++ files with "main" routine
# ##########################################################
create_single_source_cgal_program( "incident_edges.cpp" )
create_single_source_cgal_program( "simple_2.cpp" )
create_single_source_cgal_program( "simple.cpp" )
create_single_source_cgal_program( "Triangulation_benchmark_3.cpp" )
find_package(benchmark)
if(TARGET benchmark::benchmark)
find_package(TBB REQUIRED)
include( CGAL_target_use_TBB )
create_single_source_cgal_program( "DT3_benchmark_with_TBB.cpp" )
CGAL_target_use_TBB(DT3_benchmark_with_TBB)
target_link_libraries(DT3_benchmark_with_TBB PRIVATE benchmark::benchmark)
add_executable(DT3_benchmark_with_TBB_CCC_approximate_size DT3_benchmark_with_TBB.cpp)
CGAL_target_use_TBB(DT3_benchmark_with_TBB_CCC_approximate_size)
target_compile_definitions(DT3_benchmark_with_TBB_CCC_approximate_size PRIVATE CGAL_CONCURRENT_COMPACT_CONTAINER_APPROXIMATE_SIZE)
target_link_libraries(DT3_benchmark_with_TBB_CCC_approximate_size PRIVATE CGAL::CGAL benchmark::benchmark)
endif()

View File

@ -0,0 +1,53 @@
#include <CGAL/Real_timer.h>
#include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
#include <CGAL/Delaunay_triangulation_3.h>
#include <CGAL/bounding_box.h>
#include <CGAL/Random.h>
#include <iostream>
#include <fstream>
#include <benchmark/benchmark.h>
typedef CGAL::Exact_predicates_inexact_constructions_kernel K;
typedef K::Point_3 Point_3;
typedef CGAL::Triangulation_data_structure_3<
CGAL::Triangulation_vertex_base_3<K>,
CGAL::Triangulation_cell_base_3<K>,
CGAL::Parallel_tag> Tds;
typedef CGAL::Delaunay_triangulation_3<K, Tds> PDT;
// global variables used by bench_dt3
int argc;
char** argv;
void bench_dt3(benchmark::State& state) {
CGAL::get_default_random() = CGAL::Random(0);
std::vector<Point_3> points;
Point_3 p;
std::ifstream in(argv[1]);
while(in >> p)
points.push_back(p);
for(auto _ : state) {
CGAL::Bbox_3 bb = CGAL::bounding_box(points.begin(), points.end()).bbox();
PDT::Lock_data_structure locking_ds(bb, 50);
PDT pdt(points.begin(), points.end(), &locking_ds);
}
return;
}
BENCHMARK(bench_dt3)->Unit(benchmark::kMillisecond);;
int main(int argc, char* argv[])
{
benchmark::Initialize(&argc, argv);
::argc = argc;
::argv = argv;
benchmark::RunSpecifiedBenchmarks();
}