From bf0a42d7407acb35e053df4544cc56ddc44110ed Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 13 Nov 2020 01:03:57 +0100 Subject: [PATCH] lock-like protection for update_exact The exact choice of memory_order_* is hard, and even for versions that seem to work on x86_64 TSAN complains like crazy. TODO: a version that, instead of blocking if another thread is updating, also does the computation, with suitable protection to ensure that pruning the tree cannot happen while anyone is computing on it. TODO: try having AT in Lazy instead of Lazy_rep (i.e. not shared) to avoid the indirection. We can possibly choose one or the other depending on the type. --- Filtered_kernel/include/CGAL/Lazy.h | 216 ++++++++++++++-------- Number_types/include/CGAL/Lazy_exact_nt.h | 68 +++---- STL_Extension/include/CGAL/Handle.h | 7 +- 3 files changed, 174 insertions(+), 117 deletions(-) diff --git a/Filtered_kernel/include/CGAL/Lazy.h b/Filtered_kernel/include/CGAL/Lazy.h index e19b19ee9cd..6e14b66831c 100644 --- a/Filtered_kernel/include/CGAL/Lazy.h +++ b/Filtered_kernel/include/CGAL/Lazy.h @@ -53,6 +53,7 @@ #include #include #include +#include namespace CGAL { @@ -65,10 +66,10 @@ struct Indirect_AT { Indirect_AT():p(new AT()){} Indirect_AT(AT const& a):p(new AT(a)){} Indirect_AT(AT&& a):p(new AT(std::move(a))){} - Indirect_AT& operator=(AT const& a){ old.reset(p.load(std::memory_order_relaxed)); p.store(new AT(a), std::memory_order_relaxed); return *this; } - Indirect_AT& operator=(AT&& a){ old.reset(p.load(std::memory_order_relaxed)); p.store(new AT(std::move(a)), std::memory_order_relaxed); return *this; } - operator AT const&()const{return *p.load(std::memory_order_relaxed);} - ~Indirect_AT() { delete p.load(std::memory_order_relaxed); } + Indirect_AT& operator=(AT const& a){ old.reset(p.load(std::memory_order_relaxed)); p.store(new AT(a), std::memory_order_release); return *this; } + Indirect_AT& operator=(AT&& a){ old.reset(p.load(std::memory_order_relaxed)); p.store(new AT(std::move(a)), std::memory_order_release); return *this; } + operator AT const&()const{return *p.load(std::memory_order_acquire);} + ~Indirect_AT() { delete p.load(std::memory_order_acquire); } }; template & l) return l.approx(); } -#if 0 -// Where is this one (non-const) needed ? Is it ? -template -inline -AT& -approx(Lazy& l) -{ - return l.approx(); -} -#endif - - template inline const ET& @@ -259,8 +248,8 @@ public: typedef AT_ AT; - mutable Indirect_AT at; - mutable std::atomic et; + mutable Indirect_AT at{}; + mutable std::atomic et { nullptr }; Lazy_rep () : at(), et(nullptr){} @@ -283,6 +272,69 @@ public: return at; } +#if 1 +#define CGAL_UPDATE_EXACT_BEGIN \ + if (!this->start_update()) return; +#define CGAL_UPDATE_EXACT_MIDDLE \ + this->et.store(pet, std::memory_order_release); +#define CGAL_UPDATE_EXACT_END + + // true if you should update + bool start_update() const + { + ET* other = nullptr; + // TODO specify memory_order_* + bool doit = et.compare_exchange_strong(other, (ET*)1); + if (!doit) { + // Wait until it becomes available + while ((uintptr_t)other == 1) { + std::this_thread::yield(); // or do nothing? + other = et.load(std::memory_order_relaxed); + } + return false; + } + return true; + } + + const ET & exact() const + { + uintptr_t pet = (uintptr_t) et.load(std::memory_order_relaxed); + if (pet <= 1) { + if (pet == 0) + update_exact(); + else + // Wait until it becomes available + while((uintptr_t)et.load(std::memory_order_relaxed)==1) + std::this_thread::yield(); // or do nothing? + } + return *et.load(std::memory_order_consume); + // Should we return the pointer from update_exact, to avoid an extra load and synchronization? + } + +#elif 1 + mutable std::atomic worker{}; // thread that is allowed to updatee + +#define CGAL_UPDATE_EXACT_BEGIN \ + if (!this->start_update()) return; +#define CGAL_UPDATE_EXACT_MIDDLE \ + this->et.store(pet, std::memory_order_release); +#define CGAL_UPDATE_EXACT_END + + // true if you should update + bool start_update() const + { + std::thread::id none{}; + // TODO specify memory_order_* + bool doit = worker.compare_exchange_strong(none, std::this_thread::get_id()); + if (!doit) { + // Wait until it becomes available + while(et.load(std::memory_order_relaxed)==nullptr) + std::this_thread::yield(); // or do nothing? + return false; + } + return true; + } + const ET & exact() const { if (et.load(std::memory_order_relaxed)==nullptr) @@ -291,20 +343,53 @@ public: // Should we return the pointer from update_exact, to avoid an extra load and synchronization? } - // ??? Is this needed? Safe? - ET & exact() - { - if (et.load(std::memory_order_relaxed)==nullptr) - update_exact(); - return *et.load(std::memory_order_consume); +#else + mutable std::atomic_uint workers{}; // number of threads running update_exact + // not written yet +#define CGAL_UPDATE_EXACT_BEGIN \ + workers.fetch_add(1, std::memory_order_relaxed); \ + if (et.load(std::memory_order_relaxed)!=nullptr) \ + if (!this->start_update()) return; +#define CGAL_UPDATE_EXACT_MIDDLE \ + bool updated = this->set_exact(pet); \ + if (!updated) { /* some other thread was faster */ \ + pet->~ET(); \ + } else if (this->end_update()) { +#define CGAL_UPDATE_EXACT_END \ } - bool set_exact(ET* pet) const { ET* other = nullptr; return this->et.compare_exchange_strong(other, pet, std::memory_order_release); } + // true if you should update + bool start_update() const + { + unsigned int w = workers.fetch_add(1, std::memory_order_relaxed); + ??? + } + + // true if you are the last one and can clean-up + bool end_update() const + { + bool b = workers.load(std::memory_order_relaxed) == 1 + || workers.fetch_sub(1, std::memory_order_release) == 1; + // unneeded for Lazy_rep_0? + if(b) std::atomic_thread_fence(std::memory_order_acquire); + return b; + } + + const ET & exact() const + { + if (et.load(std::memory_order_relaxed)==nullptr) + update_exact(); + return *et.load(std::memory_order_consume); + // Should we return the pointer from update_exact, to avoid an extra load and synchronization? + } + +#endif + #ifdef CGAL_LAZY_KERNEL_DEBUG void print_at_et(std::ostream& os, int level) const { @@ -350,14 +435,12 @@ class Lazy_rep_n : const EC& ec() const { return *this; } template void update_exact_helper(std::index_sequence) const { - ET* pet = new ET(ec()( CGAL::exact( std::get(l) ) ... ) ); - bool updated = this->set_exact(pet); - if (!updated) { // some other thread was faster - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(ec()( CGAL::exact( std::get(l) ) ... ) ); + CGAL_UPDATE_EXACT_MIDDLE this->at = E2A()(*pet); l = std::tuple{}; - } + CGAL_UPDATE_EXACT_END } public: void update_exact() const { @@ -403,11 +486,10 @@ public: void update_exact() const { - ET* pet = new ET(); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(); + CGAL_UPDATE_EXACT_MIDDLE + CGAL_UPDATE_EXACT_END } Lazy_rep_0() @@ -518,19 +600,17 @@ public: void update_exact() const { - ET* pet = new ET(); + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(); // TODO : This looks really unfinished... - std::vector vec; - //this->et->reserve(this->at.size()); - ec()(CGAL::exact(l1_), std::back_inserter(pet)); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + std::vector vec; + //this->et->reserve(this->at.size()); + ec()(CGAL::exact(l1_), std::back_inserter(*pet)); + CGAL_UPDATE_EXACT_MIDDLE this->at = E2A()(*pet); // Prune lazy tree l1_ = L1(); - } + CGAL_UPDATE_EXACT_END } Lazy_rep_with_vector_1(const AC& ac, const EC& /*ec*/, const L1& l1) @@ -574,18 +654,16 @@ public: void update_exact() const { - ET* pet = new ET(); - pet->reserve(this->at.size()); - ec()(CGAL::exact(l1_), CGAL::exact(l2_), std::back_inserter(pet)); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(); + pet->reserve(this->at.size()); + ec()(CGAL::exact(l1_), CGAL::exact(l2_), std::back_inserter(*pet)); + CGAL_UPDATE_EXACT_MIDDLE this->at = E2A()(*pet); // Prune lazy tree l1_ = L1(); l2_ = L2(); - } + CGAL_UPDATE_EXACT_END } Lazy_rep_with_vector_2(const AC& ac, const EC& /*ec*/, const L1& l1, const L2& l2) @@ -629,17 +707,15 @@ public: void update_exact() const { - ET* pet = new ET(); - ec()(CGAL::exact(l1_), CGAL::exact(l2_), pet); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(); + ec()(CGAL::exact(l1_), CGAL::exact(l2_), *pet); + CGAL_UPDATE_EXACT_MIDDLE this->at = E2A()(*pet); // Prune lazy tree l1_ = L1(); l2_ = L2(); - } + CGAL_UPDATE_EXACT_END } Lazy_rep_2_1(const AC& ac, const EC& /*ec*/, const L1& l1, const L2& l2) @@ -686,17 +762,15 @@ public: void update_exact() const { - ET* pet = new ET(); - ec()(CGAL::exact(l1_), CGAL::exact(l2_), pet->first, pet->second ); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(); + ec()(CGAL::exact(l1_), CGAL::exact(l2_), pet->first, pet->second ); + CGAL_UPDATE_EXACT_MIDDLE this->at = E2A()(*pet); // Prune lazy tree l1_ = L1(); l2_ = L2(); - } + CGAL_UPDATE_EXACT_END } Lazy_rep_2_2(const AC& ac, const EC& /*ec*/, const L1& l1, const L2& l2) @@ -799,14 +873,6 @@ public : const ET& exact() const { return ptr()->exact(); } -#if 0 - AT& approx() - { return ptr()->approx(); } -#endif - - ET& exact() - { return ptr()->exact(); } - unsigned depth() const { return ptr()->depth(); diff --git a/Number_types/include/CGAL/Lazy_exact_nt.h b/Number_types/include/CGAL/Lazy_exact_nt.h index c505e839eb7..c9e196e7cd6 100644 --- a/Number_types/include/CGAL/Lazy_exact_nt.h +++ b/Number_types/include/CGAL/Lazy_exact_nt.h @@ -130,11 +130,10 @@ struct Lazy_exact_Int_Cst : public Lazy_exact_nt_rep : Lazy_exact_nt_rep(double(i)) {} void update_exact() const { - ET* pet = new ET((int)this->approx().inf()); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET((int)this->approx().inf()); + CGAL_UPDATE_EXACT_MIDDLE + CGAL_UPDATE_EXACT_END } }; @@ -146,11 +145,10 @@ struct Lazy_exact_Cst : public Lazy_exact_nt_rep : Lazy_exact_nt_rep(x), cste(x) {} void update_exact() const { - ET* pet = new ET(cste); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(cste); + CGAL_UPDATE_EXACT_MIDDLE + CGAL_UPDATE_EXACT_END } private: @@ -191,14 +189,12 @@ public: void update_exact() const { - ET* pet = new ET(l.exact()); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET(l.exact()); + CGAL_UPDATE_EXACT_MIDDLE this->at = l.approx(); prune_dag(); - } + CGAL_UPDATE_EXACT_END } void prune_dag() const { l.reset(); } @@ -284,15 +280,13 @@ struct NAME : public Lazy_exact_unary \ \ void update_exact() const \ { \ - ET* pet = new ET(OP(this->op1.exact())); \ - bool updated = this->set_exact(pet); \ - if (!updated) { \ - pet->~ET(); \ - } else { \ + CGAL_UPDATE_EXACT_BEGIN \ + ET* pet = new ET(OP(this->op1.exact())); \ + CGAL_UPDATE_EXACT_MIDDLE \ if (!this->approx().is_point()) \ this->at = CGAL_NTS to_interval(*(this->et)); \ this->prune_dag(); \ - } \ + CGAL_UPDATE_EXACT_END \ } \ }; @@ -312,15 +306,13 @@ struct NAME : public Lazy_exact_binary \ \ void update_exact() const \ { \ - ET* pet = new ET(this->op1.exact() OP this->op2.exact()); \ - bool updated = this->set_exact(pet); \ - if (!updated) { \ - pet->~ET(); \ - } else { \ + CGAL_UPDATE_EXACT_BEGIN \ + ET* pet = new ET(this->op1.exact() OP this->op2.exact()); \ + CGAL_UPDATE_EXACT_MIDDLE \ if (!this->approx().is_point()) \ this->at = CGAL_NTS to_interval(*(this->et)); \ this->prune_dag(); \ - } \ + CGAL_UPDATE_EXACT_END \ } \ }; @@ -338,15 +330,13 @@ struct Lazy_exact_Min : public Lazy_exact_binary void update_exact() const { - ET* pet = new ET((CGAL::min)(this->op1.exact(), this->op2.exact())); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET((CGAL::min)(this->op1.exact(), this->op2.exact())); + CGAL_UPDATE_EXACT_MIDDLE if (!this->approx().is_point()) this->at = CGAL_NTS to_interval(*(this->et)); this->prune_dag(); - } + CGAL_UPDATE_EXACT_END } }; @@ -359,15 +349,13 @@ struct Lazy_exact_Max : public Lazy_exact_binary void update_exact() const { - ET* pet = new ET((CGAL::max)(this->op1.exact(), this->op2.exact())); - bool updated = this->set_exact(pet); - if (!updated) { - pet->~ET(); - } else { + CGAL_UPDATE_EXACT_BEGIN + ET* pet = new ET((CGAL::max)(this->op1.exact(), this->op2.exact())); + CGAL_UPDATE_EXACT_MIDDLE if (!this->approx().is_point()) this->at = CGAL_NTS to_interval(*(this->et)); this->prune_dag(); - } + CGAL_UPDATE_EXACT_END } }; diff --git a/STL_Extension/include/CGAL/Handle.h b/STL_Extension/include/CGAL/Handle.h index 60cd005969b..2d58f2ec8e9 100644 --- a/STL_Extension/include/CGAL/Handle.h +++ b/STL_Extension/include/CGAL/Handle.h @@ -79,11 +79,14 @@ class Handle { if (PTR) { - // TODO: the first condition tries to avoid the expensive - // release synchronization, check that it is still safe. + // The advanced version seems to work in practice on x86_64, but TSAN complains like crazy, even if I remove the questionable first test. +#if 0 if (PTR->count.load(std::memory_order_relaxed) == 1 || PTR->count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); +#else + if (PTR->count.fetch_sub(1, std::memory_order_acq_rel) == 1) { +#endif delete PTR; } PTR=0;