From 2bcfc8bca6651e0c4e6ba5e6b019d2ec4a3715e5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 8 Jan 2019 17:57:31 +0100 Subject: [PATCH] Make operators friends and remove some. --- Number_types/include/CGAL/Interval_nt.h | 1113 +++++++++++------------ 1 file changed, 532 insertions(+), 581 deletions(-) diff --git a/Number_types/include/CGAL/Interval_nt.h b/Number_types/include/CGAL/Interval_nt.h index 8bf898d4ffc..bff07867b2f 100644 --- a/Number_types/include/CGAL/Interval_nt.h +++ b/Number_types/include/CGAL/Interval_nt.h @@ -166,7 +166,7 @@ public: // But MSVC++ 2012 optimizes the test "!(i>s)" to "i<=s", even with // /fp:strict. If 'i' or 's' is a NaN, that makes a difference. CGAL_assertion_msg( (!is_valid(i)) || (!is_valid(s)) || (!(i>s)), - " Variable used before being initialized (or CGAL bug)"); + " Variable used before being initialized (or CGAL bug)"); #ifndef CGAL_DISABLE_ROUNDING_MATH_CHECK CGAL_assertion_code((void) tester;) // Necessary to trigger a runtime test of rounding modes. #endif @@ -294,6 +294,537 @@ private: #ifndef CGAL_DISABLE_ROUNDING_MATH_CHECK static const Test_runtime_rounding_modes tester; #endif + + friend + Uncertain + operator<(const Interval_nt &a, const Interval_nt &b) + { + if (a.sup() < b.inf()) return true; + if (a.inf() >= b.sup()) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>(const Interval_nt &a, const Interval_nt &b) + { return b < a; } + + friend + Uncertain + operator<=(const Interval_nt &a, const Interval_nt &b) + { + if (a.sup() <= b.inf()) return true; + if (a.inf() > b.sup()) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>=(const Interval_nt &a, const Interval_nt &b) + { return b <= a; } + + friend + Uncertain + operator==(const Interval_nt &a, const Interval_nt &b) + { + if (b.inf() > a.sup() || b.sup() < a.inf()) return false; + if (b.inf() == a.sup() && b.sup() == a.inf()) return true; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator!=(const Interval_nt &a, const Interval_nt &b) + { return ! (a == b); } + + + // Mixed operators with double. + + friend + Uncertain + operator<(double a, const Interval_nt &b) + { + if (a < b.inf()) return true; + if (a >= b.sup()) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>(double a, const Interval_nt &b) + { return b < a; } + + friend + Uncertain + operator<=(double a, const Interval_nt &b) + { + if (a <= b.inf()) return true; + if (a > b.sup()) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>=(double a, const Interval_nt &b) + { return b <= a; } + + friend + Uncertain + operator==(double a, const Interval_nt &b) + { + if (b.inf() > a || b.sup() < a) return false; + if (b.is_point()) return true; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator!=(double a, const Interval_nt &b) + { return ! (a == b); } + + friend + Uncertain + operator<(const Interval_nt &a, double b) + { + if (a.sup() < b) return true; + if (a.inf() >= b) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>(const Interval_nt &a, double b) + { return b < a; } + + friend + Uncertain + operator<=(const Interval_nt &a, double b) + { + if (a.sup() <= b) return true; + if (a.inf() > b) return false; + return Uncertain::indeterminate(); + } + + friend + Uncertain + operator>=(const Interval_nt &a, double b) + { return b <= a; } + + friend + Uncertain + operator==(const Interval_nt &a, double b) + { + return b == a; + } + + friend + Uncertain + operator!=(const Interval_nt &a, double b) + { return b != a; } + + friend + std::ostream & operator<< (std::ostream &os, const Interval_nt & I ) + { + return os << "[" << I.inf() << ";" << I.sup() << "]"; + } + +#define CGAL_SWALLOW(IS,CHAR) \ + { \ + char c; \ + do is.get(c); while (isspace(c)); \ + if (c != CHAR) { \ + is.setstate(std::ios_base::failbit); \ + } \ + } \ + + friend + std::istream & operator>> (std::istream &is, Interval_nt & I) + { + char c; + do is.get(c); while (isspace(c)); + is.putback(c); + if(c == '['){ // read original output from operator << + double inf,sup; + CGAL_SWALLOW(is, '[');// read the "[" + is >> iformat(inf); + CGAL_SWALLOW(is, ';');// read the ";" + is >> iformat(sup); + CGAL_SWALLOW(is, ']');// read the "]" + I = Interval_nt(inf,sup); + }else{ //read double (backward compatibility) + double d; + is >> d; + I = d; + } + return is; + } +#undef CGAL_SWALLOW + + friend + Interval_nt + operator+ (const Interval_nt &a, const Interval_nt & b) + { + Internal_protector P; +#ifdef CGAL_USE_SSE2 + __m128d aa = IA_opacify128(a.simd()); + __m128d bb = IA_opacify128_weak(b.simd()); + __m128d r = _mm_add_pd(aa, bb); + return Interval_nt(IA_opacify128(r)); +#else + return Interval_nt (-CGAL_IA_ADD(-a.inf(), -b.inf()), + CGAL_IA_ADD(a.sup(), b.sup())); +#endif + } + + // MSVC does not define __SSE3__ +#if defined CGAL_USE_SSE2 && (defined __SSE3__ || defined __AVX__) + friend + Interval_nt + operator+ (double a, const Interval_nt & b) + { + Internal_protector P; + __m128d aa = _mm_set1_pd(IA_opacify(a)); + __m128d bb = IA_opacify128_weak(b.simd()); + __m128d r = _mm_addsub_pd(bb, aa); + return Interval_nt(IA_opacify128(r)); + } + + friend + Interval_nt + operator+ (const Interval_nt & a, double b) + { + return b + a; + } +#endif + + friend + Interval_nt + operator+( const Interval_nt& a ) { + return a; + } + + friend + Interval_nt + operator- (const Interval_nt &a, const Interval_nt & b) + { +#ifdef CGAL_USE_SSE2 + return a+-b; +#else + Internal_protector P; + return Interval_nt(-CGAL_IA_ADD(b.sup(), -a.inf()), + CGAL_IA_ADD(a.sup(), -b.inf())); +#endif + } + +#ifdef CGAL_USE_SSE2 + friend + Interval_nt + operator- (double a, const Interval_nt & b) + { + return a+-b; + } +#endif + + friend + Interval_nt + operator* (const Interval_nt &a, const Interval_nt & b) + { +#if 0 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88626 + if(CGAL_CST_TRUE(a.is_point())) + return a.inf() * b; + else if(CGAL_CST_TRUE(b.is_point())) + return a * b.inf(); +#endif + Internal_protector P; +#ifdef CGAL_USE_SSE2 +# if 1 + // Brutal, compute all products in all directions. + // The actual winner (by a hair) on recent hardware + __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} + __m128d bb = b.simd(); // {-bi,bs} + __m128d m = _mm_set_sd(-0.); // {-0,+0} + __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} + __m128d ax = swap_m128d (aa); // {as,-ai} + __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} + __m128d bz = _mm_xor_pd(bb, m); // {bi,bs} + bz = IA_opacify128(bz); + __m128d c = swap_m128d (bz); // {bs,bi} + __m128d x1 = _mm_mul_pd(aa,bz); // {-ai*bi,as*bs} + __m128d x2 = _mm_mul_pd(aa,c); // {-ai*bs,as*bi} + __m128d x3 = _mm_mul_pd(ap,bz); // {-as*bi,ai*bs} + __m128d x4 = _mm_mul_pd(ap,c); // {-as*bs,ai*bi} + __m128d y1 = _mm_max_pd(x1,x2); + __m128d y2 = _mm_max_pd(x3,x4); + __m128d r = _mm_max_pd (y1, y2); + return IA (IA_opacify128(r)); +# elif 0 + // we want to multiply ai,as with {ai<0?-bs:-bi,as<0?bi:bs} + // we want to multiply as,ai with {as<0?-bs:-bi,ai<0?bi:bs} + // requires SSE4 (otherwise use _mm_cmplt_pd, _mm_and_pd, _mm_andnot_pd and _mm_or_pd to avoid blendv) + // probably faster on older hardware + __m128d m = _mm_set_sd(-0.); // {-0,+0} + __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} + __m128d aa = a.simd(); // {-ai,as} + __m128d az = _mm_xor_pd(aa, m); // {ai,as} + az = IA_opacify128_weak(az); + __m128d azp = swap_m128d (az); // {as,ai} + __m128d bb = IA_opacify128(b.simd()); // {-bi,bs} + __m128d bx = swap_m128d (bb); // {bs,-bi} + __m128d bp = _mm_xor_pd(bx, m1); // {-bs,bi} + __m128d x = _mm_blendv_pd (bb, bp, az); // {ai<0?-bs:-bi,as<0?bi:bs} + __m128d y = _mm_blendv_pd (bb, bp, azp); // {as<0?-bs:-bi,ai<0?bi:bs} + __m128d p1 = _mm_mul_pd (az, x); + __m128d p2 = _mm_mul_pd (azp, y); + __m128d r = _mm_max_pd (p1, p2); + return IA (IA_opacify128(r)); +# elif 0 + // we want to multiply -ai,as with {ai>0?bi:bs,as<0?bi:bs} + // we want to multiply -as,ai with {as<0?bs:bi,ai>0?bs:bi} + // slightly worse than the previous one + __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} + __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} + __m128d ax = swap_m128d (aa); // {as,-ai} + __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} + __m128d bb = IA_opacify128(b.simd()); // {-bi,bs} + double bi = -_mm_cvtsd_f64(bb); + double bs = _mm_cvtsd_f64(_mm_unpackhi_pd(bb,bb)); + __m128d bbi = _mm_set1_pd(bi); // {bi,bi} + __m128d bbs = _mm_set1_pd(bs); // {bs,bs} + __m128d x = _mm_blendv_pd (bbs, bbi, aa); // {ai>0?bi:bs,as<0?bi:bs} + __m128d y = _mm_blendv_pd (bbi, bbs, ax); // {as<0?bs:bi,ai>0?bs:bi} + __m128d p1 = _mm_mul_pd (aa, x); + __m128d p2 = _mm_mul_pd (ap, y); + __m128d r = _mm_max_pd (p1, p2); + return IA (IA_opacify128(r)); +# else + // AVX version of the brutal method, same running time or slower + __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} + __m128d bb = b.simd(); // {-bi,bs} + __m128d m = _mm_set_sd(-0.); // {-0,+0} + __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} + __m128d ax = swap_m128d (aa); // {as,-ai} + __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} + __m128d bz = _mm_xor_pd(bb, m); // {bi,bs} + bz = IA_opacify128(bz); + __m256d X = _mm256_set_m128d(ap,aa); // {-ai,as,-as,ai} + __m256d Y1 = _mm256_set_m128d(bz,bz); // {bi,bs,bi,bs} + __m256d Y2 = _mm256_permute_pd(Y1,5); // {bs,bi,bs,bi} + __m256d Z1 = _mm256_mul_pd(X,Y1); + __m256d Z2 = _mm256_mul_pd(X,Y2); + __m256d Z = _mm256_max_pd(Z1,Z2); + __m128d z1 = _mm256_castpd256_pd128(Z); + __m128d z2 = _mm256_extractf128_pd(Z,1); + __m128d r = _mm_max_pd (z1, z2); + return IA (IA_opacify128(r)); +# endif +#else + if (a.inf() >= 0.0) // a>=0 + { + // b>=0 [a.inf()*b.inf(); a.sup()*b.sup()] + // b<=0 [a.sup()*b.inf(); a.inf()*b.sup()] + // b~=0 [a.sup()*b.inf(); a.sup()*b.sup()] + double aa = a.inf(), bb = a.sup(); + if (b.inf() < 0.0) + { + aa = bb; + if (b.sup() < 0.0) + bb = a.inf(); + } + return IA(-CGAL_IA_MUL(aa, -b.inf()), CGAL_IA_MUL(bb, b.sup())); + } + else if (a.sup()<=0.0) // a<=0 + { + // b>=0 [a.inf()*b.sup(); a.sup()*b.inf()] + // b<=0 [a.sup()*b.sup(); a.inf()*b.inf()] + // b~=0 [a.inf()*b.sup(); a.inf()*b.inf()] + double aa = a.sup(), bb = a.inf(); + if (b.inf() < 0.0) + { + aa=bb; + if (b.sup() < 0.0) + bb=a.sup(); + } + return IA(-CGAL_IA_MUL(-bb, b.sup()), CGAL_IA_MUL(-aa, -b.inf())); + } + else // 0 \in a + { + if (b.inf()>=0.0) // b>=0 + return IA(-CGAL_IA_MUL(-a.inf(), b.sup()), + CGAL_IA_MUL( a.sup(), b.sup())); + if (b.sup()<=0.0) // b<=0 + return IA(-CGAL_IA_MUL( a.sup(), -b.inf()), + CGAL_IA_MUL(-a.inf(), -b.inf())); + // 0 \in b + double tmp1 = CGAL_IA_MUL(-a.inf(), b.sup()); + double tmp2 = CGAL_IA_MUL( a.sup(), -b.inf()); + double tmp3 = CGAL_IA_MUL(-a.inf(), -b.inf()); + double tmp4 = CGAL_IA_MUL( a.sup(), b.sup()); + return IA(-(std::max)(tmp1,tmp2), (std::max)(tmp3,tmp4)); + } +#endif + } + + friend + Interval_nt + operator* (double a, Interval_nt b) + { + // return Interval_nt(a)*b; + Internal_protector P; + if (a < 0) { a = -a; b = -b; } + // Now a >= 0 +#ifdef CGAL_USE_SSE2 + // TODO: try/benchmark a branchless version + __m128d bb = IA_opacify128_weak(b.simd()); + __m128d aa = _mm_set1_pd(IA_opacify(a)); + __m128d r = _mm_mul_pd(aa, bb); + return IA(IA_opacify128(r)); +#else + return IA(-CGAL_IA_MUL(a, -b.inf()), CGAL_IA_MUL(a, b.sup())); +#endif + } + + friend + Interval_nt + operator* (const Interval_nt & a, double b) + { + return b * a; + } + + friend + Interval_nt + operator/ (const Interval_nt &a, const Interval_nt & b) + { +#if 0 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88626 + if(CGAL_CST_TRUE(a.is_point())) + return a.inf() / b; + else if(CGAL_CST_TRUE(b.is_point())) + return a / b.inf(); +#endif + Internal_protector P; +#if defined CGAL_USE_SSE2 && (defined __SSE4_1__ || defined __AVX__) + //// not a tight bound, but easy: + // return CGAL::inverse(b)*a; +# if 1 + // Current fastest + // if b>0 we want [ai/(ai>0?bs:bi),as/(as>0?bi:bs)] + // if b<0 we want [as/(as>0?bs:bi),ai/(ai>0?bi:bs)] + __m128d m = _mm_set_sd(-0.); + __m128d aa = a.simd(); + __m128d bb = b.simd(); + int i = _mm_movemask_pd(_mm_cmpge_pd(bb, _mm_set1_pd(0.))); + if(i==3) return largest(); // bi<=0 && bs>=0 + __m128d ap = _mm_xor_pd(aa, m); // {ai, as} + __m128d ax = swap_m128d(ap); // {as, ai} + __m128d bp = _mm_xor_pd(bb, m); // {bi, bs} + __m128d bx = swap_m128d(bp); // {bs, bi} + __m128d num = _mm_blendv_pd(ap, ax, bp); // {(b>0)?ai:as, (b>0)?as:ai} + __m128d d = _mm_blendv_pd(bx, bp, num); + // Can we rearrange things so we need fewer xor? + __m128d den = _mm_xor_pd(d, m); + num = IA_opacify128_weak(num); + den = IA_opacify128(den); + __m128d r = _mm_div_pd(num, den); + return IA (IA_opacify128(r)); +# else + // Similar to the multiplication, but slow, because divisions are slow + // if b>0 we want [-max(-ai/bi,-ai/bs),max(as/bi,as/bs)] {-ai,as}/{bi,bs} {-ai,as}/{bs,bi} + // if b<0 we want [-max(-as/bi,-as/bs),max(ai/bi,ai/bs)] {-as,ai}/{bi,bs} {-as,ai}/{bs,bi} + __m128d m = _mm_set_sd(-0.); + __m128d m1 = _mm_set1_pd(-0.); + __m128d aa = a.simd(); // {-ai, as} + __m128d bb = b.simd(); // {-bi, bs} + int i = _mm_movemask_pd(_mm_cmpge_pd(bb, _mm_set1_pd(0.))); + if(i==3) return largest(); // bi<=0 && bs>=0 + __m128d ap = _mm_xor_pd(aa, m1); // {ai, -as} + __m128d ax = swap_m128d(ap); // {-as, ai} + __m128d bp = _mm_xor_pd(bb, m); // {bi, bs} + __m128d num = _mm_blendv_pd(aa, ax, bp); + num = IA_opacify128_weak(num); + bp = IA_opacify128(bp); + __m128d bx = swap_m128d(bp); // {bs, bi} + __m128d d1 = _mm_div_pd(num, bp); + __m128d d2 = _mm_div_pd(num, bx); + __m128d r = _mm_max_pd(d1, d2); + return IA (IA_opacify128(r)); +# endif +#else + if (b.inf() > 0.0) // b>0 + { + // e>=0 [a.inf()/b.sup(); a.sup()/b.inf()] + // e<=0 [a.inf()/b.inf(); a.sup()/b.sup()] + // e~=0 [a.inf()/b.inf(); a.sup()/b.inf()] + double aa = b.sup(), bb = b.inf(); + if (a.inf() < 0.0) + { + aa = bb; + if (a.sup() < 0.0) + bb = b.sup(); + } + return IA(-CGAL_IA_DIV(-a.inf(), aa), CGAL_IA_DIV(a.sup(), bb)); + } + else if (b.sup()<0.0) // b<0 + { + // e>=0 [a.sup()/b.sup(); a.inf()/b.inf()] + // e<=0 [a.sup()/b.inf(); a.inf()/b.sup()] + // e~=0 [a.sup()/b.sup(); a.inf()/b.sup()] + double aa = b.sup(), bb = b.inf(); + if (a.inf() < 0.0) + { + bb = aa; + if (a.sup() < 0.0) + aa = b.inf(); + } + return IA(-CGAL_IA_DIV(a.sup(), -aa), CGAL_IA_DIV(a.inf(), bb)); + } + else // b~0 + return largest(); + // We could do slightly better -> [0;infinity] when b.sup()==0, + // but is this worth ? +#endif + } + + // Without SSE2, let it use the function above. +#ifdef CGAL_USE_SSE2 + friend + Interval_nt + operator/ (double a, const Interval_nt & b) + { + int i = _mm_movemask_pd(_mm_cmpge_pd(b.simd(), _mm_set1_pd(0.))); + if(i==3) return largest(); // bi<=0 && bs>=0 + __m128d aa, xx; + if(a>0){ + aa = _mm_set1_pd(-a); + xx = (-b).simd(); + } else if(a<0){ + aa = _mm_set1_pd(a); + xx = b.simd(); + } else return 0.; + Internal_protector P; + __m128d r = _mm_div_pd(IA_opacify128_weak(aa), IA_opacify128(xx)); + return Interval_nt(IA_opacify128(r)); + } + + friend + Interval_nt + operator/ (Interval_nt a, double b) + { + if(b<0){ a = -a; b = -b; } + else if(b==0) return largest(); + // Now b > 0 + Internal_protector P; +# ifdef __GNUC__ + // Paradoxically, constants should be safe, and this lets the compiler optimize x/2 to x*.5 + if (!__builtin_constant_p(b)) +# endif + b = IA_opacify(b); + __m128d bb = _mm_set1_pd(b); + __m128d aa = IA_opacify128(a.simd()); + __m128d r = _mm_div_pd(aa, bb); + return Interval_nt(IA_opacify128(r)); + } +#endif }; #ifndef CGAL_DISABLE_ROUNDING_MATH_CHECK @@ -302,151 +833,6 @@ const typename Interval_nt::Test_runtime_rounding_modes Interval_nt::tester; #endif -template -inline -Uncertain -operator<(const Interval_nt &a, const Interval_nt &b) -{ - if (a.sup() < b.inf()) return true; - if (a.inf() >= b.sup()) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>(const Interval_nt &a, const Interval_nt &b) -{ return b < a; } - -template -inline -Uncertain -operator<=(const Interval_nt &a, const Interval_nt &b) -{ - if (a.sup() <= b.inf()) return true; - if (a.inf() > b.sup()) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>=(const Interval_nt &a, const Interval_nt &b) -{ return b <= a; } - -template -inline -Uncertain -operator==(const Interval_nt &a, const Interval_nt &b) -{ - if (b.inf() > a.sup() || b.sup() < a.inf()) return false; - if (b.inf() == a.sup() && b.sup() == a.inf()) return true; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator!=(const Interval_nt &a, const Interval_nt &b) -{ return ! (a == b); } - - -// Mixed operators with double. - -template -inline -Uncertain -operator<(double a, const Interval_nt &b) -{ - if (a < b.inf()) return true; - if (a >= b.sup()) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>(double a, const Interval_nt &b) -{ return b < a; } - -template -inline -Uncertain -operator<=(double a, const Interval_nt &b) -{ - if (a <= b.inf()) return true; - if (a > b.sup()) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>=(double a, const Interval_nt &b) -{ return b <= a; } - -template -inline -Uncertain -operator==(double a, const Interval_nt &b) -{ - if (b.inf() > a || b.sup() < a) return false; - if (b.is_point()) return true; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator!=(double a, const Interval_nt &b) -{ return ! (a == b); } - -template -inline -Uncertain -operator<(const Interval_nt &a, double b) -{ - if (a.sup() < b) return true; - if (a.inf() >= b) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>(const Interval_nt &a, double b) -{ return b < a; } - -template -inline -Uncertain -operator<=(const Interval_nt &a, double b) -{ - if (a.sup() <= b) return true; - if (a.inf() > b) return false; - return Uncertain::indeterminate(); -} - -template -inline -Uncertain -operator>=(const Interval_nt &a, double b) -{ return b <= a; } - -template -inline -Uncertain -operator==(const Interval_nt &a, double b) -{ - return b == a; -} - -template -inline -Uncertain -operator!=(const Interval_nt &a, double b) -{ return b != a; } - // Non-documented @@ -538,445 +924,10 @@ class Is_valid< Interval_nt > } }; -template -std::ostream & operator<< (std::ostream &os, const Interval_nt & I ) -{ - return os << "[" << I.inf() << ";" << I.sup() << "]"; -} - -#define CGAL_SWALLOW(IS,CHAR) \ - { \ - char c; \ - do is.get(c); while (isspace(c)); \ - if (c != CHAR) { \ - is.setstate(std::ios_base::failbit); \ - } \ - } \ - -template -std::istream & operator>> (std::istream &is, Interval_nt & I) -{ - char c; - do is.get(c); while (isspace(c)); - is.putback(c); - if(c == '['){ // read original output from operator << - double inf,sup; - CGAL_SWALLOW(is, '[');// read the "[" - is >> iformat(inf); - CGAL_SWALLOW(is, ';');// read the ";" - is >> iformat(sup); - CGAL_SWALLOW(is, ']');// read the "]" - I = Interval_nt(inf,sup); - }else{ //read double (backward compatibility) - double d; - is >> d; - I = d; - } - return is; -} -#undef CGAL_SWALLOW typedef Interval_nt Interval_nt_advanced; // for backward-compatibility -template -inline -Interval_nt -operator+ (const Interval_nt &a, const Interval_nt & b) -{ - typename Interval_nt::Internal_protector P; -#ifdef CGAL_USE_SSE2 - __m128d aa = IA_opacify128(a.simd()); - __m128d bb = IA_opacify128_weak(b.simd()); - __m128d r = _mm_add_pd(aa, bb); - return Interval_nt(IA_opacify128(r)); -#else - return Interval_nt (-CGAL_IA_ADD(-a.inf(), -b.inf()), - CGAL_IA_ADD(a.sup(), b.sup())); -#endif -} - -template -inline -Interval_nt -operator+ (double a, const Interval_nt & b) -{ - // MSVC does not define __SSE3__ -#if defined CGAL_USE_SSE2 && (defined __SSE3__ || defined __AVX__) - typename Interval_nt::Internal_protector P; - __m128d aa = _mm_set1_pd(IA_opacify(a)); - __m128d bb = IA_opacify128_weak(b.simd()); - __m128d r = _mm_addsub_pd(bb, aa); - return Interval_nt(IA_opacify128(r)); -#else - return Interval_nt(a)+b; -#endif -} - -template -inline -Interval_nt -operator+ (const Interval_nt & a, double b) -{ - return b + a; -} - -template< bool Protected > -inline -Interval_nt -operator+( const Interval_nt& a ) { - return a; -} - -template -inline -Interval_nt -operator- (const Interval_nt &a, const Interval_nt & b) -{ -#ifdef CGAL_USE_SSE2 - return a+-b; -#else - typename Interval_nt::Internal_protector P; - return Interval_nt(-CGAL_IA_ADD(b.sup(), -a.inf()), - CGAL_IA_ADD(a.sup(), -b.inf())); -#endif -} - -template -inline -Interval_nt -operator- (double a, const Interval_nt & b) -{ -#ifdef CGAL_USE_SSE2 - return a+-b; -#else - return Interval_nt(a)-b; -#endif -} - -template -inline -Interval_nt -operator- (const Interval_nt & a, double b) -{ - return a-Interval_nt(b); -} - -template -inline -Interval_nt -operator* (const Interval_nt &a, const Interval_nt & b) -{ -#if 0 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88626 - if(CGAL_CST_TRUE(a.is_point())) - return a.inf() * b; - else if(CGAL_CST_TRUE(b.is_point())) - return a * b.inf(); -#endif - typedef Interval_nt IA; - typename Interval_nt::Internal_protector P; -#ifdef CGAL_USE_SSE2 -# if 1 - // Brutal, compute all products in all directions. - // The actual winner (by a hair) on recent hardware - __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} - __m128d bb = b.simd(); // {-bi,bs} - __m128d m = _mm_set_sd(-0.); // {-0,+0} - __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} - __m128d ax = swap_m128d (aa); // {as,-ai} - __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} - __m128d bz = _mm_xor_pd(bb, m); // {bi,bs} - bz = IA_opacify128(bz); - __m128d c = swap_m128d (bz); // {bs,bi} - __m128d x1 = _mm_mul_pd(aa,bz); // {-ai*bi,as*bs} - __m128d x2 = _mm_mul_pd(aa,c); // {-ai*bs,as*bi} - __m128d x3 = _mm_mul_pd(ap,bz); // {-as*bi,ai*bs} - __m128d x4 = _mm_mul_pd(ap,c); // {-as*bs,ai*bi} - __m128d y1 = _mm_max_pd(x1,x2); - __m128d y2 = _mm_max_pd(x3,x4); - __m128d r = _mm_max_pd (y1, y2); - return IA (IA_opacify128(r)); -# elif 0 -// we want to multiply ai,as with {ai<0?-bs:-bi,as<0?bi:bs} -// we want to multiply as,ai with {as<0?-bs:-bi,ai<0?bi:bs} -// requires SSE4 (otherwise use _mm_cmplt_pd, _mm_and_pd, _mm_andnot_pd and _mm_or_pd to avoid blendv) -// probably faster on older hardware - __m128d m = _mm_set_sd(-0.); // {-0,+0} - __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} - __m128d aa = a.simd(); // {-ai,as} - __m128d az = _mm_xor_pd(aa, m); // {ai,as} - az = IA_opacify128_weak(az); - __m128d azp = swap_m128d (az); // {as,ai} - __m128d bb = IA_opacify128(b.simd()); // {-bi,bs} - __m128d bx = swap_m128d (bb); // {bs,-bi} - __m128d bp = _mm_xor_pd(bx, m1); // {-bs,bi} - __m128d x = _mm_blendv_pd (bb, bp, az); // {ai<0?-bs:-bi,as<0?bi:bs} - __m128d y = _mm_blendv_pd (bb, bp, azp); // {as<0?-bs:-bi,ai<0?bi:bs} - __m128d p1 = _mm_mul_pd (az, x); - __m128d p2 = _mm_mul_pd (azp, y); - __m128d r = _mm_max_pd (p1, p2); - return IA (IA_opacify128(r)); -# elif 0 -// we want to multiply -ai,as with {ai>0?bi:bs,as<0?bi:bs} -// we want to multiply -as,ai with {as<0?bs:bi,ai>0?bs:bi} -// slightly worse than the previous one - __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} - __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} - __m128d ax = swap_m128d (aa); // {as,-ai} - __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} - __m128d bb = IA_opacify128(b.simd()); // {-bi,bs} - double bi = -_mm_cvtsd_f64(bb); - double bs = _mm_cvtsd_f64(_mm_unpackhi_pd(bb,bb)); - __m128d bbi = _mm_set1_pd(bi); // {bi,bi} - __m128d bbs = _mm_set1_pd(bs); // {bs,bs} - __m128d x = _mm_blendv_pd (bbs, bbi, aa); // {ai>0?bi:bs,as<0?bi:bs} - __m128d y = _mm_blendv_pd (bbi, bbs, ax); // {as<0?bs:bi,ai>0?bs:bi} - __m128d p1 = _mm_mul_pd (aa, x); - __m128d p2 = _mm_mul_pd (ap, y); - __m128d r = _mm_max_pd (p1, p2); - return IA (IA_opacify128(r)); -# else - // AVX version of the brutal method, same running time or slower - __m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as} - __m128d bb = b.simd(); // {-bi,bs} - __m128d m = _mm_set_sd(-0.); // {-0,+0} - __m128d m1 = _mm_set1_pd(-0.); // {-0,-0} - __m128d ax = swap_m128d (aa); // {as,-ai} - __m128d ap = _mm_xor_pd (ax, m1); // {-as,ai} - __m128d bz = _mm_xor_pd(bb, m); // {bi,bs} - bz = IA_opacify128(bz); - __m256d X = _mm256_set_m128d(ap,aa); // {-ai,as,-as,ai} - __m256d Y1 = _mm256_set_m128d(bz,bz); // {bi,bs,bi,bs} - __m256d Y2 = _mm256_permute_pd(Y1,5); // {bs,bi,bs,bi} - __m256d Z1 = _mm256_mul_pd(X,Y1); - __m256d Z2 = _mm256_mul_pd(X,Y2); - __m256d Z = _mm256_max_pd(Z1,Z2); - __m128d z1 = _mm256_castpd256_pd128(Z); - __m128d z2 = _mm256_extractf128_pd(Z,1); - __m128d r = _mm_max_pd (z1, z2); - return IA (IA_opacify128(r)); -# endif -#else - if (a.inf() >= 0.0) // a>=0 - { - // b>=0 [a.inf()*b.inf(); a.sup()*b.sup()] - // b<=0 [a.sup()*b.inf(); a.inf()*b.sup()] - // b~=0 [a.sup()*b.inf(); a.sup()*b.sup()] - double aa = a.inf(), bb = a.sup(); - if (b.inf() < 0.0) - { - aa = bb; - if (b.sup() < 0.0) - bb = a.inf(); - } - return IA(-CGAL_IA_MUL(aa, -b.inf()), CGAL_IA_MUL(bb, b.sup())); - } - else if (a.sup()<=0.0) // a<=0 - { - // b>=0 [a.inf()*b.sup(); a.sup()*b.inf()] - // b<=0 [a.sup()*b.sup(); a.inf()*b.inf()] - // b~=0 [a.inf()*b.sup(); a.inf()*b.inf()] - double aa = a.sup(), bb = a.inf(); - if (b.inf() < 0.0) - { - aa=bb; - if (b.sup() < 0.0) - bb=a.sup(); - } - return IA(-CGAL_IA_MUL(-bb, b.sup()), CGAL_IA_MUL(-aa, -b.inf())); - } - else // 0 \in a - { - if (b.inf()>=0.0) // b>=0 - return IA(-CGAL_IA_MUL(-a.inf(), b.sup()), - CGAL_IA_MUL( a.sup(), b.sup())); - if (b.sup()<=0.0) // b<=0 - return IA(-CGAL_IA_MUL( a.sup(), -b.inf()), - CGAL_IA_MUL(-a.inf(), -b.inf())); - // 0 \in b - double tmp1 = CGAL_IA_MUL(-a.inf(), b.sup()); - double tmp2 = CGAL_IA_MUL( a.sup(), -b.inf()); - double tmp3 = CGAL_IA_MUL(-a.inf(), -b.inf()); - double tmp4 = CGAL_IA_MUL( a.sup(), b.sup()); - return IA(-(std::max)(tmp1,tmp2), (std::max)(tmp3,tmp4)); - } -#endif -} - -template -inline -Interval_nt -operator* (double a, Interval_nt b) -{ - // return Interval_nt(a)*b; - typedef Interval_nt IA; - typename IA::Internal_protector P; - if (a < 0) { a = -a; b = -b; } - // Now a >= 0 -#ifdef CGAL_USE_SSE2 - // TODO: try/benchmark a branchless version - __m128d bb = IA_opacify128_weak(b.simd()); - __m128d aa = _mm_set1_pd(IA_opacify(a)); - __m128d r = _mm_mul_pd(aa, bb); - return IA(IA_opacify128(r)); -#else - return IA(-CGAL_IA_MUL(a, -b.inf()), CGAL_IA_MUL(a, b.sup())); -#endif -} - -template -inline -Interval_nt -operator* (const Interval_nt & a, double b) -{ - return b * a; -} - -template -inline -Interval_nt -operator/ (const Interval_nt &a, const Interval_nt & b) -{ -#if 0 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88626 - if(CGAL_CST_TRUE(a.is_point())) - return a.inf() / b; - else if(CGAL_CST_TRUE(b.is_point())) - return a / b.inf(); -#endif - typedef Interval_nt IA; - typename Interval_nt::Internal_protector P; -#if defined CGAL_USE_SSE2 && (defined __SSE4_1__ || defined __AVX__) - //// not a tight bound, but easy: - // return CGAL::inverse(b)*a; -# if 1 - // Current fastest - // if b>0 we want [ai/(ai>0?bs:bi),as/(as>0?bi:bs)] - // if b<0 we want [as/(as>0?bs:bi),ai/(ai>0?bi:bs)] - __m128d m = _mm_set_sd(-0.); - __m128d aa = a.simd(); - __m128d bb = b.simd(); - int i = _mm_movemask_pd(_mm_cmpge_pd(bb, _mm_set1_pd(0.))); - if(i==3) return IA::largest(); // bi<=0 && bs>=0 - __m128d ap = _mm_xor_pd(aa, m); // {ai, as} - __m128d ax = swap_m128d(ap); // {as, ai} - __m128d bp = _mm_xor_pd(bb, m); // {bi, bs} - __m128d bx = swap_m128d(bp); // {bs, bi} - __m128d num = _mm_blendv_pd(ap, ax, bp); // {(b>0)?ai:as, (b>0)?as:ai} - __m128d d = _mm_blendv_pd(bx, bp, num); - // Can we rearrange things so we need fewer xor? - __m128d den = _mm_xor_pd(d, m); - num = IA_opacify128_weak(num); - den = IA_opacify128(den); - __m128d r = _mm_div_pd(num, den); - return IA (IA_opacify128(r)); -# else - // Similar to the multiplication, but slow, because divisions are slow - // if b>0 we want [-max(-ai/bi,-ai/bs),max(as/bi,as/bs)] {-ai,as}/{bi,bs} {-ai,as}/{bs,bi} - // if b<0 we want [-max(-as/bi,-as/bs),max(ai/bi,ai/bs)] {-as,ai}/{bi,bs} {-as,ai}/{bs,bi} - __m128d m = _mm_set_sd(-0.); - __m128d m1 = _mm_set1_pd(-0.); - __m128d aa = a.simd(); // {-ai, as} - __m128d bb = b.simd(); // {-bi, bs} - int i = _mm_movemask_pd(_mm_cmpge_pd(bb, _mm_set1_pd(0.))); - if(i==3) return IA::largest(); // bi<=0 && bs>=0 - __m128d ap = _mm_xor_pd(aa, m1); // {ai, -as} - __m128d ax = swap_m128d(ap); // {-as, ai} - __m128d bp = _mm_xor_pd(bb, m); // {bi, bs} - __m128d num = _mm_blendv_pd(aa, ax, bp); - num = IA_opacify128_weak(num); - bp = IA_opacify128(bp); - __m128d bx = swap_m128d(bp); // {bs, bi} - __m128d d1 = _mm_div_pd(num, bp); - __m128d d2 = _mm_div_pd(num, bx); - __m128d r = _mm_max_pd(d1, d2); - return IA (IA_opacify128(r)); -# endif -#else - if (b.inf() > 0.0) // b>0 - { - // e>=0 [a.inf()/b.sup(); a.sup()/b.inf()] - // e<=0 [a.inf()/b.inf(); a.sup()/b.sup()] - // e~=0 [a.inf()/b.inf(); a.sup()/b.inf()] - double aa = b.sup(), bb = b.inf(); - if (a.inf() < 0.0) - { - aa = bb; - if (a.sup() < 0.0) - bb = b.sup(); - } - return IA(-CGAL_IA_DIV(-a.inf(), aa), CGAL_IA_DIV(a.sup(), bb)); - } - else if (b.sup()<0.0) // b<0 - { - // e>=0 [a.sup()/b.sup(); a.inf()/b.inf()] - // e<=0 [a.sup()/b.inf(); a.inf()/b.sup()] - // e~=0 [a.sup()/b.sup(); a.inf()/b.sup()] - double aa = b.sup(), bb = b.inf(); - if (a.inf() < 0.0) - { - bb = aa; - if (a.sup() < 0.0) - aa = b.inf(); - } - return IA(-CGAL_IA_DIV(a.sup(), -aa), CGAL_IA_DIV(a.inf(), bb)); - } - else // b~0 - return IA::largest(); - // We could do slightly better -> [0;infinity] when b.sup()==0, - // but is this worth ? -#endif -} - -template -inline -Interval_nt -operator/ (double a, const Interval_nt & b) -{ -#ifdef CGAL_USE_SSE2 - int i = _mm_movemask_pd(_mm_cmpge_pd(b.simd(), _mm_set1_pd(0.))); - if(i==3) return Interval_nt::largest(); // bi<=0 && bs>=0 - __m128d aa, xx; - if(a>0){ - aa = _mm_set1_pd(-a); - xx = (-b).simd(); - } else if(a<0){ - aa = _mm_set1_pd(a); - xx = b.simd(); - } else return 0.; - typename Interval_nt::Internal_protector P; - __m128d r = _mm_div_pd(IA_opacify128_weak(aa), IA_opacify128(xx)); - return Interval_nt(IA_opacify128(r)); -#else - return Interval_nt(a) / b; -#endif -} - -template -inline -Interval_nt -operator/ (Interval_nt a, double b) -{ -#ifdef CGAL_USE_SSE2 - if(b<0){ a = -a; b = -b; } - else if(b==0) return Interval_nt::largest(); - // Now b > 0 - typename Interval_nt::Internal_protector P; -#ifdef __GNUC__ - // Paradoxically, constants should be safe, and this lets the compiler optimize x/2 to x*.5 - if (!__builtin_constant_p(b)) -#endif - b = IA_opacify(b); - __m128d bb = _mm_set1_pd(b); - __m128d aa = IA_opacify128(a.simd()); - __m128d r = _mm_div_pd(aa, bb); - return Interval_nt(IA_opacify128(r)); -#else - return a / Interval_nt(b); -#endif -} - // TODO: What about these two guys? Where do they belong to? template struct Min >