mirror of https://github.com/CGAL/cgal
Extend the min trick to the other multiplication methods, change the default.
This commit is contained in:
parent
fefc2658b7
commit
e305b72f68
|
|
@ -539,9 +539,9 @@ private:
|
||||||
#endif
|
#endif
|
||||||
Internal_protector P;
|
Internal_protector P;
|
||||||
#ifdef CGAL_USE_SSE2
|
#ifdef CGAL_USE_SSE2
|
||||||
# if 1
|
# if !defined __SSE4_1__ && !defined __AVX__
|
||||||
// Brutal, compute all products in all directions.
|
// Brutal, compute all products in all directions.
|
||||||
// The actual winner (by a hair) on recent hardware
|
// The actual winner (by a hair) on recent hardware before removing NaNs.
|
||||||
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
||||||
__m128d bb = b.simd(); // {-bi,bs}
|
__m128d bb = b.simd(); // {-bi,bs}
|
||||||
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
||||||
|
|
@ -569,13 +569,14 @@ private:
|
||||||
__m128d r = _mm_max_pd (y1, y2);
|
__m128d r = _mm_max_pd (y1, y2);
|
||||||
// __m128d r = _mm_max_pd(x1,_mm_max_pd(x2,_mm_max_pd(x3,_mm_min_pd(x4,big))));
|
// __m128d r = _mm_max_pd(x1,_mm_max_pd(x2,_mm_max_pd(x3,_mm_min_pd(x4,big))));
|
||||||
return IA (IA_opacify128(r));
|
return IA (IA_opacify128(r));
|
||||||
# elif 0
|
# elif 1
|
||||||
// we want to multiply ai,as with {ai<0?-bs:-bi,as<0?bi:bs}
|
// we want to multiply ai,as with {ai<0?-bs:-bi,as<0?bi:bs}
|
||||||
// we want to multiply as,ai with {as<0?-bs:-bi,ai<0?bi:bs}
|
// we want to multiply as,ai with {as<0?-bs:-bi,ai<0?bi:bs}
|
||||||
// requires SSE4 (otherwise use _mm_cmplt_pd, _mm_and_pd, _mm_andnot_pd and _mm_or_pd to avoid blendv)
|
// requires SSE4 (otherwise use _mm_cmplt_pd, _mm_and_pd, _mm_andnot_pd and _mm_or_pd to avoid blendv)
|
||||||
// probably faster on older hardware
|
// probably faster on older hardware
|
||||||
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
||||||
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
||||||
|
__m128d big = IA::largest().simd();
|
||||||
__m128d aa = a.simd(); // {-ai,as}
|
__m128d aa = a.simd(); // {-ai,as}
|
||||||
__m128d az = _mm_xor_pd(aa, m); // {ai,as}
|
__m128d az = _mm_xor_pd(aa, m); // {ai,as}
|
||||||
az = IA_opacify128_weak(az);
|
az = IA_opacify128_weak(az);
|
||||||
|
|
@ -586,7 +587,9 @@ private:
|
||||||
__m128d x = _mm_blendv_pd (bb, bp, az); // {ai<0?-bs:-bi,as<0?bi:bs}
|
__m128d x = _mm_blendv_pd (bb, bp, az); // {ai<0?-bs:-bi,as<0?bi:bs}
|
||||||
__m128d y = _mm_blendv_pd (bb, bp, azp); // {as<0?-bs:-bi,ai<0?bi:bs}
|
__m128d y = _mm_blendv_pd (bb, bp, azp); // {as<0?-bs:-bi,ai<0?bi:bs}
|
||||||
__m128d p1 = _mm_mul_pd (az, x);
|
__m128d p1 = _mm_mul_pd (az, x);
|
||||||
|
p1 = _mm_min_pd(p1,big); // no NaN
|
||||||
__m128d p2 = _mm_mul_pd (azp, y);
|
__m128d p2 = _mm_mul_pd (azp, y);
|
||||||
|
p2 = _mm_min_pd(p2,big); // no NaN
|
||||||
__m128d r = _mm_max_pd (p1, p2);
|
__m128d r = _mm_max_pd (p1, p2);
|
||||||
return IA (IA_opacify128(r));
|
return IA (IA_opacify128(r));
|
||||||
# elif 0
|
# elif 0
|
||||||
|
|
@ -594,6 +597,7 @@ private:
|
||||||
// we want to multiply -as,ai with {as<0?bs:bi,ai>0?bs:bi}
|
// we want to multiply -as,ai with {as<0?bs:bi,ai>0?bs:bi}
|
||||||
// slightly worse than the previous one
|
// slightly worse than the previous one
|
||||||
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
||||||
|
__m128d big = IA::largest().simd();
|
||||||
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
||||||
__m128d ax = swap_m128d (aa); // {as,-ai}
|
__m128d ax = swap_m128d (aa); // {as,-ai}
|
||||||
__m128d ap = _mm_xor_pd (ax, m1); // {-as,ai}
|
__m128d ap = _mm_xor_pd (ax, m1); // {-as,ai}
|
||||||
|
|
@ -605,13 +609,16 @@ private:
|
||||||
__m128d x = _mm_blendv_pd (bbs, bbi, aa); // {ai>0?bi:bs,as<0?bi:bs}
|
__m128d x = _mm_blendv_pd (bbs, bbi, aa); // {ai>0?bi:bs,as<0?bi:bs}
|
||||||
__m128d y = _mm_blendv_pd (bbi, bbs, ax); // {as<0?bs:bi,ai>0?bs:bi}
|
__m128d y = _mm_blendv_pd (bbi, bbs, ax); // {as<0?bs:bi,ai>0?bs:bi}
|
||||||
__m128d p1 = _mm_mul_pd (aa, x);
|
__m128d p1 = _mm_mul_pd (aa, x);
|
||||||
|
p1 = _mm_min_pd(p1,big); // no NaN
|
||||||
__m128d p2 = _mm_mul_pd (ap, y);
|
__m128d p2 = _mm_mul_pd (ap, y);
|
||||||
|
p2 = _mm_min_pd(p2,big); // no NaN
|
||||||
__m128d r = _mm_max_pd (p1, p2);
|
__m128d r = _mm_max_pd (p1, p2);
|
||||||
return IA (IA_opacify128(r));
|
return IA (IA_opacify128(r));
|
||||||
# else
|
# else
|
||||||
// AVX version of the brutal method, same running time or slower
|
// AVX version of the brutal method, same running time or slower
|
||||||
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
__m128d aa = IA_opacify128_weak(a.simd()); // {-ai,as}
|
||||||
__m128d bb = b.simd(); // {-bi,bs}
|
__m128d bb = b.simd(); // {-bi,bs}
|
||||||
|
__m256d big = _mm256_set1_pd(std::numeric_limits<double>::infinity());
|
||||||
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
__m128d m = _mm_set_sd(-0.); // {-0,+0}
|
||||||
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
__m128d m1 = _mm_set1_pd(-0.); // {-0,-0}
|
||||||
__m128d ax = swap_m128d (aa); // {as,-ai}
|
__m128d ax = swap_m128d (aa); // {as,-ai}
|
||||||
|
|
@ -622,7 +629,9 @@ private:
|
||||||
__m256d Y1 = _mm256_set_m128d(bz,bz); // {bi,bs,bi,bs}
|
__m256d Y1 = _mm256_set_m128d(bz,bz); // {bi,bs,bi,bs}
|
||||||
__m256d Y2 = _mm256_permute_pd(Y1,5); // {bs,bi,bs,bi}
|
__m256d Y2 = _mm256_permute_pd(Y1,5); // {bs,bi,bs,bi}
|
||||||
__m256d Z1 = _mm256_mul_pd(X,Y1);
|
__m256d Z1 = _mm256_mul_pd(X,Y1);
|
||||||
|
Z1 = _mm256_min_pd(Z1,big); // no NaN
|
||||||
__m256d Z2 = _mm256_mul_pd(X,Y2);
|
__m256d Z2 = _mm256_mul_pd(X,Y2);
|
||||||
|
Z2 = _mm256_min_pd(Z2,big); // no NaN
|
||||||
__m256d Z = _mm256_max_pd(Z1,Z2);
|
__m256d Z = _mm256_max_pd(Z1,Z2);
|
||||||
__m128d z1 = _mm256_castpd256_pd128(Z);
|
__m128d z1 = _mm256_castpd256_pd128(Z);
|
||||||
__m128d z2 = _mm256_extractf128_pd(Z,1);
|
__m128d z2 = _mm256_extractf128_pd(Z,1);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue