From bce0e8c4b47af848dadf8dcb7e5a476f551161ec Mon Sep 17 00:00:00 2001 From: Andreas Fabri Date: Fri, 1 Apr 2011 14:01:17 +0000 Subject: [PATCH] Add sse2 based functions for fabs, min, and max --- Number_types/include/CGAL/double.h | 5 +- Number_types/include/CGAL/sse2.h | 35 +++++ Number_types/include/CGAL/utils_classes.h | 182 ++++++++++++++++++++++ 3 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 Number_types/include/CGAL/sse2.h diff --git a/Number_types/include/CGAL/double.h b/Number_types/include/CGAL/double.h index e872037461e..bc14bba427f 100644 --- a/Number_types/include/CGAL/double.h +++ b/Number_types/include/CGAL/double.h @@ -32,7 +32,7 @@ #include #ifdef CGAL_USE_SSE2_FABS -#include +#include #endif #ifdef _MSC_VER @@ -127,7 +127,7 @@ template <> class Algebraic_structure_traits< double > #ifdef CGAL_USE_SSE2_FABS inline double sse2fabs(double a) { - static _CRT_ALIGN(16) const union{ + static CGAL_ALIGN_16 const union{ __int64 i[2]; __m128d m; } absMask = {0x7fffffffffffffff, 0x7fffffffffffffff}; @@ -138,6 +138,7 @@ inline double sse2fabs(double a) _mm_store_sd(&a, temp); return a; } + #endif template <> class Real_embeddable_traits< double > diff --git a/Number_types/include/CGAL/sse2.h b/Number_types/include/CGAL/sse2.h new file mode 100644 index 00000000000..1f9a139441a --- /dev/null +++ b/Number_types/include/CGAL/sse2.h @@ -0,0 +1,35 @@ +// Copyright (c) 1999,2007 Utrecht University (The Netherlands), +// ETH Zurich (Switzerland), Freie Universitaet Berlin (Germany), +// INRIA Sophia-Antipolis (France), Martin-Luther-University Halle-Wittenberg +// (Germany), Max-Planck-Institute Saarbruecken (Germany), RISC Linz (Austria), +// and Tel-Aviv University (Israel). All rights reserved. +// +// This file is part of CGAL (www.cgal.org); you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; version 2.1 of the License. +// See the file LICENSE.LGPL distributed with CGAL. +// +// Licensees holding a valid commercial license may use this file in +// accordance with the commercial license agreement provided with the software. +// +// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE +// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +// +// $URL$ +// $Id$ +// +// +// Author(s) : Andreas Fabri + +#ifndef CGAL_SSE2_H +#define CGAL_SSE2_H + +#include + +#if defined ( _MSC_VER ) +#define CGAL_ALIGN_16 __declspec(align(16)) +#elif defined( __GNU__ ) +#define CGAL_ALIGN_16 __attribute__((aligned(16))) +#endif + +#endif // CGAL_SSE2_H diff --git a/Number_types/include/CGAL/utils_classes.h b/Number_types/include/CGAL/utils_classes.h index bc23a837b59..156f7394bc6 100644 --- a/Number_types/include/CGAL/utils_classes.h +++ b/Number_types/include/CGAL/utils_classes.h @@ -19,6 +19,10 @@ #define CGAL_UTILS_CLASSES_H #include +#ifdef CGAL_USE_SSE2_MAX +#include +#endif + namespace CGAL { template < class A, class B = A > @@ -77,6 +81,184 @@ protected: Less c; }; +#ifdef CGAL_USE_SSE2_MAX + + inline double sse2max(double a, double b, double c, double d) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + __m128d C =_mm_load_sd(&c); + __m128d D =_mm_load_sd(&d); + + __m128d AB = _mm_max_sd(A,B); + __m128d CD = _mm_max_sd(C,D); + A = _mm_max_sd(AB,CD); + _mm_store_sd(&a, A); + return a; +} + +inline double sse2max(double a, double b, double c) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + __m128d C =_mm_load_sd(&c); + + __m128d AB = _mm_max_sd(A,B); + A = _mm_max_sd(AB,C); + _mm_store_sd(&a, A); + return a; +} + +inline double sse2max(double a, double b) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + + __m128d C = _mm_max_sd(A,B); + _mm_store_sd(&a, C); + return a; +} + + +#if 0 +// Doing things in parallel seems the way to go +// but copying to/from arrays has too much overhead +// a = max(a,a2) b = max(b,b2) +inline void sse2mmax2(double& a, double a2, double& b, double b2) +{ + CGAL_ALIGN_16 double res[2]; + res[0] = a; + res[1] = b; + __m128d F =_mm_load_pd(res); + res[0] = a2; + res[1] = b2; + __m128d S =_mm_load_pd(res); + + __m128d C = _mm_max_pd(F,S); + + _mm_store_pd(res, C); + a = res[0]; + b = res[1]; +} +#endif + + + inline double sse2min(double a, double b, double c, double d) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + __m128d C =_mm_load_sd(&c); + __m128d D =_mm_load_sd(&d); + + __m128d AB = _mm_min_sd(A,B); + __m128d CD = _mm_min_sd(C,D); + A = _mm_min_sd(AB,CD); + _mm_store_sd(&a, A); + return a; +} + +inline double sse2min(double a, double b, double c) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + __m128d C =_mm_load_sd(&c); + + __m128d AB = _mm_min_sd(A,B); + A = _mm_min_sd(AB,C); + _mm_store_sd(&a, A); + return a; +} + +inline double sse2min(double a, double b) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + + __m128d C = _mm_min_sd(A,B); + _mm_store_sd(&a, C); + return a; +} + +inline void sse2minmax(double& a, double b, double& c) +{ + __m128d A =_mm_load_sd(&a); + __m128d B =_mm_load_sd(&b); + __m128d C =_mm_load_sd(&c); + + __m128d AB = _mm_min_sd(A,B); + A = _mm_min_sd(AB,C); + _mm_store_sd(&a, A); + + AB = _mm_max_pd(A,B); + C = _mm_max_sd(AB,C); + _mm_store_sd(&c, C); +} + +#endif // CGAL_USE_SSE2_MAX + +template <> +struct Max :public std::binary_function< double, double, double > { + Max() {} + + double operator()( const double& x, const double& y) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2max(x,y); +#else + return (std::max)( x, y); +#endif + } + + double operator()( double x, double y, double z) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2max(x,y,z); +#else + return (std::max)((std::max)( x, y), z); +#endif + } + + double operator()( double w,double x, double y, double z) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2max(w,x,y,z); +#else + return (std::max)((std::max)( x, y), (std::max)(w,z)); +#endif + } +}; + +template <> +struct Min :public std::binary_function< double, double, double > { + Min() {} + + double operator()( const double& x, const double& y) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2min(x,y); +#else + return (std::min)( x, y); +#endif + } + + double operator()( double x, double y, double z) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2min(x,y,z); +#else + return (std::min)((std::min)( x, y), z); +#endif + } + + double operator()( double w,double x, double y, double z) const + { +#ifdef CGAL_USE_SSE2_MAX + return sse2min(w,x,y,z); +#else + return (std::min)((std::min)( x, y), (std::min)(w,z)); +#endif + } +}; template< class T > class Is_valid : public std::unary_function< T, bool > {