Add sse2 based functions for fabs, min, and max

2011-04-01 14:01:17 +00:00 · 2011-04-01 14:01:17 +00:00 · bce0e8c4b4
parent 202c401d41
commit bce0e8c4b4
3 changed files with 220 additions and 2 deletions
--- a/Number_types/include/CGAL/double.h
+++ b/Number_types/include/CGAL/double.h
@ -32,7 +32,7 @@
 #include <limits>
 #ifdef CGAL_USE_SSE2_FABS
-#include <emmintrin.h>
+#include <CGAL/sse2.h>
 #endif
 #ifdef _MSC_VER
@ -127,7 +127,7 @@ template <> class Algebraic_structure_traits< double >
 #ifdef CGAL_USE_SSE2_FABS   
 inline double sse2fabs(double a)
 {
-  static _CRT_ALIGN(16) const union{
+  static CGAL_ALIGN_16 const union{
    __int64 i[2];
    __m128d m;
  } absMask = {0x7fffffffffffffff, 0x7fffffffffffffff};
@ -138,6 +138,7 @@ inline double sse2fabs(double a)
  _mm_store_sd(&a, temp);
  return a;
 }
 #endif
 template <> class Real_embeddable_traits< double >
--- a/Number_types/include/CGAL/sse2.h
+++ b/Number_types/include/CGAL/sse2.h
@ -0,0 +1,35 @@
 // Copyright (c) 1999,2007  Utrecht University (The Netherlands),
 // ETH Zurich (Switzerland), Freie Universitaet Berlin (Germany),
 // INRIA Sophia-Antipolis (France), Martin-Luther-University Halle-Wittenberg
 // (Germany), Max-Planck-Institute Saarbruecken (Germany), RISC Linz (Austria),
 // and Tel-Aviv University (Israel).  All rights reserved.
 //
 // This file is part of CGAL (www.cgal.org); you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public License as
 // published by the Free Software Foundation; version 2.1 of the License.
 // See the file LICENSE.LGPL distributed with CGAL.
 //
 // Licensees holding a valid commercial license may use this file in
 // accordance with the commercial license agreement provided with the software.
 //
 // This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 // WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 //
 // $URL$
 // $Id$
 //
 //
 // Author(s)     :  Andreas Fabri
 #ifndef CGAL_SSE2_H
 #define CGAL_SSE2_H
 #include <emmintrin.h>
 #if defined ( _MSC_VER )
 #define CGAL_ALIGN_16  __declspec(align(16))
 #elif defined( __GNU__ )
 #define  CGAL_ALIGN_16 __attribute__((aligned(16))) 
 #endif
 #endif // CGAL_SSE2_H
--- a/Number_types/include/CGAL/utils_classes.h
+++ b/Number_types/include/CGAL/utils_classes.h
@ -19,6 +19,10 @@
 #define CGAL_UTILS_CLASSES_H
 #include <CGAL/basic.h>
 #ifdef CGAL_USE_SSE2_MAX
 #include <CGAL/sse2.h>
 #endif
 namespace CGAL {
 template < class A, class B = A >
@ -77,6 +81,184 @@ protected:
 Less c;
 };
 #ifdef CGAL_USE_SSE2_MAX
  inline double sse2max(double a, double b, double c, double d)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C =_mm_load_sd(&c);
  __m128d D =_mm_load_sd(&d);
  __m128d AB = _mm_max_sd(A,B);  
  __m128d CD = _mm_max_sd(C,D);  
  A = _mm_max_sd(AB,CD);
  _mm_store_sd(&a, A);
  return a;
 }
 inline double sse2max(double a, double b, double c)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C =_mm_load_sd(&c);
  __m128d AB = _mm_max_sd(A,B);  
  A = _mm_max_sd(AB,C);
  _mm_store_sd(&a, A);
  return a;
 }
 inline double sse2max(double a, double b)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C = _mm_max_sd(A,B);  
  _mm_store_sd(&a, C);
  return a;
 }
 #if 0  
 // Doing things in parallel seems the way to go
 // but copying to/from arrays has too much overhead  
 //  a = max(a,a2) b = max(b,b2)    
 inline void sse2mmax2(double& a, double a2, double& b, double b2)
 {
   CGAL_ALIGN_16 double res[2];
  res[0] = a;
  res[1] = b;
  __m128d F =_mm_load_pd(res);
  res[0] = a2;
  res[1] = b2;
  __m128d S =_mm_load_pd(res);
  __m128d C = _mm_max_pd(F,S); 
  _mm_store_pd(res, C);
  a = res[0];
  b = res[1];
 }
 #endif
  inline double sse2min(double a, double b, double c, double d)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C =_mm_load_sd(&c);
  __m128d D =_mm_load_sd(&d);
  __m128d AB = _mm_min_sd(A,B);  
  __m128d CD = _mm_min_sd(C,D);  
  A = _mm_min_sd(AB,CD);
  _mm_store_sd(&a, A);
  return a;
 }
 inline double sse2min(double a, double b, double c)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C =_mm_load_sd(&c);
  __m128d AB = _mm_min_sd(A,B);  
  A = _mm_min_sd(AB,C);
  _mm_store_sd(&a, A);
  return a;
 }
 inline double sse2min(double a, double b)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C = _mm_min_sd(A,B);  
  _mm_store_sd(&a, C);
  return a;
 }
 inline void sse2minmax(double& a, double b, double& c)
 {
  __m128d A =_mm_load_sd(&a);
  __m128d B =_mm_load_sd(&b);
  __m128d C =_mm_load_sd(&c);
  __m128d AB = _mm_min_sd(A,B);  
  A = _mm_min_sd(AB,C);
  _mm_store_sd(&a, A);
  AB = _mm_max_pd(A,B);
  C = _mm_max_sd(AB,C);
  _mm_store_sd(&c, C);
 }
 #endif // CGAL_USE_SSE2_MAX
 template <>
 struct Max<double> :public std::binary_function< double, double, double > {
 Max() {}
 double operator()( const double& x, const double& y) const
    { 
 #ifdef CGAL_USE_SSE2_MAX
      return sse2max(x,y);
 #else
      return (std::max)( x, y);
 #endif
 }
  double operator()( double x, double y, double z) const
  { 
 #ifdef CGAL_USE_SSE2_MAX
    return sse2max(x,y,z);
 #else 
    return (std::max)((std::max)( x, y), z); 
 #endif
  }
  double operator()( double w,double x, double y, double z) const
  { 
 #ifdef CGAL_USE_SSE2_MAX
    return sse2max(w,x,y,z);
 #else 
    return (std::max)((std::max)( x, y), (std::max)(w,z)); 
 #endif
  }
 };
 template <>
 struct Min<double> :public std::binary_function< double, double, double > {
 Min() {}
 double operator()( const double& x, const double& y) const
    { 
 #ifdef CGAL_USE_SSE2_MAX
      return sse2min(x,y);
 #else
      return (std::min)( x, y);
 #endif
 }
  double operator()( double x, double y, double z) const
  { 
 #ifdef CGAL_USE_SSE2_MAX
    return sse2min(x,y,z);
 #else 
    return (std::min)((std::min)( x, y), z); 
 #endif
  }
  double operator()( double w,double x, double y, double z) const
  { 
 #ifdef CGAL_USE_SSE2_MAX
    return sse2min(w,x,y,z);
 #else 
    return (std::min)((std::min)( x, y), (std::min)(w,z)); 
 #endif
  }
 };
 template< class T >
 class Is_valid
  : public std::unary_function< T, bool > {