Add sse2 based functions for fabs, min, and max

2011-04-01 14:01:17 +00:00 · 2011-04-01 14:01:17 +00:00 · bce0e8c4b4
parent 202c401d41
commit bce0e8c4b4
3 changed files with 220 additions and 2 deletions
--- a/Number_types/include/CGAL/double.h
+++ b/Number_types/include/CGAL/double.h
@ -32,7 +32,7 @@
 #include <limits>

 #ifdef CGAL_USE_SSE2_FABS
-#include <emmintrin.h>
+#include <CGAL/sse2.h>
 #endif

 #ifdef _MSC_VER
@ -127,7 +127,7 @@ template <> class Algebraic_structure_traits< double >
 #ifdef CGAL_USE_SSE2_FABS   
 inline double sse2fabs(double a)
 {
-  static _CRT_ALIGN(16) const union{
+  static CGAL_ALIGN_16 const union{
    __int64 i[2];
    __m128d m;
  } absMask = {0x7fffffffffffffff, 0x7fffffffffffffff};
@ -138,6 +138,7 @@ inline double sse2fabs(double a)
  _mm_store_sd(&a, temp);
  return a;
 }
+
 #endif

 template <> class Real_embeddable_traits< double >
--- a/Number_types/include/CGAL/sse2.h
+++ b/Number_types/include/CGAL/sse2.h
@ -0,0 +1,35 @@
+// Copyright (c) 1999,2007  Utrecht University (The Netherlands),
+// ETH Zurich (Switzerland), Freie Universitaet Berlin (Germany),
+// INRIA Sophia-Antipolis (France), Martin-Luther-University Halle-Wittenberg
+// (Germany), Max-Planck-Institute Saarbruecken (Germany), RISC Linz (Austria),
+// and Tel-Aviv University (Israel).  All rights reserved.
+//
+// This file is part of CGAL (www.cgal.org); you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation; version 2.1 of the License.
+// See the file LICENSE.LGPL distributed with CGAL.
+//
+// Licensees holding a valid commercial license may use this file in
+// accordance with the commercial license agreement provided with the software.
+//
+// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// $URL$
+// $Id$
+//
+//
+// Author(s)     :  Andreas Fabri
+
+#ifndef CGAL_SSE2_H
+#define CGAL_SSE2_H
+
+#include <emmintrin.h>
+
+#if defined ( _MSC_VER )
+#define CGAL_ALIGN_16  __declspec(align(16))
+#elif defined( __GNU__ )
+#define  CGAL_ALIGN_16 __attribute__((aligned(16))) 
+#endif
+
+#endif // CGAL_SSE2_H
--- a/Number_types/include/CGAL/utils_classes.h
+++ b/Number_types/include/CGAL/utils_classes.h
@ -19,6 +19,10 @@
 #define CGAL_UTILS_CLASSES_H
 #include <CGAL/basic.h>

+#ifdef CGAL_USE_SSE2_MAX
+#include <CGAL/sse2.h>
+#endif
+
 namespace CGAL {

 template < class A, class B = A >
@ -77,6 +81,184 @@ protected:
 Less c;
 };

+#ifdef CGAL_USE_SSE2_MAX
+
+  inline double sse2max(double a, double b, double c, double d)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+  __m128d C =_mm_load_sd(&c);
+  __m128d D =_mm_load_sd(&d);
+
+  __m128d AB = _mm_max_sd(A,B);  
+  __m128d CD = _mm_max_sd(C,D);  
+  A = _mm_max_sd(AB,CD);
+  _mm_store_sd(&a, A);
+  return a;
+}
+     
+inline double sse2max(double a, double b, double c)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+  __m128d C =_mm_load_sd(&c);
+
+  __m128d AB = _mm_max_sd(A,B);  
+  A = _mm_max_sd(AB,C);
+  _mm_store_sd(&a, A);
+  return a;
+}
+      
+inline double sse2max(double a, double b)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+
+  __m128d C = _mm_max_sd(A,B);  
+  _mm_store_sd(&a, C);
+  return a;
+}
+
+ 
+#if 0  
+// Doing things in parallel seems the way to go
+// but copying to/from arrays has too much overhead  
+//  a = max(a,a2) b = max(b,b2)    
+inline void sse2mmax2(double& a, double a2, double& b, double b2)
+{
+   CGAL_ALIGN_16 double res[2];
+  res[0] = a;
+  res[1] = b;
+  __m128d F =_mm_load_pd(res);
+  res[0] = a2;
+  res[1] = b2;
+  __m128d S =_mm_load_pd(res);
+
+  __m128d C = _mm_max_pd(F,S); 
+
+  _mm_store_pd(res, C);
+  a = res[0];
+  b = res[1];
+}
+#endif
+
+
+  inline double sse2min(double a, double b, double c, double d)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+  __m128d C =_mm_load_sd(&c);
+  __m128d D =_mm_load_sd(&d);
+
+  __m128d AB = _mm_min_sd(A,B);  
+  __m128d CD = _mm_min_sd(C,D);  
+  A = _mm_min_sd(AB,CD);
+  _mm_store_sd(&a, A);
+  return a;
+}
+     
+inline double sse2min(double a, double b, double c)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+  __m128d C =_mm_load_sd(&c);
+
+  __m128d AB = _mm_min_sd(A,B);  
+  A = _mm_min_sd(AB,C);
+  _mm_store_sd(&a, A);
+  return a;
+}
+      
+inline double sse2min(double a, double b)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+
+  __m128d C = _mm_min_sd(A,B);  
+  _mm_store_sd(&a, C);
+  return a;
+}
+
+inline void sse2minmax(double& a, double b, double& c)
+{
+  __m128d A =_mm_load_sd(&a);
+  __m128d B =_mm_load_sd(&b);
+  __m128d C =_mm_load_sd(&c);
+
+  __m128d AB = _mm_min_sd(A,B);  
+  A = _mm_min_sd(AB,C);
+  _mm_store_sd(&a, A);
+
+  AB = _mm_max_pd(A,B);
+  C = _mm_max_sd(AB,C);
+  _mm_store_sd(&c, C);
+}
+
+#endif // CGAL_USE_SSE2_MAX
+
+template <>
+struct Max<double> :public std::binary_function< double, double, double > {
+ Max() {}
+
+ double operator()( const double& x, const double& y) const
+    { 
+#ifdef CGAL_USE_SSE2_MAX
+      return sse2max(x,y);
+#else
+      return (std::max)( x, y);
+#endif
+ }
+
+  double operator()( double x, double y, double z) const
+  { 
+#ifdef CGAL_USE_SSE2_MAX
+    return sse2max(x,y,z);
+#else 
+    return (std::max)((std::max)( x, y), z); 
+#endif
+  }
+
+  double operator()( double w,double x, double y, double z) const
+  { 
+#ifdef CGAL_USE_SSE2_MAX
+    return sse2max(w,x,y,z);
+#else 
+    return (std::max)((std::max)( x, y), (std::max)(w,z)); 
+#endif
+  }
+};
+
+template <>
+struct Min<double> :public std::binary_function< double, double, double > {
+ Min() {}
+
+ double operator()( const double& x, const double& y) const
+    { 
+#ifdef CGAL_USE_SSE2_MAX
+      return sse2min(x,y);
+#else
+      return (std::min)( x, y);
+#endif
+ }
+
+  double operator()( double x, double y, double z) const
+  { 
+#ifdef CGAL_USE_SSE2_MAX
+    return sse2min(x,y,z);
+#else 
+    return (std::min)((std::min)( x, y), z); 
+#endif
+  }
+
+  double operator()( double w,double x, double y, double z) const
+  { 
+#ifdef CGAL_USE_SSE2_MAX
+    return sse2min(w,x,y,z);
+#else 
+    return (std::min)((std::min)( x, y), (std::min)(w,z)); 
+#endif
+  }
+};
 template< class T >
 class Is_valid
  : public std::unary_function< T, bool > {