Add sse2 based functions for fabs, min, and max

This commit is contained in:
Andreas Fabri 2011-04-01 14:01:17 +00:00
parent 202c401d41
commit bce0e8c4b4
3 changed files with 220 additions and 2 deletions

View File

@ -32,7 +32,7 @@
#include <limits> #include <limits>
#ifdef CGAL_USE_SSE2_FABS #ifdef CGAL_USE_SSE2_FABS
#include <emmintrin.h> #include <CGAL/sse2.h>
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
@ -127,7 +127,7 @@ template <> class Algebraic_structure_traits< double >
#ifdef CGAL_USE_SSE2_FABS #ifdef CGAL_USE_SSE2_FABS
inline double sse2fabs(double a) inline double sse2fabs(double a)
{ {
static _CRT_ALIGN(16) const union{ static CGAL_ALIGN_16 const union{
__int64 i[2]; __int64 i[2];
__m128d m; __m128d m;
} absMask = {0x7fffffffffffffff, 0x7fffffffffffffff}; } absMask = {0x7fffffffffffffff, 0x7fffffffffffffff};
@ -138,6 +138,7 @@ inline double sse2fabs(double a)
_mm_store_sd(&a, temp); _mm_store_sd(&a, temp);
return a; return a;
} }
#endif #endif
template <> class Real_embeddable_traits< double > template <> class Real_embeddable_traits< double >

View File

@ -0,0 +1,35 @@
// Copyright (c) 1999,2007 Utrecht University (The Netherlands),
// ETH Zurich (Switzerland), Freie Universitaet Berlin (Germany),
// INRIA Sophia-Antipolis (France), Martin-Luther-University Halle-Wittenberg
// (Germany), Max-Planck-Institute Saarbruecken (Germany), RISC Linz (Austria),
// and Tel-Aviv University (Israel). All rights reserved.
//
// This file is part of CGAL (www.cgal.org); you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; version 2.1 of the License.
// See the file LICENSE.LGPL distributed with CGAL.
//
// Licensees holding a valid commercial license may use this file in
// accordance with the commercial license agreement provided with the software.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
//
// $URL$
// $Id$
//
//
// Author(s) : Andreas Fabri
#ifndef CGAL_SSE2_H
#define CGAL_SSE2_H
#include <emmintrin.h>
#if defined ( _MSC_VER )
#define CGAL_ALIGN_16 __declspec(align(16))
#elif defined( __GNU__ )
#define CGAL_ALIGN_16 __attribute__((aligned(16)))
#endif
#endif // CGAL_SSE2_H

View File

@ -19,6 +19,10 @@
#define CGAL_UTILS_CLASSES_H #define CGAL_UTILS_CLASSES_H
#include <CGAL/basic.h> #include <CGAL/basic.h>
#ifdef CGAL_USE_SSE2_MAX
#include <CGAL/sse2.h>
#endif
namespace CGAL { namespace CGAL {
template < class A, class B = A > template < class A, class B = A >
@ -77,6 +81,184 @@ protected:
Less c; Less c;
}; };
#ifdef CGAL_USE_SSE2_MAX
inline double sse2max(double a, double b, double c, double d)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C =_mm_load_sd(&c);
__m128d D =_mm_load_sd(&d);
__m128d AB = _mm_max_sd(A,B);
__m128d CD = _mm_max_sd(C,D);
A = _mm_max_sd(AB,CD);
_mm_store_sd(&a, A);
return a;
}
inline double sse2max(double a, double b, double c)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C =_mm_load_sd(&c);
__m128d AB = _mm_max_sd(A,B);
A = _mm_max_sd(AB,C);
_mm_store_sd(&a, A);
return a;
}
inline double sse2max(double a, double b)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C = _mm_max_sd(A,B);
_mm_store_sd(&a, C);
return a;
}
#if 0
// Doing things in parallel seems the way to go
// but copying to/from arrays has too much overhead
// a = max(a,a2) b = max(b,b2)
inline void sse2mmax2(double& a, double a2, double& b, double b2)
{
CGAL_ALIGN_16 double res[2];
res[0] = a;
res[1] = b;
__m128d F =_mm_load_pd(res);
res[0] = a2;
res[1] = b2;
__m128d S =_mm_load_pd(res);
__m128d C = _mm_max_pd(F,S);
_mm_store_pd(res, C);
a = res[0];
b = res[1];
}
#endif
inline double sse2min(double a, double b, double c, double d)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C =_mm_load_sd(&c);
__m128d D =_mm_load_sd(&d);
__m128d AB = _mm_min_sd(A,B);
__m128d CD = _mm_min_sd(C,D);
A = _mm_min_sd(AB,CD);
_mm_store_sd(&a, A);
return a;
}
inline double sse2min(double a, double b, double c)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C =_mm_load_sd(&c);
__m128d AB = _mm_min_sd(A,B);
A = _mm_min_sd(AB,C);
_mm_store_sd(&a, A);
return a;
}
inline double sse2min(double a, double b)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C = _mm_min_sd(A,B);
_mm_store_sd(&a, C);
return a;
}
inline void sse2minmax(double& a, double b, double& c)
{
__m128d A =_mm_load_sd(&a);
__m128d B =_mm_load_sd(&b);
__m128d C =_mm_load_sd(&c);
__m128d AB = _mm_min_sd(A,B);
A = _mm_min_sd(AB,C);
_mm_store_sd(&a, A);
AB = _mm_max_pd(A,B);
C = _mm_max_sd(AB,C);
_mm_store_sd(&c, C);
}
#endif // CGAL_USE_SSE2_MAX
template <>
struct Max<double> :public std::binary_function< double, double, double > {
Max() {}
double operator()( const double& x, const double& y) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2max(x,y);
#else
return (std::max)( x, y);
#endif
}
double operator()( double x, double y, double z) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2max(x,y,z);
#else
return (std::max)((std::max)( x, y), z);
#endif
}
double operator()( double w,double x, double y, double z) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2max(w,x,y,z);
#else
return (std::max)((std::max)( x, y), (std::max)(w,z));
#endif
}
};
template <>
struct Min<double> :public std::binary_function< double, double, double > {
Min() {}
double operator()( const double& x, const double& y) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2min(x,y);
#else
return (std::min)( x, y);
#endif
}
double operator()( double x, double y, double z) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2min(x,y,z);
#else
return (std::min)((std::min)( x, y), z);
#endif
}
double operator()( double w,double x, double y, double z) const
{
#ifdef CGAL_USE_SSE2_MAX
return sse2min(w,x,y,z);
#else
return (std::min)((std::min)( x, y), (std::min)(w,z));
#endif
}
};
template< class T > template< class T >
class Is_valid class Is_valid
: public std::unary_function< T, bool > { : public std::unary_function< T, bool > {