mirror of https://github.com/CGAL/cgal
Add sse2 based functions for fabs, min, and max
This commit is contained in:
parent
202c401d41
commit
bce0e8c4b4
|
|
@ -32,7 +32,7 @@
|
|||
#include <limits>
|
||||
|
||||
#ifdef CGAL_USE_SSE2_FABS
|
||||
#include <emmintrin.h>
|
||||
#include <CGAL/sse2.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
@ -127,7 +127,7 @@ template <> class Algebraic_structure_traits< double >
|
|||
#ifdef CGAL_USE_SSE2_FABS
|
||||
inline double sse2fabs(double a)
|
||||
{
|
||||
static _CRT_ALIGN(16) const union{
|
||||
static CGAL_ALIGN_16 const union{
|
||||
__int64 i[2];
|
||||
__m128d m;
|
||||
} absMask = {0x7fffffffffffffff, 0x7fffffffffffffff};
|
||||
|
|
@ -138,6 +138,7 @@ inline double sse2fabs(double a)
|
|||
_mm_store_sd(&a, temp);
|
||||
return a;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <> class Real_embeddable_traits< double >
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright (c) 1999,2007 Utrecht University (The Netherlands),
|
||||
// ETH Zurich (Switzerland), Freie Universitaet Berlin (Germany),
|
||||
// INRIA Sophia-Antipolis (France), Martin-Luther-University Halle-Wittenberg
|
||||
// (Germany), Max-Planck-Institute Saarbruecken (Germany), RISC Linz (Austria),
|
||||
// and Tel-Aviv University (Israel). All rights reserved.
|
||||
//
|
||||
// This file is part of CGAL (www.cgal.org); you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; version 2.1 of the License.
|
||||
// See the file LICENSE.LGPL distributed with CGAL.
|
||||
//
|
||||
// Licensees holding a valid commercial license may use this file in
|
||||
// accordance with the commercial license agreement provided with the software.
|
||||
//
|
||||
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
||||
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
//
|
||||
// $URL$
|
||||
// $Id$
|
||||
//
|
||||
//
|
||||
// Author(s) : Andreas Fabri
|
||||
|
||||
#ifndef CGAL_SSE2_H
|
||||
#define CGAL_SSE2_H
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#if defined ( _MSC_VER )
|
||||
#define CGAL_ALIGN_16 __declspec(align(16))
|
||||
#elif defined( __GNU__ )
|
||||
#define CGAL_ALIGN_16 __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
#endif // CGAL_SSE2_H
|
||||
|
|
@ -19,6 +19,10 @@
|
|||
#define CGAL_UTILS_CLASSES_H
|
||||
#include <CGAL/basic.h>
|
||||
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
#include <CGAL/sse2.h>
|
||||
#endif
|
||||
|
||||
namespace CGAL {
|
||||
|
||||
template < class A, class B = A >
|
||||
|
|
@ -77,6 +81,184 @@ protected:
|
|||
Less c;
|
||||
};
|
||||
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
|
||||
inline double sse2max(double a, double b, double c, double d)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
__m128d C =_mm_load_sd(&c);
|
||||
__m128d D =_mm_load_sd(&d);
|
||||
|
||||
__m128d AB = _mm_max_sd(A,B);
|
||||
__m128d CD = _mm_max_sd(C,D);
|
||||
A = _mm_max_sd(AB,CD);
|
||||
_mm_store_sd(&a, A);
|
||||
return a;
|
||||
}
|
||||
|
||||
inline double sse2max(double a, double b, double c)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
__m128d C =_mm_load_sd(&c);
|
||||
|
||||
__m128d AB = _mm_max_sd(A,B);
|
||||
A = _mm_max_sd(AB,C);
|
||||
_mm_store_sd(&a, A);
|
||||
return a;
|
||||
}
|
||||
|
||||
inline double sse2max(double a, double b)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
|
||||
__m128d C = _mm_max_sd(A,B);
|
||||
_mm_store_sd(&a, C);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
// Doing things in parallel seems the way to go
|
||||
// but copying to/from arrays has too much overhead
|
||||
// a = max(a,a2) b = max(b,b2)
|
||||
inline void sse2mmax2(double& a, double a2, double& b, double b2)
|
||||
{
|
||||
CGAL_ALIGN_16 double res[2];
|
||||
res[0] = a;
|
||||
res[1] = b;
|
||||
__m128d F =_mm_load_pd(res);
|
||||
res[0] = a2;
|
||||
res[1] = b2;
|
||||
__m128d S =_mm_load_pd(res);
|
||||
|
||||
__m128d C = _mm_max_pd(F,S);
|
||||
|
||||
_mm_store_pd(res, C);
|
||||
a = res[0];
|
||||
b = res[1];
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
inline double sse2min(double a, double b, double c, double d)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
__m128d C =_mm_load_sd(&c);
|
||||
__m128d D =_mm_load_sd(&d);
|
||||
|
||||
__m128d AB = _mm_min_sd(A,B);
|
||||
__m128d CD = _mm_min_sd(C,D);
|
||||
A = _mm_min_sd(AB,CD);
|
||||
_mm_store_sd(&a, A);
|
||||
return a;
|
||||
}
|
||||
|
||||
inline double sse2min(double a, double b, double c)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
__m128d C =_mm_load_sd(&c);
|
||||
|
||||
__m128d AB = _mm_min_sd(A,B);
|
||||
A = _mm_min_sd(AB,C);
|
||||
_mm_store_sd(&a, A);
|
||||
return a;
|
||||
}
|
||||
|
||||
inline double sse2min(double a, double b)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
|
||||
__m128d C = _mm_min_sd(A,B);
|
||||
_mm_store_sd(&a, C);
|
||||
return a;
|
||||
}
|
||||
|
||||
inline void sse2minmax(double& a, double b, double& c)
|
||||
{
|
||||
__m128d A =_mm_load_sd(&a);
|
||||
__m128d B =_mm_load_sd(&b);
|
||||
__m128d C =_mm_load_sd(&c);
|
||||
|
||||
__m128d AB = _mm_min_sd(A,B);
|
||||
A = _mm_min_sd(AB,C);
|
||||
_mm_store_sd(&a, A);
|
||||
|
||||
AB = _mm_max_pd(A,B);
|
||||
C = _mm_max_sd(AB,C);
|
||||
_mm_store_sd(&c, C);
|
||||
}
|
||||
|
||||
#endif // CGAL_USE_SSE2_MAX
|
||||
|
||||
template <>
|
||||
struct Max<double> :public std::binary_function< double, double, double > {
|
||||
Max() {}
|
||||
|
||||
double operator()( const double& x, const double& y) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2max(x,y);
|
||||
#else
|
||||
return (std::max)( x, y);
|
||||
#endif
|
||||
}
|
||||
|
||||
double operator()( double x, double y, double z) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2max(x,y,z);
|
||||
#else
|
||||
return (std::max)((std::max)( x, y), z);
|
||||
#endif
|
||||
}
|
||||
|
||||
double operator()( double w,double x, double y, double z) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2max(w,x,y,z);
|
||||
#else
|
||||
return (std::max)((std::max)( x, y), (std::max)(w,z));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Min<double> :public std::binary_function< double, double, double > {
|
||||
Min() {}
|
||||
|
||||
double operator()( const double& x, const double& y) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2min(x,y);
|
||||
#else
|
||||
return (std::min)( x, y);
|
||||
#endif
|
||||
}
|
||||
|
||||
double operator()( double x, double y, double z) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2min(x,y,z);
|
||||
#else
|
||||
return (std::min)((std::min)( x, y), z);
|
||||
#endif
|
||||
}
|
||||
|
||||
double operator()( double w,double x, double y, double z) const
|
||||
{
|
||||
#ifdef CGAL_USE_SSE2_MAX
|
||||
return sse2min(w,x,y,z);
|
||||
#else
|
||||
return (std::min)((std::min)( x, y), (std::min)(w,z));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template< class T >
|
||||
class Is_valid
|
||||
: public std::unary_function< T, bool > {
|
||||
|
|
|
|||
Loading…
Reference in New Issue