mapnik/include/mapnik/sse.hpp
2015-06-16 12:49:16 +02:00

156 lines
4.1 KiB
C++

/*****************************************************************************
*
* This file is part of Mapnik (c++ mapping toolkit)
*
* Copyright (C) 2015 Artem Pavlenko
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*****************************************************************************/
#ifndef MAPNIK_SSE_HPP
#define MAPNIK_SSE_HPP
#include <emmintrin.h>
#include <xmmintrin.h>
#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
typedef union
{
__m128i v;
int32_t i32[4];
uint32_t u32[4];
uint16_t u16[8];
uint8_t u8[16];
} m128_int;
static inline __m128i
_mm_cmple_epu16 (__m128i x, __m128i y)
{
// Returns 0xFFFF where x <= y:
return _mm_cmpeq_epi16(_mm_subs_epu16(x, y), _mm_setzero_si128());
}
static inline __m128i
_mm_cmple_epu8 (__m128i x, __m128i y)
{
// Returns 0xFF where x <= y:
return _mm_cmpeq_epi8(_mm_min_epu8(x, y), x);
}
static inline __m128i
_mm_cmpgt_epu16 (__m128i x, __m128i y)
{
// Returns 0xFFFF where x > y:
return _mm_andnot_si128(_mm_cmpeq_epi16(x, y), _mm_cmple_epu16(y, x));
}
static inline __m128i
_mm_cmpgt_epu8 (__m128i x, __m128i y)
{
// Returns 0xFF where x > y:
return _mm_andnot_si128(
_mm_cmpeq_epi8(x, y),
_mm_cmpeq_epi8(_mm_max_epu8(x, y), x)
);
}
static inline __m128i
_mm_cmplt_epu16 (__m128i x, __m128i y)
{
// Returns 0xFFFF where x < y:
return _mm_cmpgt_epu16(y, x);
}
static inline __m128i
_mm_cmplt_epu8 (__m128i x, __m128i y)
{
// Returns 0xFF where x < y:
return _mm_cmpgt_epu8(y, x);
}
static inline __m128i
_mm_cmpge_epu16 (__m128i x, __m128i y)
{
// Returns 0xFFFF where x >= y:
return _mm_cmple_epu16(y, x);
}
static inline __m128i
_mm_cmpge_epu8 (__m128i x, __m128i y)
{
// Returns 0xFF where x >= y:
return _mm_cmple_epu8(y, x);
}
// Its not often that you want to use this!
static inline __m128i
_mm_not_si128 (__m128i x)
{
// Returns ~x, the bitwise complement of x:
return _mm_xor_si128(x, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
}
static inline __m128i
_mm_absdiff_epu16 (__m128i x, __m128i y)
{
// Calculate absolute difference: abs(x - y):
return _mm_or_si128(_mm_subs_epu16(x, y), _mm_subs_epu16(y, x));
}
static inline __m128i
_mm_absdiff_epu8 (__m128i x, __m128i y)
{
// Calculate absolute difference: abs(x - y):
return _mm_or_si128(_mm_subs_epu8(x, y), _mm_subs_epu8(y, x));
}
static inline __m128i
_mm_div255_epu16 (__m128i x)
{
// Divide 8 16-bit uints by 255:
// x := ((x + 1) + (x >> 8)) >> 8:
return _mm_srli_epi16(_mm_adds_epu16(
_mm_adds_epu16(x, _mm_set1_epi16(1)),
_mm_srli_epi16(x, 8)), 8);
}
static __m128i
_mm_scale_epu8 (__m128i x, __m128i y)
{
// Returns an "alpha blend" of x scaled by y/255;
// x := x * (y / 255)
// Reorder: x := (x * y) / 255
// Unpack x and y into 16-bit uints:
__m128i xlo = _mm_unpacklo_epi8(x, _mm_setzero_si128());
__m128i ylo = _mm_unpacklo_epi8(y, _mm_setzero_si128());
__m128i xhi = _mm_unpackhi_epi8(x, _mm_setzero_si128());
__m128i yhi = _mm_unpackhi_epi8(y, _mm_setzero_si128());
// Multiply x with y, keeping the low 16 bits:
xlo = _mm_mullo_epi16(xlo, ylo);
xhi = _mm_mullo_epi16(xhi, yhi);
// Divide by 255:
xlo = _mm_div255_epu16(xlo);
xhi = _mm_div255_epu16(xhi);
// Repack the 16-bit uints to clamped 8-bit values:
return _mm_packus_epi16(xlo, xhi);
}
#endif // MAPNIK_SSE_HPP