File 0004-use-integer-sse2-operations-instead-of-single-precis.patch of Package libdvbcsa
From e2192e3617e0b652275f2dc6dfe3be63dd6b8c46 Mon Sep 17 00:00:00 2001
From: diaxen <diaxen@3589dd83-fcde-432d-8375-59f0e9bff529>
Date: Mon, 20 May 2013 20:15:51 +0000
Subject: [PATCH 4/8] use integer sse2 operations instead of single precision
operations
git-svn-id: svn://svn.videolan.org/libdvbcsa/trunk@16 3589dd83-fcde-432d-8375-59f0e9bff529
---
src/dvbcsa_bs_sse.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/dvbcsa_bs_sse.h b/src/dvbcsa_bs_sse.h
index 51edbe3..d2ffef3 100644
--- a/src/dvbcsa_bs_sse.h
+++ b/src/dvbcsa_bs_sse.h
@@ -29,27 +29,27 @@
#include <xmmintrin.h>
#include <emmintrin.h>
-typedef __m128 dvbcsa_bs_word_t;
+typedef __m128i dvbcsa_bs_word_t;
#define BS_BATCH_SIZE 128
#define BS_BATCH_BYTES 16
-#define BS_VAL(n, m) _mm_castsi128_ps(_mm_set_epi64x(n, m))
+#define BS_VAL(n, m) _mm_set_epi64x(n, m)
#define BS_VAL64(n) BS_VAL(0x##n##ULL, 0x##n##ULL)
#define BS_VAL32(n) BS_VAL64(n##n)
#define BS_VAL16(n) BS_VAL32(n##n)
#define BS_VAL8(n) BS_VAL16(n##n)
-#define BS_AND(a, b) _mm_and_ps((a), (b))
-#define BS_OR(a, b) _mm_or_ps((a), (b))
-#define BS_XOR(a, b) _mm_xor_ps((a), (b))
-#define BS_XOREQ(a, b) { dvbcsa_bs_word_t *_t = &(a); *_t = _mm_xor_ps(*_t, (b)); }
-#define BS_NOT(a) _mm_xor_ps((a), BS_VAL8(ff))
+#define BS_AND(a, b) _mm_and_si128((a), (b))
+#define BS_OR(a, b) _mm_or_si128((a), (b))
+#define BS_XOR(a, b) _mm_xor_si128((a), (b))
+#define BS_XOREQ(a, b) { dvbcsa_bs_word_t *_t = &(a); *_t = _mm_xor_si128(*_t, (b)); }
+#define BS_NOT(a) _mm_andnot_si128((a), BS_VAL8(ff))
-#define BS_SHL(a, n) _mm_castsi128_ps(_mm_slli_epi64(_mm_castps_si128(a), n))
-#define BS_SHR(a, n) _mm_castsi128_ps(_mm_srli_epi64(_mm_castps_si128(a), n))
-#define BS_SHL8(a, n) _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(a), n))
-#define BS_SHR8(a, n) _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a), n))
+#define BS_SHL(a, n) _mm_slli_epi64(a, n)
+#define BS_SHR(a, n) _mm_srli_epi64(a, n)
+#define BS_SHL8(a, n) _mm_slli_si128(a, n)
+#define BS_SHR8(a, n) _mm_srli_si128(a, n)
#define BS_EXTRACT8(a, n) ((uint8_t*)&(a))[n]
--
2.1.4