File libvorbis-sse-optimize.diff of Package libvorbis-sse
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/block.c libvorbis-1.2.0-sse/lib/block.c
--- libvorbis-1.2.0/lib/block.c 2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/block.c 2007-08-02 12:43:10.000000000 +0200
@@ -30,6 +30,9 @@
#include "lpc.h"
#include "registry.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
static int ilog2(unsigned int v){
int ret=0;
@@ -81,6 +84,10 @@
/* block abstraction setup *********************************************/
+#ifdef __SSE__ /* SSE Optimize */
+#undef DWORD_ALIGN
+#define DWORD_ALIGN 16
+#endif /* SSE Optimize */
#ifndef WORD_ALIGN
#define WORD_ALIGN 8
#endif
@@ -111,7 +118,12 @@
}
void *_vorbis_block_alloc(vorbis_block *vb,long bytes){
+ void *ret = NULL;
+#ifdef __SSE__
+ bytes=(bytes+(DWORD_ALIGN-1)) & ~(DWORD_ALIGN-1);
+#else // for __SSE__
bytes=(bytes+(WORD_ALIGN-1)) & ~(WORD_ALIGN-1);
+#endif // for __SSE__
if(bytes+vb->localtop>vb->localalloc){
/* can't just _ogg_realloc... there are outstanding pointers */
if(vb->localstore){
@@ -127,10 +139,10 @@
vb->localtop=0;
}
{
- void *ret=(void *)(((char *)vb->localstore)+vb->localtop);
+ ret=(void *)(((char *)vb->localstore)+vb->localtop);
vb->localtop+=bytes;
- return ret;
}
+ return ret;
}
/* reap the chain, pull the ripcord */
@@ -609,7 +621,39 @@
for(i=0;i<vi->channels;i++){
vbi->pcmdelay[i]=
_vorbis_block_alloc(vb,(vb->pcmend+beginW)*sizeof(*vbi->pcmdelay[i]));
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ int j;
+ float *d = (float*)(vbi->pcmdelay[i]);
+ float *s = (float*)(v->pcm[i]);
+ for(j=0;j<vb->pcmend+beginW;)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(s );
+ XMM1 = _mm_load_ps(s+ 4);
+ XMM2 = _mm_load_ps(s+ 8);
+ XMM3 = _mm_load_ps(s+12);
+ XMM4 = _mm_load_ps(s+16);
+ XMM5 = _mm_load_ps(s+20);
+ XMM6 = _mm_load_ps(s+24);
+ XMM7 = _mm_load_ps(s+28);
+ _mm_store_ps(d , XMM0);
+ _mm_store_ps(d+ 4, XMM1);
+ _mm_store_ps(d+ 8, XMM2);
+ _mm_store_ps(d+12, XMM3);
+ _mm_store_ps(d+16, XMM4);
+ _mm_store_ps(d+20, XMM5);
+ _mm_store_ps(d+24, XMM6);
+ _mm_store_ps(d+28, XMM7);
+ _mm_prefetch((const char*)(s+64), _MM_HINT_T0);
+ s += 32;
+ d += 32;
+ j += 32;
+ }
+ }
+#else /* SSE Optimize */
memcpy(vbi->pcmdelay[i],v->pcm[i],(vb->pcmend+beginW)*sizeof(*vbi->pcmdelay[i]));
+#endif /* SSE Optimize */
vb->pcm[i]=vbi->pcmdelay[i]+beginW;
/* before we added the delay
@@ -642,8 +686,72 @@
v->pcm_current-=movementW;
for(i=0;i<vi->channels;i++)
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ int j;
+ float *d = (float*)(v->pcm[i]);
+ float *s = (float*)(v->pcm[i]+movementW);
+ if(s>=d)
+ {
+ for(j=0;j<v->pcm_current;)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(s );
+ XMM1 = _mm_load_ps(s+ 4);
+ XMM2 = _mm_load_ps(s+ 8);
+ XMM3 = _mm_load_ps(s+12);
+ XMM4 = _mm_load_ps(s+16);
+ XMM5 = _mm_load_ps(s+20);
+ XMM6 = _mm_load_ps(s+24);
+ XMM7 = _mm_load_ps(s+28);
+ _mm_store_ps(d , XMM0);
+ _mm_store_ps(d+ 4, XMM1);
+ _mm_store_ps(d+ 8, XMM2);
+ _mm_store_ps(d+12, XMM3);
+ _mm_store_ps(d+16, XMM4);
+ _mm_store_ps(d+20, XMM5);
+ _mm_store_ps(d+24, XMM6);
+ _mm_store_ps(d+28, XMM7);
+ s += 32;
+ d += 32;
+ j += 32;
+ _mm_prefetch((const char*)(s+64), _MM_HINT_NTA);
+ }
+ }
+ else
+ {
+ d += v->pcm_current;
+ s += v->pcm_current;
+ for(j=0;j<v->pcm_current;)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(s-32);
+ XMM1 = _mm_load_ps(s-28);
+ XMM2 = _mm_load_ps(s-24);
+ XMM3 = _mm_load_ps(s-20);
+ XMM4 = _mm_load_ps(s-16);
+ XMM5 = _mm_load_ps(s-12);
+ XMM6 = _mm_load_ps(s- 8);
+ XMM7 = _mm_load_ps(s- 4);
+ _mm_store_ps(d-32, XMM0);
+ _mm_store_ps(d-28, XMM1);
+ _mm_store_ps(d-24, XMM2);
+ _mm_store_ps(d-20, XMM3);
+ _mm_store_ps(d-16, XMM4);
+ _mm_store_ps(d-12, XMM5);
+ _mm_store_ps(d- 8, XMM6);
+ _mm_store_ps(d- 4, XMM7);
+ s -= 32;
+ d -= 32;
+ j += 32;
+ _mm_prefetch((const char*)(s-64), _MM_HINT_NTA);
+ }
+ }
+ }
+#else /* SSE Optimize */
memmove(v->pcm[i],v->pcm[i]+movementW,
v->pcm_current*sizeof(*v->pcm[i]));
+#endif /* SSE Optimize */
v->lW=v->W;
@@ -699,6 +807,53 @@
return 0;
}
+#ifdef __SSE__ /* SSE Optimize */
+static inline void vorbis_synthesis_blockin_pmadd(float *pcm, float *w, float *p, int count)
+{
+ int i;
+ for(i=0;i<count;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM1 = _mm_load_ps(w+count-i- 4);
+ XMM4 = _mm_load_ps(w+count-i- 8);
+ XMM2 = _mm_load_ps(p+i );
+ XMM6 = _mm_load_ps(w+i );
+ XMM5 = _mm_load_ps(p+i+ 4);
+ XMM7 = _mm_load_ps(w+i+ 4);
+ XMM0 = _mm_load_ps(pcm+i );
+ XMM3 = _mm_load_ps(pcm+i+ 4);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM4);
+ XMM1 = _mm_load_ps(pcm+i+ 8);
+ XMM4 = _mm_load_ps(pcm+i+12);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+ XMM2 = _mm_load_ps(w+count-i-12);
+ XMM6 = _mm_load_ps(w+i+ 8);
+ XMM5 = _mm_load_ps(w+count-i-16);
+ XMM7 = _mm_load_ps(w+i+12);
+ _mm_store_ps(pcm+i , XMM0);
+ _mm_store_ps(pcm+i+ 4, XMM3);
+ XMM0 = _mm_load_ps(p+i+ 8);
+ XMM3 = _mm_load_ps(p+i+12);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ XMM4 = _mm_mul_ps(XMM4, XMM5);
+ XMM1 = _mm_add_ps(XMM1, XMM0);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ _mm_store_ps(pcm+i+ 8, XMM1);
+ _mm_store_ps(pcm+i+12, XMM4);
+ }
+}
+#endif /* SSE Optimize */
+
/* Unlike in analysis, the window is only partially applied for each
block. The time domain envelope is not yet handled at the point of
calling (as it relies on the previous block). */
@@ -754,6 +909,36 @@
for(j=0;j<vi->channels;j++){
/* the overlap/add section */
if(v->lW){
+#ifdef __SSE__ /* SSE Optimize */
+ if(v->W){
+ /* large/large */
+ float *w = _vorbis_window_get(b->window[1]-hs);
+ float *pcm = v->pcm[j]+prevCenter;
+ float *p = vb->pcm[j];
+ vorbis_synthesis_blockin_pmadd(pcm, w, p, n1);
+ }else{
+ /* large/small */
+ float *w = _vorbis_window_get(b->window[0]-hs);
+ float *pcm = v->pcm[j]+prevCenter+n1/2-n0/2;
+ float *p = vb->pcm[j];
+ vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+ }
+ }else{
+ if(v->W){
+ /* small/large */
+ float *w = _vorbis_window_get(b->window[0]-hs);
+ float *pcm = v->pcm[j]+prevCenter;
+ float *p = vb->pcm[j]+n1/2-n0/2;
+ vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+ memcpy(pcm+n0, p+n0, (n1/2-n0/2)*sizeof(float));
+ }else{
+ /* small/small */
+ float *w = _vorbis_window_get(b->window[0]-hs);
+ float *pcm = v->pcm[j]+prevCenter;
+ float *p = vb->pcm[j];
+ vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+ }
+#else /* SSE Optimize */
if(v->W){
/* large/large */
float *w=_vorbis_window_get(b->window[1]-hs);
@@ -787,14 +972,38 @@
for(i=0;i<n0;i++)
pcm[i]=pcm[i]*w[n0-i-1] +p[i]*w[i];
}
+#endif /* SSE Optimize */
}
/* the copy section */
{
float *pcm=v->pcm[j]+thisCenter;
float *p=vb->pcm[j]+n;
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<n;i+=32)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(p+i );
+ XMM1 = _mm_load_ps(p+i+ 4);
+ XMM2 = _mm_load_ps(p+i+ 8);
+ XMM3 = _mm_load_ps(p+i+12);
+ XMM4 = _mm_load_ps(p+i+16);
+ XMM5 = _mm_load_ps(p+i+20);
+ XMM6 = _mm_load_ps(p+i+24);
+ XMM7 = _mm_load_ps(p+i+28);
+ _mm_store_ps(pcm+i , XMM0);
+ _mm_store_ps(pcm+i+ 4, XMM1);
+ _mm_store_ps(pcm+i+ 8, XMM2);
+ _mm_store_ps(pcm+i+12, XMM3);
+ _mm_store_ps(pcm+i+16, XMM4);
+ _mm_store_ps(pcm+i+20, XMM5);
+ _mm_store_ps(pcm+i+24, XMM6);
+ _mm_store_ps(pcm+i+28, XMM7);
+ }
+#else /* SSE Optimize */
for(i=0;i<n;i++)
pcm[i]=p[i];
+#endif /* SSE Optimize */
}
}
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/codebook.c libvorbis-1.2.0-sse/lib/codebook.c
--- libvorbis-1.2.0/lib/codebook.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/codebook.c 2007-08-02 12:52:26.000000000 +0200
@@ -24,6 +24,148 @@
#include "scales.h"
#include "misc.h"
#include "os.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
+
+#define BUFFER_INCREMENT 256
+
+#if 1
+static const unsigned char bitrev8[256] = {
+ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+ 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+ 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+ 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+ 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+ 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+ 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+ 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+ 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+ 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+ 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+ 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+ 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+ 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+ 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+ 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+ 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+ 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+ 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+ 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+ 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+ 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+ 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+ 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+ 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+ 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+ 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+ 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+ 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+ 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+ 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+ 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+#endif
+
+static const uint32_t mask[]=
+{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
+ 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
+ 0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,
+ 0x00007fff,0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff,
+ 0x000fffff,0x001fffff,0x003fffff,0x007fffff,0x00ffffff,
+ 0x01ffffff,0x03ffffff,0x07ffffff,0x0fffffff,0x1fffffff,
+ 0x3fffffff,0x7fffffff,0xffffffff };
+
+#if !defined(_USRDLL)
+/* Takes only up to 32 bits. */
+static void vorbis_oggpack_write(oggpack_buffer *b, unsigned long value, int bits)
+{
+ uint32_t lvalue, hvalue;
+ if(b->endbyte+4>=b->storage){
+ b->buffer=realloc(b->buffer,b->storage+BUFFER_INCREMENT+4);
+ b->storage+=BUFFER_INCREMENT;
+ b->ptr=b->buffer+b->endbyte;
+ }
+
+ value&=mask[bits];
+ bits+=b->endbit;
+
+ if(bits<24)
+ {
+ lvalue = value<<b->endbit;
+ lvalue |= (b->ptr[0]&mask[b->endbit]);
+ *(uint32_t*)(b->ptr) = lvalue;
+ }
+ else
+ {
+ lvalue = value<<b->endbit;
+ hvalue = value>>(32-b->endbit);
+ lvalue |= (b->ptr[0]&mask[b->endbit]);
+ b->ptr[4] = hvalue;
+ *(uint32_t*)(b->ptr) = lvalue;
+ }
+
+ b->endbyte+=bits/8;
+ b->ptr+=bits/8;
+ b->endbit=bits&7;
+}
+#endif
+
+#if defined(_OPENMP)
+void vorbis_oggpack_writecache(oggpack_writecache *c, unsigned long value, int bits)
+{
+ c->data[c->count].size = bits;
+ c->data[c->count].value = value;
+ c->count ++;
+}
+
+int vorbis_book_encode_cache(codebook *book, int a, oggpack_writecache *c){
+ vorbis_oggpack_writecache(c,book->codelist[a],book->c->lengthlist[a]);
+ return(book->c->lengthlist[a]);
+}
+
+void vorbis_oggpack_cacheflush(oggpack_writecache *c, oggpack_buffer *b)
+{
+ int i;
+ for(i=0;i<c->count;i++)
+ oggpack_write(b, c->data[i].value, c->data[i].size);
+ c->count = 0;
+}
+#endif
+
+/* Read in bits without advancing the bitptr; bits <= 32 */
+static inline uint32_t vorbis_oggpack_look(oggpack_buffer *b,int bits){
+ uint32_t ret;
+ uint32_t m=mask[bits];
+
+ bits+=b->endbit;
+
+ if(b->endbyte+4>=b->storage){
+ /* not the main path */
+ if(b->endbyte*8+bits>b->storage*8)return(-1);
+ }
+
+ ret=b->ptr[0]>>b->endbit;
+ if(bits>8){
+ ret|=b->ptr[1]<<(8-b->endbit);
+ if(bits>16){
+ ret|=b->ptr[2]<<(16-b->endbit);
+ if(bits>24){
+ ret|=b->ptr[3]<<(24-b->endbit);
+ if(bits>32 && b->endbit)
+ ret|=b->ptr[4]<<(32-b->endbit);
+ }
+ }
+ }
+ return(m&ret);
+}
+
+static inline void vorbis_oggpack_adv(oggpack_buffer *b,int bits){
+ bits+=b->endbit;
+ b->ptr+=bits/8;
+ b->endbyte+=bits/8;
+ b->endbit=bits&7;
+}
/* packs the given codebook into the bitstream **************************/
@@ -256,7 +398,11 @@
/* returns the number of bits ************************************************/
int vorbis_book_encode(codebook *book, int a, oggpack_buffer *b){
if(a<0 || a>=book->c->entries)return(0);
+#if !defined(_USRDLL)
+ vorbis_oggpack_write(b,book->codelist[a],book->c->lengthlist[a]);
+#else
oggpack_write(b,book->codelist[a],book->c->lengthlist[a]);
+#endif
return(book->c->lengthlist[a]);
}
@@ -300,25 +446,47 @@
bitreverse is not in the main execution path. */
static ogg_uint32_t bitreverse(ogg_uint32_t x){
+#if 0
x= ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000);
x= ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00);
x= ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0);
x= ((x>> 2)&0x33333333) | ((x<< 2)&0xcccccccc);
return((x>> 1)&0x55555555) | ((x<< 1)&0xaaaaaaaa);
+#else
+ ogg_uint32_t x1, x2, x3;
+ x3 = x;
+ x2 = x;
+ x1 = x;
+ x3 = (x3 >> 24);
+ x2 = (x2 >> 16)&0xFF;
+ x1 = (x1 >> 8)&0xFF;
+ x = x&0xFF;
+ x3 = bitrev8[x3];
+ x2 = bitrev8[x2];
+ x1 = bitrev8[x1];
+ x = bitrev8[x ];
+ x2 = x2 << 8;
+ x1 = x1 << 16;
+ x = x << 24;
+ x = x | x1;
+ x2 = x2 | x3;
+ x = x | x2;
+ return x;
+#endif
}
STIN long decode_packed_entry_number(codebook *book, oggpack_buffer *b){
int read=book->dec_maxlength;
- long lo,hi;
- long lok = oggpack_look(b,book->dec_firsttablen);
+ uint32_t lo,hi;
+ uint32_t lok = vorbis_oggpack_look(b,book->dec_firsttablen);
if (lok >= 0) {
- long entry = book->dec_firsttable[lok];
+ uint32_t entry = book->dec_firsttable[lok];
if(entry&0x80000000UL){
lo=(entry>>15)&0x7fff;
hi=book->used_entries-(entry&0x7fff);
}else{
- oggpack_adv(b, book->dec_codelengths[entry-1]);
+ vorbis_oggpack_adv(b, book->dec_codelengths[entry-1]);
return(entry-1);
}
}else{
@@ -326,10 +494,10 @@
hi=book->used_entries;
}
- lok = oggpack_look(b, read);
+ lok = vorbis_oggpack_look(b, read);
while(lok<0 && read>1)
- lok = oggpack_look(b, --read);
+ lok = vorbis_oggpack_look(b, --read);
if(lok<0)return -1;
/* bisect search for the codeword in the ordered list */
@@ -344,12 +512,12 @@
}
if(book->dec_codelengths[lo]<=read){
- oggpack_adv(b, book->dec_codelengths[lo]);
+ vorbis_oggpack_adv(b, book->dec_codelengths[lo]);
return(lo);
}
}
- oggpack_adv(b, read);
+ vorbis_oggpack_adv(b, read);
return(-1);
}
@@ -470,7 +638,341 @@
long vorbis_book_decodevv_add(codebook *book,float **a,long offset,int ch,
oggpack_buffer *b,int n){
-
+#ifdef __SSE__ /* SSE Optimize */
+ long i,j;
+ int chptr=0;
+
+ if(ch==2)
+ {
+ int mid0 = (offset/2+3)&(~3);
+ int mid1 = ((offset+n)/2)&(~3);
+ float *bvl = book->valuelist;
+ float *a0 = a[0];
+ float *a1 = a[1];
+ switch(book->dim)
+ {
+ default :
+ for(i=offset/2;i<(offset+n)/2;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*book->dim;
+ for (j=0;j<book->dim;j++)
+ {
+ a[chptr++][i] += t[j];
+ if(chptr==2)
+ {
+ chptr = 0;
+ i ++;
+ }
+ }
+ }
+ }
+ break;
+ case 2:
+ for(i=offset/2;i<mid0;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*2;
+ __m128 XMM0 = _mm_load_ss(t );
+ __m128 XMM1 = _mm_load_ss(a0+i);
+ __m128 XMM2 = _mm_load_ss(t );
+ __m128 XMM3 = _mm_load_ss(a1+i);
+ XMM0 = _mm_add_ss(XMM0, XMM1);
+ XMM2 = _mm_add_ss(XMM2, XMM3);
+ _mm_store_ss(a0+i , XMM0);
+ _mm_store_ss(a1+i++, XMM2);
+ }
+ }
+ for(;i<mid1;)
+ {
+ /*
+ XMM0 (T11 T10 T01 T00)
+ XMM2 (T31 T30 T21 T20)
+ */
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4;
+ const float *t0, *t1,*t2, *t3;
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t0 = bvl+entry*2;
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t1 = bvl+entry*2;
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t2 = bvl+entry*2;
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t3 = bvl+entry*2;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)t0);
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)t2);
+ XMM3 = _mm_load_ps(a0+i);
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)t1);
+ XMM2 = _mm_loadh_pi(XMM2, (__m64*)t3);
+ /*
+ XMM0 (T30 T20 T10 T00)
+ XMM2 (T31 T21 T11 T01)
+ */
+ XMM4 = _mm_load_ps(a1+i);
+ XMM1 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_add_ps(XMM0, XMM3);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ _mm_store_ps(a0+i, XMM0);
+ _mm_store_ps(a1+i, XMM1);
+ i += 4;
+ }
+ for(;i<(offset+n)/2;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*2;
+ __m128 XMM0 = _mm_load_ss(t );
+ __m128 XMM1 = _mm_load_ss(a0+i);
+ __m128 XMM2 = _mm_load_ss(t );
+ __m128 XMM3 = _mm_load_ss(a1+i);
+ XMM0 = _mm_add_ss(XMM0, XMM1);
+ XMM2 = _mm_add_ss(XMM2, XMM3);
+ _mm_store_ss(a0+i , XMM0);
+ _mm_store_ss(a1+i++, XMM2);
+ }
+ }
+ break;
+ case 4:
+ for(i=offset/2;i<mid0;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*4;
+ __m128 XMM0 = _mm_load_ss(t );
+ __m128 XMM1 = _mm_load_ss(a0+i );
+ __m128 XMM2 = _mm_load_ss(t+1);
+ __m128 XMM3 = _mm_load_ss(a1+i );
+ __m128 XMM4 = _mm_load_ss(t+2);
+ __m128 XMM5 = _mm_load_ss(a0+i+1);
+ __m128 XMM6 = _mm_load_ss(t+3);
+ __m128 XMM7 = _mm_load_ss(a1+i+1);
+ XMM0 = _mm_add_ss(XMM0, XMM1);
+ XMM2 = _mm_add_ss(XMM2, XMM3);
+ XMM4 = _mm_add_ss(XMM4, XMM5);
+ XMM6 = _mm_add_ss(XMM6, XMM7);
+ _mm_store_ss(a0+i , XMM0);
+ _mm_store_ss(a1+i , XMM2);
+ _mm_store_ss(a0+i+1, XMM4);
+ _mm_store_ss(a1+i+1, XMM6);
+ i += 2;
+ }
+ }
+ for(;i<mid1;)
+ {
+ /*
+ XMM0 (T03 T02 T01 T00)
+ XMM1 (T13 T12 T11 T10)
+ XMM2 (T23 T22 T21 T20)
+ XMM3 (T33 T32 T31 T30)
+ */
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ XMM0 = _mm_lddqu_ps(bvl+entry*4);
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ XMM1 = _mm_lddqu_ps(bvl+entry*4);
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ XMM2 = _mm_lddqu_ps(bvl+entry*4);
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ XMM3 = _mm_lddqu_ps(bvl+entry*4);
+ /*
+ XMM0 (T12 T10 T02 T00)
+ XMM4 (T13 T11 T03 T01)
+ XMM2 (T32 T20 T12 T10)
+ XMM5 (T33 T21 T13 T11)
+ */
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_load_ps(a0+i );
+ XMM2 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM3 = _mm_load_ps(a1+i );
+ XMM6 = _mm_load_ps(a0+i+4);
+ XMM7 = _mm_load_ps(a1+i+4);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ XMM2 = _mm_add_ps(XMM2, XMM6);
+ XMM5 = _mm_add_ps(XMM5, XMM7);
+ _mm_store_ps(a0+i , XMM0);
+ _mm_store_ps(a1+i , XMM4);
+ _mm_store_ps(a0+i+4, XMM2);
+ _mm_store_ps(a1+i+4, XMM5);
+ i += 8;
+ }
+ for(;i<(offset+n)/2;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*4;
+ __m128 XMM0 = _mm_load_ss(t );
+ __m128 XMM1 = _mm_load_ss(a0+i );
+ __m128 XMM2 = _mm_load_ss(t+1);
+ __m128 XMM3 = _mm_load_ss(a1+i );
+ __m128 XMM4 = _mm_load_ss(t+2);
+ __m128 XMM5 = _mm_load_ss(a0+i+1);
+ __m128 XMM6 = _mm_load_ss(t+3);
+ __m128 XMM7 = _mm_load_ss(a1+i+1);
+ XMM0 = _mm_add_ss(XMM0, XMM1);
+ XMM2 = _mm_add_ss(XMM2, XMM3);
+ XMM4 = _mm_add_ss(XMM4, XMM5);
+ XMM6 = _mm_add_ss(XMM6, XMM7);
+ _mm_store_ss(a0+i , XMM0);
+ _mm_store_ss(a1+i , XMM2);
+ _mm_store_ss(a0+i+1, XMM4);
+ _mm_store_ss(a1+i+1, XMM6);
+ i += 2;
+ }
+ }
+ break;
+ case 8:
+ for(i=offset/2;i<mid0;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*8;
+ __m128 XMM0 = _mm_lddqu_ps(t );
+ __m128 XMM1 = _mm_lddqu_ps(t+4);
+ __m128 XMM2 = _mm_load_ps(a0+i);
+ __m128 XMM3 = _mm_load_ps(a1+i);
+ __m128 XMM4 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ _mm_store_ps(a0+i , XMM0);
+ _mm_store_ps(a1+i , XMM4);
+ i += 4;
+ }
+ }
+ for(;i<mid1;)
+ {
+ /*
+ XMM0 (T03 T02 T01 T00)
+ XMM1 (T13 T12 T11 T10)
+ XMM2 (T07 T06 T05 T04)
+ XMM2 (T17 T16 T15 T14)
+ */
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ const float *t;
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t = bvl+entry*8;
+ XMM0 = _mm_lddqu_ps(t );
+ XMM1 = _mm_lddqu_ps(t+4);
+ entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ t = bvl+entry*8;
+ XMM2 = _mm_lddqu_ps(t );
+ XMM3 = _mm_lddqu_ps(t+4);
+ /*
+ XMM0 (T12 T10 T02 T00)
+ XMM4 (T13 T11 T03 T01)
+ XMM2 (T16 T14 T06 T04)
+ XMM5 (T17 T15 T07 T05)
+ */
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_load_ps(a0+i );
+ XMM3 = _mm_load_ps(a1+i );
+ XMM6 = _mm_load_ps(a0+i+4);
+ XMM7 = _mm_load_ps(a1+i+4);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ XMM2 = _mm_add_ps(XMM2, XMM6);
+ XMM5 = _mm_add_ps(XMM5, XMM7);
+ _mm_store_ps(a0+i , XMM0);
+ _mm_store_ps(a1+i , XMM4);
+ _mm_store_ps(a0+i+4, XMM2);
+ _mm_store_ps(a1+i+4, XMM5);
+ i += 8;
+ }
+ for(;i<(offset+n)/2;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = bvl+entry*8;
+ __m128 XMM0 = _mm_lddqu_ps(t );
+ __m128 XMM1 = _mm_lddqu_ps(t+4);
+ __m128 XMM4 = XMM0;
+ __m128 XMM2 = _mm_load_ps(a0+i);
+ __m128 XMM3 = _mm_load_ps(a1+i);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ _mm_store_ps(a0+i , XMM0);
+ _mm_store_ps(a1+i , XMM4);
+ i += 4;
+ }
+ }
+ break;
+ }
+ }
+ else
+ {
+ for(i=offset/ch;i<(offset+n)/ch;)
+ {
+ long entry = decode_packed_entry_number(book,b);
+ if(entry==-1)
+ return(-1);
+ {
+ const float *t = book->valuelist+entry*book->dim;
+ for (j=0;j<book->dim;j++)
+ {
+ a[chptr++][i] += t[j];
+ if(chptr==ch)
+ {
+ chptr = 0;
+ i ++;
+ }
+ }
+ }
+ }
+ }
+#else /* SSE Optimize */
long i,j,entry;
int chptr=0;
if(book->used_entries>0){
@@ -489,6 +991,7 @@
}
}
}
+#endif /* SSE Optimize */
return(0);
}
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/codebook.h libvorbis-1.2.0-sse/lib/codebook.h
--- libvorbis-1.2.0/lib/codebook.h 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/codebook.h 2007-08-02 12:43:10.000000000 +0200
@@ -155,6 +155,4 @@
long off,int ch,
oggpack_buffer *b,int n);
-
-
#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/cpu.c libvorbis-1.2.0-sse/lib/cpu.c
--- libvorbis-1.2.0/lib/cpu.c 1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/cpu.c 2007-08-02 12:43:10.000000000 +0200
@@ -0,0 +1,50 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003 *
+ * by the XIPHOPHORUS Company http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: CPU ID Check
+ last mod: $Id: cpu.c,v 1.1 2006-06-09 00:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#if defined(__INTEL_COMPILER)&&defined(_WIN32)&&defined(_USRDLL)
+extern int __intel_cpu_indicator;
+
+void __intel_cpu_indicator_init(void)
+{
+ unsigned int t, u;
+ _asm {
+ mov eax,1
+ cpuid
+ mov t, edx
+ mov u, ecx
+ }
+ /* SSE3 Check */
+ if(u&0x0000001)
+ {
+ __intel_cpu_indicator = 0x800;
+ return;
+ }
+ /* SSE2 Check */
+ if(t&0x4000000)
+ {
+ __intel_cpu_indicator = 0x200;
+ return;
+ }
+ /* SSE Check */
+ if(t&0x2000000)
+ {
+ __intel_cpu_indicator = 0x100;
+ return;
+ }
+ __intel_cpu_indicator = 1;
+}
+#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/envelope.c libvorbis-1.2.0-sse/lib/envelope.c
--- libvorbis-1.2.0/lib/envelope.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/envelope.c 2007-08-02 12:43:10.000000000 +0200
@@ -28,6 +28,9 @@
#include "envelope.h"
#include "mdct.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
void _ve_envelope_init(envelope_lookup *e,vorbis_info *vi){
codec_setup_info *ci=vi->codec_setup;
@@ -103,7 +106,11 @@
itself (for low power signals) */
float minV=ve->minenergy;
+#ifdef __SSE__ /* SSE Optimize */
+ float *vec = (float*)_ogg_alloca(n*sizeof(*vec));
+#else /* SSE Optimize */
float *vec=alloca(n*sizeof(*vec));
+#endif /* SSE Optimize */
/* stretch is used to gradually lengthen the number of windows
considered prevoius-to-potential-trigger */
@@ -116,9 +123,50 @@
totalshift+pos*ve->searchstep);*/
/* window and transform */
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<n;i+=32)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(data+i );
+ XMM4 = _mm_load_ps(ve->mdct_win+i );
+ XMM1 = _mm_load_ps(data+i+ 4);
+ XMM5 = _mm_load_ps(ve->mdct_win+i+ 4);
+ XMM2 = _mm_load_ps(data+i+ 8);
+ XMM6 = _mm_load_ps(ve->mdct_win+i+ 8);
+ XMM3 = _mm_load_ps(data+i+12);
+ XMM7 = _mm_load_ps(ve->mdct_win+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM4 = _mm_load_ps(data+i+16);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM5 = _mm_load_ps(ve->mdct_win+i+16);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM6 = _mm_load_ps(data+i+20);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ XMM7 = _mm_load_ps(ve->mdct_win+i+20);
+ _mm_store_ps(vec+i , XMM0);
+ XMM0 = _mm_load_ps(data+i+24);
+ _mm_store_ps(vec+i+ 4, XMM1);
+ XMM1 = _mm_load_ps(ve->mdct_win+i+24);
+ _mm_store_ps(vec+i+ 8, XMM2);
+ XMM2 = _mm_load_ps(data+i+28);
+ _mm_store_ps(vec+i+12, XMM3);
+ XMM3 = _mm_load_ps(ve->mdct_win+i+28);
+ XMM4 = _mm_mul_ps(XMM4, XMM5);
+ XMM6 = _mm_mul_ps(XMM6, XMM7);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM3);
+ _mm_store_ps(vec+i+16, XMM4);
+ _mm_store_ps(vec+i+20, XMM6);
+ _mm_store_ps(vec+i+24, XMM0);
+ _mm_store_ps(vec+i+28, XMM2);
+ }
+ mdct_forward(&ve->mdct, vec, vec, NULL);
+#else /* SSE Optimize */
for(i=0;i<n;i++)
vec[i]=data[i]*ve->mdct_win[i];
mdct_forward(&ve->mdct,vec,vec);
+#endif /* SSE Optimize */
/*_analysis_output_always("mdct",seq2,vec,n/2,0,1,0); */
@@ -149,7 +197,231 @@
/* perform spreading and limiting, also smooth the spectrum. yes,
the MDCT results in all real coefficients, but it still *behaves*
like real/imaginary pairs */
- for(i=0;i<n/2;i+=2){
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ static _MM_ALIGN16 const float mparm[4] = {
+ 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f
+ };
+ static _MM_ALIGN16 const float aparm[4] = {
+ -764.6161886f/2.f, -764.6161886f/2.f, -764.6161886f/2.f, -764.6161886f/2.f
+ };
+ static _MM_ALIGN16 const float decayinit0[4] = {
+ 0.f, 8.f, 16.f, 24.f
+ };
+ static _MM_ALIGN16 const float decayinit1[4] = {
+ 32.f, 40.f, 48.f, 56.f
+ };
+ static _MM_ALIGN16 const float p16[4] = {
+ 64.f, 64.f, 64.f, 64.f
+ };
+ __m128 MINV = _mm_set_ps1(minV);
+ float *p = vec;
+ int midpoint = ((int)(-(minV-decay)/4.f)+15)&(~15);
+ int last_n = n/2;
+ __m128 DECAY0 = _mm_set_ps1(decay);
+ __m128 DECAY1 = _mm_set_ps1(decay);
+ DECAY0 = _mm_sub_ps(DECAY0, PM128(decayinit0));
+ DECAY1 = _mm_sub_ps(DECAY1, PM128(decayinit1));
+#if defined(__SSE2__)
+ for(i=0;i<midpoint;i+=16,p+=8)
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+#if defined(__SSE3__)
+ XMM0 = _mm_load_ps(vec+i );
+ XMM1 = _mm_load_ps(vec+i+ 4);
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM3 = _mm_load_ps(vec+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_hadd_ps(XMM0, XMM1);
+ XMM2 = _mm_hadd_ps(XMM2, XMM3);
+#else
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(vec+i );
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM4 = _mm_load_ps(vec+i+ 4);
+ XMM5 = _mm_load_ps(vec+i+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+#endif
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM2 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(aparm));
+ XMM2 = _mm_add_ps(XMM2, PM128(aparm));
+ XMM0 = _mm_max_ps(XMM0, DECAY0);
+ XMM2 = _mm_max_ps(XMM2, DECAY1);
+ XMM0 = _mm_max_ps(XMM0, MINV);
+ XMM2 = _mm_max_ps(XMM2, MINV);
+ _mm_store_ps(p , XMM0);
+ _mm_store_ps(p+4, XMM2);
+ DECAY0 = _mm_sub_ps(DECAY0, PM128(p16));
+ DECAY1 = _mm_sub_ps(DECAY1, PM128(p16));
+ }
+ for(;i<last_n;i+=16,p+=8)
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+#if defined(__SSE3__)
+ XMM0 = _mm_load_ps(vec+i );
+ XMM1 = _mm_load_ps(vec+i+ 4);
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM3 = _mm_load_ps(vec+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_hadd_ps(XMM0, XMM1);
+ XMM2 = _mm_hadd_ps(XMM2, XMM3);
+#else
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(vec+i );
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM4 = _mm_load_ps(vec+i+ 4);
+ XMM5 = _mm_load_ps(vec+i+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+#endif
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM2 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(aparm));
+ XMM2 = _mm_add_ps(XMM2, PM128(aparm));
+ XMM0 = _mm_max_ps(XMM0, MINV);
+ XMM2 = _mm_max_ps(XMM2, MINV);
+ _mm_store_ps(p , XMM0);
+ _mm_store_ps(p+4, XMM2);
+ }
+#else /* for __SSE2__ */
+/*
+ SSE optimized code
+*/
+ for(i=0;i<midpoint;i+=16,p+=8)
+ {
+ __m64 MM0, MM1, MM2, MM3;
+ __m128x U0, U1;
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(vec+i );
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM4 = _mm_load_ps(vec+i+ 4);
+ XMM5 = _mm_load_ps(vec+i+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ U0.ps = XMM0;
+ U1.ps = XMM2;
+ MM0 = U0.pi64[1];
+ MM1 = U1.pi64[1];
+ MM2 = U0.pi64[0];
+ MM3 = U1.pi64[0];
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM1);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM2);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM3);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(aparm));
+ XMM2 = _mm_add_ps(XMM2, PM128(aparm));
+ XMM0 = _mm_max_ps(XMM0, DECAY0);
+ XMM2 = _mm_max_ps(XMM2, DECAY1);
+ XMM0 = _mm_max_ps(XMM0, MINV);
+ XMM2 = _mm_max_ps(XMM2, MINV);
+ _mm_store_ps(p , XMM0);
+ _mm_store_ps(p+4, XMM2);
+ }
+ DECAY0 = _mm_sub_ps(DECAY0, PM128(p16));
+ DECAY1 = _mm_sub_ps(DECAY1, PM128(p16));
+ }
+ for(;i<last_n;i+=16,p+=8)
+ {
+ __m64 MM0, MM1, MM2, MM3;
+ __m128x U0, U1;
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(vec+i );
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM4 = _mm_load_ps(vec+i+ 4);
+ XMM5 = _mm_load_ps(vec+i+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ U0.ps = XMM0;
+ U1.ps = XMM2;
+ MM0 = U0.pi64[1];
+ MM1 = U1.pi64[1];
+ MM2 = U0.pi64[0];
+ MM3 = U1.pi64[0];
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM1);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM2);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM3);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(aparm));
+ XMM2 = _mm_add_ps(XMM2, PM128(aparm));
+ XMM0 = _mm_max_ps(XMM0, MINV);
+ XMM2 = _mm_max_ps(XMM2, MINV);
+ _mm_store_ps(p , XMM0);
+ _mm_store_ps(p+4, XMM2);
+ }
+ }
+ _mm_empty();
+#endif /* for __SSE2__ */
+ }
+#else /* SSE Optimize */
+ for(i=0;i<n/2;i+=2){
float val=vec[i]*vec[i]+vec[i+1]*vec[i+1];
val=todB(&val)*.5f;
if(val<decay)val=decay;
@@ -157,17 +429,70 @@
vec[i>>1]=val;
decay-=8.;
}
+#endif /* SSE Optimize */
/*_analysis_output_always("spread",seq2++,vec,n/4,0,0,0);*/
/* perform preecho/postecho triggering by band */
for(j=0;j<VE_BANDS;j++){
- float acc=0.;
- float valmax,valmin;
/* accumulate amplitude */
+#ifdef __SSE__ /* SSE Optimize */
+ float acc;
+ float valmax,valmin;
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ if(bands[j].end!=8)
+ {
+ switch(bands[j].end)
+ {
+ case 4 : /* bands[j].end==4(14.286%) */
+ XMM0 = _mm_lddqu_ps(vec+bands[j].begin);
+ XMM1 = _mm_load_ps(bands[j].window );
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ break;
+ case 5 : /* bands[j].end==5(14.286%) */
+ XMM0 = _mm_lddqu_ps(vec+bands[j].begin);
+ XMM2 = _mm_load_ss(vec+bands[j].begin+4);
+ XMM1 = _mm_load_ps(bands[j].window );
+ XMM3 = _mm_load_ss(bands[j].window+4);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ss(XMM2, XMM3);
+ XMM0 = _mm_add_ss(XMM0, XMM2);
+ break;
+ case 6 : /* bands[j].end==6(14.286%) */
+ XMM0 = _mm_lddqu_ps(vec+bands[j].begin);
+ XMM2 = _mm_load_ss(vec+bands[j].begin+4);
+ XMM4 = _mm_load_ss(vec+bands[j].begin+5);
+ XMM1 = _mm_load_ps(bands[j].window );
+ XMM3 = _mm_load_ss(bands[j].window+4);
+ XMM5 = _mm_load_ss(bands[j].window+5);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ss(XMM2, XMM3);
+ XMM4 = _mm_mul_ss(XMM4, XMM5);
+ XMM2 = _mm_add_ss(XMM2, XMM4);
+ XMM0 = _mm_add_ss(XMM0, XMM2);
+ break;
+ }
+ }
+ else /* bands[j].end==8(57.143%) */
+ {
+ XMM0 = _mm_lddqu_ps(vec+bands[j].begin );
+ XMM1 = _mm_load_ps(bands[j].window );
+ XMM2 = _mm_lddqu_ps(vec+bands[j].begin+4);
+ XMM3 = _mm_load_ps(bands[j].window+4);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ }
+ acc = _mm_add_horz(XMM0);
+ }
+#else /* SSE Optimize */
+ float acc=0.;
+ float valmax,valmin;
for(i=0;i<bands[j].end;i++)
acc+=vec[i+bands[j].begin]*bands[j].window[i];
+#endif /* SSE Optimize */
acc*=bands[j].total;
@@ -278,7 +603,7 @@
if(ve->mark[j/ve->searchstep]){
if(j>centerW){
-#if 0
+ #if 0
if(j>ve->curmark){
float *marker=alloca(v->pcm_current*sizeof(*marker));
int l,m;
@@ -361,13 +686,13 @@
memmove(e->mark,e->mark+smallshift,(smallsize-smallshift)*sizeof(*e->mark));
-#if 0
+ #if 0
for(i=0;i<VE_BANDS*e->ch;i++)
memmove(e->filter[i].markers,
e->filter[i].markers+smallshift,
(1024-smallshift)*sizeof(*(*e->filter).markers));
totalshift+=shift;
-#endif
+ #endif
e->current-=shift;
if(e->curmark>=0)
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/floor0.c libvorbis-1.2.0-sse/lib/floor0.c
--- libvorbis-1.2.0/lib/floor0.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/floor0.c 2007-08-02 12:43:10.000000000 +0200
@@ -28,6 +28,9 @@
#include "scales.h"
#include "misc.h"
#include "os.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
#include "misc.h"
#include <stdio.h>
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/floor1.c libvorbis-1.2.0-sse/lib/floor1.c
--- libvorbis-1.2.0/lib/floor1.c 2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/floor1.c 2007-08-02 13:50:49.000000000 +0200
@@ -25,6 +25,12 @@
#include "codebook.h"
#include "misc.h"
#include "scales.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
+#if defined(__INTEL_COMPILER)
+#include <ia32intrin.h>
+#endif
#include <stdio.h>
@@ -49,15 +55,15 @@
} vorbis_look_floor1;
typedef struct lsfit_acc{
- long x0;
- long x1;
+ int32_t x0;
+ int32_t x1;
- long xa;
- long ya;
- long x2a;
- long y2a;
- long xya;
- long an;
+ int32_t xa;
+ int32_t ya;
+ int32_t x2a;
+ int32_t y2a;
+ int32_t xya;
+ int32_t an;
} lsfit_acc;
/***********************************************/
@@ -83,6 +89,16 @@
}
}
+#if defined(__INTEL_COMPILER)
+static int ilog(unsigned int v){
+ return(_bit_scan_reverse(v) + 1);
+}
+
+static int ilog2(unsigned int v){
+ if(v)--v;
+ return(_bit_scan_reverse(v) + 1);
+}
+#else
static int ilog(unsigned int v){
int ret=0;
while(v){
@@ -101,6 +117,7 @@
}
return(ret);
}
+#endif
static void floor1_pack (vorbis_info_floor *i,oggpack_buffer *opb){
vorbis_info_floor1 *info=(vorbis_info_floor1 *)i;
@@ -283,15 +300,64 @@
return(y0+off);
}
}
+#if defined(__SSE__) /* SSE Optimize */
+static _MM_ALIGN16 const __m128x pfv0 =
+ { .sf = {7.3142857f, 7.3142857f, 7.3142857f, 7.3142857f} };
+static _MM_ALIGN16 const __m128x pfv1 =
+ { .sf = {1023.5f, 1023.5f, 1023.5f, 1023.5f} };
+static _MM_ALIGN16 const __m128x pfv2 =
+ { .sf = {1023.f, 1023.f, 1023.f, 1023.f} };
+#endif /* SSE Optimize */
static int vorbis_dBquant(const float *x){
+#if defined(__SSE__) /* SSE Optimize */
+ __m128 XMM0 = _mm_load_ss(x);
+ XMM0 = _mm_mul_ss(XMM0, pfv0.ps);
+ XMM0 = _mm_add_ss(XMM0, pfv1.ps);
+ XMM0 = _mm_max_ss(XMM0, PFV_0.ps);
+ XMM0 = _mm_min_ss(XMM0, pfv2.ps);
+ return _mm_cvttss_si32(XMM0);
+#else /* SSE Optimize */
int i= *x*7.3142857f+1023.5f;
if(i>1023)return(1023);
if(i<0)return(0);
return i;
+#endif /* SSE Optimize */
}
-static float FLOOR1_fromdB_LOOKUP[256]={
+#if 0
+#if defined(__SSE__) /* SSE Optimize */
+static __m128 vorbis_dBquant_ps(float *x)
+{
+#if defined(__SSE2__)
+ register __m128 pi;
+ pi = _mm_load_ps(x);
+ pi = _mm_mul_ps(pi, pfv0.ps);
+ pi = _mm_add_ps(pi, pfv1.ps);
+ pi = _mm_max_ps(pi, PFV_0.ps);
+ pi = _mm_min_ps(pi, pfv2.ps);
+ pi = _mm_cvtepi32_ps(_mm_cvttps_epi32(pi));
+ return pi;
+#else
+ register __m128 pi = PM128(x);
+ register __m64 MM0, MM1;
+ pi = _mm_mul_ps(pi, pfv0.ps);
+ pi = _mm_add_ps(pi, pfv1.ps);
+ pi = _mm_max_ps(pi, PFV_0.ps);
+ pi = _mm_min_ps(pi, pfv2.ps);
+ MM0 = _mm_cvttps_pi32(pi);
+ pi = _mm_movehl_ps(pi, pi);
+ MM1 = _mm_cvttps_pi32(pi);
+ pi = _mm_cvtpi32_ps(pi, MM1);
+ pi = _mm_movelh_ps(pi, pi);
+ pi = _mm_cvtpi32_ps(pi, MM0);
+ return pi;
+#endif
+}
+#endif /* SSE Optimize */
+#endif
+
+static const float FLOOR1_fromdB_LOOKUP[256]={
1.0649863e-07F, 1.1341951e-07F, 1.2079015e-07F, 1.2863978e-07F,
1.3699951e-07F, 1.4590251e-07F, 1.5538408e-07F, 1.6548181e-07F,
1.7623575e-07F, 1.8768855e-07F, 1.9988561e-07F, 2.128753e-07F,
@@ -358,74 +424,420 @@
0.82788260F, 0.88168307F, 0.9389798F, 1.F,
};
-static void render_line(int n, int x0,int x1,int y0,int y1,float *d){
- int dy=y1-y0;
- int adx=x1-x0;
- int ady=abs(dy);
- int base=dy/adx;
- int sy=(dy<0?base-1:base+1);
- int x=x0;
- int y=y0;
- int err=0;
-
- ady-=abs(base*adx);
+static void render_line(int n, int x, int x2, int y, int y2,float *d)
+{
+ int shortLen = y2-y;
+ int longLen;
+ int decInc;
+ int j;
+
+ if(n>x2) n=x2;
+
+ longLen = n - x;
+ if (longLen < 0)
+ return;
- if(n>x1)n=x1;
-
- if(x<n)
- d[x]*=FLOOR1_fromdB_LOOKUP[y];
-
- while(++x<n){
- err=err+ady;
- if(err>=adx){
- err-=adx;
- y+=sy;
- }else{
- y+=base;
- }
- d[x]*=FLOOR1_fromdB_LOOKUP[y];
- }
+ if(shortLen==0)
+ {
+#if defined(__SSE__) /* SSE Optimize */
+ __m128 XMM0 = _mm_set1_ps(FLOOR1_fromdB_LOOKUP[y]);
+ decInc = (longLen&(~7));
+ j = (longLen&(~3));
+ for(;x<decInc;x+=8)
+ {
+ __m128 XMM1 = _mm_lddqu_ps(d+x );
+ __m128 XMM2 = _mm_lddqu_ps(d+x+4);
+ XMM1 = _mm_mul_ps(XMM1, XMM0);
+ XMM2 = _mm_mul_ps(XMM2, XMM0);
+ _mm_storeu_ps(d+x , XMM1);
+ _mm_storeu_ps(d+x+4, XMM2);
+ }
+ for(;x<j;x+=4)
+ {
+ __m128 XMM1 = _mm_lddqu_ps(d+x );
+ XMM1 = _mm_mul_ps(XMM1, XMM0);
+ _mm_storeu_ps(d+x , XMM1);
+ }
+#endif /* SSE Optimize */
+ for(;x<n;x++)
+ d[x] *= FLOOR1_fromdB_LOOKUP[y];
+ }
+ else
+ {
+ decInc = (shortLen << 21) / longLen;
+ if(shortLen>=0)
+ j = 0x200 + (y<<21);
+ else
+ j = 0x1FF800 + (y<<21);
+
+ for (;x<n;x++)
+ {
+ d[x] *= FLOOR1_fromdB_LOOKUP[j >> 21];
+ j += decInc;
+ }
+ }
+ return;
}
-static void render_line0(int x0,int x1,int y0,int y1,int *d){
- int dy=y1-y0;
- int adx=x1-x0;
- int ady=abs(dy);
- int base=dy/adx;
- int sy=(dy<0?base-1:base+1);
- int x=x0;
- int y=y0;
- int err=0;
-
- ady-=abs(base*adx);
+static void render_line0(int x, int x2,int y, int y2, int *d)
+{
+ int shortLen = y2-y;
+ int longLen = x2-x;
+ int decInc = (shortLen << 21) / longLen;
+ int j;
+ if(shortLen>=0)
+ j = 0x200 + (y<<21);
+ else
+ j = 0x1FF800 + (y<<21);
- d[x]=y;
- while(++x<x1){
- err=err+ady;
- if(err>=adx){
- err-=adx;
- y+=sy;
- }else{
- y+=base;
- }
- d[x]=y;
- }
+#if defined(__SSE2__)
+ if(longLen>=4)
+ {
+ __m128i PJ0 = _mm_set_epi32(
+ j+decInc*3, j+decInc*2, j+decInc , j
+ );
+ __m128i PJ1 = _mm_set_epi32(
+ j+decInc*7, j+decInc*6, j+decInc*5, j+decInc*4
+ );
+ __m128i PDECINC = _mm_set1_epi32(decInc*8);
+ int x1 = x+(longLen&(~7));
+ for(;x<x1;x+=8)
+ {
+ __m128i XMM0 = PJ0;
+ __m128i XMM1 = PJ1;
+ XMM0 = _mm_srai_epi32(XMM0, 21);
+ XMM1 = _mm_srai_epi32(XMM1, 21);
+ _mm_storeu_si128((__m128i*)(d+x ), XMM0);
+ _mm_storeu_si128((__m128i*)(d+x+4), XMM1);
+ PJ0 = _mm_add_epi32(PJ0, PDECINC);
+ PJ1 = _mm_add_epi32(PJ1, PDECINC);
+ }
+ if(x2-x>=4)
+ {
+ __m128i XMM0 = PJ0;
+ XMM0 = _mm_srai_epi32(XMM0, 21);
+ _mm_storeu_si128((__m128i*)(d+x ), XMM0);
+ PJ0 = PJ1;
+ x += 4;
+ }
+ j = _mm_cvtsi128_si32(PJ0);
+ }
+#elif defined(__SSE__)
+ if(longLen>=4)
+ {
+ __m64 PJ0 = _mm_set_pi32(j+decInc , j );
+ __m64 PJ1 = _mm_set_pi32(j+decInc*3, j+decInc*2);
+ __m64 PDECINC = _mm_set1_pi32(decInc*4);
+ int x1 = x+(longLen&(~3));
+ for(;x<x1;x+=4)
+ {
+ __m64 MM0 = PJ0;
+ __m64 MM1 = PJ1;
+ MM0 = _mm_srai_pi32(MM0, 21);
+ MM1 = _mm_srai_pi32(MM1, 21);
+ PM64(d+x ) = MM0;
+ PM64(d+x+2) = MM1;
+ PJ0 = _mm_add_pi32(PJ0, PDECINC);
+ PJ1 = _mm_add_pi32(PJ1, PDECINC);
+ }
+ j = _mm_cvtsi64_si32(PJ0);
+ }
+#endif
+ for (;x<x2;x++)
+ {
+ d[x] = j >> 21;
+ j += decInc;
+ }
+ return;
}
/* the floor has already been filtered to only include relevant sections */
static int accumulate_fit(const float *flr,const float *mdct,
int x0, int x1,lsfit_acc *a,
- int n,vorbis_info_floor1 *info){
+#if defined(__SSE__) /* SSE Optimize */
+ int n,vorbis_info_floor1 *info, const float *tflr,
+ const float *tmask, const int *tcres)
+#else /* SSE Optimize */
+ int n,vorbis_info_floor1 *info)
+#endif /* SSE Optimize */
+{
long i;
- /*int quantized=vorbis_dBquant(flr+x0);*/
-
- long xa=0,ya=0,x2a=0,y2a=0,xya=0,na=0, xb=0,yb=0,x2b=0,y2b=0,xyb=0,nb=0;
+ int xa=0,ya=0,x2a=0,y2a=0,xya=0,na=0, xb=0,yb=0,x2b=0,y2b=0,xyb=0,nb=0;
+#ifdef __SSE__ /* SSE Optimize */
+ int x05;
+ int j;
+
+ extern float findex[2048];
+ extern float findex2[2048];
+#endif /* SSE Optimize */
memset(a,0,sizeof(*a));
a->x0=x0;
a->x1=x1;
if(x1>=n)x1=n-1;
+#ifdef __SSE__ /* SSE Optimize */
+ x1 ++;
+ {
+ static const int parm0[16] = {
+ 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6
+ };
+ static const int parm3[16] = {
+ 6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0
+ };
+ __m128 PYA;
+ __m128 PY2A;
+ __m128 PX2A;
+ __m128 PXYA;
+ __m128 PYB;
+ __m128 PY2B;
+ __m128 PX2B;
+ __m128 PXYB;
+ x05 = (x0+3)&(~3);
+ x05 = (x05>x1)?x1:x05;
+ if(x1-x05<4)
+ {
+ for(i=x0;i<x1;i++)
+ {
+ int quantized = (int)tflr[i];
+ if(quantized)
+ {
+ if(mdct[i]+info->twofitatten>=flr[i])
+ {
+ xa += i;
+ ya += quantized;
+ x2a += i*i;
+ y2a += quantized*quantized;
+ xya += i*quantized;
+ na ++;
+ }
+ else
+ {
+ xb += i;
+ yb += quantized;
+ x2b += i*i;
+ y2b += quantized*quantized;
+ xyb += i*quantized;
+ nb ++;
+ }
+ }
+ }
+ }
+ else
+ {
+ _mm_prefetch((const float*)(findex +x0) , _MM_HINT_NTA);
+ _mm_prefetch((const float*)(findex2+x0) , _MM_HINT_NTA);
+ PYA = _mm_setzero_ps();
+ PY2A = _mm_setzero_ps();
+ PX2A = _mm_setzero_ps();
+ PXYA = _mm_setzero_ps();
+ PYB = _mm_setzero_ps();
+ PY2B = _mm_setzero_ps();
+ PX2B = _mm_setzero_ps();
+ PXYB = _mm_setzero_ps();
+#if 1
+ j = 16>>(x05-x0);
+ for(i=x0;i<x05;i++)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ss(tflr+i);
+ XMM3 = _mm_load_ss(findex+i);
+ XMM1 = XMM0;
+ XMM2 = XMM0;
+ XMM1 = _mm_mul_ss(XMM1, XMM1);
+ XMM2 = _mm_mul_ss(XMM2, XMM3);
+ if((tcres[x05-4]&j)!=0) /* Type-A 1 unit burst mode */
+ {
+ xa += i;
+ PYA = _mm_add_ss(PYA, XMM0);
+ x2a += i*i;
+ PY2A = _mm_add_ss(PY2A, XMM1);
+ PXYA = _mm_add_ss(PXYA, XMM2);
+ na ++;
+ }
+ else /* Type-B 1 unit burst mode */
+ {
+ xb += i;
+ PYB = _mm_add_ss(PYB, XMM0);
+ x2b += i*i;
+ PY2B = _mm_add_ss(PY2B, XMM1);
+ PXYB = _mm_add_ss(PXYB, XMM2);
+ nb ++;
+ }
+ j = j << 1;
+ }
+#else
+ for(i=x0;i<x05;i++)
+ {
+ int quantized = (int)tflr[i];
+ if(quantized)
+ {
+ if(mdct[i]+info->twofitatten>=flr[i])
+ {
+ xa += i;
+ ya += quantized;
+ x2a += i*i;
+ y2a += quantized*quantized;
+ xya += i*quantized;
+ na ++;
+ }
+ else
+ {
+ xb += i;
+ yb += quantized;
+ x2b += i*i;
+ y2b += quantized*quantized;
+ xyb += i*quantized;
+ nb ++;
+ }
+ }
+ }
+#endif
+ x05 = ((x1-i)&(~3))+i;
+ for(;i<x05;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(tflr+i);
+ _mm_prefetch((const float*)(findex +i+16) , _MM_HINT_NTA);
+ _mm_prefetch((const float*)(findex2+i+16) , _MM_HINT_NTA);
+ XMM1 = XMM0;
+ XMM2 = XMM0;
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, PM128(findex+i));
+ if(tcres[i]==0xF) /* Type-A 4 unit burst mode */
+ {
+ xa += (i*4+6);
+ PYA = _mm_add_ps(PYA, XMM0);
+ x2a += 4*i*(i+3)+14;
+ PY2A = _mm_add_ps(PY2A, XMM1);
+ PXYA = _mm_add_ps(PXYA, XMM2);
+ na += 4;
+ }
+ else if(tcres[i]==0x0) /* Type-B 4 unit burst mode */
+ {
+ xb += (i*4+6);
+ PYB = _mm_add_ps(PYB, XMM0);
+ x2b += 4*i*(i+3)+14;
+ PY2B = _mm_add_ps(PY2B, XMM1);
+ PXYB = _mm_add_ps(PXYB, XMM2);
+ nb += 4;
+ }
+ else
+ {
+ int p = bitCountTable[tcres[i]];
+ int q = 4 - p;
+ __m128 PMASKA = _mm_load_ps(tmask+i);
+ __m128 PMASKB = _mm_xor_ps(PMASKA, PMASKTABLE[15].ps);
+ xa += i*p+parm0[tcres[i]];
+ PYA = _mm_add_ps(PYA , _mm_and_ps(XMM0, PMASKA));
+ PX2A = _mm_add_ps(PX2A, _mm_and_ps(PM128(findex2+i), PMASKA));
+ PY2A = _mm_add_ps(PY2A, _mm_and_ps(XMM1, PMASKA));
+ PXYA = _mm_add_ps(PXYA, _mm_and_ps(XMM2, PMASKA));
+ na += p;
+ xb += i*q+parm3[tcres[i]];
+ PYB = _mm_add_ps(PYB , _mm_and_ps(XMM0, PMASKB));
+ PX2B = _mm_add_ps(PX2B, _mm_and_ps(PM128(findex2+i), PMASKB));
+ PY2B = _mm_add_ps(PY2B, _mm_and_ps(XMM1, PMASKB));
+ PXYB = _mm_add_ps(PXYB, _mm_and_ps(XMM2, PMASKB));
+ nb += q;
+ }
+ }
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ __m128x TMA, TMB;
+ XMM0 = XMM1 = PYA;
+ XMM3 = XMM2 = PY2A;
+ XMM0 = _mm_shuffle_ps(XMM0, PX2A, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, PX2A, _MM_SHUFFLE(3,2,3,2));
+ XMM2 = _mm_shuffle_ps(XMM2, PXYA, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, PXYA, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+
+ TMA.ps = XMM0;
+
+ XMM0 = XMM1 = PYB;
+ XMM3 = XMM2 = PY2B;
+ XMM0 = _mm_shuffle_ps(XMM0, PX2B, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, PX2B, _MM_SHUFFLE(3,2,3,2));
+ XMM2 = _mm_shuffle_ps(XMM2, PXYB, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, PXYB, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+
+ TMB.ps = XMM0;
+
+#if defined(__SSE2__)
+ TMA.pi = _mm_cvttps_epi32(TMA.ps);
+ TMB.pi = _mm_cvttps_epi32(TMB.ps);
+#else
+ {
+ __m64 MM0, MM1, MM2, MM3;
+ MM0 = _mm_cvttps_pi32(TMA.ps);
+ MM2 = _mm_cvttps_pi32(TMB.ps);
+ TMA.ps = _mm_movehl_ps(TMA.ps, TMA.ps);
+ TMB.ps = _mm_movehl_ps(TMB.ps, TMB.ps);
+ MM1 = _mm_cvttps_pi32(TMA.ps);
+ MM3 = _mm_cvttps_pi32(TMB.ps);
+ TMA.pi64[0] = MM0;
+ TMB.pi64[0] = MM2;
+ TMA.pi64[1] = MM1;
+ TMB.pi64[1] = MM3;
+ }
+ _mm_empty();
+#endif
+
+ ya += TMA.si32[0];
+ x2a += TMA.si32[1];
+ y2a += TMA.si32[2];
+ xya += TMA.si32[3];
+ yb += TMB.si32[0];
+ x2b += TMB.si32[1];
+ y2b += TMB.si32[2];
+ xyb += TMB.si32[3];
+ }
+ for(;i<x1;i++)
+ {
+ int quantized = (int)tflr[i];
+ if(quantized)
+ {
+ if(mdct[i]+info->twofitatten>=flr[i])
+ {
+ xa += i;
+ ya += quantized;
+ x2a += i*i;
+ y2a += quantized*quantized;
+ xya += i*quantized;
+ na ++;
+ }
+ else
+ {
+ xb += i;
+ yb += quantized;
+ x2b += i*i;
+ y2b += quantized*quantized;
+ xyb += i*quantized;
+ nb ++;
+ }
+ }
+ }
+ }
+ }
+#else /* SSE Optimize */
for(i=x0;i<=x1;i++){
int quantized=vorbis_dBquant(flr+i);
if(quantized){
@@ -446,6 +858,7 @@
}
}
}
+#endif /* SSE Optimize */
xb+=xa;
yb+=ya;
@@ -470,10 +883,93 @@
}
static void fit_line(lsfit_acc *a,int fits,int *y0,int *y1){
- long x=0,y=0,x2=0,y2=0,xy=0,an=0,i;
- long x0=a[0].x0;
- long x1=a[fits-1].x1;
+ int32_t x, y, x2,y2 ,xy ,an ,i;
+ int32_t x0 = a[0].x0;
+ int32_t x1 = a[fits-1].x1;
+#ifdef __SSE__ /* SSE Optimize */
+//#if defined(__SSE2__)&&!defined(__PROF__)
+#if 0 // defined(__SSE2__)
+ __m128i XMM0, XMM1, XMM2, XMM3;
+ __m128x T;
+ __m128i *PA = (__m128i*)a;
+ XMM0 = XMM1 = XMM2 = XMM3 = _mm_setzero_si128();
+ for(i=0;i<(fits&(~1));i+=2)
+ {
+ __m128i XMM4 = _mm_load_si128(PA+i*2 );
+ __m128i XMM5 = _mm_load_si128(PA+i*2+1);
+ __m128i XMM6 = _mm_load_si128(PA+i*2+2);
+ __m128i XMM7 = _mm_load_si128(PA+i*2+3);
+ XMM0 = _mm_add_epi32(XMM0, XMM4);
+ XMM1 = _mm_add_epi32(XMM1, XMM5);
+ XMM2 = _mm_add_epi32(XMM2, XMM6);
+ XMM3 = _mm_add_epi32(XMM3, XMM7);
+ }
+ for(;i<fits;i++)
+ {
+ __m128i XMM4 = _mm_load_si128(PA+i*2 );
+ __m128i XMM5 = _mm_load_si128(PA+i*2+1);
+ XMM0 = _mm_add_epi32(XMM0, XMM4);
+ XMM1 = _mm_add_epi32(XMM1, XMM5);
+ }
+ XMM0 = _mm_add_epi32(XMM0, XMM2);
+ XMM1 = _mm_add_epi32(XMM1, XMM3);
+ T.pi = XMM0;
+ x = T.si32[2];
+ y = T.si32[3];
+ T.pi = XMM1;
+ x2 = T.si32[0];
+ y2 = T.si32[1];
+ xy = T.si32[2];
+ an = T.si32[3];
+#else
+ __m64 XY, X2Y2, XYAN;
+ __m64 *PA = (__m64*)a;
+ XY =
+ X2Y2 =
+ XYAN = _mm_setzero_si64();
+
+ for(i=0;i<(fits&~1);i+=2)
+ {
+ __m64 MM0 = *(PA+1);
+ __m64 MM1 = *(PA+2);
+ __m64 MM2 = *(PA+3);
+ XY = _mm_add_pi32(XY, MM0);
+ X2Y2 = _mm_add_pi32(X2Y2, MM1);
+ XYAN = _mm_add_pi32(XYAN, MM2);
+ MM0 = *(PA+5);
+ MM1 = *(PA+6);
+ MM2 = *(PA+7);
+ XY = _mm_add_pi32(XY, MM0);
+ X2Y2 = _mm_add_pi32(X2Y2, MM1);
+ XYAN = _mm_add_pi32(XYAN, MM2);
+ PA += 8;
+ }
+ for(;i<fits;i++)
+ {
+ __m64 MM0 = *(PA+1);
+ __m64 MM1 = *(PA+2);
+ __m64 MM2 = *(PA+3);
+ XY = _mm_add_pi32(XY, MM0);
+ X2Y2 = _mm_add_pi32(X2Y2, MM1);
+ XYAN = _mm_add_pi32(XYAN, MM2);
+ PA += 4;
+ }
+ {
+ __m64x M0X, M1X, M2X;
+ M0X.pi64 = XY;
+ M1X.pi64 = X2Y2;
+ M2X.pi64 = XYAN;
+ x = M0X.ssi32[0];
+ y = M0X.ssi32[1];
+ x2 = M1X.ssi32[0];
+ y2 = M1X.ssi32[1];
+ xy = M2X.ssi32[0];
+ an = M2X.ssi32[1];
+ }
+ _mm_empty();
+#endif
+#else /* SSE Optimize */
for(i=0;i<fits;i++){
x+=a[i].xa;
y+=a[i].ya;
@@ -482,6 +978,7 @@
xy+=a[i].xya;
an+=a[i].an;
}
+#endif /* SSE Optimize */
if(*y0>=0){
x+= x0;
@@ -537,7 +1034,138 @@
static int inspect_error(int x0,int x1,int y0,int y1,const float *mask,
const float *mdct,
+#if defined(__SSE__) /* SSE Optimize */
+ vorbis_info_floor1 *info, const float *tflr,
+ const float *tmask, const int *tcres){
+#else /* SSE Optimize */
vorbis_info_floor1 *info){
+#endif /* SSE Optimize */
+#if defined(__SSE__) /* SSE Optimize */
+ int x = x0;
+ int y = y0;
+ int val = vorbis_dBquant(mask+x);
+ int mse = 0;
+ int n = 0;
+ int shortLen = y1-y;
+ int longLen = x1-x;
+ int decInc = (shortLen << 21) / longLen;
+ int j;
+
+ if(shortLen>=0)
+ j = 0x200 + (y<<21);
+ else
+ j = 0x1FF800 + (y<<21);
+ {
+ int x05;
+ x05 = (x+3)&(~3);
+ x05 = (x05>x1)?x1:x05;
+ for(;x<x05;x++)
+ {
+ y = j >> 21;
+ val = tflr[x];
+ mse += ((y-val)*(y-val));
+ n++;
+ if(mdct[x]+info->twofitatten>=mask[x])
+ {
+ if(y+info->maxover<val)return(1);
+ if(y-info->maxunder>val)return(1);
+ }
+ j += decInc;
+ }
+ }
+ {
+ register __m128 PMSE;
+ __m128 PIMOVER = _mm_set1_ps(info->maxover);
+ __m128 PIMUNDER = _mm_set1_ps(info->maxunder);
+#if defined(__SSE2__)
+ __m128i PJ0 = _mm_set_epi32(
+ j+decInc*3, j+decInc*2, j+decInc , j
+ );
+ __m128i PDECINC = _mm_set1_epi32(decInc*4);
+#else
+ __m64 PJ0 = _mm_set_pi32(j+decInc , j );
+ __m64 PJ1 = _mm_set_pi32(j+decInc*3, j+decInc*2);
+ __m64 PDECINC = _mm_set1_pi32(decInc*4);
+#endif
+ int x05 = x1&(~3);
+
+ x05 = (x05>x1)?x1:x05;
+ PMSE = _mm_setzero_ps();
+ for(;x<x05;x+=4)
+ {
+ __m128 PY;
+ register __m128 PVAL, PDMSE;
+#if defined(__SSE2__)
+ {
+ __m128i XMM0 = PJ0;
+ XMM0 = _mm_srai_epi32(XMM0, 21);
+ PY = _mm_cvtepi32_ps(XMM0);
+ PJ0 = _mm_add_epi32(PJ0, PDECINC);
+ }
+#else
+ {
+ __m64 MM1 = PJ1;
+ __m64 MM0 = PJ0;
+ MM1 = _mm_srai_pi32(MM1, 21);
+ MM0 = _mm_srai_pi32(MM0, 21);
+#pragma warning(disable : 592)
+ PY = _mm_cvtpi32_ps(PY, MM1);
+#pragma warning(default : 592)
+ PJ1 = _mm_add_pi32(PJ1, PDECINC);
+ PY = _mm_movelh_ps(PY, PY);
+ PJ0 = _mm_add_pi32(PJ0, PDECINC);
+ PY = _mm_cvtpi32_ps(PY, MM0);
+ }
+#endif
+
+ PVAL = _mm_load_ps(tflr+x);
+ PDMSE = PY;
+ PDMSE = _mm_sub_ps(PDMSE, PVAL);
+ PDMSE = _mm_mul_ps(PDMSE, PDMSE);
+ PMSE = _mm_add_ps(PMSE, PDMSE);
+ n += 4;
+ if(tcres[x]){
+ register __m128 PMASK1, PMASK2;
+ PMASK1 = PY;
+ PMASK2 = PY;
+ PMASK1 = _mm_add_ps(PMASK1, PIMOVER);
+ PMASK2 = _mm_sub_ps(PMASK2, PIMUNDER);
+ PMASK1 = _mm_cmplt_ps(PMASK1, PVAL);
+ PMASK2 = _mm_cmpgt_ps(PMASK2, PVAL);
+ PMASK1 = _mm_or_ps(PMASK1, PMASK2);
+ if(_mm_movemask_ps(PMASK1)&tcres[x])
+ {
+#if !defined(__SSE2__)
+ _mm_empty();
+#endif
+ return(1);
+ }
+ }
+ }
+#if defined(__SSE2__)
+ j = _mm_cvtsi128_si32(PJ0);
+#else
+ j = _mm_cvtsi64_si32(PJ0);
+ _mm_empty();
+#endif
+ mse += (int)_mm_add_horz(PMSE);
+ }
+ {
+ for(;x<x1;x++)
+ {
+ y = j >> 21;
+ val = tflr[x];
+ mse += ((y-val)*(y-val));
+ n++;
+ if(mdct[x]+info->twofitatten>=mask[x])
+ {
+ if(y+info->maxover<val)return(1);
+ if(y-info->maxunder>val)return(1);
+ }
+ j += decInc;
+ }
+ }
+#else /* SSE Optimize */
int dy=y1-y0;
int adx=x1-x0;
int ady=abs(dy);
@@ -579,6 +1207,7 @@
}
}
}
+#endif /* SSE Optimize */
if(info->maxover*info->maxover/n>info->maxerr)return(0);
if(info->maxunder*info->maxunder/n>info->maxerr)return(0);
@@ -614,6 +1243,185 @@
int *output=NULL;
int memo[VIF_POSIT+2];
+#if defined(__SSE__) /* SSE Optimize */
+ float *tflr = (float*)_ogg_alloca(sizeof(float)*n);
+ float *tmask = (float*)_ogg_alloca(sizeof(float)*n);
+ int *tcres = (int*)_ogg_alloca(sizeof(int)*n);
+ __m128 PIT = _mm_set1_ps(info->twofitatten);
+
+ /*
+ preprocess (vorbis_dbQuant)
+ */
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+#if !defined(__SSE2__)
+ register __m64 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+ XMM0 = _mm_load_ps(logmask+i );
+ XMM1 = _mm_load_ps(logmask+i+ 4);
+ XMM2 = _mm_load_ps(logmask+i+ 8);
+ XMM3 = _mm_load_ps(logmask+i+12);
+ XMM0 = _mm_mul_ps(XMM0, pfv0.ps);
+ XMM1 = _mm_mul_ps(XMM1, pfv0.ps);
+ XMM2 = _mm_mul_ps(XMM2, pfv0.ps);
+ XMM3 = _mm_mul_ps(XMM3, pfv0.ps);
+ XMM0 = _mm_add_ps(XMM0, pfv1.ps);
+ XMM1 = _mm_add_ps(XMM1, pfv1.ps);
+ XMM2 = _mm_add_ps(XMM2, pfv1.ps);
+ XMM3 = _mm_add_ps(XMM3, pfv1.ps);
+ XMM0 = _mm_max_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_max_ps(XMM1, PFV_0.ps);
+ XMM2 = _mm_max_ps(XMM2, PFV_0.ps);
+ XMM3 = _mm_max_ps(XMM3, PFV_0.ps);
+ XMM0 = _mm_min_ps(XMM0, pfv2.ps);
+ XMM1 = _mm_min_ps(XMM1, pfv2.ps);
+ XMM2 = _mm_min_ps(XMM2, pfv2.ps);
+ XMM3 = _mm_min_ps(XMM3, pfv2.ps);
+#if defined(__SSE2__)
+ XMM0 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM0));
+ XMM1 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM1));
+ XMM2 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM2));
+ XMM3 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM3));
+#else
+ MM0 = _mm_cvttps_pi32(XMM0);
+ MM2 = _mm_cvttps_pi32(XMM1);
+ MM4 = _mm_cvttps_pi32(XMM2);
+ MM6 = _mm_cvttps_pi32(XMM3);
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+ MM1 = _mm_cvttps_pi32(XMM0);
+ MM3 = _mm_cvttps_pi32(XMM1);
+ MM5 = _mm_cvttps_pi32(XMM2);
+ MM7 = _mm_cvttps_pi32(XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM1);
+ XMM1 = _mm_cvtpi32_ps(XMM1, MM3);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM5);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM7);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM1 = _mm_movelh_ps(XMM1, XMM1);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM3 = _mm_movelh_ps(XMM3, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM1 = _mm_cvtpi32_ps(XMM1, MM2);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM4);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM6);
+#endif
+ _mm_store_ps(tflr+i , XMM0);
+ _mm_store_ps(tflr+i+ 4, XMM1);
+ _mm_store_ps(tflr+i+ 8, XMM2);
+ _mm_store_ps(tflr+i+12, XMM3);
+ }
+#if !defined(__SSE2__)
+ _mm_empty();
+#endif
+ /*
+ preprocess (mdct+info->twofitatten>=flr)
+ */
+ for(i=0;i<n;i+=64)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ XMM0 = _mm_load_ps(logmdct+i );
+ XMM1 = _mm_load_ps(logmdct+i+ 4);
+ XMM2 = _mm_load_ps(logmask+i );
+ XMM3 = _mm_load_ps(logmask+i+ 4);
+ XMM0 = _mm_add_ps(XMM0, PIT);
+ XMM1 = _mm_add_ps(XMM1, PIT);
+ XMM4 = _mm_load_ps(logmdct+i+ 8);
+ XMM5 = _mm_load_ps(logmdct+i+12);
+ XMM2 = _mm_cmplt_ps(XMM2, XMM0);
+ XMM3 = _mm_cmplt_ps(XMM3, XMM1);
+ XMM0 = _mm_load_ps(logmask+i+ 8);
+ XMM1 = _mm_load_ps(logmask+i+12);
+ XMM4 = _mm_add_ps(XMM4, PIT);
+ XMM5 = _mm_add_ps(XMM5, PIT);
+ _mm_store_ps(tmask+i , XMM2);
+ _mm_store_ps(tmask+i+ 4, XMM3);
+ XMM6 = _mm_load_ps(logmdct+i+16);
+ XMM0 = _mm_cmplt_ps(XMM0, XMM4);
+ XMM4 = _mm_load_ps(logmdct+i+20);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM5);
+ XMM5 = _mm_load_ps(logmask+i+16);
+ tcres[i ] = _mm_movemask_ps(XMM2);
+ XMM6 = _mm_add_ps(XMM6, PIT);
+ XMM2 = _mm_load_ps(logmask+i+20);
+ tcres[i+ 4] = _mm_movemask_ps(XMM3);
+ XMM4 = _mm_add_ps(XMM4, PIT);
+ XMM3 = _mm_load_ps(logmdct+i+24);
+ _mm_store_ps(tmask+i+ 8, XMM0);
+ tcres[i+ 8] = _mm_movemask_ps(XMM0);
+ _mm_store_ps(tmask+i+12, XMM1);
+ XMM0 = _mm_load_ps(logmdct+i+28);
+ tcres[i+12] = _mm_movemask_ps(XMM1);
+ XMM1 = _mm_load_ps(logmdct+i+32);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM6);
+ XMM2 = _mm_cmplt_ps(XMM2, XMM4);
+ XMM6 = _mm_load_ps(logmask+i+24);
+ XMM4 = _mm_load_ps(logmask+i+28);
+ XMM3 = _mm_add_ps(XMM3, PIT);
+ XMM0 = _mm_add_ps(XMM0, PIT);
+ _mm_store_ps(tmask+i+16, XMM5);
+ _mm_store_ps(tmask+i+20, XMM2);
+ XMM6 = _mm_cmplt_ps(XMM6, XMM3);
+ XMM3 = _mm_load_ps(logmdct+i+36);
+ XMM4 = _mm_cmplt_ps(XMM4, XMM0);
+ XMM0 = _mm_load_ps(logmask+i+32);
+ tcres[i+16] = _mm_movemask_ps(XMM5);
+ XMM5 = _mm_load_ps(logmask+i+36);
+ XMM1 = _mm_add_ps(XMM1, PIT);
+ XMM3 = _mm_add_ps(XMM3, PIT);
+ tcres[i+20] = _mm_movemask_ps(XMM2);
+ XMM2 = _mm_load_ps(logmdct+i+40);
+ _mm_store_ps(tmask+i+24, XMM6);
+ tcres[i+24] = _mm_movemask_ps(XMM6);
+ XMM6 = _mm_load_ps(logmdct+i+44);
+ _mm_store_ps(tmask+i+28, XMM4);
+ XMM0 = _mm_cmplt_ps(XMM0, XMM1);
+ tcres[i+28] = _mm_movemask_ps(XMM4);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM3);
+ XMM1 = _mm_load_ps(logmask+i+40);
+ XMM3 = _mm_load_ps(logmask+i+44);
+ XMM2 = _mm_add_ps(XMM2, PIT);
+ XMM6 = _mm_add_ps(XMM6, PIT);
+ _mm_store_ps(tmask+i+32, XMM0);
+ _mm_store_ps(tmask+i+36, XMM5);
+ XMM4 = _mm_load_ps(logmdct+i+48);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM2);
+ XMM2 = _mm_load_ps(logmdct+i+52);
+ XMM3 = _mm_cmplt_ps(XMM3, XMM6);
+ XMM6 = _mm_load_ps(logmask+i+48);
+ tcres[i+32] = _mm_movemask_ps(XMM0);
+ XMM4 = _mm_add_ps(XMM4, PIT);
+ XMM0 = _mm_load_ps(logmask+i+52);
+ tcres[i+36] = _mm_movemask_ps(XMM5);
+ XMM2 = _mm_add_ps(XMM2, PIT);
+ XMM5 = _mm_load_ps(logmdct+i+56);
+ _mm_store_ps(tmask+i+40, XMM1);
+ tcres[i+40] = _mm_movemask_ps(XMM1);
+ _mm_store_ps(tmask+i+44, XMM3);
+ XMM1 = _mm_load_ps(logmdct+i+60);
+ tcres[i+44] = _mm_movemask_ps(XMM3);
+ XMM6 = _mm_cmplt_ps(XMM6, XMM4);
+ XMM0 = _mm_cmplt_ps(XMM0, XMM2);
+ XMM4 = _mm_load_ps(logmask+i+56);
+ XMM2 = _mm_load_ps(logmask+i+60);
+ XMM5 = _mm_add_ps(XMM5, PIT);
+ XMM1 = _mm_add_ps(XMM1, PIT);
+ _mm_store_ps(tmask+i+48, XMM6);
+ _mm_store_ps(tmask+i+52, XMM0);
+ XMM4 = _mm_cmplt_ps(XMM4, XMM5);
+ XMM2 = _mm_cmplt_ps(XMM2, XMM1);
+ tcres[i+48] = _mm_movemask_ps(XMM6);
+ tcres[i+52] = _mm_movemask_ps(XMM0);
+ _mm_store_ps(tmask+i+56, XMM4);
+ tcres[i+56] = _mm_movemask_ps(XMM4);
+ _mm_store_ps(tmask+i+60, XMM2);
+ tcres[i+60] = _mm_movemask_ps(XMM2);
+ }
+#endif /* SSE Optimize */
+
for(i=0;i<posts;i++)fit_valueA[i]=-200; /* mark all unused */
for(i=0;i<posts;i++)fit_valueB[i]=-200; /* mark all unused */
for(i=0;i<posts;i++)loneighbor[i]=0; /* 0 for the implicit 0 post */
@@ -622,6 +1430,19 @@
/* quantize the relevant floor points and collect them into line fit
structures (one per minimal division) at the same time */
+#if defined(__SSE__) /* SSE Optimize */
+ if(posts==0)
+ {
+ nonzero+=accumulate_fit(logmask,logmdct,0,n,fits,n,info, tflr, tmask, tcres);
+ }
+ else
+ {
+ for(i=0;i<posts-1;i++)
+ nonzero+=accumulate_fit(logmask,logmdct,look->sorted_index[i],
+ look->sorted_index[i+1],fits+i,
+ n,info, tflr, tmask, tcres);
+ }
+#else /* SSE Optimize */
if(posts==0){
nonzero+=accumulate_fit(logmask,logmdct,0,n,fits,n,info);
}else{
@@ -630,6 +1451,7 @@
look->sorted_index[i+1],fits+i,
n,info);
}
+#endif /* SSE Optimize */
if(nonzero){
/* start by fitting the implicit base case.... */
@@ -669,7 +1491,11 @@
exit(1);
}
+#if defined(__SSE__) /* SSE Optimize */
+ if(inspect_error(lx,hx,ly,hy,logmask,logmdct,info, tflr, tmask, tcres)){
+#else /* SSE Optimize */
if(inspect_error(lx,hx,ly,hy,logmask,logmdct,info)){
+#endif /* SSE Optimize */
/* outside error bounds/begin search area. Split it. */
int ly0=-200;
int ly1=-200;
@@ -763,7 +1589,7 @@
int floor1_encode(oggpack_buffer *opb,vorbis_block *vb,
vorbis_look_floor1 *look,
- int *post,int *ilogmask){
+ int *post,int *ilogmask){
long i,j;
vorbis_info_floor1 *info=look->vi;
@@ -777,6 +1603,324 @@
/* quantize values to multiplier spec */
if(post){
+#if defined(__SSE2__)
+ int posts4 = posts&(~3);
+ int posts8 = posts&(~7);
+ static _MM_ALIGN16 const __m128x PIV0 =
+ { .si32 = {0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF} };
+ static _MM_ALIGN16 const __m128x PIV1 =
+ { .si32 = {0xFFFF8000, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000} };
+
+ i = 0;
+ switch(info->mult)
+ {
+ case 1:
+ for(;i<posts8;i+=8)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV1 = PM128I(post+i+4);
+ __m128i PV2 = PV0;
+ __m128i PV3 = PV1;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV1 = _mm_and_si128(PV1, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV3 = _mm_and_si128(PV3, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 2);
+ PV1 = _mm_srli_epi32(PV1, 2);
+ PV0 = _mm_or_si128(PV0, PV2);
+ PV1 = _mm_or_si128(PV1, PV3);
+ _mm_store_si128(post+i, PV0);
+ _mm_store_si128(post+i+4, PV1);
+ }
+ for(;i<posts4;i+=4)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV2 = PV0;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 2);
+ PV0 = _mm_or_si128(PV0, PV2);
+ _mm_store_si128(post+i, PV0);
+ }
+ break;
+ case 2:
+ for(;i<posts8;i+=8)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV1 = PM128I(post+i+4);
+ __m128i PV2 = PV0;
+ __m128i PV3 = PV1;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV1 = _mm_and_si128(PV1, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV3 = _mm_and_si128(PV3, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 3);
+ PV1 = _mm_srli_epi32(PV1, 3);
+ PV0 = _mm_or_si128(PV0, PV2);
+ PV1 = _mm_or_si128(PV1, PV3);
+ _mm_store_si128(post+i, PV0);
+ _mm_store_si128(post+i+4, PV1);
+ }
+ for(;i<posts4;i+=4)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV2 = PV0;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 3);
+ PV0 = _mm_or_si128(PV0, PV2);
+ _mm_store_si128(post+i, PV0);
+ }
+ break;
+ case 3:
+ for(;i<posts8;i+=8)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV1 = PM128I(post+i+4);
+ __m128i PV2 = PV0;
+ __m128i PV3 = PV1;
+ __m128i PV4, PV5;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV1 = _mm_and_si128(PV1, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV3 = _mm_and_si128(PV3, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 4);
+ PV1 = _mm_srli_epi32(PV1, 4);
+ PV4 = PV0;
+ PV5 = PV1;
+ PV0 = _mm_add_epi32(PV0, PV0);
+ PV1 = _mm_add_epi32(PV1, PV1);
+ PV0 = _mm_add_epi32(PV0, PV4);
+ PV1 = _mm_add_epi32(PV1, PV5);
+ PV0 = _mm_or_si128(PV0, PV2);
+ PV1 = _mm_or_si128(PV1, PV3);
+ _mm_store_si128(post+i, PV0);
+ _mm_store_si128(post+i+4, PV1);
+ }
+ for(;i<posts4;i+=4)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV2 = PV0;
+ __m128i PV4;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 4);
+ PV4 = PV0;
+ PV0 = _mm_add_epi32(PV0, PV0);
+ PV0 = _mm_add_epi32(PV0, PV4);
+ PV0 = _mm_or_si128(PV0, PV2);
+ _mm_store_si128(post+i, PV0);
+ }
+ break;
+ case 4:
+ for(;i<posts8;i+=8)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV1 = PM128I(post+i+4);
+ __m128i PV2 = PV0;
+ __m128i PV3 = PV1;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV1 = _mm_and_si128(PV1, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV3 = _mm_and_si128(PV3, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 4);
+ PV1 = _mm_srli_epi32(PV1, 4);
+ PV0 = _mm_or_si128(PV0, PV2);
+ PV1 = _mm_or_si128(PV1, PV3);
+ _mm_store_si128(post+i, PV0);
+ _mm_store_si128(post+i+4, PV1);
+ }
+ for(;i<posts4;i+=4)
+ {
+ __m128i PV0 = PM128I(post+i );
+ __m128i PV2 = PV0;
+ PV0 = _mm_and_si128(PV0, PIV0.pi);
+ PV2 = _mm_and_si128(PV2, PIV1.pi);
+ PV0 = _mm_srli_epi32(PV0, 4);
+ PV0 = _mm_or_si128(PV0, PV2);
+ _mm_store_si128(post+i, PV0);
+ }
+ break;
+ }
+ for(;i<posts;i++)
+ {
+ int val = post[i]&0x7fff;
+ switch(info->mult)
+ {
+ case 1: /* 1024 -> 256 */
+ val>>=2;
+ break;
+ case 2: /* 1024 -> 128 */
+ val>>=3;
+ break;
+ case 3: /* 1024 -> 86 */
+ val/=12;
+ break;
+ case 4: /* 1024 -> 64 */
+ val>>=4;
+ break;
+ }
+ post[i]=val | (post[i]&0x8000);
+ }
+#elif defined(__SSE__)
+ int posts2 = posts&(~1);
+ int posts4 = posts&(~3);
+ static uint32_t PIV0[2] = {0x00007FFF, 0x00007FFF};
+ static uint32_t PIV1[2] = {0xFFFF8000, 0xFFFF8000};
+
+ i = 0;
+ switch(info->mult)
+ {
+ case 1:
+ for(;i<posts4;i+=4)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV1 = PM64(post+i+2);
+ __m64 PV2 = PV0;
+ __m64 PV3 = PV1;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV1 = _mm_and_si64(PV1, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV3 = _mm_and_si64(PV3, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 2);
+ PV1 = _mm_srli_pi32(PV1, 2);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PV1 = _mm_or_si64(PV1, PV3);
+ PM64(post+i ) = PV0;
+ PM64(post+i+2) = PV1;
+ }
+ for(;i<posts2;i+=2)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV2 = PV0;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 2);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PM64(post+i ) = PV0;
+ }
+ break;
+ case 2:
+ for(;i<posts4;i+=4)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV1 = PM64(post+i+2);
+ __m64 PV2 = PV0;
+ __m64 PV3 = PV1;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV1 = _mm_and_si64(PV1, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV3 = _mm_and_si64(PV3, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 3);
+ PV1 = _mm_srli_pi32(PV1, 3);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PV1 = _mm_or_si64(PV1, PV3);
+ PM64(post+i ) = PV0;
+ PM64(post+i+2) = PV1;
+ }
+ for(;i<posts2;i+=2)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV2 = PV0;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 3);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PM64(post+i ) = PV0;
+ }
+ break;
+ case 3:
+ for(;i<posts4;i+=4)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV1 = PM64(post+i+2);
+ __m64 PV2 = PV0;
+ __m64 PV3 = PV1;
+ __m64 PV4, PV5;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV1 = _mm_and_si64(PV1, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV3 = _mm_and_si64(PV3, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 4);
+ PV1 = _mm_srli_pi32(PV1, 4);
+ PV4 = PV0;
+ PV5 = PV1;
+ PV0 = _mm_add_pi32(PV0, PV0);
+ PV1 = _mm_add_pi32(PV1, PV1);
+ PV0 = _mm_add_pi32(PV0, PV4);
+ PV1 = _mm_add_pi32(PV1, PV5);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PV1 = _mm_or_si64(PV1, PV3);
+ PM64(post+i ) = PV0;
+ PM64(post+i+2) = PV1;
+ }
+ for(;i<posts2;i+=2)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV2 = PV0;
+ __m64 PV4;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 4);
+ PV4 = PV0;
+ PV0 = _mm_add_pi32(PV0, PV0);
+ PV0 = _mm_add_pi32(PV0, PV4);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PM64(post+i ) = PV0;
+ }
+ break;
+ case 4:
+ for(;i<posts4;i+=4)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV1 = PM64(post+i+2);
+ __m64 PV2 = PV0;
+ __m64 PV3 = PV1;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV1 = _mm_and_si64(PV1, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV3 = _mm_and_si64(PV3, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 4);
+ PV1 = _mm_srli_pi32(PV1, 4);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PV1 = _mm_or_si64(PV1, PV3);
+ PM64(post+i ) = PV0;
+ PM64(post+i+2) = PV1;
+ }
+ for(;i<posts2;i+=2)
+ {
+ __m64 PV0 = PM64(post+i );
+ __m64 PV2 = PV0;
+ PV0 = _mm_and_si64(PV0, PM64(PIV0));
+ PV2 = _mm_and_si64(PV2, PM64(PIV1));
+ PV0 = _mm_srli_pi32(PV0, 4);
+ PV0 = _mm_or_si64(PV0, PV2);
+ PM64(post+i ) = PV0;
+ }
+ break;
+ }
+ _mm_empty();
+ for(;i<posts;i++)
+ {
+ int val = post[i]&0x7fff;
+ switch(info->mult)
+ {
+ case 1: /* 1024 -> 256 */
+ val>>=2;
+ break;
+ case 2: /* 1024 -> 128 */
+ val>>=3;
+ break;
+ case 3: /* 1024 -> 86 */
+ val/=12;
+ break;
+ case 4: /* 1024 -> 64 */
+ val>>=4;
+ break;
+ }
+ post[i]=val | (post[i]&0x8000);
+ }
+#else /* SSE Optimize */
for(i=0;i<posts;i++){
int val=post[i]&0x7fff;
switch(info->mult){
@@ -795,6 +1939,7 @@
}
post[i]=val | (post[i]&0x8000);
}
+#endif /* SSE Optimize */
out[0]=post[0];
out[1]=post[1];
@@ -853,7 +1998,6 @@
oggpack_write(opb,out[0],ilog(look->quant_q-1));
oggpack_write(opb,out[1],ilog(look->quant_q-1));
-
/* partition by partition */
for(i=0,j=2;i<info->partitions;i++){
int class=info->partitionclass[i];
@@ -951,6 +2095,9 @@
ly=hy;
}
}
+#if defined(__SSE__)&&!defined(__SSE2__) /* SSE Optimize */
+ _mm_empty();
+#endif /* SSE Optimize */
for(j=hx;j<vb->pcmend/2;j++)ilogmask[j]=ly; /* be certain */
seq++;
return(1);
@@ -1081,7 +2228,21 @@
for(j=hx;j<n;j++)out[j]*=FLOOR1_fromdB_LOOKUP[ly]; /* be certain */
return(1);
}
+#if defined(__SSE__) /* SSE Optimize */
+ {
+ __m128 XMM0 = _mm_setzero_ps();
+ for(j=0;j<n;j+=16)
+ {
+ _mm_store_ps(out+j , XMM0);
+ _mm_store_ps(out+j+ 4, XMM0);
+ _mm_store_ps(out+j+ 8, XMM0);
+ _mm_store_ps(out+j+12, XMM0);
+ }
+ }
+#else /* SSE Optimize */
memset(out,0,sizeof(*out)*n);
+#endif /* SSE Optimize */
+
return(0);
}
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/info.c libvorbis-1.2.0-sse/lib/info.c
--- libvorbis-1.2.0/lib/info.c 2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/info.c 2007-08-02 12:44:44.000000000 +0200
@@ -30,6 +30,12 @@
#include "psy.h"
#include "misc.h"
#include "os.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#if !defined(CPDATE)
+#define CPDATE __DATE__
+#endif
+#endif /* SSE Optimize */
/* helpers */
static int ilog2(unsigned int v){
@@ -60,12 +66,21 @@
}
void vorbis_comment_add(vorbis_comment *vc,char *comment){
+#ifdef __SSE__ /* SSE Optimize */
+ vc->user_comments=realloc(vc->user_comments,
+ (vc->comments+2)*sizeof(*vc->user_comments));
+ vc->comment_lengths=realloc(vc->comment_lengths,
+ (vc->comments+2)*sizeof(*vc->comment_lengths));
+ vc->comment_lengths[vc->comments]=strlen(comment);
+ vc->user_comments[vc->comments]=malloc(vc->comment_lengths[vc->comments]+1);
+#else /* SSE Optimize */
vc->user_comments=_ogg_realloc(vc->user_comments,
(vc->comments+2)*sizeof(*vc->user_comments));
vc->comment_lengths=_ogg_realloc(vc->comment_lengths,
(vc->comments+2)*sizeof(*vc->comment_lengths));
vc->comment_lengths[vc->comments]=strlen(comment);
vc->user_comments[vc->comments]=_ogg_malloc(vc->comment_lengths[vc->comments]+1);
+#endif /* SSE Optimize */
strcpy(vc->user_comments[vc->comments], comment);
vc->comments++;
vc->user_comments[vc->comments]=NULL;
@@ -130,11 +145,19 @@
void vorbis_comment_clear(vorbis_comment *vc){
if(vc){
long i;
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<vc->comments;i++)
+ if(vc->user_comments[i])free(vc->user_comments[i]);
+ if(vc->user_comments)free(vc->user_comments);
+ if(vc->comment_lengths)free(vc->comment_lengths);
+ if(vc->vendor)free(vc->vendor);
+#else /* SSE Optimize */
for(i=0;i<vc->comments;i++)
if(vc->user_comments[i])_ogg_free(vc->user_comments[i]);
if(vc->user_comments)_ogg_free(vc->user_comments);
if(vc->comment_lengths)_ogg_free(vc->comment_lengths);
if(vc->vendor)_ogg_free(vc->vendor);
+#endif /* SSE Optimize */
memset(vc,0,sizeof(*vc));
}
}
@@ -236,10 +259,25 @@
int i;
int vendorlen=oggpack_read(opb,32);
if(vendorlen<0)goto err_out;
+#ifdef __SSE__ /* SSE Optimize */
+ vc->vendor=calloc(vendorlen+1,1);
+#else /* SSE Optimize */
vc->vendor=_ogg_calloc(vendorlen+1,1);
+#endif /* SSE Optimize */
_v_readstring(opb,vc->vendor,vendorlen);
vc->comments=oggpack_read(opb,32);
if(vc->comments<0)goto err_out;
+#ifdef __SSE__ /* SSE Optimize */
+ vc->user_comments=calloc(vc->comments+1,sizeof(*vc->user_comments));
+ vc->comment_lengths=calloc(vc->comments+1, sizeof(*vc->comment_lengths));
+ for(i=0;i<vc->comments;i++){
+ int len=oggpack_read(opb,32);
+ if(len<0)goto err_out;
+ vc->comment_lengths[i]=len;
+ vc->user_comments[i]=calloc(len+1,1);
+ _v_readstring(opb,vc->user_comments[i],len);
+ }
+#else /* SSE Optimize */
vc->user_comments=_ogg_calloc(vc->comments+1,sizeof(*vc->user_comments));
vc->comment_lengths=_ogg_calloc(vc->comments+1, sizeof(*vc->comment_lengths));
@@ -250,6 +288,7 @@
vc->user_comments[i]=_ogg_calloc(len+1,1);
_v_readstring(opb,vc->user_comments[i],len);
}
+#endif /* SSE Optimize */
if(oggpack_read(opb,1)!=1)goto err_out; /* EOP check */
return(0);
@@ -451,7 +490,15 @@
}
static int _vorbis_pack_comment(oggpack_buffer *opb,vorbis_comment *vc){
+#if defined(__SSE3__)
+ char temp[]="BS; Lancer(SSE3) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#elif defined(__SSE2__)
+ char temp[]="BS; Lancer(SSE2) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#elif defined(__SSE__)
+ char temp[]="BS; Lancer(SSE) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#else
char temp[]="AO; aoTuV b5 [20061024] (based on Xiph.Org's I 20070622)";
+#endif
int bytes = strlen(temp);
/* preamble */
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/Makefile.am libvorbis-1.2.0-sse/lib/Makefile.am
--- libvorbis-1.2.0/lib/Makefile.am 2004-07-26 15:31:38.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/Makefile.am 2007-08-02 12:43:10.000000000 +0200
@@ -10,16 +10,16 @@
lpc.c analysis.c synthesis.c psy.c info.c \
floor1.c floor0.c\
res0.c mapping0.c registry.c codebook.c sharedbook.c\
- lookup.c bitrate.c\
+ lookup.c bitrate.c xmmlib.c \
envelope.h lpc.h lsp.h codebook.h misc.h psy.h\
masking.h os.h mdct.h smallft.h highlevel.h\
registry.h scales.h window.h lookup.h lookup_data.h\
- codec_internal.h backends.h bitrate.h
+ codec_internal.h backends.h bitrate.h xmmlib.h
libvorbis_la_LDFLAGS = -no-undefined -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@
libvorbis_la_LIBADD = @OGG_LIBS@ @VORBIS_LIBS@
libvorbisfile_la_SOURCES = vorbisfile.c
-libvorbisfile_la_LDFLAGS = -no-undefined -version-info @VF_LIB_CURRENT@:@VF_LIB_REVISION@:@VF_LIB_AGE@
+libvorbisfile_la_LDFLAGS = -no-undefined -version-info @VF_LIB_CURRENT@:@VF_LIB_REVISION@:@VF_LIB_AGE@ @OGG_LIBS@
libvorbisfile_la_LIBADD = libvorbis.la
libvorbisenc_la_SOURCES = vorbisenc.c
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mapping0.c libvorbis-1.2.0-sse/lib/mapping0.c
--- libvorbis-1.2.0/lib/mapping0.c 2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mapping0.c 2007-08-02 12:43:10.000000000 +0200
@@ -27,6 +27,10 @@
#include "registry.h"
#include "psy.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include <float.h>
+#include "xmmlib.h"
+#endif /* SSE Optimize */
/* simplistic, wasteful way of doing this (unique lookup for each
mode/submapping); there should be a central repository for
@@ -239,6 +243,508 @@
vorbis_look_floor *look,
int *post,int *ilogmask);
+#ifdef __SSE__ /* SSE Optimize */
+static void mapping_forward_sub0(float *pcm, float *logfft, float scale_dB,
+ float *local_ampmax, int i, int n)
+{
+ _MM_ALIGN16 const float mparm[4] = {
+ 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f,
+ };
+ __m128 SCALEdB;
+ __m128 LAM0;
+#if !defined(__SSE2__)
+ __m128 LAM1;
+#endif
+ int j, k;
+ SCALEdB = _mm_set_ps1(scale_dB+.345f-764.6161886f/2.f);
+ LAM0 = _mm_set_ps1(local_ampmax[i]);
+#if defined(__SSE2__)
+ if(n>=256&&n<=4096)
+ {
+ /*
+ Cation! This routhine is for SSE optimized fft only.
+ */
+ float rfv = logfft[0];
+ logfft[0] = 0.f;
+ logfft[1] = 0.f;
+#if defined(__SSE3__)
+ /*
+ SSE3 optimized code
+ */
+ for(j=0,k=0;j<n;j+=16,k+=8)
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+ XMM0 = _mm_load_ps(pcm+j );
+ XMM1 = _mm_load_ps(pcm+j+ 4);
+ XMM2 = _mm_load_ps(pcm+j+ 8);
+ XMM3 = _mm_load_ps(pcm+j+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_hadd_ps(XMM0, XMM1);
+ XMM2 = _mm_hadd_ps(XMM2, XMM3);
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM2 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ XMM2 = _mm_add_ps(XMM2, SCALEdB);
+ _mm_store_ps(logfft+k , XMM0);
+ _mm_store_ps(logfft+k+ 4, XMM2);
+ XMM0 = _mm_max_ps(XMM0, XMM2);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ }
+#else /* for SSE2 */
+ /*
+ SSE2 optimized code
+ */
+ for(j=0,k=0;j<n;j+=16,k+=8)
+ {
+ __m128 XMM0, XMM2;
+ __m128 XMM1, XMM3;
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(pcm+j );
+ XMM2 = _mm_load_ps(pcm+j+ 8);
+ XMM4 = _mm_load_ps(pcm+j+ 4);
+ XMM5 = _mm_load_ps(pcm+j+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM2 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ XMM2 = _mm_add_ps(XMM2, SCALEdB);
+ _mm_store_ps(logfft+k , XMM0);
+ _mm_store_ps(logfft+k+ 4, XMM2);
+ XMM0 = _mm_max_ps(XMM0, XMM2);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ }
+#endif
+ local_ampmax[i] = _mm_max_horz(LAM0);
+ logfft[0] = rfv;
+ }
+ else
+ {
+ /*
+ SSE2 optimized code
+ */
+ int Cnt = ((n-2)&(~15))+1;
+ for(j=1;j<Cnt;j+=16){
+ __m128 XMM0, XMM3;
+#if defined(__SSE3__)
+ {
+ __m128 XMM2, XMM5;
+ XMM0 = _mm_lddqu_ps(pcm+j );
+ XMM2 = _mm_lddqu_ps(pcm+j+ 4);
+ XMM3 = _mm_lddqu_ps(pcm+j+ 8);
+ XMM5 = _mm_lddqu_ps(pcm+j+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM0 = _mm_hadd_ps(XMM0, XMM2);
+ XMM3 = _mm_hadd_ps(XMM3, XMM5);
+ }
+#else
+ {
+ __m128 XMM2, XMM5;
+ {
+ __m128 XMM1, XMM4;
+ XMM0 = _mm_loadu_ps(pcm+j );
+ XMM1 = _mm_loadu_ps(pcm+j+ 4);
+ XMM3 = _mm_loadu_ps(pcm+j+ 8);
+ XMM4 = _mm_loadu_ps(pcm+j+12);
+ XMM2 = XMM0;
+ XMM5 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4,_MM_SHUFFLE(3,1,3,1));
+ }
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+ }
+#endif
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM3 = _mm_cvtepi32_ps(_mm_castps_si128(XMM3));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm ));
+ XMM3 = _mm_mul_ps(XMM3, PM128(mparm+4));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ XMM3 = _mm_add_ps(XMM3, SCALEdB);
+ _mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+ _mm_storeu_ps(logfft+((j+9)>>1), XMM3);
+ XMM0 = _mm_max_ps(XMM0, XMM3);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ }
+ Cnt = ((n-2)&(~7))+1;
+ for(;j<Cnt;j+=8){
+ __m128 XMM0;
+#if defined(__SSE3__)
+ {
+ __m128 XMM1;
+ XMM0 = _mm_lddqu_ps(pcm+j );
+ XMM1 = _mm_lddqu_ps(pcm+j+ 4);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM0 = _mm_hadd_ps(XMM0, XMM1);
+ }
+#else
+ {
+ __m128 XMM2;
+ {
+ __m128 XMM1;
+ XMM0 = _mm_loadu_ps(pcm+j );
+ XMM1 = _mm_loadu_ps(pcm+j+ 4);
+ XMM2 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+ }
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ }
+#endif
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ _mm_storeu_ps(&logfft[(j+1)>>1], XMM0);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ }
+ local_ampmax[i] = _mm_max_horz(LAM0);
+ for(;j<n;j+=2){
+ float temp = pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
+ temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp) + .345; /* +
+ .345 is a hack; the original todB
+ estimation used on IEEE 754
+ compliant machines had a bug that
+ returned dB values about a third
+ of a decibel too high. The bug
+ was harmless because tunings
+ implicitly took that into
+ account. However, fixing the bug
+ in the estimator requires
+ changing all the tunings as well.
+ For now, it's easier to sync
+ things back up here, and
+ recalibrate the tunings in the
+ next major model upgrade. */
+ if(temp>local_ampmax[i])
+ local_ampmax[i] = temp;
+ }
+ }
+#else /* for __SSE2__ */
+ /*
+ SSE optimized code
+ */
+ LAM1 = LAM0;
+ if(n>=256&&n<=4096)
+ {
+ /*
+ Cation! This routhine is for SSE optimized fft only.
+ */
+ float rfv = logfft[0];
+ logfft[0] = 0.f;
+ logfft[1] = 0.f;
+ for(j=0,k=0;j<n;j+=32,k+=16)
+ {
+ __m64 MM0, MM1, MM2, MM3;
+ __m64 MM4, MM5, MM6, MM7;
+ __m128x U0, U1, U2, U3;
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5;
+ XMM0 = _mm_load_ps(pcm+j );
+ XMM2 = _mm_load_ps(pcm+j+ 8);
+ XMM4 = _mm_load_ps(pcm+j+ 4);
+ XMM5 = _mm_load_ps(pcm+j+12);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ XMM4 = _mm_load_ps(pcm+j+16);
+ U0.ps = XMM0;
+ U1.ps = XMM2;
+ XMM1 = _mm_load_ps(pcm+j+24);
+ XMM0 = _mm_load_ps(pcm+j+20);
+ XMM2 = _mm_load_ps(pcm+j+28);
+ XMM5 = XMM4;
+ XMM3 = XMM1;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM0,_MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM0,_MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2,_MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM2,_MM_SHUFFLE(3,1,3,1));
+ MM0 = U0.pi64[1];
+ MM1 = U1.pi64[1];
+ MM2 = U0.pi64[0];
+ MM3 = U1.pi64[0];
+ XMM4 = _mm_mul_ps(XMM4, XMM4);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM1);
+ U2.ps = XMM4;
+ U3.ps = XMM1;
+ MM4 = U2.pi64[1];
+ MM5 = U3.pi64[1];
+ MM6 = U2.pi64[0];
+ MM7 = U3.pi64[0];
+ XMM5 = _mm_cvtpi32_ps(XMM5, MM4);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM5);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM5 = _mm_movelh_ps(XMM5, XMM5);
+ XMM3 = _mm_movelh_ps(XMM3, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM2);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM3);
+ XMM5 = _mm_cvtpi32_ps(XMM5, MM6);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM7);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM5 = _mm_mul_ps(XMM5, PM128(mparm));
+ XMM3 = _mm_mul_ps(XMM3, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ XMM2 = _mm_add_ps(XMM2, SCALEdB);
+ XMM5 = _mm_add_ps(XMM5, SCALEdB);
+ XMM3 = _mm_add_ps(XMM3, SCALEdB);
+ _mm_store_ps(logfft+k , XMM0);
+ _mm_store_ps(logfft+k+ 4, XMM2);
+ _mm_store_ps(logfft+k+ 8, XMM5);
+ _mm_store_ps(logfft+k+12, XMM3);
+ XMM0 = _mm_max_ps(XMM0, XMM2);
+ XMM5 = _mm_max_ps(XMM5, XMM3);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ LAM1 = _mm_max_ps(LAM1, XMM5);
+ }
+ }
+ _mm_empty();
+ logfft[0] = rfv;
+ LAM0 = _mm_max_ps(LAM0, LAM1);
+ local_ampmax[i] = _mm_max_horz(LAM0);
+ }
+ else
+ {
+ __m64 MM0, MM1, MM2, MM3;
+ __m128x U0, U1;
+ int Cnt = ((n-2)&(~15))+1;
+ for(j=1;j<Cnt;j+=16){
+ __m128 XMM0, XMM3;
+ {
+ __m128 XMM2, XMM5;
+ {
+ __m128 XMM1, XMM4;
+ XMM0 = _mm_loadu_ps(pcm+j );
+ XMM1 = _mm_loadu_ps(pcm+j+ 4);
+ XMM3 = _mm_loadu_ps(pcm+j+ 8);
+ XMM4 = _mm_loadu_ps(pcm+j+12);
+ XMM2 = XMM0;
+ XMM5 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM4,_MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4,_MM_SHUFFLE(3,1,3,1));
+ }
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+ }
+ U0.ps = XMM0;
+ U1.ps = XMM3;
+ MM0 = U0.pi64[1];
+ MM1 = U1.pi64[1];
+ MM2 = U0.pi64[0];
+ MM3 = U1.pi64[0];
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM1);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM3 = _mm_movelh_ps(XMM3, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM2);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM3);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM3 = _mm_mul_ps(XMM3, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ XMM3 = _mm_add_ps(XMM3, SCALEdB);
+ _mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+ _mm_storeu_ps(logfft+((j+9)>>1), XMM3);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ LAM1 = _mm_max_ps(LAM1, XMM3);
+ }
+ Cnt = ((n-2)&(~7))+1;
+ for(;j<Cnt;j+=8){
+ __m128 XMM0;
+ {
+ __m128 XMM2;
+ {
+ __m128 XMM1;
+ XMM0 = _mm_loadu_ps(pcm+j );
+ XMM1 = _mm_loadu_ps(pcm+j+ 4);
+ XMM2 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+ }
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ }
+ U0.ps = XMM0;
+ MM0 = U0.pi64[1];
+ MM1 = U0.pi64[0];
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM1);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, SCALEdB);
+ _mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+ LAM0 = _mm_max_ps(LAM0, XMM0);
+ }
+ LAM0 = _mm_max_ps(LAM0, LAM1);
+ _mm_empty();
+ local_ampmax[i] = _mm_max_horz(LAM0);
+ for(;j<n;j+=2){
+ float temp = pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
+ temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp) + .345; /* +
+ .345 is a hack; the original todB
+ estimation used on IEEE 754
+ compliant machines had a bug that
+ returned dB values about a third
+ of a decibel too high. The bug
+ was harmless because tunings
+ implicitly took that into
+ account. However, fixing the bug
+ in the estimator requires
+ changing all the tunings as well.
+ For now, it's easier to sync
+ things back up here, and
+ recalibrate the tunings in the
+ next major model upgrade. */
+ if(temp>local_ampmax[i])
+ local_ampmax[i] = temp;
+ }
+ }
+#endif
+}
+
+static void mapping_forward_sub1(float *mdct, float *logmdct, int n)
+{
+ static _MM_ALIGN16 const float mparm[4] = {
+ 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f
+ };
+ static _MM_ALIGN16 const float PFV0[4] = {
+ 0.345f-764.6161886f, 0.345f-764.6161886f,
+ 0.345f-764.6161886f, 0.345f-764.6161886f
+ };
+ int j;
+#if defined(__SSE2__)
+ /*
+ SSE2 optimized code
+ */
+ for(j=0;j<n/2;j+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(mdct+j );
+ XMM1 = _mm_load_ps(mdct+j+ 4);
+ XMM2 = _mm_load_ps(mdct+j+ 8);
+ XMM3 = _mm_load_ps(mdct+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ XMM0 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+ XMM1 = _mm_cvtepi32_ps(_mm_castps_si128(XMM1));
+ XMM2 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+ XMM3 = _mm_cvtepi32_ps(_mm_castps_si128(XMM3));
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM1 = _mm_mul_ps(XMM1, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM3 = _mm_mul_ps(XMM3, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(PFV0));
+ XMM1 = _mm_add_ps(XMM1, PM128(PFV0));
+ XMM2 = _mm_add_ps(XMM2, PM128(PFV0));
+ XMM3 = _mm_add_ps(XMM3, PM128(PFV0));
+ _mm_store_ps(logmdct+j , XMM0);
+ _mm_store_ps(logmdct+j+ 4, XMM1);
+ _mm_store_ps(logmdct+j+ 8, XMM2);
+ _mm_store_ps(logmdct+j+12, XMM3);
+ }
+#else /* __SSE2__ */
+ /*
+ SSE optimized code
+ */
+ for(j=0;j<n/2;j+=16)
+ {
+ __m128x U0, U1, U2, U3;
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(mdct+j );
+ XMM1 = _mm_load_ps(mdct+j+ 4);
+ XMM2 = _mm_load_ps(mdct+j+ 8);
+ XMM3 = _mm_load_ps(mdct+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ U0.ps = XMM0;
+ U1.ps = XMM1;
+ U2.ps = XMM2;
+ U3.ps = XMM3;
+ XMM0 = _mm_cvtpi32_ps(XMM0, U0.pi64[1]);
+ XMM1 = _mm_cvtpi32_ps(XMM1, U1.pi64[1]);
+ XMM2 = _mm_cvtpi32_ps(XMM2, U2.pi64[1]);
+ XMM3 = _mm_cvtpi32_ps(XMM3, U3.pi64[1]);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM1 = _mm_movelh_ps(XMM1, XMM1);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM3 = _mm_movelh_ps(XMM3, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, U0.pi64[0]);
+ XMM1 = _mm_cvtpi32_ps(XMM1, U1.pi64[0]);
+ XMM2 = _mm_cvtpi32_ps(XMM2, U2.pi64[0]);
+ XMM3 = _mm_cvtpi32_ps(XMM3, U3.pi64[0]);
+ XMM0 = _mm_mul_ps(XMM0, PM128(mparm));
+ XMM1 = _mm_mul_ps(XMM1, PM128(mparm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(mparm));
+ XMM3 = _mm_mul_ps(XMM3, PM128(mparm));
+ XMM0 = _mm_add_ps(XMM0, PM128(PFV0));
+ XMM1 = _mm_add_ps(XMM1, PM128(PFV0));
+ XMM2 = _mm_add_ps(XMM2, PM128(PFV0));
+ XMM3 = _mm_add_ps(XMM3, PM128(PFV0));
+ _mm_store_ps(logmdct+j , XMM0);
+ _mm_store_ps(logmdct+j+ 4, XMM1);
+ _mm_store_ps(logmdct+j+ 8, XMM2);
+ _mm_store_ps(logmdct+j+12, XMM3);
+ }
+ _mm_empty();
+#endif
+}
+#endif /* SSE Optimize */
static int mapping0_forward(vorbis_block *vb){
vorbis_dsp_state *vd=vb->vd;
@@ -315,8 +821,12 @@
/* transform the PCM data */
/* only MDCT right now.... */
+#if defined(__SSE__) /* SSE Optimize */
+ mdct_forward(b->transform[vb->W][0],pcm,gmdct[i], gmdct_org[i]);
+#else /* SSE Optimize */
mdct_forward(b->transform[vb->W][0],pcm,gmdct[i]);
memcpy(gmdct_org[i], gmdct[i], n/2*sizeof(**gmdct_org));
+#endif /* SSE Optimize */
/* FFT yields more accurate tonal estimation (not phase sensitive) */
drft_forward(&b->fft_look[vb->W],pcm);
@@ -335,6 +845,9 @@
recalibrate the tunings in the
next major model upgrade. */
local_ampmax[i]=logfft[0];
+#ifdef __SSE__ /* SSE Optimize */
+ mapping_forward_sub0(pcm, logfft, scale_dB, local_ampmax, i, n);
+#else /* SSE Optimize */
for(j=1;j<n-1;j+=2){
float temp=pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp) + .345; /* +
@@ -354,6 +867,7 @@
next major model upgrade. */
if(temp>local_ampmax[i])local_ampmax[i]=temp;
}
+#endif /* SSE Optimize */
if(local_ampmax[i]>0.f)local_ampmax[i]=0.f;
if(local_ampmax[i]>global_ampmax)global_ampmax=local_ampmax[i];
@@ -397,6 +911,9 @@
floor_posts[i]=_vorbis_block_alloc(vb,PACKETBLOBS*sizeof(**floor_posts));
memset(floor_posts[i],0,sizeof(**floor_posts)*PACKETBLOBS);
+#ifdef __SSE__ /* SSE Optimize */
+ mapping_forward_sub1(mdct, logmdct, n);
+#else /* SSE Optimize */
for(j=0;j<n/2;j++)
logmdct[j]=todB(mdct+j) + .345; /* + .345 is a hack; the original
todB estimation used on IEEE 754
@@ -412,6 +929,7 @@
things back up here, and
recalibrate the tunings in the
next major model upgrade. */
+#endif /* SSE Optimize */
#if 0
if(vi->channels==2){
@@ -492,7 +1010,12 @@
vif->n,
blocktype, modenumber,
vb->nW,
+#ifdef __SSE__ /* SSE Optimize */
+ b->lW_blocktype, b->lW_modenumber, b->lW_no,
+ res_org[i]);
+#else /* SSE Optimize */
b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif /* SSE Optimize */
#if 0
if(vi->channels==2){
@@ -541,7 +1064,12 @@
vif->n,
blocktype, modenumber,
vb->nW,
+#ifdef __SSE__ /* SSE Optimize */
+ b->lW_blocktype, b->lW_modenumber, b->lW_no,
+ res_org[i]);
+#else /* SSE Optimize */
b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif /* SSE Optimize */
#if 0
if(vi->channels==2){
@@ -570,7 +1098,12 @@
vif->n,
blocktype, modenumber,
vb->nW,
+#ifdef __SSE__ /* SSE Optimize */
+ b->lW_blocktype, b->lW_modenumber, b->lW_no,
+ res_org[i]);
+#else /* SSE Optimize */
b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif /* SSE Optimize */
#if 0
if(vi->channels==2)
@@ -636,7 +1169,12 @@
mag_sort=_vp_quantize_couple_sort(vb,
psy_look,
info,
+#ifdef __SSE__ /* SSE Optimize */
+ mag_memo,
+ res_org[0]);
+#else /* SSE Optimize */
mag_memo);
+#endif /* SSE Optimize */
}
memset(sortindex,0,sizeof(*sortindex)*vi->channels);
@@ -644,7 +1182,11 @@
for(i=0;i<vi->channels;i++){
float *mdct =gmdct[i];
sortindex[i]=alloca(sizeof(**sortindex)*n/2);
+#ifdef __SSE__ /* SSE Optimize */
+ _vp_noise_normalize_sort(psy_look,mdct,sortindex[i],res_org[0]);
+#else /* SSE Optimize */
_vp_noise_normalize_sort(psy_look,mdct,sortindex[i]);
+#endif /* SSE Optimize */
}
}
@@ -835,6 +1377,77 @@
/* channel coupling */
for(i=info->coupling_steps-1;i>=0;i--){
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ float *PCMM = vb->pcm[info->coupling_mag[i]];
+ float *PCMA = vb->pcm[info->coupling_ang[i]];
+ int Lim = (n/2)&(~7);
+ for(j=0;j<Lim;j+=8){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(PCMA+j );
+ XMM3 = _mm_load_ps(PCMA+j+4);
+ XMM1 = _mm_load_ps(PCMM+j );
+ XMM4 = _mm_load_ps(PCMM+j+4);
+ XMM2 = XMM0;
+ XMM5 = XMM3;
+ XMM0 = _mm_cmpnle_ps(XMM0, PFV_0.ps);
+ XMM3 = _mm_cmpnle_ps(XMM3, PFV_0.ps);
+ XMM1 = _mm_xor_ps(XMM1, XMM2);
+ XMM4 = _mm_xor_ps(XMM4, XMM5);
+ XMM1 = _mm_andnot_ps(XMM1, PCS_RRRR.ps);
+ XMM4 = _mm_andnot_ps(XMM4, PCS_RRRR.ps);
+ XMM1 = _mm_xor_ps(XMM1, XMM2);
+ XMM4 = _mm_xor_ps(XMM4, XMM5);
+ XMM2 = XMM1;
+ XMM5 = XMM4;
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM5);
+ XMM2 = _mm_load_ps(PCMM+j );
+ XMM5 = _mm_load_ps(PCMM+j+4);
+ XMM1 = _mm_add_ps(XMM1, XMM2);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+ _mm_store_ps(PCMA+j , XMM1);
+ _mm_store_ps(PCMA+j+4, XMM4);
+ _mm_store_ps(PCMM+j , XMM0);
+ _mm_store_ps(PCMM+j+4, XMM3);
+ }
+ Lim = (n/2)&(~3);
+ for(;j<Lim;j+=4){
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(PCMA+j );
+ XMM1 = _mm_load_ps(PCMM+j );
+ XMM2 = XMM0;
+ XMM0 = _mm_cmpnle_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_xor_ps(XMM1, XMM2);
+ XMM1 = _mm_andnot_ps(XMM1, PCS_RRRR.ps);
+ XMM1 = _mm_xor_ps(XMM1, XMM2);
+ XMM2 = XMM1;
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM2 = _mm_load_ps(PCMM+j );
+ XMM1 = _mm_add_ps(XMM1, XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ _mm_store_ps(PCMA+j , XMM1);
+ _mm_store_ps(PCMM+j , XMM0);
+ }
+ for(;j<n/2;j++){
+ float mag=PCMM[j];
+ float ang=PCMA[j];
+
+ if(ang>0){
+ PCMM[j]=mag;
+ PCMA[j]=mag > 0 ? mag-ang : mag+ang;
+ }else{
+ PCMM[j]=mag > 0 ? mag+ang : mag-ang;
+ PCMA[j]=mag;
+ }
+ }
+ }
+#else /* SSE Optimize */
float *pcmM=vb->pcm[info->coupling_mag[i]];
float *pcmA=vb->pcm[info->coupling_ang[i]];
@@ -859,6 +1472,7 @@
pcmM[j]=mag-ang;
}
}
+#endif /* SSE Optimize */
}
/* compute and apply spectral envelope */
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mdct.c libvorbis-1.2.0-sse/lib/mdct.c
--- libvorbis-1.2.0/lib/mdct.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mdct.c 2007-08-02 12:43:10.000000000 +0200
@@ -45,6 +45,10 @@
#include "mdct.h"
#include "os.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
+
/* build lookups for trig functions; also pre-figure scaling and
some window function algebra. */
@@ -88,10 +92,342 @@
}
}
lookup->scale=FLOAT_CONV(4.f/n);
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ __m128 pscalem = _mm_set_ps1(lookup->scale);
+ float *S, *U;
+ int n2 = n>>1;
+ int n4 = n>>2;
+ int n8 = n>>3;
+ int j;
+ /*
+ for mdct_bitreverse
+ */
+ T = _ogg_malloc(sizeof(*T)*n2);
+ lookup->trig_bitreverse = T;
+ S = lookup->trig+n;
+ for(i=0;i<n4;i+=8)
+ {
+ __m128 XMM0 = _mm_load_ps(S+i );
+ __m128 XMM1 = _mm_load_ps(S+i+ 4);
+ __m128 XMM2 = XMM0;
+ __m128 XMM3 = XMM1;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM2 = _mm_xor_ps(XMM2, PCS_RNRN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+ _mm_store_ps(T+i*2 , XMM0);
+ _mm_store_ps(T+i*2+ 4, XMM2);
+ _mm_store_ps(T+i*2+ 8, XMM1);
+ _mm_store_ps(T+i*2+12, XMM3);
+ }
+ /*
+ for mdct_forward part 0
+ */
+ T = _ogg_malloc(sizeof(*T)*(n*2));
+ lookup->trig_forward = T;
+ S = lookup->trig;
+ for(i=0,j=n2-4;i<n8;i+=4,j-=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(S+j+2));
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(S+j ));
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(S+i ));
+ XMM2 = _mm_loadh_pi(XMM2, (__m64*)(S+i+2));
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_xor_ps(XMM0, PCS_RRNN.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM2 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ _mm_store_ps(T+i*4 , XMM0);
+ _mm_store_ps(T+i*4+ 4, XMM1);
+ _mm_store_ps(T+i*4+ 8, XMM2);
+ _mm_store_ps(T+i*4+12, XMM3);
+ }
+ for(;i<n4;i+=4,j-=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(S+j+2));
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(S+j ));
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(S+i ));
+ XMM2 = _mm_loadh_pi(XMM2, (__m64*)(S+i+2));
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_xor_ps(XMM0, PCS_NNRR.ps);
+ XMM2 = _mm_xor_ps(XMM2, PCS_NNRR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ _mm_store_ps(T+i*4 , XMM0);
+ _mm_store_ps(T+i*4+ 4, XMM1);
+ _mm_store_ps(T+i*4+ 8, XMM2);
+ _mm_store_ps(T+i*4+12, XMM3);
+ }
+ /*
+ for mdct_forward part 1
+ */
+ T = lookup->trig_forward+n;
+ S = lookup->trig+n2;
+ for(i=0;i<n4;i+=4){
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(S+4);
+ XMM2 = _mm_load_ps(S );
+ XMM1 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2,_MM_SHUFFLE(1,3,1,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2,_MM_SHUFFLE(0,2,0,2));
+ XMM0 = _mm_mul_ps(XMM0, pscalem);
+ XMM1 = _mm_mul_ps(XMM1, pscalem);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ _mm_store_ps(T+ 8, XMM1);
+ _mm_store_ps(T+12, XMM0);
+ S += 8;
+ T += 16;
+ }
+ /*
+ for mdct_backward part 0
+ */
+ S = U = lookup->trig+n4;
+ T = _ogg_malloc(sizeof(*T)*(n+n2));
+ lookup->trig_backward = T;
+ for(i=0;i<n4;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ U -= 4;
+ XMM0 = _mm_load_ps(S);
+ XMM2 = _mm_load_ps(U);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(1,1,3,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,0,2,2));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,3,2));
+ XMM0 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ _mm_store_ps(T+ 8, XMM2);
+ _mm_store_ps(T+12, XMM3);
+ S += 4;
+ T += 16;
+ }
+ /*
+ for mdct_backward part 1
+ */
+ S = lookup->trig+n2;
+ T = lookup->trig_backward+n;
+ for(i=0;i<n4;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(S );
+ XMM2 = _mm_load_ps(S+4);
+ XMM1 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(2,0,2,0));
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+4, XMM1);
+ S += 8;
+ T += 8;
+ }
+ /*
+ for mdct_butterfly_first
+ */
+ S = lookup->trig;
+ T = _ogg_malloc(sizeof(*T)*n*2);
+ lookup->trig_butterfly_first = T;
+ for(i=0;i<n4;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM2 = _mm_load_ps(S );
+ XMM0 = _mm_load_ps(S+ 4);
+ XMM5 = _mm_load_ps(S+ 8);
+ XMM3 = _mm_load_ps(S+12);
+ XMM1 = XMM0;
+ XMM4 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+ _mm_store_ps(T , XMM1);
+ _mm_store_ps(T+ 4, XMM4);
+ _mm_store_ps(T+ 8, XMM0);
+ _mm_store_ps(T+12, XMM3);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RRRR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RRRR.ps);
+ XMM0 = _mm_xor_ps(XMM0, PCS_RRRR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RRRR.ps);
+ _mm_store_ps(T+n , XMM1);
+ _mm_store_ps(T+n+ 4, XMM4);
+ _mm_store_ps(T+n+ 8, XMM0);
+ _mm_store_ps(T+n+12, XMM3);
+ S += 16;
+ T += 16;
+ }
+ /*
+ for mdct_butterfly_generic(trigint=8)
+ */
+ S = lookup->trig;
+ T = _ogg_malloc(sizeof(*T)*n2);
+ lookup->trig_butterfly_generic8 = T;
+ for(i=0;i<n;i+=32)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+ XMM0 = _mm_load_ps(S+ 24);
+ XMM2 = _mm_load_ps(S+ 16);
+ XMM3 = _mm_load_ps(S+ 8);
+ XMM5 = _mm_load_ps(S );
+ XMM1 = XMM0;
+ XMM4 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ _mm_store_ps(T+ 8, XMM3);
+ _mm_store_ps(T+12, XMM4);
+ S += 32;
+ T += 16;
+ }
+ /*
+ for mdct_butterfly_generic(trigint=16)
+ */
+ S = lookup->trig;
+ T = _ogg_malloc(sizeof(*T)*n4);
+ lookup->trig_butterfly_generic16 = T;
+ for(i=0;i<n;i+=64)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+ XMM0 = _mm_load_ps(S+ 48);
+ XMM2 = _mm_load_ps(S+ 32);
+ XMM3 = _mm_load_ps(S+ 16);
+ XMM5 = _mm_load_ps(S );
+ XMM1 = XMM0;
+ XMM4 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ _mm_store_ps(T+ 8, XMM3);
+ _mm_store_ps(T+12, XMM4);
+ S += 64;
+ T += 16;
+ }
+ /*
+ for mdct_butterfly_generic(trigint=32)
+ */
+ if(n<128)
+ lookup->trig_butterfly_generic32 = NULL;
+ else
+ {
+ S = lookup->trig;
+ T = _ogg_malloc(sizeof(*T)*n8);
+ lookup->trig_butterfly_generic32 = T;
+ for(i=0;i<n;i+=128)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+ XMM0 = _mm_load_ps(S+ 96);
+ XMM2 = _mm_load_ps(S+ 64);
+ XMM3 = _mm_load_ps(S+ 32);
+ XMM5 = _mm_load_ps(S );
+ XMM1 = XMM0;
+ XMM4 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ _mm_store_ps(T+ 8, XMM3);
+ _mm_store_ps(T+12, XMM4);
+ S += 128;
+ T += 16;
+ }
+ }
+ /*
+ for mdct_butterfly_generic(trigint=64)
+ */
+ if(n<256)
+ lookup->trig_butterfly_generic64 = NULL;
+ else
+ {
+ S = lookup->trig;
+ T = _ogg_malloc(sizeof(*T)*(n8>>1));
+ lookup->trig_butterfly_generic64 = T;
+ for(i=0;i<n;i+=256)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+ XMM0 = _mm_load_ps(S+192);
+ XMM2 = _mm_load_ps(S+128);
+ XMM3 = _mm_load_ps(S+ 64);
+ XMM5 = _mm_load_ps(S );
+ XMM1 = XMM0;
+ XMM4 = XMM3;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+ _mm_store_ps(T , XMM0);
+ _mm_store_ps(T+ 4, XMM1);
+ _mm_store_ps(T+ 8, XMM3);
+ _mm_store_ps(T+12, XMM4);
+ S += 256;
+ T += 16;
+ }
+ }
+ }
+#endif /* SSE Optimize */
}
/* 8 point butterfly (in place, 4 register) */
STIN void mdct_butterfly_8(DATA_TYPE *x){
+#ifdef __SSE__ /* SSE Optimize */
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(x+4);
+ XMM1 = _mm_load_ps(x );
+ XMM2 = XMM0;
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM1);
+
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,0,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,2,3,2));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,1,0));
+
+ XMM1 = _mm_xor_ps(XMM1, PCS_NRRN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NNRR.ps);
+
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+
+ _mm_store_ps(x , XMM0);
+ _mm_store_ps(x+4, XMM2);
+#else /* SSE Optimize */
REG_TYPE r0 = x[6] + x[2];
REG_TYPE r1 = x[6] - x[2];
REG_TYPE r2 = x[4] + x[0];
@@ -112,10 +448,49 @@
x[7] = r1 + r0;
x[5] = r1 - r0;
+#endif /* SSE Optimize */
}
/* 16 point butterfly (in place, 4 register) */
STIN void mdct_butterfly_16(DATA_TYPE *x){
+#ifdef __SSE__ /* SSE Optimize */
+ static _MM_ALIGN16 const float PFV0[4] = { cPI2_8, cPI2_8, 1.f, -1.f};
+ static _MM_ALIGN16 const float PFV1[4] = { cPI2_8, -cPI2_8, 0.f, 0.f};
+ static _MM_ALIGN16 const float PFV2[4] = { cPI2_8, cPI2_8, 1.f, 1.f};
+ static _MM_ALIGN16 const float PFV3[4] = {-cPI2_8, cPI2_8, 0.f, 0.f};
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+
+ XMM3 = _mm_load_ps(x+12);
+ XMM0 = _mm_load_ps(x );
+ XMM1 = _mm_load_ps(x+ 4);
+ XMM2 = _mm_load_ps(x+ 8);
+ XMM4 = XMM3;
+ XMM5 = XMM0;
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM4 = _mm_add_ps(XMM4, XMM1);
+ XMM1 = XMM0;
+ XMM5 = XMM3;
+ _mm_store_ps(x+ 8, XMM2);
+ _mm_store_ps(x+12, XMM4);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,1,1));
+ XMM2 = _mm_load_ps(PFV0);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,3,0,0));
+ XMM4 = _mm_load_ps(PFV1);
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,2,0,0));
+ XMM6 = _mm_load_ps(PFV2);
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,2,1,1));
+ XMM7 = _mm_load_ps(PFV3);
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+ _mm_store_ps(x , XMM0);
+ _mm_store_ps(x+ 4, XMM3);
+#else /* SSE Optimize */
REG_TYPE r0 = x[1] - x[9];
REG_TYPE r1 = x[0] - x[8];
@@ -144,6 +519,7 @@
x[15] += x[7];
x[6] = r0;
x[7] = r1;
+#endif /* SSE Optimize */
mdct_butterfly_8(x);
mdct_butterfly_8(x+8);
@@ -151,6 +527,85 @@
/* 32 point butterfly (in place, 4 register) */
STIN void mdct_butterfly_32(DATA_TYPE *x){
+#ifdef __SSE__ /* SSE Optimize */
+ static _MM_ALIGN16 const __m128x PFV0 =
+ { .sf = {-cPI3_8, -cPI1_8, -cPI2_8, -cPI2_8} };
+ static _MM_ALIGN16 const __m128x PFV1 =
+ { .sf = {-cPI1_8, cPI3_8, -cPI2_8, cPI2_8} };
+ static _MM_ALIGN16 const __m128x PFV2 =
+ { .sf = {-cPI1_8, -cPI3_8, -1.f, 1.f} };
+ static _MM_ALIGN16 const __m128x PFV3 =
+ { .sf = {-cPI3_8, cPI1_8, 0.f, 0.f} };
+ static _MM_ALIGN16 const __m128x PFV4 =
+ { .sf = { cPI3_8, cPI3_8, cPI2_8, cPI2_8} };
+ static _MM_ALIGN16 const __m128x PFV5 =
+ { .sf = {-cPI1_8, cPI1_8, -cPI2_8, cPI2_8} };
+ static _MM_ALIGN16 const __m128x PFV6 =
+ { .sf = { cPI1_8, cPI3_8, 1.f, 1.f} };
+ static _MM_ALIGN16 const __m128x PFV7 =
+ { .sf = {-cPI3_8, cPI1_8, 0.f, 0.f} };
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+
+ XMM0 = _mm_load_ps(x+16);
+ XMM1 = _mm_load_ps(x+20);
+ XMM2 = _mm_load_ps(x+24);
+ XMM3 = _mm_load_ps(x+28);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM6 = XMM2;
+ XMM7 = XMM3;
+
+ XMM0 = _mm_sub_ps(XMM0, PM128(x ));
+ XMM1 = _mm_sub_ps(XMM1, PM128(x+ 4));
+ XMM2 = _mm_sub_ps(XMM2, PM128(x+ 8));
+ XMM3 = _mm_sub_ps(XMM3, PM128(x+12));
+ XMM4 = _mm_add_ps(XMM4, PM128(x ));
+ XMM5 = _mm_add_ps(XMM5, PM128(x+ 4));
+ XMM6 = _mm_add_ps(XMM6, PM128(x+ 8));
+ XMM7 = _mm_add_ps(XMM7, PM128(x+12));
+ _mm_store_ps(x+16, XMM4);
+ _mm_store_ps(x+20, XMM5);
+ _mm_store_ps(x+24, XMM6);
+ _mm_store_ps(x+28, XMM7);
+
+#if defined(__SSE3__)
+ XMM4 = _mm_moveldup_ps(XMM0);
+ XMM5 = XMM1;
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM6 = XMM2;
+ XMM7 = XMM3;
+#else
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM6 = XMM2;
+ XMM7 = XMM3;
+
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,3,1,1));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,2,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(3,3,0,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,2,0,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM7, _MM_SHUFFLE(3,2,1,1));
+ XMM0 = _mm_mul_ps(XMM0, PFV0.ps);
+ XMM4 = _mm_mul_ps(XMM4, PFV1.ps);
+ XMM1 = _mm_mul_ps(XMM1, PFV2.ps);
+ XMM5 = _mm_mul_ps(XMM5, PFV3.ps);
+ XMM2 = _mm_mul_ps(XMM2, PFV4.ps);
+ XMM6 = _mm_mul_ps(XMM6, PFV5.ps);
+ XMM3 = _mm_mul_ps(XMM3, PFV6.ps);
+ XMM7 = _mm_mul_ps(XMM7, PFV7.ps);
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM2 = _mm_add_ps(XMM2, XMM6);
+ XMM3 = _mm_add_ps(XMM3, XMM7);
+ _mm_store_ps(x , XMM0);
+ _mm_store_ps(x+ 4, XMM1);
+ _mm_store_ps(x+ 8, XMM2);
+ _mm_store_ps(x+12, XMM3);
+#else /* SSE Optimize */
REG_TYPE r0 = x[30] - x[14];
REG_TYPE r1 = x[31] - x[15];
@@ -207,6 +662,7 @@
x[17] += x[1];
x[0] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 );
x[1] = MULT_NORM( r1 * cPI1_8 - r0 * cPI3_8 );
+#endif /* SSE Optimize */
mdct_butterfly_16(x);
mdct_butterfly_16(x+16);
@@ -214,10 +670,194 @@
}
/* N point first stage butterfly (in place, 2 register) */
+#ifdef __SSE__ /* SSE Optimize */
+STIN void mdct_butterfly_first_backward(int n,float *T,
+ float *x,
+ int points, float *zX0, float *zX1)
+{
+ float *X1 = x + points - 8;
+ float *X2 = x + (points>>1) - 8;
+
+ /*
+ Part of X2[*]=0.f
+ */
+ while(X2>=zX0){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(X1+4);
+ XMM1 = _mm_load_ps(X1 );
+#if defined(__SSE3__)
+ XMM2 = _mm_moveldup_ps(XMM0);
+ XMM3 = _mm_moveldup_ps(XMM1);
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM1 = _mm_movehdup_ps(XMM1);
+#else
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM0 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+ XMM2 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+ XMM3 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T );
+ XMM5 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM5);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_store_ps(X2+4, XMM0);
+ _mm_store_ps(X2 , XMM1);
+ X1 -= 8;
+ X2 -= 8;
+ T += 16;
+ }
+ /*
+ Part of Normal
+ */
+ while(X1>=zX1){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(X1+4);
+ XMM1 = _mm_load_ps(X1 );
+ XMM2 = _mm_load_ps(X2+4);
+ XMM3 = _mm_load_ps(X2 );
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM3);
+ XMM4 = _mm_add_ps(XMM4, XMM2);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+#if defined(__SSE3__)
+ XMM2 = _mm_moveldup_ps(XMM0);
+ XMM3 = _mm_moveldup_ps(XMM1);
+ _mm_store_ps(X1+4, XMM4);
+ _mm_store_ps(X1 , XMM5);
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM1 = _mm_movehdup_ps(XMM1);
+#else
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ _mm_store_ps(X1+4, XMM4);
+ _mm_store_ps(X1 , XMM5);
+ XMM0 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+ XMM2 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+ XMM3 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T );
+ XMM5 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM5);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_store_ps(X2+4, XMM0);
+ _mm_store_ps(X2 , XMM1);
+ X1 -= 8;
+ X2 -= 8;
+ T += 16;
+ }
+ /*
+ Part of X1[*]=0.f
+ */
+ T += n;
+ while(X2>=x){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(X2+4);
+ XMM1 = _mm_load_ps(X2 );
+ _mm_store_ps(X1+4, XMM0);
+ _mm_store_ps(X1 , XMM1);
+#if defined(__SSE3__)
+ XMM2 = _mm_moveldup_ps(XMM0);
+ XMM3 = _mm_moveldup_ps(XMM1);
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM1 = _mm_movehdup_ps(XMM1);
+#else
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM0 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+ XMM2 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+ XMM3 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T );
+ XMM5 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM5);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_store_ps(X2+4, XMM0);
+ _mm_store_ps(X2 , XMM1);
+ X1 -= 8;
+ X2 -= 8;
+ T += 16;
+ }
+}
+#endif /* SSE Optimize */
+
STIN void mdct_butterfly_first(DATA_TYPE *T,
DATA_TYPE *x,
int points){
+#ifdef __SSE__ /* SSE Optimize */
+ float *X1 = x + points - 8;
+ float *X2 = x + (points>>1) - 8;
+
+ do{
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(X1+4);
+ XMM1 = _mm_load_ps(X1 );
+ XMM2 = _mm_load_ps(X2+4);
+ XMM3 = _mm_load_ps(X2 );
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM3);
+ XMM4 = _mm_add_ps(XMM4, XMM2);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+#if defined(__SSE3__)
+ XMM2 = _mm_moveldup_ps(XMM0);
+ XMM3 = _mm_moveldup_ps(XMM1);
+ _mm_store_ps(X1+4, XMM4);
+ _mm_store_ps(X1 , XMM5);
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM1 = _mm_movehdup_ps(XMM1);
+#else
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ _mm_store_ps(X1+4, XMM4);
+ _mm_store_ps(X1 , XMM5);
+ XMM0 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+ XMM2 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+ XMM3 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T );
+ XMM5 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM5);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_store_ps(X2+4, XMM0);
+ _mm_store_ps(X2 , XMM1);
+ X1 -= 8;
+ X2 -= 8;
+ T += 16;
+ }while(X2>=x);
+#else /* SSE Optimize */
DATA_TYPE *x1 = x + points - 8;
DATA_TYPE *x2 = x + (points>>1) - 8;
REG_TYPE r0;
@@ -258,14 +898,144 @@
T+=16;
}while(x2>=x);
+#endif /* SSE Optimize */
}
/* N/stage point generic N stage butterfly (in place, 2 register) */
+#ifdef __SSE__ /* SSE Optimize */
+STIN void mdct_butterfly_generic(mdct_lookup *init,
+#else /* SSE Optimize */
STIN void mdct_butterfly_generic(DATA_TYPE *T,
+#endif /* SSE Optimize */
DATA_TYPE *x,
int points,
int trigint){
+#ifdef __SSE__ /* SSE Optimize */
+ float *T;
+ float *x1 = x + points - 8;
+ float *x2 = x + (points>>1) - 8;
+ switch(trigint)
+ {
+ default :
+ T = init->trig;
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ XMM0 = _mm_load_ps(x1 );
+ XMM1 = _mm_load_ps(x2 );
+ XMM2 = _mm_load_ps(x1+4);
+ XMM3 = _mm_load_ps(x2+4);
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM2 = _mm_sub_ps(XMM2, XMM3);
+ XMM4 = _mm_add_ps(XMM4, XMM1);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ _mm_store_ps(x1 , XMM4);
+ _mm_store_ps(x1+4, XMM5);
+#if defined(__SSE3__)
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM1 = _mm_moveldup_ps(XMM1);
+ XMM2 = _mm_movehdup_ps(XMM2);
+ XMM3 = _mm_moveldup_ps(XMM3);
+#else
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T+trigint*3);
+ XMM5 = _mm_load_ps(T+trigint*3);
+ XMM6 = _mm_load_ps(T+trigint*2);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(0,1,0,1));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM4 = _mm_load_ps(T+trigint );
+ XMM5 = _mm_load_ps(T+trigint );
+ XMM6 = _mm_load_ps(T );
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(0,1,0,1));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM3 = _mm_mul_ps(XMM3, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ _mm_store_ps(x2 , XMM0);
+ _mm_store_ps(x2+4, XMM2);
+ T += trigint*4;
+ x1 -= 8;
+ x2 -= 8;
+ }
+ while(x2>=x);
+ return;
+ case 8:
+ T = init->trig_butterfly_generic8;
+ break;
+ case 16:
+ T = init->trig_butterfly_generic16;
+ break;
+ case 32:
+ T = init->trig_butterfly_generic32;
+ break;
+ case 64:
+ T = init->trig_butterfly_generic64;
+ break;
+ }
+ _mm_prefetch(T , _MM_HINT_NTA);
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ _mm_prefetch(T+16, _MM_HINT_NTA);
+ XMM0 = _mm_load_ps(x1 );
+ XMM1 = _mm_load_ps(x2 );
+ XMM2 = _mm_load_ps(x1+4);
+ XMM3 = _mm_load_ps(x2+4);
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM2 = _mm_sub_ps(XMM2, XMM3);
+ XMM4 = _mm_add_ps(XMM4, XMM1);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+#if defined(__SSE3__)
+ XMM1 = _mm_moveldup_ps(XMM0);
+ XMM3 = _mm_moveldup_ps(XMM2);
+ _mm_store_ps(x1 , XMM4);
+ _mm_store_ps(x1+4, XMM5);
+ XMM0 = _mm_movehdup_ps(XMM0);
+ XMM2 = _mm_movehdup_ps(XMM2);
+#else
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0));
+ _mm_store_ps(x1 , XMM4);
+ _mm_store_ps(x1+4, XMM5);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0));
+#endif
+ XMM4 = _mm_load_ps(T );
+ XMM5 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ _mm_store_ps(x2 , XMM0);
+ _mm_store_ps(x2+4, XMM2);
+ T += 16;
+ x1 -= 8;
+ x2 -= 8;
+ }
+ while(x2>=x);
+#else /* SSE Optimize */
DATA_TYPE *x1 = x + points - 8;
DATA_TYPE *x2 = x + (points>>1) - 8;
REG_TYPE r0;
@@ -312,23 +1082,57 @@
x2-=8;
}while(x2>=x);
+#endif /* SSE Optimize */
+}
+
+#ifdef __SSE__ /* SSE Optimize */
+STIN void mdct_butterflies_backward(mdct_lookup *init,
+ float *x,
+ int points, float *x0, float *x1){
+
+ int stages=init->log2n-5;
+ int i,j;
+
+ if(--stages>0){
+ mdct_butterfly_first_backward(init->n,init->trig_butterfly_first,x,points,x0,x1);
+ }
+
+ for(i=1;--stages>0;i++){
+ for(j=0;j<(1<<i);j++)
+ mdct_butterfly_generic(init,x+(points>>i)*j,points>>i,4<<i);
+ }
+
+ for(j=0;j<points;j+=32)
+ mdct_butterfly_32(x+j);
+
}
+#endif /* SSE Optimize */
STIN void mdct_butterflies(mdct_lookup *init,
DATA_TYPE *x,
int points){
+#ifndef __SSE__ /* SSE Optimize */
DATA_TYPE *T=init->trig;
+#endif /* SSE Optimize */
int stages=init->log2n-5;
int i,j;
if(--stages>0){
+#ifdef __SSE__ /* SSE Optimize */
+ mdct_butterfly_first(init->trig_butterfly_first,x,points);
+#else /* SSE Optimize */
mdct_butterfly_first(T,x,points);
+#endif /* SSE Optimize */
}
for(i=1;--stages>0;i++){
for(j=0;j<(1<<i);j++)
+#ifdef __SSE__ /* SSE Optimize */
+ mdct_butterfly_generic(init,x+(points>>i)*j,points>>i,4<<i);
+#else /* SSE Optimize */
mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
+#endif /* SSE Optimize */
}
for(j=0;j<points;j+=32)
@@ -340,6 +1144,16 @@
if(l){
if(l->trig)_ogg_free(l->trig);
if(l->bitrev)_ogg_free(l->bitrev);
+#ifdef __SSE__ /* SSE Optimize */
+ if(l->trig_bitreverse)_ogg_free(l->trig_bitreverse);
+ if(l->trig_forward)_ogg_free(l->trig_forward);
+ if(l->trig_backward)_ogg_free(l->trig_backward);
+ if(l->trig_butterfly_first)_ogg_free(l->trig_butterfly_first);
+ if(l->trig_butterfly_generic8)_ogg_free(l->trig_butterfly_generic8);
+ if(l->trig_butterfly_generic16)_ogg_free(l->trig_butterfly_generic16);
+ if(l->trig_butterfly_generic32)_ogg_free(l->trig_butterfly_generic32);
+ if(l->trig_butterfly_generic64)_ogg_free(l->trig_butterfly_generic64);
+#endif /* SSE Optimize */
memset(l,0,sizeof(*l));
}
}
@@ -348,6 +1162,76 @@
DATA_TYPE *x){
int n = init->n;
int *bit = init->bitrev;
+#ifdef __SSE__ /* SSE Optimize */
+ float *w0 = x;
+ float *w1 = x = w0+(n>>1);
+ float *T = init->trig_bitreverse;
+
+ do
+ {
+ float *x0 = x+bit[0];
+ float *x1 = x+bit[1];
+ float *x2 = x+bit[2];
+ float *x3 = x+bit[3];
+
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ w1 -= 4;
+
+ XMM0 = _mm_lddqu_ps(x0);
+ XMM1 = _mm_lddqu_ps(x1);
+ XMM4 = _mm_lddqu_ps(x2);
+ XMM7 = _mm_lddqu_ps(x3);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM5 = XMM0;
+ XMM6 = XMM1;
+
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(0,1,0,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(0,1,0,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM4, _MM_SHUFFLE(0,0,0,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(0,0,0,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,1,1,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(1,1,1,1));
+ XMM4 = _mm_load_ps(T );
+ XMM7 = _mm_load_ps(T+4);
+
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ XMM5 = _mm_sub_ps(XMM5, XMM6);
+
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM4);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+
+ XMM0 = _mm_mul_ps(XMM0, PFV_0P5.ps);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+
+#if defined(__SSE3__)
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_addsub_ps(XMM1, XMM3);
+#else
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM3);
+#endif
+ _mm_store_ps(w0, XMM0);
+ _mm_storeh_pi((__m64*)(w1 ), XMM1);
+ _mm_storel_pi((__m64*)(w1+2), XMM1);
+
+ T += 8;
+ bit += 4;
+ w0 += 4;
+
+ }
+ while(w0<w1);
+#else /* SSE Optimize */
DATA_TYPE *w0 = x;
DATA_TYPE *w1 = x = w0+(n>>1);
DATA_TYPE *T = init->trig+n;
@@ -392,6 +1276,7 @@
w0 += 4;
}while(w0<w1);
+#endif /* SSE Optimize */
}
void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
@@ -399,6 +1284,263 @@
int n2=n>>1;
int n4=n>>2;
+#ifdef __SSE__ /* SSE Optimize */
+ /* rotate */
+
+ float *iX = in+n2-8;
+ float *oX0 = out+n2+n4;
+ float *T = init->trig_backward;
+ float *oX1 = oX0;
+ float *zX0, *zX1;
+
+ if(n<1024)
+ {
+ do
+ {
+ int c0, c1;
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(iX- 8);
+ XMM1 = _mm_load_ps(iX- 4);
+ XMM2 = _mm_load_ps(iX );
+ XMM3 = _mm_load_ps(iX+ 4);
+ XMM0 = _mm_cmpneq_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+ XMM2 = _mm_cmpneq_ps(XMM2, PFV_0.ps);
+ XMM3 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+ XMM0 = _mm_or_ps(XMM0, XMM1);
+ XMM2 = _mm_or_ps(XMM2, XMM3);
+ c0 = _mm_movemask_ps(XMM0);
+ c1 = _mm_movemask_ps(XMM2);
+ c0 |= c1;
+ if(!c0)
+ {
+ oX0 -= 8;
+ _mm_store_ps(oX0 , PFV_0.ps);
+ _mm_store_ps(oX0+ 4, PFV_0.ps);
+ _mm_store_ps(oX1 , PFV_0.ps);
+ _mm_store_ps(oX1+ 4, PFV_0.ps);
+ iX -= 16;
+ oX1 += 8;
+ T += 32;
+ }
+ else
+ break;
+ }while(iX>=in);
+ }
+ else
+ {
+ do
+ {
+ int c0, c1;
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(iX-24);
+ XMM1 = _mm_load_ps(iX-20);
+ XMM2 = _mm_load_ps(iX-16);
+ XMM3 = _mm_load_ps(iX-12);
+ XMM0 = _mm_cmpneq_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+ XMM2 = _mm_cmpneq_ps(XMM2, PFV_0.ps);
+ XMM3 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+ XMM0 = _mm_or_ps(XMM0, XMM1);
+ XMM2 = _mm_or_ps(XMM2, XMM3);
+ XMM4 = _mm_load_ps(iX- 8);
+ XMM5 = _mm_load_ps(iX- 4);
+ XMM1 = _mm_load_ps(iX );
+ XMM3 = _mm_load_ps(iX+ 4);
+ XMM4 = _mm_cmpneq_ps(XMM4, PFV_0.ps);
+ XMM5 = _mm_cmpneq_ps(XMM5, PFV_0.ps);
+ XMM1 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+ XMM3 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+ XMM4 = _mm_or_ps(XMM4, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM3);
+ XMM0 = _mm_or_ps(XMM0, XMM4);
+ XMM2 = _mm_or_ps(XMM2, XMM1);
+ c0 = _mm_movemask_ps(XMM0);
+ c1 = _mm_movemask_ps(XMM2);
+ c0 |= c1;
+ if(!c0)
+ {
+ oX0 -= 16;
+ _mm_store_ps(oX0 , PFV_0.ps);
+ _mm_store_ps(oX0+ 4, PFV_0.ps);
+ _mm_store_ps(oX0+ 8, PFV_0.ps);
+ _mm_store_ps(oX0+12, PFV_0.ps);
+ _mm_store_ps(oX1 , PFV_0.ps);
+ _mm_store_ps(oX1+ 4, PFV_0.ps);
+ _mm_store_ps(oX1+ 8, PFV_0.ps);
+ _mm_store_ps(oX1+12, PFV_0.ps);
+ iX -= 32;
+ oX1 += 16;
+ T += 64;
+ }
+ else
+ break;
+ }while(iX>=in);
+ }
+ zX0 = oX0;
+ zX1 = oX1;
+ while(iX>=in)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ oX0 -= 4;
+ XMM0 = _mm_load_ps(iX );
+ XMM4 = _mm_load_ps(iX+4);
+ XMM2 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+ 4);
+ XMM1 = XMM0;
+ XMM5 = XMM0;
+ XMM6 = XMM4;
+ XMM7 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(1,3,1,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(0,0,0,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM5, _MM_SHUFFLE(2,2,2,2));
+ XMM4 = _mm_load_ps(T+ 8);
+ XMM5 = _mm_load_ps(T+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM6 = _mm_mul_ps(XMM6, XMM4);
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ _mm_store_ps(oX0, XMM0);
+ _mm_store_ps(oX1, XMM6);
+ iX -= 8;
+ oX1 += 4;
+ T += 16;
+ }
+
+ mdct_butterflies_backward(init,out+n2,n2,zX0,zX1);
+ mdct_bitreverse(init,out);
+
+ /* roatate + window */
+
+ {
+ float *oX1 = out+n2+n4;
+ float *oX2 = out+n2+n4;
+ float *iX = out;
+ float *T = init->trig_backward+n;
+
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ oX1 -=4;
+ XMM0 = _mm_load_ps(iX );
+ XMM4 = _mm_load_ps(iX+4);
+ XMM2 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+4);
+ XMM1 = XMM0;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM5 = _mm_mul_ps(XMM5, XMM2);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_xor_ps(XMM4, PCS_RRRR.ps);
+ _mm_store_ps(oX1, XMM0);
+ _mm_store_ps(oX2, XMM4);
+ oX2 += 4;
+ iX += 8;
+ T += 8;
+ }while(iX<oX1);
+
+ iX = out+n2+n4;
+ oX1 = out+n4;
+ oX2 = oX1;
+
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ oX1 -= 16;
+ iX -= 16;
+ XMM0 = _mm_load_ps(iX+12);
+ XMM1 = _mm_load_ps(iX+ 8);
+ XMM2 = _mm_load_ps(iX+ 4);
+ XMM3 = _mm_load_ps(iX );
+ _mm_store_ps(oX1+12, XMM0);
+ _mm_store_ps(oX1+ 8, XMM1);
+ _mm_store_ps(oX1+ 4, XMM2);
+ _mm_store_ps(oX1 , XMM3);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_xor_ps(XMM0, PCS_RRRR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RRRR.ps);
+ XMM2 = _mm_xor_ps(XMM2, PCS_RRRR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RRRR.ps);
+ _mm_store_ps(oX2 , XMM0);
+ _mm_store_ps(oX2+ 4, XMM1);
+ _mm_store_ps(oX2+ 8, XMM2);
+ _mm_store_ps(oX2+12, XMM3);
+ oX2 += 16;
+ }while(oX2<iX);
+
+ iX = out+n2+n4;
+ oX1 = out+n2+n4;
+ oX2 = out+n2;
+
+ if(n4>16)
+ {
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ oX1 -= 32;
+ XMM0 = _mm_load_ps(iX+28);
+ XMM1 = _mm_load_ps(iX+24);
+ XMM2 = _mm_load_ps(iX+20);
+ XMM3 = _mm_load_ps(iX+16);
+ XMM4 = _mm_load_ps(iX+12);
+ XMM5 = _mm_load_ps(iX+ 8);
+ XMM6 = _mm_load_ps(iX+ 4);
+ XMM7 = _mm_load_ps(iX );
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(0,1,2,3));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM7, _MM_SHUFFLE(0,1,2,3));
+ _mm_store_ps(oX1 , XMM0);
+ _mm_store_ps(oX1+ 4, XMM1);
+ _mm_store_ps(oX1+ 8, XMM2);
+ _mm_store_ps(oX1+12, XMM3);
+ _mm_store_ps(oX1+16, XMM4);
+ _mm_store_ps(oX1+20, XMM5);
+ _mm_store_ps(oX1+24, XMM6);
+ _mm_store_ps(oX1+28, XMM7);
+ iX += 32;
+ }while(oX1>oX2);
+ }
+ else
+ {
+ do
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ oX1 -= 16;
+ XMM0 = _mm_load_ps(iX+12);
+ XMM1 = _mm_load_ps(iX+ 8);
+ XMM2 = _mm_load_ps(iX+ 4);
+ XMM3 = _mm_load_ps(iX );
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+ _mm_store_ps(oX1 , XMM0);
+ _mm_store_ps(oX1+ 4, XMM1);
+ _mm_store_ps(oX1+ 8, XMM2);
+ _mm_store_ps(oX1+12, XMM3);
+ iX += 16;
+ }while(oX1>oX2);
+ }
+ }
+#else /* SSE Optimize */
/* rotate */
DATA_TYPE *iX = in+n2-7;
@@ -488,8 +1630,175 @@
iX+=4;
}while(oX1>oX2);
}
+#endif /* SSE Optimize */
}
+#ifdef __SSE__ /* SSE Optimize */
+void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out, DATA_TYPE *out1){
+ int n = init->n;
+ int n2 = n>>1;
+ int n4 = n>>2;
+ int n8 = n>>3;
+ float *w = (float*)_ogg_alloca(n*sizeof(*w)); /* forward needs working space */
+ float *w2 = w+n2;
+
+ /* rotate */
+
+ /* window + rotate + step 1 */
+
+ int i, j;
+
+ float *x0 = in+n2+n4-8;
+ float *x1 = in+n2+n4;
+ float *T = init->trig_forward;
+
+ for(i=0,j=n2-2;i<n8;i+=4,j-=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(x0 + 4);
+ XMM4 = _mm_load_ps(x0 );
+ XMM1 = _mm_load_ps(x0+i*4+ 8);
+ XMM5 = _mm_load_ps(x0+i*4+12);
+ XMM2 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM1 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,0,3,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,1,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,0,3,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ _mm_storel_pi((__m64*)(w2+i ), XMM0);
+ _mm_storeh_pi((__m64*)(w2+j ), XMM0);
+ _mm_storel_pi((__m64*)(w2+i+2), XMM4);
+ _mm_storeh_pi((__m64*)(w2+j-2), XMM4);
+ x0 -= 8;
+ T += 16;
+ }
+
+ x0 = in;
+ x1 = in+n2-8;
+
+ for(;i<n4;i+=4,j-=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM1 = _mm_load_ps(x1+4);
+ XMM5 = _mm_load_ps(x1 );
+ XMM0 = _mm_load_ps(x0 );
+ XMM4 = _mm_load_ps(x0+4);
+ XMM2 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM1 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,0,3,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,1,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,0,3,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ _mm_storel_pi((__m64*)(w2+i ), XMM0);
+ _mm_storeh_pi((__m64*)(w2+j ), XMM0);
+ _mm_storel_pi((__m64*)(w2+i+2), XMM4);
+ _mm_storeh_pi((__m64*)(w2+j-2), XMM4);
+ x0 += 8;
+ x1 -= 8;
+ T += 16;
+ }
+
+ mdct_butterflies(init, w+n2, n2);
+ mdct_bitreverse(init, w);
+
+ /* roatate + window */
+
+ T = init->trig_forward+n;
+ x0 =out +n2;
+
+ if(out1!=NULL)
+ {
+ x1 =out1+n2;
+ for(i=0;i<n4;i+=4){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ x0 -= 4;
+ x1 -= 4;
+ XMM0 = _mm_load_ps(w+4);
+ XMM4 = _mm_load_ps(w );
+ XMM2 = XMM0;
+ XMM1 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(0,2,0,2));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM4,_MM_SHUFFLE(1,3,1,3));
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM3);
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ _mm_store_ps(x0 , XMM0);
+ _mm_store_ps(x1 , XMM0);
+ _mm_store_ps(out +i, XMM4);
+ _mm_store_ps(out1+i, XMM4);
+ w += 8;
+ T += 16;
+ }
+ }
+ else
+ {
+ for(i=0;i<n4;i+=4){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ x0 -= 4;
+ XMM0 = _mm_load_ps(w+4);
+ XMM4 = _mm_load_ps(w );
+ XMM2 = XMM0;
+ XMM1 = _mm_load_ps(T );
+ XMM3 = _mm_load_ps(T+ 4);
+ XMM6 = _mm_load_ps(T+ 8);
+ XMM7 = _mm_load_ps(T+12);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(0,2,0,2));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM4,_MM_SHUFFLE(1,3,1,3));
+ XMM4 = XMM0;
+ XMM5 = XMM2;
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM3);
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ _mm_store_ps(x0 , XMM0);
+ _mm_store_ps(out +i, XMM4);
+ w += 8;
+ T += 16;
+ }
+ }
+#else /* SSE Optimize */
void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
int n=init->n;
int n2=n>>1;
@@ -560,5 +1869,5 @@
w+=2;
T+=2;
}
+#endif /* SSE Optimize */
}
-
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mdct.h libvorbis-1.2.0-sse/lib/mdct.h
--- libvorbis-1.2.0/lib/mdct.h 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mdct.h 2007-08-02 12:43:10.000000000 +0200
@@ -58,6 +58,16 @@
int log2n;
DATA_TYPE *trig;
+#ifdef __SSE__ /* SSE Optimize */
+ DATA_TYPE *trig_bitreverse;
+ DATA_TYPE *trig_forward;
+ DATA_TYPE *trig_backward;
+ DATA_TYPE *trig_butterfly_first;
+ DATA_TYPE *trig_butterfly_generic8;
+ DATA_TYPE *trig_butterfly_generic16;
+ DATA_TYPE *trig_butterfly_generic32;
+ DATA_TYPE *trig_butterfly_generic64;
+#endif /* SSE Optimize */
int *bitrev;
DATA_TYPE scale;
@@ -65,7 +75,11 @@
extern void mdct_init(mdct_lookup *lookup,int n);
extern void mdct_clear(mdct_lookup *l);
+#ifdef __SSE__ /* SSE Optimize */
+extern void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out, DATA_TYPE *out1);
+#else /* SSE Optimize */
extern void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out);
+#endif /* SSE Optimize */
extern void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out);
#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/misc.h libvorbis-1.2.0-sse/lib/misc.h
--- libvorbis-1.2.0/lib/misc.h 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/misc.h 2007-08-02 12:43:10.000000000 +0200
@@ -45,6 +45,20 @@
#endif
#endif
+#ifdef __SSE__ /* SSE Optimize */
+#undef _ogg_malloc
+#undef _ogg_calloc
+#undef _ogg_realloc
+#undef _ogg_free
+#undef _ogg_alloca
+
+#define _ogg_malloc(x) xmm_malloc(x)
+#define _ogg_calloc(x,y) xmm_calloc((x), (y))
+#define _ogg_realloc(x,y) xmm_realloc((x), (y))
+#define _ogg_alloca(x) xmm_align(alloca((x)+16))
+#define _ogg_free(x) xmm_free(x)
+#endif /* SSE Optimize */
+
#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/psy.c libvorbis-1.2.0-sse/lib/psy.c
--- libvorbis-1.2.0/lib/psy.c 2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/psy.c 2007-08-02 12:43:10.000000000 +0200
@@ -21,6 +21,9 @@
#include "vorbis/codec.h"
#include "codec_internal.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
#include "masking.h"
#include "psy.h"
#include "os.h"
@@ -59,6 +62,186 @@
static float nnmid_th=0.2;
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float PNEGINF[4] = {NEGINF, NEGINF, NEGINF, NEGINF};
+
+static const int temp_bfn8[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+25,25,25,25,25,25,25,25,17,17,17,17,17,17,17,17,
+ 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const int temp_bfn4[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
+25,25,25,25,25,25,25,25,21,21,21,21,17,17,17,17,
+13,13,13,13, 9, 9, 9, 9, 5, 5, 5, 5, 1, 1, 1, 1,
+};
+
+static _MM_ALIGN16 const float PTEMP_BFN1[1] = {
+ -8.0000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN2[2] = {
+ -4.2000000e+001, -7.9000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN3[3] = {
+ -3.0000000e+001, -5.5000000e+001, -8.0000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN4[4] = {
+ -2.3000000e+001, -4.1000000e+001, -5.9000000e+001, -7.7000000e+001,
+
+};
+static _MM_ALIGN16 const float PTEMP_BFN5[5] = {
+ -2.0000000e+001, -3.5000000e+001, -5.0000000e+001, -6.5000000e+001,
+ -8.0000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN6[6] = {
+ -1.7000000e+001, -2.9000000e+001, -4.1000000e+001, -5.3000000e+001,
+ -6.5000000e+001, -7.7000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN7[7] = {
+ -1.5000000e+001, -2.5000000e+001, -3.5000000e+001, -4.5000000e+001,
+ -5.5000000e+001, -6.5000000e+001, -7.5000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN8[8] = {
+ -1.4000000e+001, -2.3000000e+001, -3.2000000e+001, -4.1000000e+001,
+ -5.0000000e+001, -5.9000000e+001, -6.8000000e+001, -7.7000000e+001,
+
+};
+static _MM_ALIGN16 const float PTEMP_BFN9[9] = {
+ -1.3000000e+001, -2.1000000e+001, -2.9000000e+001, -3.7000000e+001,
+ -4.5000000e+001, -5.3000000e+001, -6.1000000e+001, -6.9000000e+001,
+ -7.7000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN10[10] = {
+ -1.2000000e+001, -1.9000000e+001, -2.6000000e+001, -3.3000000e+001,
+ -4.0000000e+001, -4.7000000e+001, -5.4000000e+001, -6.1000000e+001,
+ -6.8000000e+001, -7.5000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN12[12] = {
+ -1.1000000e+001, -1.7000000e+001, -2.3000000e+001, -2.9000000e+001,
+ -3.5000000e+001, -4.1000000e+001, -4.7000000e+001, -5.3000000e+001,
+ -5.9000000e+001, -6.5000000e+001, -7.1000000e+001, -7.7000000e+001,
+
+};
+static _MM_ALIGN16 const float PTEMP_BFN15[15] = {
+ -1.0000000e+001, -1.5000000e+001, -2.0000000e+001, -2.5000000e+001,
+ -3.0000000e+001, -3.5000000e+001, -4.0000000e+001, -4.5000000e+001,
+ -5.0000000e+001, -5.5000000e+001, -6.0000000e+001, -6.5000000e+001,
+ -7.0000000e+001, -7.5000000e+001, -8.0000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN18[18] = {
+ -9.0000000e+000, -1.3000000e+001, -1.7000000e+001, -2.1000000e+001,
+ -2.5000000e+001, -2.9000000e+001, -3.3000000e+001, -3.7000000e+001,
+ -4.1000000e+001, -4.5000000e+001, -4.9000000e+001, -5.3000000e+001,
+ -5.7000000e+001, -6.1000000e+001, -6.5000000e+001, -6.9000000e+001,
+ -7.3000000e+001, -7.7000000e+001,
+};
+static _MM_ALIGN16 const float PTEMP_BFN25[25] = {
+ -8.0000000e+000, -1.1000000e+001, -1.4000000e+001, -1.7000000e+001,
+ -2.0000000e+001, -2.3000000e+001, -2.6000000e+001, -2.9000000e+001,
+ -3.2000000e+001, -3.5000000e+001, -3.8000000e+001, -4.1000000e+001,
+ -4.4000000e+001, -4.7000000e+001, -5.0000000e+001, -5.3000000e+001,
+ -5.6000000e+001, -5.9000000e+001, -6.2000000e+001, -6.5000000e+001,
+ -6.8000000e+001, -7.1000000e+001, -7.4000000e+001, -7.7000000e+001,
+ -8.0000000e+001,
+};
+
+static const float *PTEMP_BFN[26] = {
+ NULL,
+ PTEMP_BFN1, PTEMP_BFN2, PTEMP_BFN3, PTEMP_BFN4,
+ PTEMP_BFN5, PTEMP_BFN6, PTEMP_BFN7, PTEMP_BFN8,
+ PTEMP_BFN9, PTEMP_BFN10, PTEMP_BFN12, PTEMP_BFN12,
+ PTEMP_BFN15, PTEMP_BFN15, PTEMP_BFN15, PTEMP_BFN18,
+ PTEMP_BFN18, PTEMP_BFN18, PTEMP_BFN25, PTEMP_BFN25,
+ PTEMP_BFN25, PTEMP_BFN25, PTEMP_BFN25, PTEMP_BFN25,
+ PTEMP_BFN25
+};
+
+/*
+ for shellsort fix4 by SSE compare
+*/
+static _MM_ALIGN16 const __m128x Sort4IndexConvTable[64] = {
+ {.si32 = {3,2,1,0}}, /* A>B>C>D 000000 00 */
+ {.si32 = {3,2,0,1}}, /* B>A>C>D 000001 01 */
+ {.si32 = {3,1,2,0}}, /* A>C>B>D 000010 02 */
+ {.si32 = {0,1,2,3}}, /* 03 */
+ {.si32 = {2,3,1,0}}, /* A>B>D>C 000100 04 */
+ {.si32 = {2,3,0,1}}, /* B>A>D>C 000101 05 */
+ {.si32 = {0,1,2,3}}, /* 06 */
+ {.si32 = {0,1,2,3}}, /* 07 */
+ {.si32 = {0,1,2,3}}, /* 08 */
+ {.si32 = {0,1,2,3}}, /* 09 */
+ {.si32 = {0,1,2,3}}, /* 10 */
+ {.si32 = {0,1,2,3}}, /* 11 */
+ {.si32 = {0,1,2,3}}, /* 12 */
+ {.si32 = {2,0,3,1}}, /* B>D>A>C 001101 13 */
+ {.si32 = {0,1,2,3}}, /* 14 */
+ {.si32 = {0,1,2,3}}, /* 15 */
+ {.si32 = {0,1,2,3}}, /* 16 */
+ {.si32 = {3,0,2,1}}, /* B>C>A>D 010001 17 */
+ {.si32 = {3,1,0,2}}, /* C>A>B>D 010010 18 */
+ {.si32 = {3,0,1,2}}, /* C>B>A>D 010011 19 */
+ {.si32 = {0,1,2,3}}, /* 20 */
+ {.si32 = {0,1,2,3}}, /* 21 */
+ {.si32 = {0,1,2,3}}, /* 22 */
+ {.si32 = {0,1,2,3}}, /* 23 */
+ {.si32 = {0,1,2,3}}, /* 24 */
+ {.si32 = {0,3,2,1}}, /* B>C>D>A 011001 25 */
+ {.si32 = {0,1,2,3}}, /* 26 */
+ {.si32 = {0,3,1,2}}, /* C>B>D>A 011011 27 */
+ {.si32 = {0,1,2,3}}, /* 28 */
+ {.si32 = {0,2,3,1}}, /* B>D>C>A 011101 29 */
+ {.si32 = {0,1,2,3}}, /* 30 */
+ {.si32 = {0,1,2,3}}, /* 31 */
+ {.si32 = {0,1,2,3}}, /* 32 */
+ {.si32 = {0,1,2,3}}, /* 33 */
+ {.si32 = {1,3,2,0}}, /* A>C>D>B 100010 34 */
+ {.si32 = {0,1,2,3}}, /* 35 */
+ {.si32 = {2,1,3,0}}, /* A>D>B>C 100100 36 */
+ {.si32 = {0,1,2,3}}, /* 37 */
+ {.si32 = {1,2,3,0}}, /* A>D>C>B 100110 38 */
+ {.si32 = {0,1,2,3}}, /* 39 */
+ {.si32 = {0,1,2,3}}, /* 40 */
+ {.si32 = {0,1,2,3}}, /* 41 */
+ {.si32 = {0,1,2,3}}, /* 42 */
+ {.si32 = {0,1,2,3}}, /* 43 */
+ {.si32 = {2,1,0,3}}, /* D>A>B>C 101100 44 */
+ {.si32 = {2,0,1,3}}, /* D>B>A>C 101101 45 */
+ {.si32 = {1,2,0,3}}, /* D>A>C>B 101110 46 */
+ {.si32 = {0,1,2,3}}, /* 47 */
+ {.si32 = {0,1,2,3}}, /* 48 */
+ {.si32 = {0,1,2,3}}, /* 49 */
+ {.si32 = {1,3,0,2}}, /* C>A>D>B 110010 50 */
+ {.si32 = {0,1,2,3}}, /* 51 */
+ {.si32 = {0,1,2,3}}, /* 52 */
+ {.si32 = {0,1,2,3}}, /* 53 */
+ {.si32 = {0,1,2,3}}, /* 54 */
+ {.si32 = {0,1,2,3}}, /* 55 */
+ {.si32 = {0,1,2,3}}, /* 56 */
+ {.si32 = {0,1,2,3}}, /* 57 */
+ {.si32 = {1,0,3,2}}, /* C>D>A>B 111010 58 */
+ {.si32 = {0,1,3,2}}, /* C>D>B>A 111011 59 */
+ {.si32 = {0,1,2,3}}, /* 60 */
+ {.si32 = {0,2,1,3}}, /* D>B>C>A 111101 61 */
+ {.si32 = {1,0,2,3}}, /* D>C>A>B 111110 62 */
+ {.si32 = {0,1,2,3}} /* D>C>B>A 111111 63 */
+};
+
+_MM_ALIGN16 float findex[2048];
+_MM_ALIGN16 float findex2[2048];
+
+#endif /* SSE Optimize */
+
vorbis_look_psy_global *_vp_global_look(vorbis_info *vi){
codec_setup_info *ci=vi->codec_setup;
vorbis_info_psy_global *gi=&ci->psy_g_param;
@@ -422,6 +605,66 @@
_analysis_output_always("noiseoff2",ls++,p->noiseoffset[2],n,1,0,0);
}
#endif
+#ifdef __SSE__ /* SSE Optimize */
+ if(findex[1]==0.f)
+ {
+ for(i=0;i<2048;i++)
+ {
+ findex[i] = (float)(i);
+ findex2[i] = (float)(i*i);
+ }
+ }
+ {
+ short* sb = (short*)p->bark;
+ for(i=0;i<n;i++)
+ {
+ if(sb[i*2+1]>=0)
+ break;
+ }
+ p->midpoint1 = i;
+ p->midpoint1_4 = p->midpoint1&(~3);
+ p->midpoint1_8 = p->midpoint1_4&(~7);
+ p->midpoint1_16 = p->midpoint1_8&(~15);
+ for(;i<n;i++)
+ {
+ if(sb[i*2]>=n)
+ break;
+ }
+ p->midpoint2 = i;
+ i = (p->midpoint1+3)&(~3);
+ p->midpoint2_4 = (p->midpoint2-i)&(~3);
+ p->midpoint2_8 = p->midpoint2_4&(~7);
+ p->midpoint2_16 = p->midpoint2_8&(~15);
+ p->midpoint2_4 += i;
+ p->midpoint2_8 += i;
+ p->midpoint2_16 += i;
+ }
+ p->octsft=_ogg_malloc(n*sizeof(*p->octsft));
+ p->octend=_ogg_malloc(n*sizeof(*p->octend));
+ p->octpos=_ogg_malloc(n*sizeof(*p->octpos));
+ for(i=0;i<n;i++)
+ {
+ long oc = p->octave[i];
+ oc = oc>>p->shiftoc;
+
+ if(oc>=P_BANDS)oc=P_BANDS-1;
+ if(oc<0)oc=0;
+
+ p->octsft[i] = oc;
+ p->octpos[i] = ((p->octave[i]+p->octave[i+1])>>1)-p->firstoc;
+
+ }
+ for(i=0;i<n;i++)
+ {
+ long oc=p->octave[i];
+ long j = i, k;
+ while(i+1<n && p->octave[i+1]==oc){
+ i++;
+ }
+ for(k=j;k<=i;k++)
+ p->octend[k] = i;
+ }
+#endif /* SSE Optimize */
}
void _vp_psy_clear(vorbis_look_psy *p){
@@ -445,6 +688,11 @@
}
_ogg_free(p->noiseoffset);
}
+#ifdef __SSE__ /* SSE Optimize */
+ if(p->octsft)_ogg_free(p->octsft);
+ if(p->octend)_ogg_free(p->octend);
+ if(p->octpos)_ogg_free(p->octpos);
+#endif /* SSE Optimize */
memset(p,0,sizeof(*p));
}
}
@@ -458,6 +706,9 @@
int i,post1;
int seedptr;
const float *posts,*curve;
+#ifdef __SSE__ /* SSE Optimize */
+ __m128 SAMP = _mm_load_ss(&);
+#endif /* SSE Optimize */
int choice=(int)((amp+dBoffset-P_LEVEL_0)*.1f);
choice=max(choice,0);
@@ -467,6 +718,42 @@
post1=(int)posts[1];
seedptr=oc+(posts[0]-EHMER_OFFSET)*linesper-(linesper>>1);
+#ifdef __SSE__ /* SSE Optimize */
+ i = posts[0];
+ if(seedptr<0)
+ {
+ int preseedptr = seedptr;
+ seedptr = (8-((-seedptr)&7));
+ i += ((seedptr-preseedptr)>>3);
+ }
+ if((post1-i)*8+seedptr>=n)
+ post1 = (n-1-seedptr)/8+i+1;
+ {
+ int post05 = ((post1-i)&(~1))+i;
+ for(;i<post05;i+=2)
+ {
+ __m128 XMM0 = _mm_load_ss(curve+i );
+ __m128 XMM1 = _mm_load_ss(curve+i+1);
+ __m128 XMM2 = _mm_load_ss(seed+seedptr );
+ __m128 XMM3 = _mm_load_ss(seed+seedptr+ 8);
+ XMM0 = _mm_add_ss(XMM0, SAMP);
+ XMM1 = _mm_add_ss(XMM1, SAMP);
+ XMM0 = _mm_max_ss(XMM0, XMM2);
+ XMM1 = _mm_max_ss(XMM1, XMM3);
+ _mm_store_ss(seed+seedptr , XMM0);
+ _mm_store_ss(seed+seedptr+ 8, XMM1);
+ seedptr += 16;
+ }
+ if(post1!=i)
+ {
+ __m128 XMM0 = _mm_load_ss(curve+i );
+ __m128 XMM2 = _mm_load_ss(seed+seedptr );
+ XMM0 = _mm_add_ss(XMM0, SAMP);
+ XMM0 = _mm_max_ss(XMM0, XMM2);
+ _mm_store_ss(seed+seedptr , XMM0);
+ }
+ }
+#else /* SSE Optimize */
for(i=posts[0];i<post1;i++){
if(seedptr>0){
float lin=amp+curve[i];
@@ -475,6 +762,7 @@
seedptr+=linesper;
if(seedptr>=n)break;
}
+#endif /* SSE Optimize */
}
static void seed_loop(vorbis_look_psy *p,
@@ -491,6 +779,22 @@
for(i=0;i<n;i++){
float max=f[i];
+#ifdef __SSE__ /* SSE Optimize */
+ long oc;
+ long ei=p->octend[i];
+ if(i>ei)
+ continue;
+ oc = p->octave[i];
+ while(i<ei)
+ {
+ i++;
+ if(f[i]>max)max = f[i];
+ }
+
+ if(max+6.f>flr[i])
+ {
+ oc = p->octsft[i];
+#else
long oc=p->octave[i];
while(i+1<n && p->octave[i+1]==oc){
i++;
@@ -502,6 +806,7 @@
if(oc>=P_BANDS)oc=P_BANDS-1;
if(oc<0)oc=0;
+#endif
seed_curve(seed,
curves[oc],
@@ -519,6 +824,80 @@
float *ampstack=alloca(n*sizeof(*ampstack));
long stack=0;
long pos=0;
+#ifdef __SSE__ /* SSE Optimize */
+ long i=0;
+
+ for(;i<n;i++)
+ {
+ if(stack<2)
+ {
+ posstack[stack]=i;
+ ampstack[stack++]=seeds[i];
+ }
+ else
+ {
+ while(1)
+ {
+ if(seeds[i]<ampstack[stack-1])
+ {
+ posstack[stack]=i;
+ ampstack[stack++]=seeds[i];
+ break;
+ }
+ else
+ {
+ if(i<posstack[stack-1]+linesper)
+ {
+ if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] && i<posstack[stack-2]+linesper)
+ {
+ /* we completely overlap, making stack-1 irrelevant. pop it */
+ stack--;
+LOOP_WITH_CHECK_STACK:
+ continue;
+ }
+ }
+ posstack[stack]=i;
+ ampstack[stack++]=seeds[i];
+ break;
+ }
+ }
+ i ++;
+ break;
+ }
+ }
+ for(;i<n;i++)
+ {
+ while(1)
+ {
+ if(seeds[i]<ampstack[stack-1])
+ {
+ posstack[stack]=i;
+ ampstack[stack++]=seeds[i];
+ break;
+ }
+ else
+ {
+ if(i<posstack[stack-1]+linesper)
+ {
+ if(ampstack[stack-1]<=ampstack[stack-2] && i<posstack[stack-2]+linesper)
+ {
+ /* we completely overlap, making stack-1 irrelevant. pop it */
+ stack--;
+ if(stack<2)
+ {
+ goto LOOP_WITH_CHECK_STACK;
+ }
+ else
+ continue;
+ }
+ }
+ posstack[stack]=i;
+ ampstack[stack++]=seeds[i];
+ break;
+ }
+ }
+ }
+#else /* SSE Optimize */
long i;
for(i=0;i<n;i++){
@@ -548,10 +927,40 @@
}
}
}
+#endif /* SSE Optimize */
/* the stack now contains only the positions that are relevant. Scan
'em straight through */
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<stack-1;i++)
+ {
+ long endpos;
+ if(ampstack[i+1]>ampstack[i])
+ {
+ endpos = posstack[i+1];
+ }
+ else
+ {
+ endpos = posstack[i]+linesper+1; /* +1 is important, else bin 0 is
+ discarded in short frames */
+ }
+ if(endpos>n)
+ endpos = n;
+ for(;pos<endpos;pos++)
+ seeds[pos]=ampstack[i];
+ }
+ if(i<stack)
+ {
+ long endpos;
+ endpos = posstack[i]+linesper+1; /* +1 is important, else bin 0 is
+ discarded in short frames */
+ if(endpos>n)
+ endpos = n;
+ for(;pos<endpos;pos++)
+ seeds[pos]=ampstack[i];
+ }
+#else /* SSE Optimize */
for(i=0;i<stack;i++){
long endpos;
if(i<stack-1 && ampstack[i+1]>ampstack[i]){
@@ -564,6 +973,7 @@
for(;pos<endpos;pos++)
seeds[pos]=ampstack[i];
}
+#endif /* SSE Optimize */
/* there. Linear time. I now remember this was on a problem set I
had in Grad Skool... I didn't solve it at the time ;-) */
@@ -575,6 +985,121 @@
static void max_seeds(vorbis_look_psy *p,
float *seed,
float *flr){
+#ifdef __SSE__ /* SSE Optimize */
+ long n = p->total_octave_lines;
+ int linesper = p->eighth_octave_lines;
+ long linpos = 0;
+ long pos;
+ float TEMP[p->n] __attribute__((aligned(16)));
+
+ seed_chase(seed,linesper,n); /* for masking */
+ {
+ __m128 PVAL = _mm_set_ps1(p->vi->tone_abs_limit);
+ long ln = n&(~15);
+ for(pos=0;pos<ln;pos+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(seed+pos );
+ __m128 XMM1 = _mm_load_ps(seed+pos+ 4);
+ __m128 XMM2 = _mm_load_ps(seed+pos+ 8);
+ __m128 XMM3 = _mm_load_ps(seed+pos+12);
+ XMM0 = _mm_min_ps(XMM0, PVAL);
+ XMM1 = _mm_min_ps(XMM1, PVAL);
+ XMM2 = _mm_min_ps(XMM2, PVAL);
+ XMM3 = _mm_min_ps(XMM3, PVAL);
+ _mm_store_ps(seed+pos , XMM0);
+ _mm_store_ps(seed+pos+ 4, XMM1);
+ _mm_store_ps(seed+pos+ 8, XMM2);
+ _mm_store_ps(seed+pos+12, XMM3);
+ }
+ ln = n&(~7);
+ for(;pos<ln;pos+=8)
+ {
+ __m128 XMM0 = _mm_load_ps(seed+pos );
+ __m128 XMM1 = _mm_load_ps(seed+pos+ 4);
+ XMM0 = _mm_min_ps(XMM0, PVAL);
+ XMM1 = _mm_min_ps(XMM1, PVAL);
+ _mm_store_ps(seed+pos , XMM0);
+ _mm_store_ps(seed+pos+ 4, XMM1);
+ }
+ ln = n&(~3);
+ for(;pos<ln;pos+=4)
+ {
+ __m128 XMM0 = _mm_load_ps(seed+pos );
+ XMM0 = _mm_min_ps(XMM0, PVAL);
+ _mm_store_ps(seed+pos , XMM0);
+ }
+ for(;pos<n;pos++)
+ {
+ __m128 XMM0 = _mm_load_ss(seed+pos );
+ XMM0 = _mm_min_ss(XMM0, PVAL);
+ _mm_store_ss(seed+pos, XMM0);
+ }
+ }
+ pos = p->octave[0]-p->firstoc-(linesper>>1);
+ if(linpos+1<p->n)
+ {
+ float minV = seed[pos];
+ long end = p->octpos[linpos];
+ while(pos+1<=end)
+ {
+ pos ++;
+ if((seed[pos]>NEGINF && seed[pos]<minV) || minV==NEGINF)
+ minV = seed[pos];
+ }
+ end = pos+p->firstoc;
+ for(;linpos<p->n&&p->octave[linpos]<=end;)
+ {
+ int ep = p->octend[linpos];
+ for(;linpos<=ep;linpos++)
+ TEMP[linpos] = minV;
+ }
+ }
+ while(linpos+1<p->n)
+ {
+ float minV = seed[pos];
+ long end = p->octpos[linpos];
+ while(pos+1<=end)
+ {
+ pos ++;
+ if(seed[pos]<minV)
+ minV = seed[pos];
+ }
+ end = pos+p->firstoc;
+ for(;linpos<p->n&&p->octave[linpos]<=end;)
+ {
+ int ep = p->octend[linpos];
+ for(;linpos<=ep;linpos++)
+ TEMP[linpos] = minV;
+ }
+ }
+
+ {
+ float minV = seed[p->total_octave_lines-1];
+ for(;linpos<p->n;linpos++)
+ TEMP[linpos] = minV;
+ }
+ {
+ for(pos=0;pos<p->n;pos+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(flr+pos );
+ __m128 XMM4 = _mm_load_ps(TEMP+pos );
+ __m128 XMM1 = _mm_load_ps(flr+pos+ 4);
+ __m128 XMM5 = _mm_load_ps(TEMP+pos+ 4);
+ __m128 XMM2 = _mm_load_ps(flr+pos+ 8);
+ __m128 XMM6 = _mm_load_ps(TEMP+pos+ 8);
+ __m128 XMM3 = _mm_load_ps(flr+pos+ 12);
+ __m128 XMM7 = _mm_load_ps(TEMP+pos+12);
+ XMM0 = _mm_max_ps(XMM0, XMM4);
+ XMM1 = _mm_max_ps(XMM1, XMM5);
+ XMM2 = _mm_max_ps(XMM2, XMM6);
+ XMM3 = _mm_max_ps(XMM3, XMM7);
+ _mm_store_ps(flr+pos , XMM0);
+ _mm_store_ps(flr+pos+ 4, XMM1);
+ _mm_store_ps(flr+pos+ 8, XMM2);
+ _mm_store_ps(flr+pos+12, XMM3);
+ }
+ }
+#else /* SSE Optimize */
long n=p->total_octave_lines;
int linesper=p->eighth_octave_lines;
long linpos=0;
@@ -605,8 +1130,2399 @@
if(flr[linpos]<minV)flr[linpos]=minV;
}
+#endif /* SSE Optimize */
+}
+
+#ifdef __SSE__ /* SSE Optimize */
+/*
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ R = (A + x * B) / D;
+
+ Input
+ TN (N3 ,N2 ,N1 ,N0 )
+ XMM0 = (XY0,Y0 ,XX0,X0 )
+ XMM1 = (XY1,Y1 ,XX1,X1 )
+ XMM4 = (XY2,Y2 ,XX2,X2 )
+ XMM3 = (XY3,Y3 ,XX3,X3 )
+
+ Phase 1.
+
+ Phase 2.
+ XMM0 = (X3 ,X2 ,X1 ,X0 )
+ XMM1 = (XX3,XX2,XX1,XX0)
+ XMM2 = (Y3 ,Y2 ,Y1 ,Y0 )
+ XMM3 = (XY3,XY2,XY1,XY0)
+
+ Phase 3.
+ XMM4 = Y*XX
+ XMM5 = X*XY
+ XMM6 = XY*TN
+ XMM7 = X*Y
+
+ Phase 4.
+ XMM4 = Y*XX - X*XY ... A
+ XMM5 = XY*TN - X*Y ... B
+ XMM6 = XX*TN
+ XMM7 = X*X
+ XMM1 = XX*TN - X*X ... D
+
+ Phase 5.
+ XMM4 = PX*B
+ XMM4 = PX*B+A
+ XMM4 = (A+PX*B)/D
+*/
+#define bark_noise_hybridmp_SSE_SUBC() \
+{ \
+ __m128 XMM2, XMM5, XMM6, XMM7; \
+ XMM2 = XMM0; \
+ XMM5 = XMM4; \
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0)); \
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2)); \
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0)); \
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2)); \
+ XMM1 = XMM0; \
+ XMM3 = XMM2; \
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0)); \
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1)); \
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0)); \
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1)); \
+ XMM4 = XMM2; \
+ XMM5 = XMM0; \
+ XMM6 = XMM3; \
+ XMM7 = XMM0; \
+ XMM4 = _mm_mul_ps(XMM4, XMM1); \
+ XMM5 = _mm_mul_ps(XMM5, XMM3); \
+ XMM3 = _mm_load_ps(findex+i); \
+ XMM6 = _mm_mul_ps(XMM6, TN.ps); \
+ XMM1 = _mm_mul_ps(XMM1, TN.ps); \
+ XMM7 = _mm_mul_ps(XMM7, XMM2); \
+ XMM0 = _mm_mul_ps(XMM0, XMM0); \
+ XMM4 = _mm_sub_ps(XMM4, XMM5); \
+ XMM6 = _mm_sub_ps(XMM6, XMM7); \
+ XMM1 = _mm_sub_ps(XMM1, XMM0); \
+ XMM6 = _mm_mul_ps(XMM6, XMM3); \
+ XMM3 = _mm_rcp_ps(XMM1); \
+ XMM4 = _mm_add_ps(XMM4, XMM6); \
+ XMM1 = _mm_mul_ps(XMM1, XMM3); \
+ XMM1 = _mm_mul_ps(XMM1, XMM3); \
+ XMM3 = _mm_add_ps(XMM3, XMM3); \
+ XMM3 = _mm_sub_ps(XMM3, XMM1); \
+ XMM4 = _mm_mul_ps(XMM4, XMM3); \
}
+#define bark_noise_hybridmp_SSE_SUBC2() \
+{ \
+ __m128 XMM2, XMM5, XMM6, XMM7; \
+ XMM2 = XMM0; \
+ XMM5 = XMM4; \
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0)); \
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2)); \
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0)); \
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2)); \
+ XMM1 = XMM0; \
+ XMM3 = XMM2; \
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0)); \
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1)); \
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0)); \
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1)); \
+ XMM4 = XMM2; \
+ XMM5 = XMM0; \
+ XMM6 = XMM3; \
+ XMM7 = XMM0; \
+ XMM4 = _mm_mul_ps(XMM4, XMM1); \
+ XMM5 = _mm_mul_ps(XMM5, XMM3); \
+ XMM3 = _mm_load_ps(findex+i); \
+ XMM6 = _mm_mul_ps(XMM6, TN.ps); \
+ XMM1 = _mm_mul_ps(XMM1, TN.ps); \
+ XMM7 = _mm_mul_ps(XMM7, XMM2); \
+ XMM0 = _mm_mul_ps(XMM0, XMM0); \
+ XMM4 = _mm_sub_ps(XMM4, XMM5); \
+ XMM6 = _mm_sub_ps(XMM6, XMM7); \
+ XMM1 = _mm_sub_ps(XMM1, XMM0); \
+ PA = XMM4; \
+ PB = XMM6; \
+ XMM6 = _mm_mul_ps(XMM6, XMM3); \
+ XMM3 = _mm_rcp_ps(XMM1); \
+ XMM4 = _mm_add_ps(XMM4, XMM6); \
+ XMM1 = _mm_mul_ps(XMM1, XMM3); \
+ XMM1 = _mm_mul_ps(XMM1, XMM3); \
+ XMM3 = _mm_add_ps(XMM3, XMM3); \
+ XMM3 = _mm_sub_ps(XMM3, XMM1); \
+ PD = XMM3; \
+ XMM4 = _mm_mul_ps(XMM4, XMM3); \
+}
+#endif /* SSE Optimize */
+
+#ifdef __SSE__ /* SSE Optimize */
+static void bark_noise_hybridmp(vorbis_look_psy *p,
+ const float *f,
+ float *noise,
+ const float offset,
+ const int fixed,
+ float *work,
+ float *tf){
+ int n = p->n;
+ float *N = work;
+ __m128 *XXYY = (__m128*)(N+n);
+ float *xxyy = N+n;
+ short *sb = (short*)p->bark;
+
+ int i, j;
+ int lo, hi;
+ int midpoint1, midpoint2;
+ float tN, tX, tXX, tY, tXY;
+ float R, A, B, D;
+ float x;
+ float *TN = N;
+ __m128 *TXXYY = XXYY;
+
+ __m128 OFFSET;
+ __m128 PXXYY = _mm_setzero_ps();
+ __m128 PA, PB, PD;
+ _MM_ALIGN16 __m128 TEMP[16];
+ int p0, p1;
+
+ // Phase 1
+ _mm_prefetch((const char*)(f ), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(findex2 ), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(f +16), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(findex2 +16), _MM_HINT_NTA);
+ OFFSET = _mm_set_ps1(offset);
+ {
+ __m128 XMM0 = _mm_load_ps(f );
+ __m128 XMM1 = _mm_load_ps(f+ 4);
+ __m128 XMM2 = _mm_load_ps(f+ 8);
+ __m128 XMM3 = _mm_load_ps(f+12);
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM4 = OFFSET;
+ XMM5 = _mm_load_ps(PFV_1.sf);
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM4);
+ XMM3 = _mm_add_ps(XMM3, XMM4);
+ XMM0 = _mm_max_ps(XMM0, XMM5);
+ XMM1 = _mm_max_ps(XMM1, XMM5);
+ XMM2 = _mm_max_ps(XMM2, XMM5);
+ XMM3 = _mm_max_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM6 = XMM2;
+ XMM7 = XMM3;
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ _mm_store_ps(TN , XMM0); /* N */
+ _mm_store_ps(TN+ 4, XMM1);
+ _mm_store_ps(TN+ 8, XMM2);
+ _mm_store_ps(TN+12, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ TEMP[ 1] = XMM0; /* Y */
+ PXXYY = _mm_move_ss(PXXYY, TEMP[1]);
+ XMM4 = _mm_load_ps(findex );
+ TEMP[ 5] = XMM1;
+ XMM5 = _mm_load_ps(findex+ 4);
+ TEMP[ 9] = XMM2;
+ XMM6 = _mm_load_ps(findex+ 8);
+ TEMP[13] = XMM3;
+ XMM7 = _mm_load_ps(findex+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ TEMP[ 3] = XMM0; /* XY */
+ TEMP[ 7] = XMM1;
+ TEMP[11] = XMM2;
+ TEMP[15] = XMM3;
+ XMM0 = _mm_load_ps(TN ); /* N */
+ XMM1 = _mm_load_ps(TN+ 4);
+ XMM2 = _mm_load_ps(TN+ 8);
+ XMM3 = _mm_load_ps(TN+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM6 = _mm_mul_ps(XMM6, XMM2);
+ XMM7 = _mm_mul_ps(XMM7, XMM3);
+ TEMP[ 0] = XMM4; /* X */
+ TEMP[ 4] = XMM5;
+ TEMP[ 8] = XMM6;
+ TEMP[12] = XMM7;
+ XMM4 = _mm_load_ps(findex2 );
+ XMM5 = _mm_load_ps(findex2+ 4);
+ XMM6 = _mm_load_ps(findex2+ 8);
+ XMM7 = _mm_load_ps(findex2+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM4 = TEMP[0]; // X
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM5 = TEMP[1]; // Y
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM6 = XMM0; // XX
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ XMM7 = TEMP[3]; // XY
+ XMM0 = XMM4;
+ TEMP[ 6] = XMM1;
+ XMM1 = XMM5;
+ // i=0-3
+ // PXXYY = (0, 0, 0, Y^2)
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,2,3,2));
+ TEMP[10] = XMM2;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+ TEMP[14] = XMM3;
+ XMM6 = XMM4;
+ XMM7 = XMM0;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM5 = TEMP[ 4]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = TEMP[ 5]; // Y
+ // XXYY[i+0] = (XY, Y, XX, X) = (0, Y^3, 0, 0)
+ // To Fix (0, Y^3*.5f, 0, Y^2*.5f)
+ XMM4 = _mm_add_ps(XMM4, PXXYY);
+ TN[ 0] *= 0.5;
+ XMM4 = _mm_mul_ps(XMM4, PFV_0P5.ps);
+ TN[ 1] += TN[ 0];
+ XMM6 = _mm_add_ps(XMM6, XMM4);
+ TN[ 2] += TN[ 1];
+ XMM0 = _mm_add_ps(XMM0, XMM6);
+ TN[ 3] += TN[ 2];
+ XMM7 = _mm_add_ps(XMM7, XMM0);
+ TXXYY[ 0] = XMM4;
+ XMM4 = TEMP[ 6]; // XX
+ TXXYY[ 1] = XMM6;
+ XMM6 = TEMP[ 7]; // XY
+ TXXYY[ 2] = XMM0;
+ XMM0 = XMM5;
+ TXXYY[ 3] = XMM7;
+ XMM7 = XMM1;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = XMM5;
+ XMM6 = XMM0;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = TEMP[ 8]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = TEMP[ 9]; // Y
+ XMM5 = _mm_add_ps(XMM5, TXXYY[ 3]);
+ TN[ 4] += TN[ 3];
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ TN[ 5] += TN[ 4];
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ TN[ 6] += TN[ 5];
+ XMM6 = _mm_add_ps(XMM6, XMM0);
+ TN[ 7] += TN[ 6];
+ TXXYY[ 4] = XMM5;
+ XMM5 = TEMP[10]; // XX
+ TXXYY[ 5] = XMM4;
+ XMM4 = TEMP[11]; // XY
+ TXXYY[ 6] = XMM0;
+ XMM0 = XMM1;
+ TXXYY[ 7] = XMM6;
+ XMM6 = XMM7;
+ XMM1 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,2,3,2));
+ XMM5 = XMM1;
+ XMM4 = XMM0;
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = TEMP[12]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = TEMP[13]; // Y
+ XMM1 = _mm_add_ps(XMM1, TXXYY[ 7]);
+ TN[ 8] += TN[ 7];
+ XMM5 = _mm_add_ps(XMM5, XMM1);
+ TN[ 9] += TN[ 8];
+ XMM0 = _mm_add_ps(XMM0, XMM5);
+ TN[10] += TN[ 9];
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ TN[11] += TN[10];
+ TXXYY[ 8] = XMM1;
+ XMM1 = TEMP[14]; // XX
+ TXXYY[ 9] = XMM5;
+ XMM5 = TEMP[15]; // XY
+ TXXYY[10] = XMM0;
+ XMM0 = XMM7;
+ TXXYY[11] = XMM4;
+ XMM4 = XMM6;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = XMM7;
+ XMM5 = XMM0;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_add_ps(XMM7, TXXYY[11]);
+ TN[12] += TN[11];
+ XMM1 = _mm_add_ps(XMM1, XMM7);
+ TN[13] += TN[12];
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ TN[14] += TN[13];
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ TN[15] += TN[14];
+ TXXYY[12] = XMM7;
+ TXXYY[13] = XMM1;
+ TXXYY[14] = XMM0;
+ TXXYY[15] = XMM5;
+ TN += 16;
+ TXXYY += 16;
+ }
+ for(i=16;i<n;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ _mm_prefetch((const char*)(f+i+16), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(findex2+i+16), _MM_HINT_NTA);
+ XMM0 = _mm_load_ps(f+i );
+ XMM1 = _mm_load_ps(f+i+ 4);
+ XMM2 = _mm_load_ps(f+i+ 8);
+ XMM3 = _mm_load_ps(f+i+12);
+ XMM4 = OFFSET;
+ XMM5 = _mm_load_ps(PFV_1.sf);
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM4);
+ XMM3 = _mm_add_ps(XMM3, XMM4);
+ XMM0 = _mm_max_ps(XMM0, XMM5);
+ XMM1 = _mm_max_ps(XMM1, XMM5);
+ XMM2 = _mm_max_ps(XMM2, XMM5);
+ XMM3 = _mm_max_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM6 = XMM2;
+ XMM7 = XMM3;
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ _mm_store_ps(TN , XMM0);
+ _mm_store_ps(TN+ 4, XMM1);
+ _mm_store_ps(TN+ 8, XMM2);
+ _mm_store_ps(TN+12, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ TEMP[ 1] = XMM0; /* Y */
+ XMM4 = _mm_load_ps(findex+i );
+ TEMP[ 5] = XMM1;
+ XMM5 = _mm_load_ps(findex+i+ 4);
+ TEMP[ 9] = XMM2;
+ XMM6 = _mm_load_ps(findex+i+ 8);
+ TEMP[13] = XMM3;
+ XMM7 = _mm_load_ps(findex+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ TEMP[ 3] = XMM0; /* XY */
+ TEMP[ 7] = XMM1;
+ TEMP[11] = XMM2;
+ TEMP[15] = XMM3;
+ XMM0 = _mm_load_ps(TN ); /* N */
+ XMM1 = _mm_load_ps(TN+ 4);
+ XMM2 = _mm_load_ps(TN+ 8);
+ XMM3 = _mm_load_ps(TN+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM6 = _mm_mul_ps(XMM6, XMM2);
+ XMM7 = _mm_mul_ps(XMM7, XMM3);
+ TEMP[ 0] = XMM4; /* X */
+ TEMP[ 4] = XMM5;
+ TEMP[ 8] = XMM6;
+ TEMP[12] = XMM7;
+ XMM4 = _mm_load_ps(findex2+i );
+ XMM5 = _mm_load_ps(findex2+i+ 4);
+ XMM6 = _mm_load_ps(findex2+i+ 8);
+ XMM7 = _mm_load_ps(findex2+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM4 = TEMP[ 0]; // X
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM5 = TEMP[ 1]; // Y
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM6 = XMM0; /* XX */
+ XMM0 = XMM4;
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ XMM7 = TEMP[ 3]; // XY
+ TEMP[ 6] = XMM1;
+ XMM1 = XMM5;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,2,3,2));
+ TEMP[10] = XMM2;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+ TEMP[14] = XMM3;
+ XMM6 = XMM4;
+ XMM7 = XMM0;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM5 = TEMP[ 4]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = TEMP[ 5]; // Y
+ XMM4 = _mm_add_ps(XMM4, TXXYY[-1]);
+ TN[ 0] += TN[-1];
+ XMM6 = _mm_add_ps(XMM6, XMM4);
+ TN[ 1] += TN[ 0];
+ XMM0 = _mm_add_ps(XMM0, XMM6);
+ TN[ 2] += TN[ 1];
+ XMM7 = _mm_add_ps(XMM7, XMM0);
+ TN[ 3] += TN[ 2];
+ TXXYY[ 0] = XMM4;
+ XMM4 = TEMP[ 6]; // XX
+ TXXYY[ 1] = XMM6;
+ XMM6 = TEMP[ 7]; // XY
+ TXXYY[ 2] = XMM0;
+ XMM0 = XMM5;
+ TXXYY[ 3] = XMM7;
+ XMM7 = XMM1;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = XMM5;
+ XMM6 = XMM0;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = TEMP[ 8]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = TEMP[ 9]; // Y
+ XMM5 = _mm_add_ps(XMM5, TXXYY[ 3]);
+ TN[ 4] += TN[ 3];
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ TN[ 5] += TN[ 4];
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ TN[ 6] += TN[ 5];
+ XMM6 = _mm_add_ps(XMM6, XMM0);
+ TN[ 7] += TN[ 6];
+ TXXYY[ 4] = XMM5;
+ XMM5 = TEMP[10]; // XX
+ TXXYY[ 5] = XMM4;
+ XMM4 = TEMP[11]; // XY
+ TXXYY[ 6] = XMM0;
+ XMM0 = XMM1;
+ TXXYY[ 7] = XMM6;
+ XMM6 = XMM7;
+ XMM1 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,2,3,2));
+ XMM5 = XMM1;
+ XMM4 = XMM0;
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = TEMP[12]; // X
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = TEMP[13]; // Y
+ XMM1 = _mm_add_ps(XMM1, TXXYY[ 7]);
+ TN[ 8] += TN[ 7];
+ XMM5 = _mm_add_ps(XMM5, XMM1);
+ TN[ 9] += TN[ 8];
+ XMM0 = _mm_add_ps(XMM0, XMM5);
+ TN[10] += TN[ 9];
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ TN[11] += TN[10];
+ TXXYY[ 8] = XMM1;
+ XMM1 = TEMP[14]; // XX
+ TXXYY[ 9] = XMM5;
+ XMM5 = TEMP[15]; // XY
+ TXXYY[10] = XMM0;
+ XMM0 = XMM7;
+ TXXYY[11] = XMM4;
+ XMM4 = XMM6;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = XMM7;
+ XMM5 = XMM0;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_add_ps(XMM7, TXXYY[11]);
+ TN[12] += TN[11];
+ XMM1 = _mm_add_ps(XMM1, XMM7);
+ TN[13] += TN[12];
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ TN[14] += TN[13];
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ TN[15] += TN[14];
+ TXXYY[12] = XMM7;
+ TXXYY[13] = XMM1;
+ TXXYY[14] = XMM0;
+ TXXYY[15] = XMM5;
+ TN += 16;
+ TXXYY += 16;
+ }
+ for(i=0;i<p->midpoint1_4;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1, p2, p3;
+ p0 =-sb[i*2+1];
+ p1 =-sb[i*2+3];
+ p2 =-sb[i*2+5];
+ p3 =-sb[i*2+7];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] = N[p1];
+ TN.sf[2] = N[p2];
+ TN.sf[3] = N[p3];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+2];
+ p2 = sb[i*2+4];
+ p3 = sb[i*2+6];
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p1]);
+ XMM4 = _mm_add_ps(XMM4, XXYY[p2]);
+ XMM3 = _mm_add_ps(XMM3, XXYY[p3]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+
+ TN.ps = _mm_add_ps(TN.ps, TN1.ps);
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ }
+ if(p->midpoint2-i<4)
+ {
+ x = (float)i;
+ for (;i<p->midpoint1;i++,x+=1.f)
+ {
+ lo = sb[i*2+1];
+ hi = sb[i*2];
+
+ tN = N[hi] + N[-lo];
+ tX = xxyy[hi*4 ] - xxyy[-lo*4 ];
+ tXX = xxyy[hi*4+1] + xxyy[-lo*4+1];
+ tY = xxyy[hi*4+2] + xxyy[-lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[-lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ R = (A + x * B) / D;
+ if(R<0.f)
+ R = 0.f;
+
+ noise[i] = R - offset;
+ }
+ for (;i<p->midpoint2;i++,x+=1.f)
+ {
+ lo = sb[i*2+1];
+ hi = sb[i*2];
+
+ tN = N[hi] - N[lo];
+ tX = xxyy[hi*4 ] - xxyy[lo*4 ];
+ tXX = xxyy[hi*4+1] - xxyy[lo*4+1];
+ tY = xxyy[hi*4+2] - xxyy[lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ R = (A + x * B) / D;
+ if(R<0.f)
+ R = 0.f;
+ noise[i] = R - offset;
+ }
+ j = (i+3)&(~3);
+ j = (j>=n)?n:j;
+ for (;i<j;i++,x+=1.f)
+ {
+ R = (A + x * B) / D;
+ if(R<0.f)
+ R = 0.f;
+
+ noise[i] = R - offset;
+ }
+ PA = _mm_set_ps1(A);
+ PB = _mm_set_ps1(B);
+ PD = _mm_set_ps1(1.f/D);
+ }
+ else
+ {
+ switch(p->midpoint1%4)
+ {
+ case 0:
+ break;
+ case 1:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1, p2, p3;
+ p0 =-sb[i*2+1];
+ p1 = sb[i*2+2];
+ p2 = sb[i*2+4];
+ p3 = sb[i*2+6];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] = N[p1];
+ TN.sf[2] = N[p2];
+ TN.sf[3] = N[p3];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+3];
+ p2 = sb[i*2+5];
+ p3 = sb[i*2+7];
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p3]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNNR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ case 2:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1, p2, p3;
+ p0 =-sb[i*2+1];
+ p1 =-sb[i*2+3];
+ p2 = sb[i*2+4];
+ p3 = sb[i*2+6];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] = N[p1];
+ TN.sf[2] = N[p2];
+ TN.sf[3] = N[p3];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+2];
+ p2 = sb[i*2+5];
+ p3 = sb[i*2+7];
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p3]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNRR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ case 3:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1, p2, p3;
+ p0 =-sb[i*2+1];
+ p1 =-sb[i*2+3];
+ p2 =-sb[i*2+5];
+ p3 = sb[i*2+6];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] = N[p1];
+ TN.sf[2] = N[p2];
+ TN.sf[3] = N[p3];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+2];
+ p2 = sb[i*2+4];
+ p3 = sb[i*2+7];
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p1]);
+ XMM4 = _mm_add_ps(XMM4, XXYY[p2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p3]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NRRR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ }
+ for(;i<p->midpoint2_16;i+=16)
+ {
+ register __m128 XMM0, XMM1, XMM2, XMM3;
+ register __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN0, TN1, TN2;
+ int p0, p1, p2, p3;
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+ 2];
+ p2 = sb[i*2+ 4];
+ p3 = sb[i*2+ 6];
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+ p0 = sb[i*2+ 1];
+ p1 = sb[i*2+ 3];
+ p2 = sb[i*2+ 5];
+ p3 = sb[i*2+ 7];
+ XMM2 = XXYY[p0];
+ XMM5 = XXYY[p1];
+ XMM6 = XXYY[p2];
+ XMM7 = XXYY[p3];
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM4 = _mm_sub_ps(XMM4, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM7);
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = TN0.ps;
+ XMM6 = TN1.ps;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ p0 = sb[i*2+ 8];
+ p1 = sb[i*2+10];
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ p2 = sb[i*2+12];
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM7;
+ p3 = sb[i*2+14];
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = TN0.ps;
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_load_ps(findex+i );
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM2 = XXYY[p0];
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p1];
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM7 = XXYY[p2];
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM0 = XXYY[p3];
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM3 = _mm_rcp_ps(XMM1);
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ p0 = sb[i*2+ 9];
+ p1 = sb[i*2+11];
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ p2 = sb[i*2+13];
+ p3 = sb[i*2+15];
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM1 = _mm_load_ps(PFV_0.sf);
+ XMM6 = XXYY[p0];
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM3 = OFFSET;
+ XMM4 = _mm_max_ps(XMM4, XMM1);
+ XMM1 = XXYY[p1];
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ XMM3 = XXYY[p2];
+ _mm_store_ps(noise+i , XMM4);
+ XMM4 = XXYY[p3];
+ XMM2 = _mm_sub_ps(XMM2, XMM6);
+ XMM5 = _mm_sub_ps(XMM5, XMM1);
+ XMM7 = _mm_sub_ps(XMM7, XMM3);
+ XMM0 = _mm_sub_ps(XMM0, XMM4);
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM6 = XMM2;
+ XMM1 = XMM7;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = TN0.ps;
+ XMM3 = TN1.ps;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,2,3,2));
+ p0 = sb[i*2+16];
+ p1 = sb[i*2+18];
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ XMM5 = XMM2;
+ XMM0 = XMM6;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ p2 = sb[i*2+20];
+ p3 = sb[i*2+22];
+ TN0.ps = XMM4;
+ XMM6 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,3,1));
+ TN2.sf[0] = N[p0];
+ TN2.sf[1] = N[p1];
+ TN2.sf[2] = N[p2];
+ TN2.sf[3] = N[p3];
+ XMM7 = XMM6;
+ XMM1 = XMM2;
+ XMM3 = XMM0;
+ XMM4 = XMM2;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM0);
+ XMM0 = TN0.ps;
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM0 = _mm_load_ps(findex+i+ 4);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM6 = XXYY[p0];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM7 = _mm_sub_ps(XMM7, XMM1);
+ XMM1 = XXYY[p1];
+ XMM3 = _mm_sub_ps(XMM3, XMM4);
+ XMM4 = XXYY[p2];
+ XMM5 = _mm_sub_ps(XMM5, XMM2);
+ XMM2 = XXYY[p3];
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM0 = _mm_rcp_ps(XMM5);
+ p0 = sb[i*2+17];
+ p1 = sb[i*2+19];
+ XMM7 = _mm_add_ps(XMM7, XMM3);
+ XMM3 = XXYY[p0];
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ p2 = sb[i*2+21];
+ p3 = sb[i*2+23];
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM0 = _mm_add_ps(XMM0, XMM0);
+ TN1.sf[0] = N[p0];
+ XMM0 = _mm_sub_ps(XMM0, XMM5);
+ XMM5 = _mm_load_ps(PFV_0.sf);
+ XMM7 = _mm_mul_ps(XMM7, XMM0);
+ TN1.sf[1] = N[p1];
+ XMM0 = OFFSET;
+ XMM7 = _mm_max_ps(XMM7, XMM5);
+ TN1.sf[2] = N[p2];
+ XMM5 = XXYY[p1];
+ XMM7 = _mm_sub_ps(XMM7, XMM0);
+ TN1.sf[3] = N[p3];
+ XMM0 = XXYY[p2];
+ _mm_store_ps(noise+i+ 4, XMM7);
+ XMM7 = XXYY[p3];
+ XMM6 = _mm_sub_ps(XMM6, XMM3);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM4 = _mm_sub_ps(XMM4, XMM0);
+ XMM2 = _mm_sub_ps(XMM2, XMM7);
+ XMM3 = XMM6;
+ XMM5 = XMM4;
+ XMM6 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = TN2.ps;
+ XMM0 = TN1.ps;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM2, _MM_SHUFFLE(3,2,3,2));
+ p0 = sb[i*2+24];
+ p1 = sb[i*2+26];
+ XMM1 = XMM6;
+ XMM2 = XMM3;
+ XMM6 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_sub_ps(XMM7, XMM0);
+ p2 = sb[i*2+28];
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM7;
+ p3 = sb[i*2+30];
+ XMM4 = XMM3;
+ XMM5 = XMM6;
+ XMM0 = XMM2;
+ XMM7 = XMM6;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM2);
+ XMM2 = TN0.ps;
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ XMM2 = _mm_load_ps(findex+i+ 8);
+ XMM7 = _mm_mul_ps(XMM7, XMM3);
+ XMM3 = XXYY[p0];
+ XMM6 = _mm_mul_ps(XMM6, XMM6);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p1];
+ XMM0 = _mm_sub_ps(XMM0, XMM7);
+ XMM7 = XXYY[p2];
+ XMM1 = _mm_sub_ps(XMM1, XMM6);
+ XMM6 = XXYY[p3];
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM2 = _mm_rcp_ps(XMM1);
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ p0 = sb[i*2+25];
+ p1 = sb[i*2+27];
+ XMM2 = _mm_add_ps(XMM2, XMM2);
+ p2 = sb[i*2+29];
+ p3 = sb[i*2+31];
+ XMM2 = _mm_sub_ps(XMM2, XMM1);
+ XMM1 = _mm_load_ps(PFV_0.sf);
+ XMM0 = XXYY[p0];
+ XMM4 = _mm_mul_ps(XMM4, XMM2);
+ XMM2 = OFFSET;
+ XMM4 = _mm_max_ps(XMM4, XMM1);
+ XMM1 = XXYY[p1];
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM2 = XXYY[p2];
+ _mm_store_ps(noise+i+ 8, XMM4);
+ XMM4 = XXYY[p3];
+ XMM3 = _mm_sub_ps(XMM3, XMM0);
+ XMM5 = _mm_sub_ps(XMM5, XMM1);
+ XMM7 = _mm_sub_ps(XMM7, XMM2);
+ XMM6 = _mm_sub_ps(XMM6, XMM4);
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM0 = XMM3;
+ XMM1 = XMM7;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = TN0.ps;
+ XMM2 = TN1.ps;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM5 = XMM3;
+ XMM6 = XMM0;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = XMM0;
+ XMM1 = XMM3;
+ XMM2 = XMM6;
+ XMM4 = XMM3;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM6);
+ XMM6 = TN0.ps;
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ XMM6 = _mm_load_ps(findex+i+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ XMM7 = _mm_sub_ps(XMM7, XMM1);
+ XMM2 = _mm_sub_ps(XMM2, XMM4);
+ XMM5 = _mm_sub_ps(XMM5, XMM3);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM6 = _mm_rcp_ps(XMM5);
+ XMM7 = _mm_add_ps(XMM7, XMM2);
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ XMM6 = _mm_add_ps(XMM6, XMM6);
+ XMM6 = _mm_sub_ps(XMM6, XMM5);
+ XMM5 = _mm_load_ps(PFV_0.sf);
+ XMM7 = _mm_mul_ps(XMM7, XMM6);
+ XMM6 = OFFSET;
+ XMM7 = _mm_max_ps(XMM7, XMM5);
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ _mm_store_ps(noise+i+12, XMM7);
+ }
+ for(;i<p->midpoint2_8;i+=8)
+ {
+ register __m128 XMM0, XMM1, XMM2, XMM3;
+ register __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN0, TN1;
+ int p0, p1, p2, p3;
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+ 2];
+ p2 = sb[i*2+ 4];
+ p3 = sb[i*2+ 6];
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+ p0 = sb[i*2+ 1];
+ p1 = sb[i*2+ 3];
+ p2 = sb[i*2+ 5];
+ p3 = sb[i*2+ 7];
+ XMM2 = XXYY[p0];
+ XMM5 = XXYY[p1];
+ XMM6 = XXYY[p2];
+ XMM7 = XXYY[p3];
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM4 = _mm_sub_ps(XMM4, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM7);
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = TN0.ps;
+ XMM6 = TN1.ps;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ p0 = sb[i*2+ 8];
+ p1 = sb[i*2+10];
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ p2 = sb[i*2+12];
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM7;
+ p3 = sb[i*2+14];
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = TN0.ps;
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_load_ps(findex+i );
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM2 = XXYY[p0];
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p1];
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM7 = XXYY[p2];
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM0 = XXYY[p3];
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM3 = _mm_rcp_ps(XMM1);
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ p0 = sb[i*2+ 9];
+ p1 = sb[i*2+11];
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ p2 = sb[i*2+13];
+ p3 = sb[i*2+15];
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM1 = _mm_load_ps(PFV_0.sf);
+ XMM6 = XXYY[p0];
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM3 = OFFSET;
+ XMM4 = _mm_max_ps(XMM4, XMM1);
+ XMM1 = XXYY[p1];
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ XMM3 = XXYY[p2];
+ _mm_store_ps(noise+i , XMM4);
+ XMM4 = XXYY[p3];
+ XMM2 = _mm_sub_ps(XMM2, XMM6);
+ XMM5 = _mm_sub_ps(XMM5, XMM1);
+ XMM7 = _mm_sub_ps(XMM7, XMM3);
+ XMM0 = _mm_sub_ps(XMM0, XMM4);
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM6 = XMM2;
+ XMM1 = XMM7;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = TN0.ps;
+ XMM3 = TN1.ps;
+ XMM7 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ XMM5 = XMM2;
+ XMM0 = XMM6;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM4;
+ XMM6 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = XMM6;
+ XMM1 = XMM2;
+ XMM3 = XMM0;
+ XMM4 = XMM2;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM0);
+ XMM0 = TN0.ps;
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM0 = _mm_load_ps(findex+i+ 4);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM7 = _mm_sub_ps(XMM7, XMM1);
+ XMM3 = _mm_sub_ps(XMM3, XMM4);
+ XMM5 = _mm_sub_ps(XMM5, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM0 = _mm_rcp_ps(XMM5);
+ XMM7 = _mm_add_ps(XMM7, XMM3);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM0 = _mm_add_ps(XMM0, XMM0);
+ XMM0 = _mm_sub_ps(XMM0, XMM5);
+ XMM5 = _mm_load_ps(PFV_0.sf);
+ XMM7 = _mm_mul_ps(XMM7, XMM0);
+ XMM0 = OFFSET;
+ XMM7 = _mm_max_ps(XMM7, XMM5);
+ XMM7 = _mm_sub_ps(XMM7, XMM0);
+ _mm_store_ps(noise+i+ 4, XMM7);
+ }
+ for(;i<p->midpoint2_4;i+=4)
+ {
+ register __m128 XMM0, XMM1, XMM2, XMM3;
+ register __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN0, TN1;
+ int p0, p1, p2, p3;
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+ 2];
+ p2 = sb[i*2+ 4];
+ p3 = sb[i*2+ 6];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+ XMM3 = XXYY[p3];
+
+ TN0.sf[0] = N[p0];
+ TN0.sf[1] = N[p1];
+ TN0.sf[2] = N[p2];
+ TN0.sf[3] = N[p3];
+
+ p0 = sb[i*2+ 1];
+ p1 = sb[i*2+ 3];
+ p2 = sb[i*2+ 5];
+ p3 = sb[i*2+ 7];
+
+ XMM2 = XXYY[p0];
+ XMM5 = XXYY[p1];
+ XMM6 = XXYY[p2];
+ XMM7 = XXYY[p3];
+
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM4 = _mm_sub_ps(XMM4, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM7);
+
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ XMM7 = TN0.ps;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ TN1.sf[2] = N[p2];
+ TN1.sf[3] = N[p3];
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM6 = TN1.ps;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ TN0.ps = XMM7;
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = TN0.ps;
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_load_ps(findex+i );
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM3 = _mm_rcp_ps(XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM1 = _mm_load_ps(PFV_0.sf);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM3 = OFFSET;
+ XMM4 = _mm_max_ps(XMM4, XMM1);
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ _mm_store_ps(noise+i , XMM4);
+ }
+ if(i!=n)
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1, p2;
+ switch(p->midpoint2%4)
+ {
+ case 0:
+ {
+ lo = sb[i*2-1];
+ hi = sb[i*2-2];
+
+ tN = N[hi] - N[lo];
+ tX = xxyy[hi*4 ] - xxyy[lo*4 ];
+ tXX = xxyy[hi*4+1] - xxyy[lo*4+1];
+ tY = xxyy[hi*4+2] - xxyy[lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ PA = _mm_set_ps1(A);
+ PB = _mm_set_ps1(B);
+ PD = _mm_set_ps1(1.f/D);
+ }
+ break;
+ case 1:
+ {
+ p0 = sb[i*2 ];
+
+ XMM0 = XXYY[p0];
+
+ TN.ps = _mm_set_ps1(N[p0]);
+
+ p0 = sb[i*2+1];
+
+ XMM1 =
+ XMM4 =
+ XMM3 =
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p0]);
+
+ TN1.ps = _mm_set_ps1(N[p0]);
+
+ TN.ps = _mm_sub_ps(TN.ps, TN1.ps);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(0,0,0,0));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(0,0,0,0));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(0,0,0,0));
+ }
+ break;
+ case 2:
+ {
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+2];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] =
+ TN.sf[2] =
+ TN.sf[3] = N[p1];
+
+ p0 = sb[i*2+1];
+ p1 = sb[i*2+3];
+
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p0]);
+ XMM4 =
+ XMM3 =
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] =
+ TN1.sf[2] =
+ TN1.sf[3] = N[p1];
+
+ TN.ps = _mm_sub_ps(TN.ps, TN1.ps);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(1,1,1,1));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(1,1,1,1));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(1,1,1,1));
+ }
+ break;
+ case 3:
+ {
+ p0 = sb[i*2 ];
+ p1 = sb[i*2+2];
+ p2 = sb[i*2+4];
+
+ XMM0 = XXYY[p0];
+ XMM1 = XXYY[p1];
+ XMM4 = XXYY[p2];
+
+ TN.sf[0] = N[p0];
+ TN.sf[1] = N[p1];
+ TN.sf[2] =
+ TN.sf[3] = N[p2];
+
+ p0 = sb[i*2+1];
+ p1 = sb[i*2+3];
+ p2 = sb[i*2+5];
+
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p0]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1]);
+ XMM3 =
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p2]);
+
+ TN1.sf[0] = N[p0];
+ TN1.sf[1] = N[p1];
+ TN1.sf[2] =
+ TN1.sf[3] = N[p2];
+
+ TN.ps = _mm_sub_ps(TN.ps, TN1.ps);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_max_ps(XMM4, PFV_0.ps);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(2,2,2,2));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(2,2,2,2));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(2,2,2,2));
+ }
+ break;
+ }
+ }
+ }
+ if(i<n)
+ {
+ __m128 XMM0 = PA;
+ __m128 XMM1 = PB;
+ __m128 XMM2 = _mm_set_ps1(-offset);
+ XMM0 = _mm_mul_ps(XMM0, PD);
+ XMM1 = _mm_mul_ps(XMM1, PD);
+ XMM0 = _mm_sub_ps(XMM0, OFFSET);
+ if(i%8!=0)
+ {
+ __m128 XMM4 = _mm_load_ps(findex+i );
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM4 = _mm_max_ps(XMM4, XMM2);
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ if(i%16!=0)
+ {
+ __m128 XMM4 = _mm_load_ps(findex+i );
+ __m128 XMM5 = _mm_load_ps(findex+i+ 4);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ XMM4 = _mm_max_ps(XMM4, XMM2);
+ XMM5 = _mm_max_ps(XMM5, XMM2);
+ _mm_store_ps(noise+i , XMM4);
+ _mm_store_ps(noise+i+4, XMM5);
+ i += 8;
+ }
+ for(;i<n;i+=16)
+ {
+ __m128 XMM4 = _mm_load_ps(findex+i );
+ __m128 XMM5 = _mm_load_ps(findex+i+ 4);
+ __m128 XMM6 = _mm_load_ps(findex+i+ 8);
+ __m128 XMM7 = _mm_load_ps(findex+i+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM6 = _mm_mul_ps(XMM6, XMM1);
+ XMM7 = _mm_mul_ps(XMM7, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ XMM6 = _mm_add_ps(XMM6, XMM0);
+ XMM7 = _mm_add_ps(XMM7, XMM0);
+ XMM4 = _mm_max_ps(XMM4, XMM2);
+ XMM5 = _mm_max_ps(XMM5, XMM2);
+ XMM6 = _mm_max_ps(XMM6, XMM2);
+ XMM7 = _mm_max_ps(XMM7, XMM2);
+ _mm_store_ps(noise+i , XMM4);
+ _mm_store_ps(noise+i+ 4, XMM5);
+ _mm_store_ps(noise+i+ 8, XMM6);
+ _mm_store_ps(noise+i+12, XMM7);
+ }
+ }
+
+ if (fixed <= 0) return;
+
+ midpoint1 = (fixed+1)/2;
+ midpoint2 = n-fixed/2;
+
+ j = midpoint1&(~7);
+ p1 = fixed / 2;
+ p0 = p1 - 3;
+
+ for(i=0;i<j;i+=8)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN, TN1;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = XXYY[p0+3];
+ XMM1 = XXYY[p0+2];
+ XMM4 = XXYY[p0+1];
+ XMM3 = XXYY[p0 ];
+ TN.ps = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ XMM5 = _mm_lddqu_ps(N+p1);
+ XMM0 = _mm_add_ps(XMM0, XXYY[p1 ]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p1+1]);
+ XMM4 = _mm_add_ps(XMM4, XXYY[p1+2]);
+ XMM3 = _mm_add_ps(XMM3, XXYY[p1+3]);
+ TN.ps = _mm_add_ps(TN.ps, XMM5);
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = _mm_load_ps(findex+i);
+ XMM6 = _mm_mul_ps(XMM6, TN.ps);
+ XMM1 = _mm_mul_ps(XMM1, TN.ps);
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM2 = _mm_lddqu_ps(N+p0-4);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p0-1];
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM7 = XXYY[p0-2];
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM0 = XXYY[p0-3];
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ XMM3 = _mm_rcp_ps(XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM6 = XXYY[p0-4];
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ TN1.ps = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM5 = _mm_xor_ps(XMM5, PCS_RNNR.ps);
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ XMM7 = _mm_xor_ps(XMM7, PCS_RNNR.ps);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM6 = _mm_xor_ps(XMM6, PCS_RNNR.ps);
+ XMM2 = _mm_lddqu_ps(N+p1+4);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM5 = _mm_add_ps(XMM5, XXYY[p1+4]);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ XMM7 = _mm_add_ps(XMM7, XXYY[p1+5]);
+ XMM0 = _mm_add_ps(XMM0, XXYY[p1+6]);
+ _mm_store_ps(noise+i , XMM4);
+ XMM6 = _mm_add_ps(XMM6, XXYY[p1+7]);
+ TN1.ps = _mm_add_ps(TN1.ps, XMM2);
+ XMM1 = XMM5;
+ XMM2 = XMM0;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM6, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = XMM5;
+ XMM6 = XMM1;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM0, _MM_SHUFFLE(2,0,2,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = XMM1;
+ XMM2 = XMM5;
+ XMM3 = XMM6;
+ XMM4 = XMM5;
+ XMM0 = _mm_mul_ps(XMM0, XMM7);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM6 = _mm_load_ps(findex+i+4);
+ XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+ XMM7 = _mm_mul_ps(XMM7, TN1.ps);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM3 = _mm_sub_ps(XMM3, XMM4);
+ XMM7 = _mm_sub_ps(XMM7, XMM5);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM6 = _mm_rcp_ps(XMM7);
+ XMM0 = _mm_add_ps(XMM0, XMM3);
+ XMM7 = _mm_mul_ps(XMM7, XMM6);
+ XMM7 = _mm_mul_ps(XMM7, XMM6);
+ XMM6 = _mm_add_ps(XMM6, XMM6);
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM6 = _mm_mul_ps(XMM6, XMM0);
+ XMM6 = _mm_sub_ps(XMM6, OFFSET);
+ XMM6 = _mm_min_ps(XMM6, PM128(noise+i+4));
+ _mm_store_ps(noise+i+4, XMM6);
+ p0 -= 8;
+ p1 += 8;
+ }
+ j = midpoint1&(~3);
+ for(;i<j;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3, XMM5;
+ __m128x TN;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = XXYY[p0+3];
+ XMM1 = XXYY[p0+2];
+ XMM4 = XXYY[p0+1];
+ XMM3 = XXYY[p0 ];
+ TN.ps = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+
+ XMM5 = _mm_lddqu_ps(N+p1);
+ XMM0 = _mm_add_ps(XMM0, XXYY[p1 ]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p1+1]);
+ XMM4 = _mm_add_ps(XMM4, XXYY[p1+2]);
+ XMM3 = _mm_add_ps(XMM3, XXYY[p1+3]);
+
+ TN.ps = _mm_add_ps(TN.ps, XMM5);
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ p0 -= 4;
+ p1 += 4;
+ }
+ if(midpoint2-i<4)
+ {
+ x = (float)i;
+ for (;i<midpoint1;i++,x+=1.f)
+ {
+ hi = i + fixed / 2;
+ lo = hi - fixed;
+
+ tN = N[hi] + N[-lo];
+ tX = xxyy[hi*4 ] - xxyy[-lo*4 ];
+ tXX = xxyy[hi*4+1] + xxyy[-lo*4+1];
+ tY = xxyy[hi*4+2] + xxyy[-lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[-lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ R = (A + x * B) / D;
+
+ if(R - offset < noise[i])
+ noise[i] = R - offset;
+ }
+ for (;i<midpoint2;i++,x+=1.f)
+ {
+ hi = i + fixed / 2;
+ lo = hi - fixed;
+
+ tN = N[hi] - N[lo];
+ tX = xxyy[hi*4 ] - xxyy[lo*4 ];
+ tXX = xxyy[hi*4+1] - xxyy[lo*4+1];
+ tY = xxyy[hi*4+2] - xxyy[lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ R = (A + x * B) / D;
+ if(R - offset < noise[i])
+ noise[i] = R - offset;
+ }
+ j = (i+3)&(~3);
+ j = (j>=n)?n:j;
+ for (;i<j;i++,x+=1.f)
+ {
+ R = (A + x * B) / D;
+ if(R - offset < noise[i])
+ noise[i] = R - offset;
+ }
+ PA = _mm_set_ps1(A);
+ PB = _mm_set_ps1(B);
+ PD = _mm_set_ps1(D);
+ }
+ else
+ {
+ switch(midpoint1%4)
+ {
+ case 0:
+ break;
+ case 1:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1;
+ p0 = -((i ) + fixed / 2 - fixed);
+ p1 = (i+1) + fixed / 2;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p1 ];
+ XMM4 = XXYY[p1+1];
+ XMM3 = XXYY[p1+2];
+
+ TN.sf[0] = N[p0 ];
+ TN.sf[1] = N[p1 ];
+ TN.sf[2] = N[p1+1];
+ TN.sf[3] = N[p1+2];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+
+ p0 = (i ) + fixed / 2;
+ p1 -= fixed;
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0 ]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1 ]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1+1]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+2]);
+
+ TN1.sf[0] = N[p0 ];
+ TN1.sf[1] = N[p1 ];
+ TN1.sf[2] = N[p1+1];
+ TN1.sf[3] = N[p1+2];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNNR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ case 2:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1;
+ p0 = -((i ) + fixed / 2 - fixed);
+ p1 = (i+2) + fixed / 2;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0-1];
+ XMM4 = XXYY[p1 ];
+ XMM3 = XXYY[p1+1];
+
+ TN.sf[0] = N[p0 ];
+ TN.sf[1] = N[p0-1];
+ TN.sf[2] = N[p1 ];
+ TN.sf[3] = N[p1+1];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+
+ p0 = (i ) + fixed / 2;
+ p1 -= fixed;
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0 ]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p0+1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1 ]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+1]);
+
+ TN1.sf[0] = N[p0 ];
+ TN1.sf[1] = N[p0+1];
+ TN1.sf[2] = N[p1 ];
+ TN1.sf[3] = N[p1+1];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNRR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ case 3:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0, p1;
+ p0 = -((i ) + fixed / 2 - fixed);
+ p1 = (i+3) + fixed / 2;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0-1];
+ XMM4 = XXYY[p0-2];
+ XMM3 = XXYY[p1 ];
+
+ TN.sf[0] = N[p0 ];
+ TN.sf[1] = N[p0-1];
+ TN.sf[2] = N[p0-2];
+ TN.sf[3] = N[p1 ];
+
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM1 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+ XMM4 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+
+ p0 = (i ) + fixed / 2;
+ p1 -= fixed;
+
+ XMM0 = _mm_add_ps(XMM0, XXYY[p0 ]);
+ XMM1 = _mm_add_ps(XMM1, XXYY[p0+1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p0+2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1 ]);
+
+ TN1.sf[0] = N[p0 ];
+ TN1.sf[1] = N[p0+1];
+ TN1.sf[2] = N[p0+2];
+ TN1.sf[3] = N[p1 ];
+
+ TN.ps = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NRRR.ps));
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ break;
+ }
+ p0 = i + fixed / 2;
+ p1 = p0 - fixed;
+ j = ((midpoint2-i)&(~15))+i;
+ for(;i<j;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN, TN1;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0+ 1];
+ XMM4 = XXYY[p0+ 2];
+ XMM3 = XXYY[p0+ 3];
+ TN.ps = _mm_lddqu_ps(N+p0 );
+ XMM5 = _mm_lddqu_ps(N+p1 );
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p1 ]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1+ 1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1+ 2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+ 3]);
+ TN.ps = _mm_sub_ps(TN.ps, XMM5);
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = _mm_load_ps(findex+i );
+ XMM6 = _mm_mul_ps(XMM6, TN.ps);
+ XMM1 = _mm_mul_ps(XMM1, TN.ps);
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM2 = XXYY[p0+ 4];
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p0+ 5];
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM7 = XXYY[p0+ 6];
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM0 = XXYY[p0+ 7];
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+
+ TN1.ps = _mm_lddqu_ps(N+p0+ 4);
+ XMM3 = _mm_rcp_ps(XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM6 = _mm_lddqu_ps(N+p1+ 4);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM2 = _mm_sub_ps(XMM2, XXYY[p1+ 4]);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM5 = _mm_sub_ps(XMM5, XXYY[p1+ 5]);
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ XMM7 = _mm_sub_ps(XMM7, XXYY[p1+ 6]);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p1+ 7]);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ TN1.ps = _mm_sub_ps(TN1.ps, XMM6);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM1 = XMM2;
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ XMM6 = XMM7;
+ _mm_store_ps(noise+i , XMM4);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,2,3,2));
+ XMM5 = XMM2;
+ XMM0 = XMM1;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = XMM1;
+ XMM6 = XMM2;
+ XMM3 = XMM0;
+ XMM4 = XMM2;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM6 = _mm_mul_ps(XMM6, XMM0);
+ XMM0 = _mm_load_ps(findex+i+ 4);
+ XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+ XMM5 = _mm_mul_ps(XMM5, TN1.ps);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM1 = XXYY[p0+ 8];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ XMM6 = XXYY[p0+ 9];
+ XMM3 = _mm_sub_ps(XMM3, XMM4);
+ XMM4 = XXYY[p0+10];
+ XMM5 = _mm_sub_ps(XMM5, XMM2);
+ XMM2 = XXYY[p0+11];
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ TN.ps = _mm_lddqu_ps(N+p0+ 8);
+ XMM0 = _mm_rcp_ps(XMM5);
+ XMM7 = _mm_add_ps(XMM7, XMM3);
+ XMM3 = _mm_lddqu_ps(N+p1+ 8);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1+ 8]);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM6 = _mm_sub_ps(XMM6, XXYY[p1+ 9]);
+ XMM0 = _mm_add_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1+10]);
+ XMM0 = _mm_sub_ps(XMM0, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XXYY[p1+11]);
+ XMM7 = _mm_mul_ps(XMM7, XMM0);
+ TN.ps = _mm_sub_ps(TN.ps, XMM3);
+ XMM7 = _mm_sub_ps(XMM7, OFFSET);
+ XMM5 = XMM1;
+ XMM7 = _mm_min_ps(XMM7, PM128(noise+i+ 4));
+ XMM3 = XMM4;
+ _mm_store_ps(noise+i+ 4, XMM7);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(1,0,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM2, _MM_SHUFFLE(3,2,3,2));
+ XMM6 = XMM1;
+ XMM2 = XMM5;
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM5;
+ XMM3 = XMM1;
+ XMM0 = XMM2;
+ XMM7 = XMM1;
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM2);
+ XMM2 = _mm_load_ps(findex+i+ 8);
+ XMM0 = _mm_mul_ps(XMM0, TN.ps);
+ XMM6 = _mm_mul_ps(XMM6, TN.ps);
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM5 = XXYY[p0+12];
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM4 = _mm_sub_ps(XMM4, XMM3);
+ XMM3 = XXYY[p0+13];
+ XMM0 = _mm_sub_ps(XMM0, XMM7);
+ XMM7 = XXYY[p0+14];
+ XMM6 = _mm_sub_ps(XMM6, XMM1);
+ XMM1 = XXYY[p0+15];
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ TN1.ps = _mm_lddqu_ps(N+p0+12);
+ XMM2 = _mm_rcp_ps(XMM6);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM0 = _mm_lddqu_ps(N+p1+12);
+ XMM6 = _mm_mul_ps(XMM6, XMM2);
+ XMM5 = _mm_sub_ps(XMM5, XXYY[p1+12]);
+ XMM6 = _mm_mul_ps(XMM6, XMM2);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+13]);
+ XMM2 = _mm_add_ps(XMM2, XMM2);
+ XMM7 = _mm_sub_ps(XMM7, XXYY[p1+14]);
+ XMM2 = _mm_sub_ps(XMM2, XMM6);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1+15]);
+ XMM4 = _mm_mul_ps(XMM4, XMM2);
+ TN1.ps = _mm_sub_ps(TN1.ps, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM6 = XMM5;
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i+ 8));
+ XMM0 = XMM7;
+ _mm_store_ps(noise+i+ 8, XMM4);
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM3, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM3 = XMM5;
+ XMM1 = XMM6;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = XMM6;
+ XMM0 = XMM5;
+ XMM2 = XMM1;
+ XMM4 = XMM5;
+ XMM7 = _mm_mul_ps(XMM7, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM1 = _mm_load_ps(findex+i+12);
+ XMM2 = _mm_mul_ps(XMM2, TN1.ps);
+ XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM7 = _mm_sub_ps(XMM7, XMM0);
+ XMM2 = _mm_sub_ps(XMM2, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM1);
+ XMM1 = _mm_rcp_ps(XMM3);
+ XMM7 = _mm_add_ps(XMM7, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM1 = _mm_add_ps(XMM1, XMM1);
+ XMM1 = _mm_sub_ps(XMM1, XMM3);
+ XMM7 = _mm_mul_ps(XMM7, XMM1);
+ XMM7 = _mm_sub_ps(XMM7, OFFSET);
+ XMM7 = _mm_min_ps(XMM7, PM128(noise+i+12));
+ _mm_store_ps(noise+i+12, XMM7);
+ p0 += 16;
+ p1 += 16;
+ }
+ j = ((midpoint2-i)&(~7))+i;
+ for(;i<j;i+=8)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ __m128x TN, TN1;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0+1];
+ XMM4 = XXYY[p0+2];
+ XMM3 = XXYY[p0+3];
+ TN.ps = _mm_lddqu_ps(N+p0 );
+ XMM5 = _mm_lddqu_ps(N+p1 );
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p1 ]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1+1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1+2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+3]);
+ TN.ps = _mm_sub_ps(TN.ps, XMM5);
+ XMM2 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM2;
+ XMM5 = XMM0;
+ XMM6 = XMM3;
+ XMM7 = XMM0;
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = _mm_load_ps(findex+i );
+ XMM6 = _mm_mul_ps(XMM6, TN.ps);
+ XMM1 = _mm_mul_ps(XMM1, TN.ps);
+ XMM7 = _mm_mul_ps(XMM7, XMM2);
+ XMM2 = XXYY[p0+4];
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XXYY[p0+5];
+ XMM6 = _mm_sub_ps(XMM6, XMM7);
+ XMM7 = XXYY[p0+6];
+ XMM1 = _mm_sub_ps(XMM1, XMM0);
+ XMM0 = XXYY[p0+7];
+ XMM6 = _mm_mul_ps(XMM6, XMM3);
+ TN1.ps = _mm_lddqu_ps(N+p0+ 4);
+ XMM3 = _mm_rcp_ps(XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM6);
+ XMM6 = _mm_lddqu_ps(N+p1+ 4);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM2 = _mm_sub_ps(XMM2, XXYY[p1+4]);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM5 = _mm_sub_ps(XMM5, XXYY[p1+5]);
+ XMM3 = _mm_add_ps(XMM3, XMM3);
+ XMM7 = _mm_sub_ps(XMM7, XXYY[p1+6]);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p1+7]);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ TN1.ps = _mm_sub_ps(TN1.ps, XMM6);
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM1 = XMM2;
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ XMM6 = XMM7;
+ _mm_store_ps(noise+i , XMM4);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(3,2,3,2));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,2,3,2));
+ XMM5 = XMM2;
+ XMM0 = XMM1;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = XMM1;
+ XMM6 = XMM2;
+ XMM3 = XMM0;
+ XMM4 = XMM2;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM6 = _mm_mul_ps(XMM6, XMM0);
+ XMM0 = _mm_load_ps(findex+i+4);
+ XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+ XMM5 = _mm_mul_ps(XMM5, TN1.ps);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM7 = _mm_sub_ps(XMM7, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM4);
+ XMM5 = _mm_sub_ps(XMM5, XMM2);
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM0 = _mm_rcp_ps(XMM5);
+ XMM7 = _mm_add_ps(XMM7, XMM3);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM0);
+ XMM0 = _mm_add_ps(XMM0, XMM0);
+ XMM0 = _mm_sub_ps(XMM0, XMM5);
+ XMM7 = _mm_mul_ps(XMM7, XMM0);
+ XMM7 = _mm_sub_ps(XMM7, OFFSET);
+ XMM7 = _mm_min_ps(XMM7, PM128(noise+i+4));
+ _mm_store_ps(noise+i+4, XMM7);
+
+ p0 += 8;
+ p1 += 8;
+ }
+ j = midpoint2&(~3);
+ for(;i<j;i+=4)
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN;
+ __m128 XMM5;
+
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0+1];
+ XMM4 = XXYY[p0+2];
+ XMM3 = XXYY[p0+3];
+ TN.ps = _mm_lddqu_ps(N+p0 );
+ XMM5 = _mm_lddqu_ps(N+p1 );
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p1 ]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p1+1]);
+ XMM4 = _mm_sub_ps(XMM4, XXYY[p1+2]);
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p1+3]);
+
+ TN.ps = _mm_sub_ps(TN.ps, XMM5);
+
+ bark_noise_hybridmp_SSE_SUBC();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ p0 += 4;
+ p1 += 4;
+ }
+ if(i!=n)
+ {
+ switch(midpoint2%4)
+ {
+ case 0:
+ {
+ hi = (i-1) + fixed / 2;
+ lo = hi - fixed;
+
+ tN = N[hi] - N[lo];
+ tX = xxyy[hi*4 ] - xxyy[lo*4 ];
+ tXX = xxyy[hi*4+1] - xxyy[lo*4+1];
+ tY = xxyy[hi*4+2] - xxyy[lo*4+2];
+ tXY = xxyy[hi*4+3] - xxyy[lo*4+3];
+
+ A = tY * tXX - tX * tXY;
+ B = tN * tXY - tX * tY;
+ D = tN * tXX - tX * tX;
+ PA = _mm_set_ps1(A);
+ PB = _mm_set_ps1(B);
+ PD = _mm_set_ps1(1.f/D);
+ }
+ break;
+ case 1:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN, TN1;
+ int p0 = (i ) + fixed / 2;
+
+ XMM0 =
+ XMM1 =
+ XMM4 =
+ XMM3 = XXYY[p0];
+
+ TN.ps = _mm_set_ps1(N[p0]);
+
+ p0 -= fixed;
+
+ XMM0 =
+ XMM4 =
+ XMM3 =
+ XMM1 = _mm_sub_ps(XMM3, XXYY[p0]);
+
+ TN1.ps = _mm_set_ps1(N[p0]);
+
+ TN.ps = _mm_sub_ps(TN.ps, TN1.ps);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(0,0,0,0));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(0,0,0,0));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(0,0,0,0));
+ }
+ break;
+ case 2:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN;
+ __m128 XMM5;
+ int p0 = (i ) + fixed / 2;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = XXYY[p0 ];
+ XMM1 =
+ XMM4 =
+ XMM3 = XXYY[p0+1];
+ TN.ps = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(1,1,1,0));
+
+ p0 -= fixed;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p0 ]);
+ XMM4 =
+ XMM3 =
+ XMM1 = _mm_sub_ps(XMM3, XXYY[p0+1]);
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(1,1,1,0));
+
+ TN.ps = _mm_sub_ps(TN.ps, XMM5);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(1,1,1,1));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(1,1,1,1));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(1,1,1,1));
+ }
+ break;
+ case 3:
+ {
+ __m128 XMM0, XMM1, XMM4, XMM3;
+ __m128x TN;
+ __m128 XMM5;
+ int p0 = (i ) + fixed / 2;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = XXYY[p0 ];
+ XMM1 = XXYY[p0+1];
+ XMM4 =
+ XMM3 = XXYY[p0+2];
+ TN.ps = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,0));
+
+ p0 -= fixed;
+
+ XMM5 = _mm_lddqu_ps(N+p0);
+ XMM0 = _mm_sub_ps(XMM0, XXYY[p0 ]);
+ XMM1 = _mm_sub_ps(XMM1, XXYY[p0+1]);
+ XMM4 =
+ XMM3 = _mm_sub_ps(XMM3, XXYY[p0+2]);
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,0));
+
+ TN.ps = _mm_sub_ps(TN.ps, XMM5);
+
+ bark_noise_hybridmp_SSE_SUBC2();
+ XMM4 = _mm_sub_ps(XMM4, OFFSET);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ PA = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(2,2,2,2));
+ PB = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(2,2,2,2));
+ PD = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(2,2,2,2));
+ }
+ break;
+ }
+ }
+ }
+ if(i<n)
+ {
+ __m128 XMM0 = PA;
+ __m128 XMM1 = PB;
+ XMM0 = _mm_mul_ps(XMM0, PD);
+ XMM1 = _mm_mul_ps(XMM1, PD);
+ XMM0 = _mm_sub_ps(XMM0, OFFSET);
+ if(i%8!=0)
+ {
+ __m128 XMM4 = _mm_load_ps(findex+i);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM4 = _mm_min_ps(XMM4, PM128(noise+i ));
+ _mm_store_ps(noise+i , XMM4);
+ i += 4;
+ }
+ if(i%16!=0)
+ {
+ __m128 XMM4 = _mm_load_ps(findex+i );
+ __m128 XMM5 = _mm_load_ps(findex+i+4);
+ __m128 XMM6 = _mm_load_ps(noise+i );
+ __m128 XMM7 = _mm_load_ps(noise+i+ 4);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ XMM6 = _mm_min_ps(XMM6, XMM4);
+ XMM7 = _mm_min_ps(XMM7, XMM5);
+ _mm_store_ps(noise+i , XMM6);
+ _mm_store_ps(noise+i+ 4, XMM7);
+ i += 8;
+ }
+ for(;i<n;i+=32)
+ {
+ __m128 XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM4 = _mm_load_ps(findex+i );
+ XMM5 = _mm_load_ps(findex+i+ 4);
+ XMM6 = _mm_load_ps(noise+i );
+ XMM7 = _mm_load_ps(noise+i+ 4);
+ XMM2 = _mm_load_ps(findex+i+ 8);
+ XMM3 = _mm_load_ps(findex+i+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM1);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM5 = _mm_add_ps(XMM5, XMM0);
+ XMM6 = _mm_min_ps(XMM6, XMM4);
+ XMM7 = _mm_min_ps(XMM7, XMM5);
+ XMM4 = _mm_load_ps(noise+i+ 8);
+ XMM5 = _mm_load_ps(noise+i+ 12);
+ _mm_store_ps(noise+i , XMM6);
+ _mm_store_ps(noise+i+ 4, XMM7);
+ XMM2 = _mm_mul_ps(XMM2, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM2 = _mm_add_ps(XMM2, XMM0);
+ XMM3 = _mm_add_ps(XMM3, XMM0);
+ XMM2 = _mm_min_ps(XMM2, XMM4);
+ XMM3 = _mm_min_ps(XMM3, XMM5);
+ _mm_store_ps(noise+i+ 8, XMM2);
+ _mm_store_ps(noise+i+12, XMM3);
+ }
+ }
+#else /* SSE Optimize */
static void bark_noise_hybridmp(int n,const long *b,
const float *f,
float *noise,
@@ -760,6 +3676,7 @@
R = (A + x * B) / D;
if (R - offset < noise[i]) noise[i] = R - offset;
}
+#endif /* SSE Optimize */
}
static float FLOOR1_fromdB_INV_LOOKUP[256]={
@@ -839,10 +3756,145 @@
if(sliding_lowpass>n)sliding_lowpass=n;
+#ifdef __SSE__ /* SSE Optimize */
+{
+#if defined(_MSC_VER)
+ int j;
+ for(j=0;j<256;j+=16)
+ {
+ _mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j ), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j+8), _MM_HINT_NTA);
+ }
+ _asm{
+ push ebp
+ push ebx
+ mov ecx, sliding_lowpass
+ mov edi, mdct
+ mov esi, codedflr
+ mov ebx, residue
+ lea ecx, [esi+ecx*4]
+ align 4
+ _vp_remove_floor_0:
+ mov eax, [esi ]
+ mov edx, [esi+ 4]
+ movss xmm0, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm1, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mov eax, [esi+ 8]
+ mov edx, [esi+12]
+ mulss xmm0, [edi ]
+ mulss xmm1, [edi+ 4]
+ movss xmm2, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm3, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mov eax, [esi+16]
+ mov edx, [esi+20]
+ mulss xmm2, [edi+ 8]
+ mulss xmm3, [edi+12]
+ movss xmm4, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm5, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mov eax, [esi+24]
+ mov edx, [esi+28]
+ movss [ebx ], xmm0
+ movss [ebx+ 4], xmm1
+ movss xmm6, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm7, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mulss xmm4, [edi+16]
+ mulss xmm5, [edi+20]
+ mov eax, [esi+32]
+ mov edx, [esi+36]
+ movss [ebx+ 8], xmm2
+ movss [ebx+12], xmm3
+ movss xmm0, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm1, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mulss xmm6, [edi+24]
+ mulss xmm7, [edi+28]
+ mov eax, [esi+40]
+ mov edx, [esi+44]
+ movss [ebx+16], xmm4
+ movss [ebx+20], xmm5
+ mulss xmm0, [edi+32]
+ mulss xmm1, [edi+36]
+ movss [ebx+24], xmm6
+ movss [ebx+28], xmm7
+ movss xmm2, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm3, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mov eax, [esi+48]
+ mov edx, [esi+52]
+ mulss xmm2, [edi+40]
+ mulss xmm3, [edi+44]
+ movss xmm4, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm5, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mov eax, [esi+56]
+ mov edx, [esi+60]
+ movss [ebx+32], xmm0
+ movss [ebx+36], xmm1
+ movss xmm6, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+ movss xmm7, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+ mulss xmm4, [edi+48]
+ mulss xmm5, [edi+52]
+ movss [ebx+40], xmm2
+ movss [ebx+44], xmm3
+ mulss xmm6, [edi+56]
+ mulss xmm7, [edi+60]
+ movss [ebx+48], xmm4
+ movss [ebx+52], xmm5
+ lea ebx, [ebx+64]
+ lea esi, [esi+64]
+ lea edi, [edi+64]
+ movss [ebx+56-64], xmm6
+ movss [ebx+60-64], xmm7
+
+ cmp esi, ecx
+ jl _vp_remove_floor_0
+ pop ebx
+ pop ebp
+ };
+ for(i=sliding_lowpass;i<n;i++)
+ residue[i]=0.;
+#else
+ int j;
+ float *work = (float*)_ogg_alloca(sliding_lowpass*sizeof(float));
+
+ for(j=0;j<256;j+=16)
+ {
+ _mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j ), _MM_HINT_NTA);
+ _mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j+8), _MM_HINT_NTA);
+ }
+ for(i=0;i<sliding_lowpass;i+=4)
+ {
+ work[i ] = FLOOR1_fromdB_INV_LOOKUP[codedflr[i ]];
+ work[i+1] = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+1]];
+ work[i+2] = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+2]];
+ work[i+3] = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+3]];
+ }
+ for(i=0;i<sliding_lowpass;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(mdct+i );
+ XMM4 = _mm_load_ps(work+i );
+ XMM1 = _mm_load_ps(mdct+i+ 4);
+ XMM5 = _mm_load_ps(work+i+ 4);
+ XMM2 = _mm_load_ps(mdct+i+ 8);
+ XMM6 = _mm_load_ps(work+i+ 8);
+ XMM3 = _mm_load_ps(mdct+i+12);
+ XMM7 = _mm_load_ps(work+i+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ _mm_store_ps(residue+i , XMM0);
+ _mm_store_ps(residue+i+ 4, XMM1);
+ _mm_store_ps(residue+i+ 8, XMM2);
+ _mm_store_ps(residue+i+12, XMM3);
+ }
+#endif
+}
+#else /* SSE Optimize */
for(i=0;i<sliding_lowpass;i++){
residue[i]=
mdct[i]*FLOOR1_fromdB_INV_LOOKUP[codedflr[i]];
}
+#endif /* SSE Optimize */
for(;i<n;i++)
residue[i]=0.;
@@ -854,8 +3906,43 @@
float *logmask){
int i,n=p->n;
+#ifdef __SSE__ /* SSE Optimize */
+ float *work = (float*)_ogg_alloca(n*sizeof(*work)*2);
+ float *bwork = (float*)_ogg_alloca(n*sizeof(float)*5);
+
+#else /* SSE Optimize */
float *work=alloca(n*sizeof(*work));
+#endif /* SSE Optimize */
+
+#ifdef __SSE__ /* SSE Optimize */
+ bark_noise_hybridmp(p,logmdct,logmask,
+ 140.,-1, bwork, work+n);
+
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(logmdct+i );
+ XMM4 = _mm_load_ps(logmask+i );
+ XMM1 = _mm_load_ps(logmdct+i+ 4);
+ XMM5 = _mm_load_ps(logmask+i+ 4);
+ XMM2 = _mm_load_ps(logmdct+i+ 8);
+ XMM6 = _mm_load_ps(logmask+i+ 8);
+ XMM3 = _mm_load_ps(logmdct+i+12);
+ XMM7 = _mm_load_ps(logmask+i+12);
+ XMM0 = _mm_sub_ps(XMM0, XMM4);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM7);
+ _mm_store_ps(work+i , XMM0);
+ _mm_store_ps(work+i+ 4, XMM1);
+ _mm_store_ps(work+i+ 8, XMM2);
+ _mm_store_ps(work+i+12, XMM3);
+ }
+ bark_noise_hybridmp(p,work,logmask,0.,
+ p->vi->noisewindowfixed, bwork, work+n);
+#else /* SSE Optimize */
bark_noise_hybridmp(n,p->bark,logmdct,logmask,
140.,-1);
@@ -863,8 +3950,33 @@
bark_noise_hybridmp(n,p->bark,work,logmask,0.,
p->vi->noisewindowfixed);
+#endif /* SSE Optimize */
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(logmdct+i );
+ XMM4 = _mm_load_ps(work+i );
+ XMM1 = _mm_load_ps(logmdct+i+ 4);
+ XMM5 = _mm_load_ps(work+i+ 4);
+ XMM2 = _mm_load_ps(logmdct+i+ 8);
+ XMM6 = _mm_load_ps(work+i+ 8);
+ XMM3 = _mm_load_ps(logmdct+i+12);
+ XMM7 = _mm_load_ps(work+i+12);
+ XMM0 = _mm_sub_ps(XMM0, XMM4);
+ XMM1 = _mm_sub_ps(XMM1, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XMM6);
+ XMM3 = _mm_sub_ps(XMM3, XMM7);
+ _mm_store_ps(work+i , XMM0);
+ _mm_store_ps(work+i+ 4, XMM1);
+ _mm_store_ps(work+i+ 8, XMM2);
+ _mm_store_ps(work+i+12, XMM3);
+ }
+#else /* SSE Optimize */
for(i=0;i<n;i++)work[i]=logmdct[i]-work[i];
+#endif /* SSE Optimize */
#if 0
{
@@ -900,12 +4012,109 @@
((p->vi->noisecompand[dB]-p->vi->noisecompand_high[dB])*noise_compand_level);
}
}
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ static _MM_ALIGN16 const __m128x NCLMAX = {
+ .sf = {
+ NOISE_COMPAND_LEVELS-1, NOISE_COMPAND_LEVELS-1,
+ NOISE_COMPAND_LEVELS-1, NOISE_COMPAND_LEVELS-1
+ }
+ };
+ int spm4 = (i+15)&(~15);
+ for(;i<spm4;i++){
+ int dB = logmask[i]+.5;
+ if(dB>=NOISE_COMPAND_LEVELS)
+ dB = NOISE_COMPAND_LEVELS-1;
+ if(dB<0)
+ dB = 0;
+ logmask[i] = work[i]+p->vi->noisecompand[dB];
+ }
+ {
+ register float* fwork2 = (float*)(work+n);
+ for(i=spm4;i<n;i+=16)
+ {
+#if !defined(__SSE2__)
+ __m64 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(logmask+i );
+ XMM1 = _mm_load_ps(logmask+i+ 4);
+ XMM2 = _mm_load_ps(logmask+i+ 8);
+ XMM3 = _mm_load_ps(logmask+i+12);
+ XMM0 = _mm_min_ps(XMM0, NCLMAX.ps);
+ XMM1 = _mm_min_ps(XMM1, NCLMAX.ps);
+ XMM2 = _mm_min_ps(XMM2, NCLMAX.ps);
+ XMM3 = _mm_min_ps(XMM3, NCLMAX.ps);
+ XMM0 = _mm_max_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_max_ps(XMM1, PFV_0.ps);
+ XMM2 = _mm_max_ps(XMM2, PFV_0.ps);
+ XMM3 = _mm_max_ps(XMM3, PFV_0.ps);
+#if defined(__SSE2__)
+ _mm_store_si128((__m128i*)(fwork2+i ), _mm_cvtps_epi32(XMM0));
+ _mm_store_si128((__m128i*)(fwork2+i+ 4), _mm_cvtps_epi32(XMM1));
+ _mm_store_si128((__m128i*)(fwork2+i+ 8), _mm_cvtps_epi32(XMM2));
+ _mm_store_si128((__m128i*)(fwork2+i+12), _mm_cvtps_epi32(XMM3));
+ }
+#else
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+ MM4 = _mm_cvtps_pi32(XMM2);
+ MM6 = _mm_cvtps_pi32(XMM3);
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+ MM5 = _mm_cvtps_pi32(XMM2);
+ MM7 = _mm_cvtps_pi32(XMM3);
+ PM64(fwork2+i ) = MM0;
+ PM64(fwork2+i+ 4) = MM2;
+ PM64(fwork2+i+ 8) = MM4;
+ PM64(fwork2+i+ 2) = MM1;
+ PM64(fwork2+i+12) = MM6;
+ PM64(fwork2+i+ 6) = MM3;
+ PM64(fwork2+i+10) = MM5;
+ PM64(fwork2+i+14) = MM7;
+ }
+ _mm_empty();
+#endif
+ for(i=spm4;i<n;i+=4)
+ {
+ fwork2[i ] = p->vi->noisecompand[*((int*)(fwork2+i ))];
+ fwork2[i+1] = p->vi->noisecompand[*((int*)(fwork2+i+1))];
+ fwork2[i+2] = p->vi->noisecompand[*((int*)(fwork2+i+2))];
+ fwork2[i+3] = p->vi->noisecompand[*((int*)(fwork2+i+3))];
+ }
+ for(i=spm4;i<n;i+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(fwork2+i );
+ __m128 XMM4 = _mm_load_ps(work+i );
+ __m128 XMM1 = _mm_load_ps(fwork2+i+ 4);
+ __m128 XMM5 = _mm_load_ps(work+i+ 4);
+ __m128 XMM2 = _mm_load_ps(fwork2+i+ 8);
+ __m128 XMM6 = _mm_load_ps(work+i+ 8);
+ __m128 XMM3 = _mm_load_ps(fwork2+i+12);
+ __m128 XMM7 = _mm_load_ps(work+i+12);
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM2 = _mm_add_ps(XMM2, XMM6);
+ XMM3 = _mm_add_ps(XMM3, XMM7);
+ _mm_store_ps(logmask+i , XMM0);
+ _mm_store_ps(logmask+i+ 4, XMM1);
+ _mm_store_ps(logmask+i+ 8, XMM2);
+ _mm_store_ps(logmask+i+12, XMM3);
+ }
+ }
+ }
+#else /* SSE Optimize */
for(;i<n;i++){
int dB=logmask[i]+.5;
if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
if(dB<0)dB=0;
logmask[i]= work[i]+p->vi->noisecompand[dB];
}
+#endif /* SSE Optimize */
}
@@ -917,6 +4126,48 @@
int i,n=p->n;
+#ifdef __SSE__ /* SSE Optimize */
+ int seedsize = (p->total_octave_lines+31)&(~31);
+ float *seed = (float*)_ogg_alloca(sizeof(*seed)*seedsize);
+ float att=local_specmax+p->vi->ath_adjatt;
+ {
+ __m128 XMM0 = _mm_load_ps(PNEGINF);
+ for(i=0;i<seedsize;i+=32)
+ {
+ _mm_store_ps(seed+i , XMM0);
+ _mm_store_ps(seed+i+ 4, XMM0);
+ _mm_store_ps(seed+i+ 8, XMM0);
+ _mm_store_ps(seed+i+12, XMM0);
+ _mm_store_ps(seed+i+16, XMM0);
+ _mm_store_ps(seed+i+20, XMM0);
+ _mm_store_ps(seed+i+24, XMM0);
+ _mm_store_ps(seed+i+28, XMM0);
+ }
+ }
+ /* set the ATH (floating below localmax, not global max by a
+ specified att) */
+ if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
+
+ {
+ __m128 pm = _mm_set_ps1(att);
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(p->ath+i );
+ XMM1 = _mm_load_ps(p->ath+i+ 4);
+ XMM2 = _mm_load_ps(p->ath+i+ 8);
+ XMM3 = _mm_load_ps(p->ath+i+12);
+ XMM0 = _mm_add_ps(XMM0, pm);
+ XMM1 = _mm_add_ps(XMM1, pm);
+ XMM2 = _mm_add_ps(XMM2, pm);
+ XMM3 = _mm_add_ps(XMM3, pm);
+ _mm_store_ps(logmask+i , XMM0);
+ _mm_store_ps(logmask+i+ 4, XMM1);
+ _mm_store_ps(logmask+i+ 8, XMM2);
+ _mm_store_ps(logmask+i+12, XMM3);
+ }
+ }
+#else /* SSE Optimize */
float *seed=alloca(sizeof(*seed)*p->total_octave_lines);
float att=local_specmax+p->vi->ath_adjatt;
for(i=0;i<p->total_octave_lines;i++)seed[i]=NEGINF;
@@ -927,6 +4178,7 @@
for(i=0;i<n;i++)
logmask[i]=p->ath[i]+att;
+#endif /* SSE Optimize */
/* tone masking */
seed_loop(p,(const float ***)p->tonecurves,logfft,logmask,seed,global_specmax);
@@ -946,17 +4198,52 @@
int end_block,
int blocktype, int modenumber,
int nW_modenumber,
+#ifdef __SSE__ /* SSE Optimize */
+ int lW_blocktype, int lW_modenumber, int lW_no,
+ float *tlogmdct){
+#else /* SSE Optimize */
int lW_blocktype, int lW_modenumber, int lW_no){
+#endif /* SSE Optimize */
int i,j,n=p->n;
int m2_sw=0, padth; /* aoTuV for M2 */
int it_sw, *m3n, m3_count; /* aoTuV for M3 */
int m4_end, lp_pos, m4_start; /* aoTuV for M4 */
float de, coeffi, cx; /* aoTuV for M1 */
- float toneth; /* aoTuV for M2 */
+ /*float toneth;*/ /* aoTuV for M2 */
float noise_rate, noise_rate_low, noise_center, rate_mod; /* aoTuV for M3 */
float m4_thres; /* aoTuV for M4 */
float toneatt=p->vi->tone_masteratt[offset_select];
+#ifdef __SSE__ /* SSE Optimize */
+ static _MM_ALIGN16 const __m128x PCOEFFI =
+ { .sf = {-17.2f, -17.2f, -17.2f, -17.2f} };
+ static __m128 PCX0;
+ static __m128 PCX1;
+ static _MM_ALIGN16 const __m128x PM160 =
+ { .sf = {-160.f, -160.f, -160.f, -160.f} };
+ static _MM_ALIGN16 const __m128x PM140 =
+ { .sf = {-140.f, -140.f, -140.f, -140.f} };
+ static _MM_ALIGN16 const __m128x PP0001 =
+ { .sf = {0.0001f, 0.0001f, 0.0001f, 0.0001f} };
+ static _MM_ALIGN16 const __m128x PP1 =
+ { .sf = {0.1f, 0.1f, 0.1f, 0.1f} };
+ static _MM_ALIGN16 const __m128x P5 =
+ { .sf = {5.f, 5.f, 5.f, 5.f} };
+ static _MM_ALIGN16 const __m128x P20 =
+ { .sf = {20.f, 20.f, 20.f, 20.f} };
+ static _MM_ALIGN16 const __m128x P30 =
+ { .sf = {30.f, 30.f, 30.f, 30.f} };
+ __m128 PTONEATT;
+ __m128 PNOISEMAXSUPP;
+ __m128 PLOW_COMPAND;
+ __m128 PPADTH;
+ __m128 PNOISE_CENTER;
+ __m128 PNOISE_RATE;
+ __m128 PNOISE_RATE_LOW;
+ __m128 PFV_C0, PFV_C1;
+ __m128 PM4_THRES;
+ int midpoint;
+#endif /* SSE Optimize */
cx = p->m_val;
m3n = p->m3n;
@@ -997,13 +4284,49 @@
if((lW_no*m3_count) < 24) noise_center = lW_no*m3_count;
}
if(offset_select == 1){
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0; i<128; i+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(tempmdct+i );
+ __m128 XMM1 = _mm_load_ps(tempmdct+i+ 4);
+ __m128 XMM2 = _mm_load_ps(tempmdct+i+ 8);
+ __m128 XMM3 = _mm_load_ps(tempmdct+i+12);
+ XMM0 = _mm_sub_ps(XMM0, P5.ps);
+ XMM1 = _mm_sub_ps(XMM1, P5.ps);
+ XMM2 = _mm_sub_ps(XMM2, P5.ps);
+ XMM3 = _mm_sub_ps(XMM3, P5.ps);
+ _mm_store_ps(tempmdct+i , XMM0);
+ _mm_store_ps(tempmdct+i+ 4, XMM1);
+ _mm_store_ps(tempmdct+i+ 8, XMM2);
+ _mm_store_ps(tempmdct+i+12, XMM3);
+ }
+#else /* SSE Optimize */
for(i=0; i<128; i++) tempmdct[i] -= 5;
+#endif /* SSE Optimize */
}
}else{ /* non_impulse - @Short(impulse) case */
noise_rate = 0.7;
noise_center = 0;
if(offset_select == 1){
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0; i<128; i+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(lastmdct+i );
+ __m128 XMM1 = _mm_load_ps(lastmdct+i+ 4);
+ __m128 XMM2 = _mm_load_ps(lastmdct+i+ 8);
+ __m128 XMM3 = _mm_load_ps(lastmdct+i+12);
+ XMM0 = _mm_sub_ps(XMM0, P5.ps);
+ XMM1 = _mm_sub_ps(XMM1, P5.ps);
+ XMM2 = _mm_sub_ps(XMM2, P5.ps);
+ XMM3 = _mm_sub_ps(XMM3, P5.ps);
+ _mm_store_ps(tempmdct+i , XMM0);
+ _mm_store_ps(tempmdct+i+ 4, XMM1);
+ _mm_store_ps(tempmdct+i+ 8, XMM2);
+ _mm_store_ps(tempmdct+i+12, XMM3);
+ }
+#else /* SSE Optimize */
for(i=0; i<128; i++) tempmdct[i] = lastmdct[i] - 5;
+#endif /* SSE Optimize */
}
}
noise_rate_low = 0;
@@ -1023,6 +4346,686 @@
else lp_pos=end_block;
}
+#ifdef __SSE__ /* SSE Optimize */
+/*
+ printf("M4S = %d\n", m4_start);
+ printf("M4E = %d\n", m4_end);
+ printf("LP = %d\n\n", lp_pos);
+*/
+ if(offset_select==1)
+ {
+ PTONEATT = _mm_set_ps1(toneatt);
+ PNOISEMAXSUPP = _mm_set_ps1(p->vi->noisemaxsupp);
+ PLOW_COMPAND = _mm_set_ps1(low_compand);
+ PPADTH = _mm_set_ps1(1.0f/padth);
+ PNOISE_CENTER = _mm_set_ps1(noise_center);
+ PNOISE_RATE = _mm_set_ps1(noise_rate);
+ PNOISE_RATE_LOW = _mm_set_ps1(noise_rate_low);
+ PCX0 = _mm_set_ps1(-0.005 *cx);
+ PCX1 = _mm_set_ps1(-0.0003*cx);
+ PFV_C0 = _mm_set_ps1(1.0f-17.2f*cx*0.005f);
+ PFV_C1 = _mm_set_ps1(1.0f-17.2f*cx*0.0003f);
+ PM4_THRES = _mm_set_ps1(m4_thres);
+ if(it_sw){
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(logmdct+i );
+ __m128 XMM1 = _mm_load_ps(logmdct+i+ 4);
+ __m128 XMM2 = _mm_load_ps(logmdct+i+ 8);
+ __m128 XMM3 = _mm_load_ps(logmdct+i+12);
+ XMM0 = _mm_sub_ps(XMM0, P5.ps);
+ XMM1 = _mm_sub_ps(XMM1, P5.ps);
+ XMM2 = _mm_sub_ps(XMM2, P5.ps);
+ XMM3 = _mm_sub_ps(XMM3, P5.ps);
+ _mm_store_ps(tlogmdct+i , XMM0);
+ _mm_store_ps(tlogmdct+i+ 4, XMM1);
+ _mm_store_ps(tlogmdct+i+ 8, XMM2);
+ _mm_store_ps(tlogmdct+i+12, XMM3);
+ }
+ }
+ midpoint = (m3n[1]+4)&(~3);
+ for(i=0;i<midpoint;i++)
+ {
+ float val= noise[i]+p->noiseoffset[1][i];
+ float tval= tone[i]+toneatt;
+ tval-=low_compand;
+ if(val>p->vi->noisemaxsupp)val=p->vi->noisemaxsupp;
+
+ if(m2_sw){
+ if((logmdct[i]-lastmdct[i]) > 20){
+ if(i > m3n[3]) val -= (logmdct[i]-lastmdct[i]-20)/padth;
+ else val -= (logmdct[i]-lastmdct[i]-20)/(padth+padth);
+ }
+ }
+
+ if(it_sw){
+ const float* ptempbuf = PTEMP_BFN[temp_bfn[i]];
+ for(j=1; j<=temp_bfn[i]; j++,ptempbuf++){
+ float tempbuf = logmdct[i]+(*ptempbuf);
+ if( (tempmdct[i+j] < tempbuf) && (tempmdct[i+j] < tlogmdct[i+j]) )
+ tempmdct[i+j] = tlogmdct[i+j];
+ }
+ if(val > tval){
+ if( (val>lastmdct[i]) && (logmdct[i]>(tempmdct[i]+noise_center)) ){
+ float valmask=0;
+ tempmdct[i] = logmdct[i];
+
+ if(logmdct[i]>lastmdct[i]){
+ rate_mod = noise_rate;
+ }else{
+ rate_mod = noise_rate_low;
+ }
+ if(i > m3n[1]){
+ if((val-tval)>30) valmask=((val-tval-30)/10+30)*rate_mod;
+ else valmask=(val-tval)*rate_mod;
+ }else if(i > m3n[2]){
+ if((val-tval)>20) valmask=((val-tval-20)/10+20)*rate_mod;
+ else valmask=(val-tval)*rate_mod;
+ }else if(i > m3n[3]){
+ if((val-tval)>10) valmask=((val-tval-10)/10+10)*rate_mod*0.5;
+ else valmask=(val-tval)*rate_mod*0.5;
+ }else{
+ if((val-tval)>10) valmask=((val-tval-10)/10+10)*rate_mod*0.3;
+ else valmask=(val-tval)*rate_mod*0.3;
+ }
+ if((val-valmask)>lastmdct[i])val-=valmask;
+ else val=lastmdct[i];
+ }
+ }
+ }
+
+ if(val>tval){
+ logmask[i]=val;
+ }else logmask[i]=tval;
+
+ coeffi = -17.2;
+ val = val - logmdct[i];
+
+ if(val > coeffi){
+ de = 1.0-((val-coeffi)*0.005*cx);
+ if(de < 0) de = 0.0001;
+ }else
+ de = 1.0-((val-coeffi)*0.0003*cx);
+ mdct[i] *= de;
+ }
+ if(n<=m4_start&&n<=lp_pos)
+ {
+ for(;i<n;i+=4)
+ {
+ __m128 PVAL = _mm_load_ps(noise+i);
+ __m128 PTVAL = _mm_load_ps(tone+i);
+ PVAL = _mm_add_ps(PVAL, PM128(p->noiseoffset[1]+i));
+ PTVAL = _mm_add_ps(PTVAL, PTONEATT);
+ PVAL = _mm_min_ps(PVAL, PNOISEMAXSUPP);
+ PTVAL = _mm_sub_ps(PTVAL, PLOW_COMPAND);
+ if(m2_sw)
+ {
+ __m128 XMM0 = _mm_load_ps(logmdct+i);
+ __m128 XMM1 = _mm_load_ps(lastmdct+i);
+ __m128 XMM2 = _mm_load_ps(P20.sf);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = XMM0;
+ XMM0 = _mm_mul_ps(XMM0, PPADTH);
+ XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+ XMM1 = _mm_andnot_ps(XMM1, XMM0);
+ PVAL = _mm_sub_ps(PVAL, XMM1);
+ }
+ if(it_sw){
+ int k;
+ for(k=0;k<4;k++)
+ {
+ const float* ptempbuf = PTEMP_BFN[temp_bfn[i+k]];
+ __m128 PLOGMDCT = _mm_set_ps1(logmdct[i+k]);
+ if(((i+k)&3)==3)
+ {
+ for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ __m128 XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(ptempbuf );
+ XMM3 = _mm_load_ps(ptempbuf+4);
+ XMM1 = _mm_load_ps(tempmdct+i+j+k );
+ XMM4 = _mm_load_ps(tempmdct+i+j+k+4);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+ XMM2 = _mm_load_ps(tlogmdct+i+j+k );
+ XMM5 = _mm_load_ps(tlogmdct+i+j+k+4);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM3 = _mm_min_ps(XMM3, XMM5);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM3 = _mm_cmple_ps(XMM3, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM4 = _mm_or_ps(XMM4, XMM3);
+ _mm_store_ps(tempmdct+i+j+k , XMM1);
+ _mm_store_ps(tempmdct+i+j+k+4, XMM4);
+ }
+ for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_load_ps(tempmdct+i+j+k);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_load_ps(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_store_ps(tempmdct+i+j+k, XMM1);
+ }
+ }
+ else
+ {
+ for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ __m128 XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(ptempbuf );
+ XMM3 = _mm_load_ps(ptempbuf+4);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k );
+ XMM4 = _mm_lddqu_ps(tempmdct+i+j+k+4);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k );
+ XMM5 = _mm_lddqu_ps(tlogmdct+i+j+k+4);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM3 = _mm_min_ps(XMM3, XMM5);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM3 = _mm_cmple_ps(XMM3, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM4 = _mm_or_ps(XMM4, XMM3);
+ _mm_storeu_ps(tempmdct+i+j+k , XMM1);
+ _mm_storeu_ps(tempmdct+i+j+k+4, XMM4);
+ }
+ for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k );
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k );
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storeu_ps(tempmdct+i+j+k, XMM1);
+ }
+ }
+ switch(temp_bfn[i+k]-j)
+ {
+ case 0 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ss(ptempbuf);
+ XMM1 = _mm_load_ss(tempmdct+i+j+k);
+ XMM0 = _mm_add_ss(XMM0, PLOGMDCT);
+ XMM2 = _mm_load_ss(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ss(XMM0, XMM2);
+ XMM0 = _mm_cmple_ss(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_store_ss(tempmdct+i+j+k, XMM1);
+ }
+ break;
+ case 1 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)ptempbuf);
+ XMM1 = _mm_loadl_pi(XMM1, (__m64*)(tempmdct+i+j+k));
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(tlogmdct+i+j+k));
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+ }
+ break;
+ case 2 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ _mm_store_ss(tempmdct+i+j+k+2, XMM1);
+ }
+ break;
+ case 3 :
+ break;
+ }
+ }
+ {
+ __m128 XMM0 = _mm_cmpgt_ps(PVAL, _mm_max_ps(PTVAL, PM128(lastmdct+i)));
+ if(_mm_movemask_ps(XMM0))
+ {
+ __m128 XMM1 = _mm_cmpgt_ps(PM128(logmdct+i), _mm_add_ps(PM128(tempmdct+i), PNOISE_CENTER));
+ __m128 XMM2, XMM3, XMM4;
+ XMM0 = _mm_and_ps(XMM0, XMM1);
+ if(_mm_movemask_ps(XMM0))
+ {
+ XMM1 = _mm_load_ps(logmdct+i);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM3 = _mm_or_ps(
+ _mm_and_ps(XMM3, XMM2),
+ _mm_andnot_ps(XMM2, PM128(tempmdct+i))
+ );
+ _mm_store_ps(tempmdct+i, XMM3);
+ XMM1 = _mm_cmpgt_ps(XMM1, PM128(lastmdct+i));
+ XMM2 = _mm_or_ps(
+ _mm_and_ps(PNOISE_RATE, XMM1),
+ _mm_andnot_ps(XMM1, PNOISE_RATE_LOW)
+ ); /* rate_mod */
+ XMM1 = _mm_sub_ps(PVAL, PTVAL);
+ XMM3 = XMM1;
+ XMM1 = _mm_sub_ps(XMM1, P30.ps);
+ XMM4 = _mm_cmpgt_ps(XMM1, PFV_0.ps);
+ XMM1 = _mm_mul_ps(XMM1, PP1.ps);
+ XMM1 = _mm_add_ps(XMM1, P30.ps);
+ XMM1 = _mm_and_ps(XMM1, XMM4);
+ XMM4 = _mm_andnot_ps(XMM4, XMM3);
+ XMM1 = _mm_or_ps(XMM1, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ XMM3 = PVAL;
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM3 = _mm_max_ps(XMM3, PM128(lastmdct+i));
+ XMM3 = _mm_and_ps(XMM3, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, PVAL);
+ PVAL = _mm_or_ps(XMM3, XMM0);
+ }
+ }
+ }
+ }
+ _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+ {
+ __m128 XMM0, XMM1, XMM2;
+ PVAL = _mm_sub_ps(PVAL, PM128(logmdct+i));
+ XMM0 = PVAL;
+ XMM1 = PVAL;
+ XMM2 = PVAL;
+ XMM0 = _mm_cmpgt_ps(XMM0, PCOEFFI.ps);
+ XMM1 = _mm_mul_ps(XMM1, PCX0);
+ XMM2 = _mm_mul_ps(XMM2, PCX1);
+ XMM1 = _mm_add_ps(XMM1, PFV_C0);
+ XMM2 = _mm_add_ps(XMM2, PFV_C1);
+ XMM1 = _mm_max_ps(XMM1, PP0001.ps);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, PM128(mdct+i));
+ _mm_store_ps(mdct+i, XMM1);
+ }
+ }
+ }
+ else if(lp_pos>=m4_end&&n>lp_pos)
+ {
+ char RunMode[2048];
+ j = (m3n[1]+4)&(~3);
+ midpoint = m4_start&(~3);
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 1; /* SSE-1 */
+ midpoint = (m4_start+4)&(~3); /* i>m4_start is not 1=>m4_start */
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 0; /* Normal */
+ midpoint = m4_end&(~3);
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 2; /* SSE-2 */
+ midpoint = (m4_end+3)&(~3);
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 0; /* Normal */
+ midpoint = lp_pos&(~3);
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 1; /* SSE-1 */
+ midpoint = (lp_pos+3)&(~3);
+ for(;j<midpoint;j+=4)
+ RunMode[j] = 3; /* SSE-3 */
+ for(;j<n;j+=4)
+ RunMode[j] = 4; /* SSE-4 */
+ for(;i<n;i+=4)
+ {
+ __m128 PVAL = _mm_load_ps(noise+i);
+ __m128 PTVAL = _mm_load_ps(tone+i);
+ PVAL = _mm_add_ps(PVAL, PM128(p->noiseoffset[1]+i));
+ PTVAL = _mm_add_ps(PTVAL, PTONEATT);
+ PVAL = _mm_min_ps(PVAL, PNOISEMAXSUPP);
+ PTVAL = _mm_sub_ps(PTVAL, PLOW_COMPAND);
+ if(m2_sw)
+ {
+ __m128 XMM0 = _mm_load_ps(logmdct+i);
+ __m128 XMM1 = _mm_load_ps(lastmdct+i);
+ __m128 XMM2 = _mm_load_ps(P20.sf);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = XMM0;
+ XMM0 = _mm_mul_ps(XMM0, PPADTH);
+ XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+ XMM1 = _mm_andnot_ps(XMM1, XMM0);
+ PVAL = _mm_sub_ps(PVAL, XMM1);
+ }
+ if(it_sw){
+ int k;
+ for(k=0;k<4;k++)
+ {
+ const float* ptempbuf = PTEMP_BFN[temp_bfn[i+k]];
+ __m128 PLOGMDCT = _mm_set_ps1(logmdct[i+k]);
+ if(((i+k)&3)==3)
+ {
+ for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ __m128 XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(ptempbuf );
+ XMM3 = _mm_load_ps(ptempbuf+4);
+ XMM1 = _mm_load_ps(tempmdct+i+j+k );
+ XMM4 = _mm_load_ps(tempmdct+i+j+k+4);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+ XMM2 = _mm_load_ps(tlogmdct+i+j+k );
+ XMM5 = _mm_load_ps(tlogmdct+i+j+k+4);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM3 = _mm_min_ps(XMM3, XMM5);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM3 = _mm_cmple_ps(XMM3, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM4 = _mm_or_ps(XMM4, XMM3);
+ _mm_store_ps(tempmdct+i+j+k , XMM1);
+ _mm_store_ps(tempmdct+i+j+k+4, XMM4);
+ }
+ for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_load_ps(tempmdct+i+j+k);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_load_ps(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_store_ps(tempmdct+i+j+k, XMM1);
+ }
+ }
+ else
+ {
+ for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ __m128 XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(ptempbuf );
+ XMM3 = _mm_load_ps(ptempbuf+4);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k );
+ XMM4 = _mm_lddqu_ps(tempmdct+i+j+k+4);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k );
+ XMM5 = _mm_lddqu_ps(tlogmdct+i+j+k+4);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM3 = _mm_min_ps(XMM3, XMM5);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM3 = _mm_cmple_ps(XMM3, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM4 = _mm_or_ps(XMM4, XMM3);
+ _mm_storeu_ps(tempmdct+i+j+k , XMM1);
+ _mm_storeu_ps(tempmdct+i+j+k+4, XMM4);
+ }
+ for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k );
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k );
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storeu_ps(tempmdct+i+j+k, XMM1);
+ }
+ }
+ switch(temp_bfn[i+k]-j)
+ {
+ case 0 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ss(ptempbuf);
+ XMM1 = _mm_load_ss(tempmdct+i+j+k);
+ XMM0 = _mm_add_ss(XMM0, PLOGMDCT);
+ XMM2 = _mm_load_ss(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ss(XMM0, XMM2);
+ XMM0 = _mm_cmple_ss(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_store_ss(tempmdct+i+j+k, XMM1);
+ }
+ break;
+ case 1 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)ptempbuf);
+ XMM1 = _mm_loadl_pi(XMM1, (__m64*)(tempmdct+i+j+k));
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(tlogmdct+i+j+k));
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+ }
+ break;
+ case 2 :
+ {
+ __m128 XMM0, XMM1, XMM2;
+ XMM0 = _mm_load_ps(ptempbuf);
+ XMM1 = _mm_lddqu_ps(tempmdct+i+j+k);
+ XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+ XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM0 = _mm_cmple_ps(XMM0, XMM1);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ _mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ _mm_store_ss(tempmdct+i+j+k+2, XMM1);
+ }
+ break;
+ case 3 :
+ break;
+ }
+ }
+ {
+ __m128 XMM0 = _mm_cmpgt_ps(PVAL, _mm_max_ps(PTVAL, PM128(lastmdct+i)));
+ if(_mm_movemask_ps(XMM0))
+ {
+ __m128 XMM1 = _mm_cmpgt_ps(PM128(logmdct+i), _mm_add_ps(PM128(tempmdct+i), PNOISE_CENTER));
+ __m128 XMM2, XMM3, XMM4;
+ XMM0 = _mm_and_ps(XMM0, XMM1);
+ if(_mm_movemask_ps(XMM0))
+ {
+ XMM1 = _mm_load_ps(logmdct+i);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM3 = _mm_or_ps(
+ _mm_and_ps(XMM3, XMM2),
+ _mm_andnot_ps(XMM2, PM128(tempmdct+i))
+ );
+ _mm_store_ps(tempmdct+i, XMM3);
+ XMM1 = _mm_cmpgt_ps(XMM1, PM128(lastmdct+i));
+ XMM2 = _mm_or_ps(
+ _mm_and_ps(PNOISE_RATE, XMM1),
+ _mm_andnot_ps(XMM1, PNOISE_RATE_LOW)
+ ); /* rate_mod */
+ XMM1 = _mm_sub_ps(PVAL, PTVAL);
+ XMM3 = XMM1;
+ XMM1 = _mm_sub_ps(XMM1, P30.ps);
+ XMM4 = _mm_cmpgt_ps(XMM1, PFV_0.ps);
+ XMM1 = _mm_mul_ps(XMM1, PP1.ps);
+ XMM1 = _mm_add_ps(XMM1, P30.ps);
+ XMM1 = _mm_and_ps(XMM1, XMM4);
+ XMM4 = _mm_andnot_ps(XMM4, XMM3);
+ XMM1 = _mm_or_ps(XMM1, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM2);
+ XMM3 = PVAL;
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM3 = _mm_max_ps(XMM3, PM128(lastmdct+i));
+ XMM3 = _mm_and_ps(XMM3, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, PVAL);
+ PVAL = _mm_or_ps(XMM3, XMM0);
+ }
+ }
+ }
+ }
+ switch(RunMode[i])
+ {
+ default:
+ case 0: /* Default */
+ {
+ int k;
+ __m128x T0, T1;
+ T0.ps = PVAL;
+ T1.ps = PTVAL;
+ for(k=0;k<4;k++){
+ float val = T0.sf[k];
+ float tval = T1.sf[k];
+ if(i+k>=lp_pos)logmdct[i+k]=-160;
+ if(val>tval){
+ logmask[i+k]=val;
+ }else if((i+k>m4_start) && (i+k<m4_end) && (logmdct[i+k]>-140)){
+ if(logmdct[i+k]>val){
+ if(logmdct[i+k]<tval)tval-=(tval-val)*m4_thres;
+ }else{
+ if(val<tval)tval-=(tval-val)*m4_thres;
+ }
+ logmask[i+k]=tval;
+ }else logmask[i+k]=tval;
+ T1.sf[k] = tval;
+ }
+ PTVAL = T1.ps;
+ }
+ break;
+ case 1: /* SSE-1 */
+ _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+ break;
+ case 2: /* SSE-2(m4_start - m4_end) */
+ {
+ /*
+ A: val>tval
+ B: logmdct>-140
+ C: logmdct>val
+ D: logmdct<tval
+ E: val<tval
+ T0 = A for val
+ T1 = a(b|B(Cd|ce)) for logmdct
+ T2 = T0|T1 for tval
+ T3 = t2 for tval*
+ logmask = val&T0 | tval&T1 | tval*&T3
+ tval = tval&T2 | tval*&T3
+ */
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4;
+ XMM4 = _mm_cmpgt_ps(PVAL, PTVAL); /* T0:A */
+ if(_mm_movemask_ps(XMM4)==15)
+ _mm_store_ps(logmask+i, PVAL);
+ else
+ {
+ XMM2 = _mm_cmple_ps(PM128(logmdct+i), PVAL); /* c */
+ XMM0 = _mm_cmple_ps(PTVAL, PM128(logmdct+i)); /* d */
+ XMM1 = _mm_cmple_ps(PTVAL, PVAL); /* e */
+ XMM1 = _mm_and_ps(XMM1, XMM2); /* ce */
+ XMM2 = _mm_andnot_ps(XMM2, XMM0); /* Cd */
+ XMM1 = _mm_or_ps(XMM1, XMM2); /* Cd|ce */
+ XMM3 = _mm_cmple_ps(PM128(logmdct+i),PM140.ps); /* b */
+ XMM2 = XMM3;
+ XMM2 = _mm_andnot_ps(XMM2, XMM1); /* B(Cd|ce) */
+ XMM3 = _mm_or_ps(XMM3, XMM2); /* b|B(Cd|ce) */
+ XMM1 = XMM4;
+ XMM1 = _mm_andnot_ps(XMM1, XMM3); /* T1:a(b|B(Cd|ce)) */
+ XMM2 = _mm_or_ps(XMM4, XMM1); /* T2:T0|T1 */
+ XMM4 = _mm_and_ps(XMM4, PVAL); /* val&T0 */
+ XMM1 = _mm_and_ps(XMM1, PTVAL); /* tval&T1 */
+ XMM3 = _mm_sub_ps(PVAL, PTVAL);
+ XMM3 = _mm_mul_ps(XMM3, PM4_THRES);
+ XMM3 = _mm_add_ps(XMM3, PTVAL); /* tval* */
+ PTVAL = _mm_and_ps(PTVAL, XMM2); /* tval&T2 */
+ XMM2 = _mm_andnot_ps(XMM2, XMM3); /* tval*&T3 */
+ PTVAL = _mm_or_ps(PTVAL, XMM2); /* tval = tval&T2 | tval*&T3 */
+ XMM1 = _mm_or_ps(XMM1, XMM2); /* tval&T1 | tval*&T3 */
+ XMM4 = _mm_or_ps(XMM4, XMM1); /* val&T0 | tval&T1 | tval*&T3 */
+ _mm_store_ps(logmask+i, XMM4);
+ }
+ }
+ break;
+ case 3: /* SSE-3(block include lp_pos) */
+ {
+ int k;
+ for(k=0;k<4;k++)
+ if(i+k>=lp_pos)logmdct[i+k]=-160;
+ }
+ _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+ break;
+ case 4: /* SSE-4(i>=lp_pos) */
+ _mm_store_ps(logmdct+i, PM160.ps);
+ _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+ break;
+ }
+ {
+ __m128 XMM0, XMM1, XMM2;
+ PVAL = _mm_sub_ps(PVAL, PM128(logmdct+i));
+ XMM0 = PVAL;
+ XMM1 = PVAL;
+ XMM2 = PVAL;
+ XMM0 = _mm_cmpgt_ps(XMM0, PCOEFFI.ps);
+ XMM1 = _mm_mul_ps(XMM1, PCX0);
+ XMM2 = _mm_mul_ps(XMM2, PCX1);
+ XMM1 = _mm_add_ps(XMM1, PFV_C0);
+ XMM2 = _mm_add_ps(XMM2, PFV_C1);
+ XMM1 = _mm_max_ps(XMM1, PP0001.ps);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ XMM0 = _mm_andnot_ps(XMM0, XMM2);
+ XMM1 = _mm_or_ps(XMM1, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, PM128(mdct+i));
+ _mm_store_ps(mdct+i, XMM1);
+ }
+ }
+ }
+ else
+ goto SAFE_MODE;
+ }
+ else
+ {
+SAFE_MODE:
+#endif /* SSE Optimize */
for(i=0;i<n;i++){
float val= noise[i]+p->noiseoffset[offset_select][i];
float tval= tone[i]+toneatt;
@@ -1144,9 +5147,160 @@
}
}
+#ifdef __SSE__ /* SSE Optimize */
+ }
+#endif /* SSE Optimize */
/** @ M3 SET lastmdct **/
if(offset_select == 1){
+#ifdef __SSE__ /* SSE Optimize */
+ if(n == 1024)
+ {
+ if(!nW_modenumber)
+ {
+ for(i=0; i<128; i+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(logmdct+i*8 );
+ XMM1 = _mm_load_ps(logmdct+i*8+ 4);
+ XMM2 = _mm_load_ps(logmdct+i*8+ 8);
+ XMM3 = _mm_load_ps(logmdct+i*8+ 12);
+ XMM4 = _mm_load_ps(logmdct+i*8+ 16);
+ XMM5 = _mm_load_ps(logmdct+i*8+ 20);
+ XMM6 = _mm_load_ps(logmdct+i*8+ 24);
+ XMM7 = _mm_load_ps(logmdct+i*8+ 28);
+ XMM0 = _mm_min_ps(XMM0, XMM1);
+ XMM2 = _mm_min_ps(XMM2, XMM3);
+ XMM4 = _mm_min_ps(XMM4, XMM5);
+ XMM6 = _mm_min_ps(XMM6, XMM7);
+ XMM1 = XMM0;
+ XMM5 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_load_ps(logmdct+i*8+ 32);
+ XMM7 = _mm_load_ps(logmdct+i*8+ 36);
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = XMM0;
+ XMM6 = XMM1;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = _mm_load_ps(logmdct+i*8+ 40);
+ XMM5 = _mm_load_ps(logmdct+i*8+ 44);
+ XMM1 = _mm_min_ps(XMM1, XMM0);
+ XMM0 = _mm_load_ps(logmdct+i*8+ 48);
+ XMM6 = _mm_min_ps(XMM6, XMM2);
+ XMM2 = _mm_load_ps(logmdct+i*8+ 52);
+ XMM6 = _mm_min_ps(XMM6, XMM1);
+ XMM1 = _mm_load_ps(logmdct+i*8+ 56);
+ _mm_store_ps(lastmdct+i , XMM6);
+ XMM6 = _mm_load_ps(logmdct+i*8+ 60);
+ XMM3 = _mm_min_ps(XMM3, XMM7);
+ XMM4 = _mm_min_ps(XMM4, XMM5);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM1 = _mm_min_ps(XMM1, XMM6);
+ XMM7 = XMM3;
+ XMM2 = XMM0;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_load_ps(logmdct+i*8+ 64);
+ XMM6 = _mm_load_ps(logmdct+i*8+ 68);
+ XMM7 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = XMM3;
+ XMM1 = XMM7;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM0, _MM_SHUFFLE(2,0,2,0));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM2, _MM_SHUFFLE(2,0,2,0));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM0, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_load_ps(logmdct+i*8+ 72);
+ XMM2 = _mm_load_ps(logmdct+i*8+ 76);
+ XMM7 = _mm_min_ps(XMM7, XMM3);
+ XMM3 = _mm_load_ps(logmdct+i*8+ 80);
+ XMM1 = _mm_min_ps(XMM1, XMM4);
+ XMM4 = _mm_load_ps(logmdct+i*8+ 84);
+ XMM1 = _mm_min_ps(XMM1, XMM7);
+ XMM7 = _mm_load_ps(logmdct+i*8+ 88);
+ _mm_store_ps(lastmdct+i+ 4, XMM1);
+ XMM1 = _mm_load_ps(logmdct+i*8+ 92);
+ XMM5 = _mm_min_ps(XMM5, XMM6);
+ XMM0 = _mm_min_ps(XMM0, XMM2);
+ XMM3 = _mm_min_ps(XMM3, XMM4);
+ XMM7 = _mm_min_ps(XMM7, XMM1);
+ XMM6 = XMM5;
+ XMM4 = XMM3;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM0, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM2 = _mm_load_ps(logmdct+i*8+ 96);
+ XMM1 = _mm_load_ps(logmdct+i*8+100);
+ XMM6 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = XMM5;
+ XMM7 = XMM6;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(2,0,2,0));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM7 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(3,1,3,1));
+ XMM3 = _mm_load_ps(logmdct+i*8+104);
+ XMM4 = _mm_load_ps(logmdct+i*8+108);
+ XMM6 = _mm_min_ps(XMM6, XMM5);
+ XMM5 = _mm_load_ps(logmdct+i*8+112);
+ XMM7 = _mm_min_ps(XMM7, XMM0);
+ XMM0 = _mm_load_ps(logmdct+i*8+116);
+ XMM7 = _mm_min_ps(XMM7, XMM6);
+ XMM6 = _mm_load_ps(logmdct+i*8+120);
+ _mm_store_ps(lastmdct+i+ 8, XMM7);
+ XMM7 = _mm_load_ps(logmdct+i*8+124);
+ XMM2 = _mm_min_ps(XMM2, XMM1);
+ XMM3 = _mm_min_ps(XMM3, XMM4);
+ XMM5 = _mm_min_ps(XMM5, XMM0);
+ XMM6 = _mm_min_ps(XMM6, XMM7);
+ XMM1 = XMM2;
+ XMM0 = XMM5;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(3,1,3,1));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM3 = XMM2;
+ XMM6 = XMM1;
+ XMM2 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(2,0,2,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,1,3,1));
+ XMM1 = _mm_min_ps(XMM1, XMM2);
+ XMM6 = _mm_min_ps(XMM6, XMM3);
+ XMM6 = _mm_min_ps(XMM6, XMM1);
+ _mm_store_ps(lastmdct+i+12, XMM6);
+ }
+ }
+ }
+ else
+ if(n == 128)
+ {
+ for(i=0;i<128;i+=32)
+ {
+ __m128 XMM0 = _mm_load_ps(logmdct+i );
+ __m128 XMM1 = _mm_load_ps(logmdct+i+ 4);
+ __m128 XMM2 = _mm_load_ps(logmdct+i+ 8);
+ __m128 XMM3 = _mm_load_ps(logmdct+i+12);
+ __m128 XMM4 = _mm_load_ps(logmdct+i+16);
+ __m128 XMM5 = _mm_load_ps(logmdct+i+20);
+ __m128 XMM6 = _mm_load_ps(logmdct+i+24);
+ __m128 XMM7 = _mm_load_ps(logmdct+i+28);
+ _mm_store_ps(lastmdct+i , XMM0);
+ _mm_store_ps(lastmdct+i+ 4, XMM1);
+ _mm_store_ps(lastmdct+i+ 8, XMM2);
+ _mm_store_ps(lastmdct+i+12, XMM3);
+ _mm_store_ps(lastmdct+i+16, XMM4);
+ _mm_store_ps(lastmdct+i+20, XMM5);
+ _mm_store_ps(lastmdct+i+24, XMM6);
+ _mm_store_ps(lastmdct+i+28, XMM7);
+ }
+ }
+#else /* SSE Optimize */
if(n == 1024){
if(!nW_modenumber){
for(i=0; i<128; i++){
@@ -1161,6 +5315,7 @@
}else if(n == 128){
for(i=0; i<128; i++) lastmdct[i] = logmdct[i];
}
+#endif /* SSE Optimize */
}
}
@@ -1177,7 +5332,11 @@
return(amp);
}
+#ifdef __SSE__ /* SSE Optimize */
+static inline void couple_lossless(float A, float B,
+#else /* SSE Optimize */
static void couple_lossless(float A, float B,
+#endif /* SSE Optimize */
float *qA, float *qB){
int test1=fabs(*qA)>fabs(*qB);
test1-= fabs(*qA)<fabs(*qB);
@@ -1197,7 +5356,80 @@
}
}
-static float hypot_lookup[32]={
+#ifdef __SSE__ /* SSE Optimize */
+ /*
+ Phase 1.
+ fabs(*qA)>fabs(*qB) test1 = 1
+ fabs(*qA)>fabs(*qB) test1 = -1
+ fabs(*qA)=fabs(*qB) fabs(qA)> fabs(B) test1 = -1
+ fabs(*qA)=fabs(*qB) fabs(qA)<=fabs(B) test1 = 1
+
+ Phase 2.
+ *qB = S(*qA)^(*qA-*qB) (test1==1)
+ *qB = S(*qB)^(*qA-*qB) (test1!=1)
+ *qA= Old *qA (test1==1)
+ *qA= Old *qB (test1!=1)
+
+ Phase 3.
+ *qB = -fabs(*qA)*2.f (*qB >fabs(*qA)*1.9999f)
+ *qB = *qB (*qB<=fabs(*qA)*1.9999f)
+ *qA = -*qA (*qB >fabs(*qA)*1.9999f)
+ *qA = *qA (*qB<=fabs(*qA)*1.9999f)
+ */
+static inline void couple_lossless_ps(float *A, float *B, float *qA, float *qB)
+{
+ /*
+ Phase 1
+ */
+ __m128 PQA = _mm_load_ps(qA);
+ __m128 PQB = _mm_load_ps(qB);
+ __m128 FQA = _mm_and_ps(PQA, PABSMASK.ps);
+ __m128 FQB = _mm_and_ps(PQB, PABSMASK.ps);
+ __m128 XMM0 = _mm_and_ps(PM128(A), PABSMASK.ps);
+ __m128 XMM1 = _mm_and_ps(PM128(B), PABSMASK.ps);
+ __m128 PTEST1;
+ __m128 PTEST2;
+ __m128 PFQA2M;
+
+ XMM0 = _mm_cmpgt_ps(XMM0, XMM1);
+ XMM1 = _mm_cmpneq_ps(FQA, FQB);
+ PTEST1 = _mm_or_ps(
+ _mm_and_ps(_mm_cmpgt_ps(FQA, FQB), XMM1),
+ _mm_andnot_ps(XMM1, XMM0)
+ );
+ PTEST2 = PTEST1;
+
+ /*
+ Phase 2
+ */
+ XMM0 = _mm_and_ps(PQA, PCS_RRRR.ps); /* Sign of PQA */
+ XMM1 = _mm_and_ps(PQB, PCS_RRRR.ps); /* Sign of PQB */
+ XMM0 = _mm_and_ps(XMM0, PTEST2);
+ XMM1 = _mm_andnot_ps(PTEST2, XMM1);
+ XMM0 = _mm_or_ps(XMM0, XMM1); /* Sign of new *qB */
+ XMM1 = _mm_sub_ps(PQA, PQB); /* New *qB Body */
+ XMM1 = _mm_xor_ps(XMM1, XMM0); /* New qB */
+ PQA = _mm_and_ps(PQA, PTEST1);
+ PQB = _mm_andnot_ps(PTEST1, PQB);
+ XMM0 = _mm_or_ps(PQA, PQB); /* New qA */
+
+ /*
+ Phase 3
+ */
+ PFQA2M = _mm_mul_ps(FQA, PFV_2.ps);
+
+ PTEST1 = _mm_cmpge_ps(XMM1, PFQA2M); /* Mask of *qB >= fabs(*qA)*2.f */
+ PTEST2 = PTEST1;
+ PQB = _mm_xor_ps(PFQA2M, PCS_RRRR.ps); /* -fabs(qA)*2.f */
+ PQA = _mm_xor_ps(XMM0 , PCS_RRRR.ps); /* -qA */
+ PQB = _mm_or_ps(_mm_and_ps(PQB, PTEST1), _mm_andnot_ps(PTEST1, XMM1));
+ PQA = _mm_or_ps(_mm_and_ps(PQA, PTEST2), _mm_andnot_ps(PTEST2, XMM0));
+ _mm_store_ps(qB, PQB);
+ _mm_store_ps(qA, PQA);
+}
+#endif /* SSE Optimize */
+
+static const float hypot_lookup[32]={
-0.009935, -0.011245, -0.012726, -0.014397,
-0.016282, -0.018407, -0.020800, -0.023494,
-0.026522, -0.029923, -0.033737, -0.038010,
@@ -1207,7 +5439,11 @@
-0.159093, -0.175146, -0.192286, -0.210490,
-0.229718, -0.249913, -0.271001, -0.292893};
+#ifdef __SSE__ /* SSE Optimize */
+static inline void precomputed_couple_point(float premag,
+#else /* SSE Optimize */
static void precomputed_couple_point(float premag,
+#endif /* SSE Optimize */
int floorA,int floorB,
float *mag, float *ang){
@@ -1221,6 +5457,73 @@
*ang=0.f;
}
+#ifdef __SSE__ /* SSE Optimize */
+static inline void precomputed_couple_point_ps(float *premag,
+ int *floorA,int *floorB,
+ float *mag, float *ang){
+ __m128 XMM0;
+ __m128x PI0, PI1;
+#ifdef __SSE2__
+ {
+ __m128i PFA = PM128I(floorA);
+ __m128i PFB = PM128I(floorB);
+ __m128i XMM0 = PFA;
+ __m128i XMM1 = PFA;
+ __m128i XMM2 = _mm_set_epi32(31, 31, 31, 31);
+ __m128i PFI0 = _mm_setzero_si128();
+ __m128i XMM3 = PFI0;
+ XMM0 = _mm_cmpgt_epi32(XMM0, PFB);
+ PFA = _mm_and_si128(PFA, XMM0);
+ XMM0 = _mm_andnot_si128(XMM0, PFB);
+ PFA = _mm_or_si128(PFA, XMM0);
+ PI1.pi = PFA;
+
+ XMM1 = _mm_sub_epi32(XMM1, PFB);
+ XMM3 = _mm_cmpgt_epi32(XMM3, XMM1);
+ XMM1 = _mm_xor_si128(XMM1, XMM3);
+ XMM1 = _mm_sub_epi32(XMM1, XMM3);
+ XMM2 = _mm_sub_epi32(XMM2, XMM1);
+ XMM3 = XMM2;
+ XMM3 = _mm_cmpgt_epi32(XMM3, PFI0);
+ XMM2 = _mm_and_si128(XMM2, XMM3);
+ PI0.pi = XMM2;
+ }
+ PI0.sf[0] = hypot_lookup[PI0.si32[0]];
+ PI0.sf[1] = hypot_lookup[PI0.si32[1]];
+ PI0.sf[2] = hypot_lookup[PI0.si32[2]];
+ PI0.sf[3] = hypot_lookup[PI0.si32[3]];
+ PI1.sf[0] = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[0]];
+ PI1.sf[1] = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[1]];
+ PI1.sf[2] = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[2]];
+ PI1.sf[3] = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[3]];
+#else
+ int test0 = (*(floorA )>*(floorB ))-1;
+ int test1 = (*(floorA+1)>*(floorB+1))-1;
+ int test2 = (*(floorA+2)>*(floorB+2))-1;
+ int test3 = (*(floorA+3)>*(floorB+3))-1;
+ int offset0 = 31-abs(*(floorA )-*(floorB ));
+ int offset1 = 31-abs(*(floorA+1)-*(floorB+1));
+ int offset2 = 31-abs(*(floorA+2)-*(floorB+2));
+ int offset3 = 31-abs(*(floorA+3)-*(floorB+3));
+ PI0.sf[0] = hypot_lookup[((offset0<0)-1)&offset0];
+ PI0.sf[1] = hypot_lookup[((offset1<0)-1)&offset1];
+ PI0.sf[2] = hypot_lookup[((offset2<0)-1)&offset2];
+ PI0.sf[3] = hypot_lookup[((offset3<0)-1)&offset3];
+
+ PI1.sf[0] = FLOOR1_fromdB_INV_LOOKUP[(*(floorB )&test0)|(*(floorA )&(~test0))];
+ PI1.sf[1] = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+1)&test1)|(*(floorA+1)&(~test1))];
+ PI1.sf[2] = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+2)&test2)|(*(floorA+2)&(~test2))];
+ PI1.sf[3] = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+3)&test3)|(*(floorA+3)&(~test3))];
+#endif
+
+ XMM0 = _mm_add_ps(PI0.ps, PFV_1.ps);
+ XMM0 = _mm_mul_ps(XMM0, PI1.ps);
+ XMM0 = _mm_mul_ps(XMM0, PM128(premag));
+ _mm_store_ps(mag, XMM0);
+ _mm_store_ps(ang, _mm_setzero_ps());
+}
+#endif /* SSE Optimize */
+
/* just like below, this is currently set up to only do
single-step-depth coupling. Otherwise, we'd have to do more
copying (which will be inevitable later) */
@@ -1237,6 +5540,56 @@
if(-a>b)return -sqrt(a*a-b*b);
return sqrt(b*b-a*a);
}
+#ifdef __SSE__ /* SSE Optimize */
+/*
+ a>0 b>0 sqrt(a*a+b*b)
+ a>0 b<=0 a>abs(b) sqrt(a*a-b*b)
+ a>0 b<=0 a<=abs(b) -sqrt(b*b-a*a)
+ a<=0 b<0 -sqrt(a*a+b*b)
+ a<=0 b>=0 abs(a)>abs(b) -sqrt(a*a-b*b)
+ a<=0 b>=0 abs(a)<=abs(b) sqrt(b*b-a*a)
+
+ sa sb fa<=fb rs s(a*b) s(a*b)&(fa<=fb) s(a*b)&(fa<=fb)^sa
+ 0 0 * 0 0 0 0
+ 0 1 0 0 1 0 0
+ 0 1 1 1 1 1 1
+ 1 1 * 1 0 0 1
+ 1 0 0 1 1 0 1
+ 1 0 1 0 1 1 0
+
+ sa sb fa<=fb (a&~(fa<=fb))|(b&(fa<=fb)) (a&(fa<=fb))|(b&~(fa<=fb))
+ 0 0 * * *
+ 0 1 0 a b
+ 0 1 1 b a
+ 1 1 * * *
+ 1 0 0 a b
+ 1 0 1 b a
+*/
+static inline __m128 dipole_hypot_ps(float* a, float *b)
+{
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 A = _mm_load_ps(a);
+ __m128 B = _mm_load_ps(b);
+ __m128 PMASK = _mm_cmple_ps(_mm_and_ps(A, PABSMASK.ps), _mm_and_ps(B, PABSMASK.ps));
+ XMM2 = _mm_cmplt_ps(_mm_mul_ps(A, B), PFV_0.ps); /* XMM2 = MASK(S(A*B) */
+ XMM0 = _mm_and_ps(A, PCS_RRRR.ps); /* XMM0 = SA */
+ XMM1 = _mm_xor_ps(
+ _mm_and_ps(
+ _mm_and_ps(XMM2, PCS_RRRR.ps),
+ PMASK
+ ),
+ XMM0
+ ); /* XMM1 = Sign of result */
+ A = _mm_mul_ps(A, A);
+ B = _mm_mul_ps(B, B);
+ XMM2 = _mm_and_ps(XMM2, PCS_RRRR.ps);
+ XMM3 = _mm_min_ps(A, B);
+ XMM0 = _mm_max_ps(A, B);
+ XMM3 = _mm_or_ps(XMM3, XMM2);
+ B = _mm_or_ps(_mm_sqrt_ps(_mm_add_ps(XMM0, XMM3)), XMM1);
+ return B;
+}
+#endif /* SSE Optimize */
static float round_hypot(float a, float b){
if(a>0.){
if(b>0.)return sqrt(a*a+b*b);
@@ -1247,8 +5600,121 @@
if(-a>b)return -sqrt(a*a+b*b);
return sqrt(b*b+a*a);
}
+#ifdef __SSE__ /* SSE Optimize */
+#define round_hypot_ps(d, PA, PB) \
+{ \
+ __m128 R0, SA; \
+ { \
+ __m128 SAMB; \
+ { \
+ __m128 FASB; \
+ { \
+ __m128 P2A, P2B; \
+ { \
+ __m128 FA, FB; \
+ { \
+ __m128 A = _mm_load_ps(PA); \
+ __m128 B = _mm_load_ps(PB); \
+ SA = _mm_and_ps(A, PCS_RRRR.ps); /* sign of a */ \
+ FA = _mm_and_ps(A, PABSMASK.ps); /* FA = fabs(a) */ \
+ FB = _mm_and_ps(B, PABSMASK.ps); /* FB = fabs(b) */ \
+ P2A = _mm_mul_ps(A, A); /* a*a */ \
+ P2B = _mm_mul_ps(B, B); /* b*b */ \
+ SAMB = _mm_mul_ps(A, B); /* a*b */ \
+ } \
+ FASB = _mm_cmple_ps(FA, FB); /* mask of fa<fb */ \
+ } \
+ R0 = _mm_add_ps(P2A, P2B); /* a*a+b*b */ \
+ } \
+ FASB = _mm_and_ps(FASB, PCS_RRRR.ps); /* sign of F(a)-F(b) */ \
+ R0 = _mm_sqrt_ps(R0); /* sqrt(a*a+b*b) */ \
+ SAMB = _mm_and_ps(SAMB, FASB); \
+ } \
+ SA = _mm_xor_ps(SA, SAMB); /* If a<0, reverse sign */ \
+ } \
+ R0 = _mm_xor_ps(R0, SA); /* set sign to result */ \
+ _mm_store_ps(d, R0); \
+}
+#endif /* SSE Optimize */
/* modified hypot by aoyumi
better method should be found. */
+#ifdef __SSE__ /* SSE Optimize */
+#if 0
+/*
+ a>0 b>0 sqrt(a*a+b*b*0.92)
+ a>0 b<=0 a>abs(b) sqrt(a*a-b*b*0.16)
+ a>0 b<=0 a<=abs(b) -sqrt(b*b-a*a*0.16)
+ a<=0 b<0 -sqrt(a*a+b*b*0.92)
+ a<=0 b>=0 abs(a)>b -sqrt(a*a-b*b*0.16)
+ a<=0 b>=0 abs(a)<=b sqrt(b*b-a*a*0.16)
+
+ sa sb fa<=fb rs s(a*b) s(a*b)&(fa<=fb) s(a*b)&(fa<=fb)^sa
+ 0 0 * 0 0 0 0
+ 0 1 0 0 1 0 0
+ 0 1 1 1 1 1 1
+ 1 1 * 1 0 0 1
+ 1 0 0 1 1 0 1
+ 1 0 1 0 1 1 0
+*/
+static inline __m128 min_indemnity_dipole_hypot_ps(float* a, float *b)
+{
+ static _MM_ALIGN16 const __m128x PFV_p92 =
+ { .sf = {0.92f, 0.92f, 0.92f, 0.92f} };
+ static _MM_ALIGN16 const __m128x PFV_mp16 =
+ { .sf = {-0.16f, -0.16f, -0.16f, -0.16f} };
+ static _MM_ALIGN16 const __m128x PFV_mp5 =
+ { .sf = {-0.5f, -0.5f, -0.5f, -0.5f} };
+ static _MM_ALIGN16 const __m128x PFV_1p5 =
+ { .sf = {1.5f, 1.5f, 1.5f, 1.5f} };
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 A = _mm_load_ps(a);
+ __m128 B = _mm_load_ps(b);
+ __m128 PMASK = _mm_cmple_ps(_mm_and_ps(A, PABSMASK.ps), _mm_and_ps(B, PABSMASK.ps));
+ XMM2 = _mm_cmplt_ps(_mm_mul_ps(A, B), PFV_0.ps); /* XMM2 = MASK(S(A*B) */
+ XMM0 = _mm_and_ps(A, PCS_RRRR.ps); /* XMM0 = SA */
+ XMM3 = XMM2;
+ XMM3 = _mm_and_ps(XMM3, PMASK);
+ XMM1 = XMM3;
+ XMM1 = _mm_and_ps(XMM1, PCS_RRRR.ps);
+ XMM1 = _mm_xor_ps(XMM1, XMM0);
+ A = _mm_mul_ps(A, A);
+ B = _mm_mul_ps(B, B);
+ XMM0 = _mm_or_ps(
+ _mm_and_ps(PFV_mp16.ps, XMM2),
+ _mm_andnot_ps(XMM2, PFV_p92.ps)
+ ); /* XMM0 = Packed Multi Value */
+ XMM2 = XMM3;
+ PMASK = B;
+ B = _mm_or_ps(
+ _mm_and_ps(B, XMM2),
+ _mm_andnot_ps(XMM2, A)
+ );
+ A = _mm_or_ps(
+ _mm_and_ps(A, XMM3),
+ _mm_andnot_ps(XMM3, PMASK)
+ );
+ A = _mm_mul_ps(A, XMM0);
+ B = _mm_add_ps(B, A);
+#if 1
+ XMM0 = _mm_rsqrt_ps(B);
+ XMM2 = XMM0;
+ XMM3 = B;
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, XMM0);
+ XMM3 = _mm_mul_ps(XMM3, PFV_mp5.ps);
+ XMM2 = _mm_mul_ps(XMM2, PFV_1p5.ps);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ B = _mm_mul_ps(B, XMM2);
+#else
+ B = _mm_sqrt_ps(B);
+#endif
+ B = _mm_or_ps(B, XMM1);
+ return B;
+}
+#endif
+#endif /* SSE Optimize */
+#if !defined(__SSE__) /* SSE Optimize */
static float min_indemnity_dipole_hypot(float a, float b){
float thnor=0.92;
float threv=0.84;
@@ -1263,6 +5729,8 @@
if(-a>b)return -sqrt(a2-b2+b2*threv);
return sqrt(b2-a2+a2*threv);
}
+#endif /* SSE Optimize */
+
/* revert to round hypot for now */
float **_vp_quantize_couple_memo(vorbis_block *vb,
@@ -1281,8 +5749,228 @@
float *mdctA=mdct[vi->coupling_ang[i]];
ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+#ifdef __SSE__ /* SSE Optimize */
+ for(j=0;j<n;j+=16)
+ {
+ static _MM_ALIGN16 const float PFV_p92[4] = {0.92f, 0.92f, 0.92f, 0.92f};
+ static _MM_ALIGN16 const float PFV_mp16[4] = {-0.16f, -0.16f, -0.16f, -0.16f};
+ static _MM_ALIGN16 const float PFV_mp5[4] = {-0.5f, -0.5f, -0.5f, -0.5f};
+ static _MM_ALIGN16 const float PFV_1p5[4] = {1.5f, 1.5f, 1.5f, 1.5f};
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(mdctM+j );
+ XMM1 = _mm_load_ps(mdctA+j );
+ XMM2 = _mm_load_ps(PABSMASK.sf);
+ XMM3 = _mm_load_ps(PFV_0.sf);
+ XMM4 = XMM0;
+ XMM5 = XMM0;
+ XMM6 = XMM0;
+ XMM7 = XMM1;
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM4 = _mm_and_ps(XMM4, XMM2);
+ XMM7 = _mm_and_ps(XMM7, XMM2);
+ XMM2 = _mm_load_ps(PCS_RRRR.sf);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM3);
+ XMM4 = _mm_cmple_ps(XMM4, XMM7);
+ XMM6 = _mm_and_ps(XMM6, XMM2);
+ XMM3 = XMM5;
+ XMM3 = _mm_and_ps(XMM3, XMM4);
+ XMM7 = XMM3;
+ XMM7 = _mm_and_ps(XMM7, XMM2);
+ XMM2 = _mm_load_ps(PFV_p92);
+ XMM7 = _mm_xor_ps(XMM7, XMM6);
+ XMM6 = _mm_load_ps(PFV_mp16);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM6 = _mm_and_ps(XMM6, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM4 = XMM1;
+ XMM6 = _mm_or_ps(XMM6, XMM5);
+ XMM5 = XMM3;
+ XMM2 = XMM0;
+ XMM1 = _mm_and_ps(XMM1, XMM5);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM4);
+ XMM2 = _mm_load_ps(PFV_mp5);
+ XMM4 = _mm_load_ps(PFV_1p5);
+ XMM1 = _mm_or_ps(XMM1, XMM5);
+ XMM0 = _mm_or_ps(XMM0, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_add_ps(XMM1, XMM0);
+ XMM6 = _mm_rsqrt_ps(XMM1);
+ XMM5 = XMM6;
+ XMM3 = XMM1;
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM0 = _mm_load_ps(mdctM+j+ 4);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM6 = _mm_load_ps(mdctA+j+ 4);
+ XMM3 = _mm_mul_ps(XMM3, XMM2);
+ XMM2 = _mm_load_ps(PABSMASK.sf);
+ XMM5 = _mm_mul_ps(XMM5, XMM4);
+ XMM4 = _mm_load_ps(PFV_0.sf);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+ XMM3 = XMM0;
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM5 = XMM0;
+ XMM1 = _mm_or_ps(XMM1, XMM7);
+ XMM7 = XMM0;
+ _mm_store_ps(ret[i]+j , XMM1);
+ XMM1 = XMM6;
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ XMM3 = _mm_and_ps(XMM3, XMM2);
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ XMM2 = _mm_load_ps(PCS_RRRR.sf);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM4);
+ XMM3 = _mm_cmple_ps(XMM3, XMM1);
+ XMM7 = _mm_and_ps(XMM7, XMM2);
+ XMM4 = XMM5;
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM1 = XMM4;
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ XMM2 = _mm_load_ps(PFV_p92);
+ XMM1 = _mm_xor_ps(XMM1, XMM7);
+ XMM7 = _mm_load_ps(PFV_mp16);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM7 = _mm_and_ps(XMM7, XMM5);
+ XMM6 = _mm_mul_ps(XMM6, XMM6);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM3 = XMM6;
+ XMM7 = _mm_or_ps(XMM7, XMM5);
+ XMM5 = XMM4;
+ XMM2 = XMM0;
+ XMM6 = _mm_and_ps(XMM6, XMM5);
+ XMM0 = _mm_and_ps(XMM0, XMM4);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM4 = _mm_andnot_ps(XMM4, XMM3);
+ XMM2 = _mm_load_ps(PFV_mp5);
+ XMM3 = _mm_load_ps(PFV_1p5);
+ XMM6 = _mm_or_ps(XMM6, XMM5);
+ XMM0 = _mm_or_ps(XMM0, XMM4);
+ XMM0 = _mm_mul_ps(XMM0, XMM7);
+ XMM6 = _mm_add_ps(XMM6, XMM0);
+ XMM7 = _mm_rsqrt_ps(XMM6);
+ XMM5 = XMM7;
+ XMM4 = XMM6;
+ XMM4 = _mm_mul_ps(XMM4, XMM7);
+ XMM4 = _mm_mul_ps(XMM4, XMM7);
+ XMM0 = _mm_load_ps(mdctM+j+ 8);
+ XMM4 = _mm_mul_ps(XMM4, XMM7);
+ XMM7 = _mm_load_ps(mdctA+j+ 8);
+ XMM4 = _mm_mul_ps(XMM4, XMM2);
+ XMM2 = _mm_load_ps(PABSMASK.sf);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM3 = _mm_load_ps(PFV_0.sf);
+ XMM5 = _mm_add_ps(XMM5, XMM4);
+ XMM4 = XMM0;
+ XMM6 = _mm_mul_ps(XMM6, XMM5);
+ XMM5 = XMM0;
+ XMM6 = _mm_or_ps(XMM6, XMM1);
+ XMM1 = XMM0;
+ _mm_store_ps(ret[i]+j+ 4, XMM6);
+ XMM6 = XMM7;
+ XMM5 = _mm_mul_ps(XMM5, XMM7);
+ XMM4 = _mm_and_ps(XMM4, XMM2);
+ XMM6 = _mm_and_ps(XMM6, XMM2);
+ XMM2 = _mm_load_ps(PCS_RRRR.sf);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM3);
+ XMM4 = _mm_cmple_ps(XMM4, XMM6);
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ XMM3 = XMM5;
+ XMM3 = _mm_and_ps(XMM3, XMM4);
+ XMM6 = XMM3;
+ XMM6 = _mm_and_ps(XMM6, XMM2);
+ XMM2 = _mm_load_ps(PFV_p92);
+ XMM6 = _mm_xor_ps(XMM6, XMM1);
+ XMM1 = _mm_load_ps(PFV_mp16);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_and_ps(XMM1, XMM5);
+ XMM7 = _mm_mul_ps(XMM7, XMM7);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM4 = XMM7;
+ XMM1 = _mm_or_ps(XMM1, XMM5);
+ XMM5 = XMM3;
+ XMM2 = XMM0;
+ XMM7 = _mm_and_ps(XMM7, XMM5);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM3 = _mm_andnot_ps(XMM3, XMM4);
+ XMM2 = _mm_load_ps(PFV_mp5);
+ XMM4 = _mm_load_ps(PFV_1p5);
+ XMM7 = _mm_or_ps(XMM7, XMM5);
+ XMM0 = _mm_or_ps(XMM0, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM1);
+ XMM7 = _mm_add_ps(XMM7, XMM0);
+ XMM1 = _mm_rsqrt_ps(XMM7);
+ XMM5 = XMM1;
+ XMM3 = XMM7;
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM0 = _mm_load_ps(mdctM+j+12);
+ XMM3 = _mm_mul_ps(XMM3, XMM1);
+ XMM1 = _mm_load_ps(mdctA+j+12);
+ XMM3 = _mm_mul_ps(XMM3, XMM2);
+ XMM2 = _mm_load_ps(PABSMASK.sf);
+ XMM5 = _mm_mul_ps(XMM5, XMM4);
+ XMM4 = _mm_load_ps(PFV_0.sf);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+ XMM3 = XMM0;
+ XMM7 = _mm_mul_ps(XMM7, XMM5);
+ XMM5 = XMM0;
+ XMM7 = _mm_or_ps(XMM7, XMM6);
+ XMM6 = XMM0;
+ _mm_store_ps(ret[i]+j+ 8, XMM7);
+ XMM7 = XMM1;
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM3 = _mm_and_ps(XMM3, XMM2);
+ XMM7 = _mm_and_ps(XMM7, XMM2);
+ XMM2 = _mm_load_ps(PCS_RRRR.sf);
+ XMM5 = _mm_cmplt_ps(XMM5, XMM4);
+ XMM3 = _mm_cmple_ps(XMM3, XMM7);
+ XMM6 = _mm_and_ps(XMM6, XMM2);
+ XMM4 = XMM5;
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM7 = XMM4;
+ XMM7 = _mm_and_ps(XMM7, XMM2);
+ XMM2 = _mm_load_ps(PFV_p92);
+ XMM7 = _mm_xor_ps(XMM7, XMM6);
+ XMM6 = _mm_load_ps(PFV_mp16);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM6 = _mm_and_ps(XMM6, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM3 = XMM1;
+ XMM6 = _mm_or_ps(XMM6, XMM5);
+ XMM5 = XMM4;
+ XMM2 = XMM0;
+ XMM1 = _mm_and_ps(XMM1, XMM5);
+ XMM0 = _mm_and_ps(XMM0, XMM4);
+ XMM5 = _mm_andnot_ps(XMM5, XMM2);
+ XMM4 = _mm_andnot_ps(XMM4, XMM3);
+ XMM2 = _mm_load_ps(PFV_mp5);
+ XMM3 = _mm_load_ps(PFV_1p5);
+ XMM1 = _mm_or_ps(XMM1, XMM5);
+ XMM0 = _mm_or_ps(XMM0, XMM4);
+ XMM0 = _mm_mul_ps(XMM0, XMM6);
+ XMM1 = _mm_add_ps(XMM1, XMM0);
+ XMM6 = _mm_rsqrt_ps(XMM1);
+ XMM5 = XMM6;
+ XMM4 = XMM1;
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, XMM2);
+ XMM5 = _mm_mul_ps(XMM5, XMM3);
+ XMM5 = _mm_add_ps(XMM5, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM1 = _mm_or_ps(XMM1, XMM7);
+ _mm_store_ps(ret[i]+j+12, XMM1);
+ }
+#else /* SSE Optimize */
for(j=0;j<n;j++)
ret[i][j]=min_indemnity_dipole_hypot(mdctM[j],mdctA[j]);
+#endif /* SSE Optimize */
}
}else{
for(i=0;i<vi->coupling_steps;i++){
@@ -1290,24 +5978,3308 @@
float *mdctA=mdct[vi->coupling_ang[i]];
ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ float *p = ret[i];
+ int limit4 = limit&(~7);
+ for(j=0;j<limit4;j+=8)
+ {
+ _mm_store_ps(p+j , dipole_hypot_ps(mdctM+j , mdctA+j ));
+ _mm_store_ps(p+j+4, dipole_hypot_ps(mdctM+j+4, mdctA+j+4));
+ }
+ limit4 = limit&(~3);
+ for(;j<limit4;j+=4)
+ {
+ _mm_store_ps(p+j , dipole_hypot_ps(mdctM+j , mdctA+j ));
+ }
+ for(;j<limit;j++)
+ p[j] = dipole_hypot(mdctM[j],mdctA[j]);
+ limit4 = (limit+3)&(~3);
+ limit4 = (limit4>=n)?n:limit4;
+ for(;j<limit4;j++)
+ p[j] = round_hypot(mdctM[j],mdctA[j]);
+ limit4 = (limit+7)&(~7);
+ limit4 = (limit4>=n)?n:limit4;
+ for(;j<limit4;j+=4)
+ {
+ round_hypot_ps(&p[j ], &mdctM[j ], &mdctA[j ]);
+ }
+ for(;j<n;j+=8)
+ {
+ round_hypot_ps(&p[j ], &mdctM[j ], &mdctA[j ]);
+ round_hypot_ps(&p[j+4], &mdctM[j+4], &mdctA[j+4]);
+ }
+ }
+#else /* SSE Optimize */
for(j=0;j<limit;j++)
ret[i][j]=dipole_hypot(mdctM[j],mdctA[j]);
for(;j<n;j++)
ret[i][j]=round_hypot(mdctM[j],mdctA[j]);
+#endif /* SSE Optimize */
}
}
return(ret);
}
/* this is for per-channel noise normalization */
-static int apsort(const void *a, const void *b){
- float f1=fabs(**(float**)a);
- float f2=fabs(**(float**)b);
- return (f1<f2)-(f1>f2);
+#ifdef __SSE__ /* SSE Optimize */
+#define C(a,b)\
+ (data[a]>=data[b])
+/*
+0 ACBA
+1 DDCB
+2 ACDC
+
+0<1 D>A D>C C>B B>A
+0<2 000 000 D>B C>A
+Cond. (0<2<<4)|(0<1) SCODE
+
+D>C>B>A 111111 63 3210
+C>D>B>A 111011 59 2310
+D>B>C>A 111101 61 3120
+B>D>C>A 011101 29 1320
+C>B>D>A 011011 27 2130
+B>C>D>A 011001 25 1230
+D>C>A>B 111110 30 3201
+C>D>A>B 111010 58 2301
+D>A>C>B 101110 46 3021
+A>D>C>B 100110 38 0321
+C>A>D>B 110010 50 2031
+A>C>D>B 100010 18 0231
+D>B>A>C 101101 45 3102
+B>D>A>C 001101 13 1302
+D>A>B>C 101100 44 3012
+A>D>B>C 100100 36 0312
+B>A>D>C 000101 5 1032
+A>B>D>C 000100 4 0132
+C>B>A>D 010011 19 2103
+B>C>A>D 010001 17 1203
+C>A>B>D 010010 18 2013
+A>C>B>D 000010 2 0213
+B>A>C>D 000001 1 1023
+A>B>C>D 000000 0 0123
+
+A>B>C>D 000000 0 0123
+B>A>C>D 000001 1 1023
+A>C>B>D 000010 2 0213
+A>B>D>C 000100 4 0132
+B>A>D>C 000101 5 1032
+B>D>A>C 001101 13 1302
+B>C>A>D 010001 17 1203
+C>A>B>D 010010 18 2013
+C>B>A>D 010011 19 2103
+B>C>D>A 011001 25 1230
+C>B>D>A 011011 27 2130
+B>D>C>A 011101 29 1320
+A>C>D>B 100010 34 0231
+A>D>B>C 100100 36 0312
+A>D>C>B 100110 38 0321
+D>A>B>C 101100 44 3012
+D>B>A>C 101101 45 3102
+D>A>C>B 101110 46 3021
+C>A>D>B 110010 50 2031
+C>D>A>B 111010 58 2301
+C>D>B>A 111011 59 2310
+D>B>C>A 111101 61 3120
+D>C>A>B 111110 62 3201
+D>C>B>A 111111 63 3210
+
+*/
+
+static inline void SORT4x2(float *i, int *n)
+{
+ int c0, c1;
+#if defined(__SSE2__)
+ __m128i XMM0, XMM1;
+ static _MM_ALIGN16 const __m128x PI4 =
+ { .si32 = {4, 4, 4, 4} };
+#endif
+ {
+ __m128 P0, P1, P2, P3, P4, P5;
+ P0 = _mm_load_ps(i );
+ P3 = _mm_load_ps(i+4);
+ P1 = P0;
+ P2 = P0;
+ P4 = P3;
+ P5 = P3;
+ P0 = _mm_shuffle_ps(P0, P0, _MM_SHUFFLE(0,2,1,0));
+ P1 = _mm_shuffle_ps(P1, P1, _MM_SHUFFLE(3,3,2,1));
+ P2 = _mm_shuffle_ps(P2, P2, _MM_SHUFFLE(0,2,3,2));
+ P3 = _mm_shuffle_ps(P3, P3, _MM_SHUFFLE(0,2,1,0));
+ P4 = _mm_shuffle_ps(P4, P4, _MM_SHUFFLE(3,3,2,1));
+ P5 = _mm_shuffle_ps(P5, P5, _MM_SHUFFLE(0,2,3,2));
+
+ P1 = _mm_cmplt_ps(P1, P0);
+ P2 = _mm_cmplt_ps(P2, P0);
+ P4 = _mm_cmplt_ps(P4, P3);
+ P5 = _mm_cmplt_ps(P5, P3);
+ c0 = _mm_movemask_ps(P2);
+ c1 = _mm_movemask_ps(P5);
+ c0 = c0 << 4;
+ c1 = c1 << 4;
+ c0 = c0|_mm_movemask_ps(P1);
+ c1 = c1|_mm_movemask_ps(P4);
+ }
+#if defined(__SSE2__)
+ {
+ __m128i *mx = (__m128i*)n;
+ XMM1 = Sort4IndexConvTable[c1].pi;
+ XMM0 = Sort4IndexConvTable[c0].pi;
+ XMM1 = _mm_add_epi32(XMM1, PI4.pi);
+ _mm_storeu_si128(mx, XMM0);
+ _mm_storeu_si128(mx + 1, XMM1);
+ }
+#else
+ n[0] =Sort4IndexConvTable[c0].si32[0];
+ n[1] =Sort4IndexConvTable[c0].si32[1];
+ n[2] =Sort4IndexConvTable[c0].si32[2];
+ n[3] =Sort4IndexConvTable[c0].si32[3];
+ n[4] =Sort4IndexConvTable[c1].si32[0]+4;
+ n[5] =Sort4IndexConvTable[c1].si32[1]+4;
+ n[6] =Sort4IndexConvTable[c1].si32[2]+4;
+ n[7] =Sort4IndexConvTable[c1].si32[3]+4;
+#endif
}
-/*** optimization of sort (for 8 or 32 element) ***/
-#ifdef OPT_SORT
+static inline void sortindex_fix8(int *index,
+ float *data,
+ int offset){
+ _MM_ALIGN16 int n[8];
+ index += offset;
+ data += offset;
+ SORT4x2(data, n);
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ss(data+n[0]);
+ XMM4 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[1]);
+ XMM5 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[2]);
+ XMM6 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[3]);
+ XMM7 = _mm_load_ss(data+n[7]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[0] = n[0]+offset;
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[1] = n[1]+offset;
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[2] = n[2]+offset;
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[3] = n[3]+offset;
+ index[4] = n[4]+offset;
+ index[5] = n[5]+offset;
+ index[6] = n[6]+offset;
+ index[7] = n[7]+offset;
+ }else{
+ index[3] = n[4]+offset;
+SORT8_4_35:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[4] = n[3]+offset;
+ index[5] = n[5]+offset;
+ index[6] = n[6]+offset;
+ index[7] = n[7]+offset;
+ }else{
+ index[4] = n[5]+offset;
+SORT8_5_36:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[5] = n[3]+offset;
+ index[6] = n[6]+offset;
+ index[7] = n[7]+offset;
+ }else{
+ index[5] = n[6]+offset;
+SORT8_6_37:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[6] = n[3]+offset;
+ index[7] = n[7]+offset;
+ }else{
+ index[6] = n[7]+offset;
+ index[7] = n[3]+offset;
+ }
+ }
+ }
+ }
+ }else{
+ index[2] = n[4]+offset;
+SORT8_3_25:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[3] = n[2]+offset;
+ goto SORT8_4_35;
+ }else{
+ index[3] = n[5]+offset;
+SORT8_4_26:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[4] = n[2]+offset;
+ goto SORT8_5_36;
+ }else{
+ index[4] = n[6]+offset;
+SORT8_5_27:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[5] = n[2]+offset;
+ goto SORT8_6_37;
+ }else{
+ index[5] = n[7]+offset;
+ index[6] = n[2]+offset;
+ index[7] = n[3]+offset;
+ }
+ }
+ }
+ }
+ }else{
+ index[1] = n[4]+offset;
+SORT8_2_15:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[2] = n[1]+offset;
+ goto SORT8_3_25;
+ }else{
+ index[2] = n[5]+offset;
+SORT8_3_16:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[3] = n[1]+offset;
+ goto SORT8_4_26;
+ }else{
+ index[3] = n[6]+offset;
+SORT8_4_17:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[4] = n[1]+offset;
+ goto SORT8_5_27;
+ }else{
+ index[4] = n[7]+offset;
+ index[5] = n[1]+offset;
+ index[6] = n[2]+offset;
+ index[7] = n[3]+offset;
+ }
+ }
+ }
+ }
+ }else{
+ index[0] = n[4]+offset;
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[1] = n[0]+offset;
+ goto SORT8_2_15;
+ }else{
+ index[1] = n[5]+offset;
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[2] = n[0]+offset;
+ goto SORT8_3_16;
+ }else{
+ index[2] = n[6]+offset;
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[3] = n[0]+offset;
+ goto SORT8_4_17;
+ }else{
+ index[3] = n[7]+offset;
+ index[4] = n[0]+offset;
+ index[5] = n[1]+offset;
+ index[6] = n[2]+offset;
+ index[7] = n[3]+offset;
+ }
+ }
+ }
+ }
+ }
+}
+static inline void sortindex_fix16(int *index,
+ int *n,
+ float *data,
+ int j){
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ index += j;
+ n += j;
+ XMM0 = _mm_load_ss(data+n[0]);
+ XMM4 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[1]);
+ XMM5 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[2]);
+ XMM6 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[3]);
+ XMM7 = _mm_load_ss(data+n[11]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[0] = n[0];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[1] = n[1];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[2] = n[2];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[3] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[4] = n[4];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[5] = n[5];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[6] = n[6];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[7] = n[7];
+ index[8] = n[8];
+ index[9] = n[9];
+ index[10] = n[10];
+ index[11] = n[11];
+ index[12] = n[12];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[7] = n[8];
+SORT16_080709:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[8] = n[7];
+ index[9] = n[9];
+ index[10] = n[10];
+ index[11] = n[11];
+ index[12] = n[12];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[8] = n[9];
+SORT16_09070A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[9] = n[7];
+ index[10] = n[10];
+ index[11] = n[11];
+ index[12] = n[12];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[9] = n[10];
+SORT16_0A070B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[10] = n[7];
+ index[11] = n[11];
+ index[12] = n[12];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[10] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_0B070C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[11] = n[7];
+ index[12] = n[12];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[11] = n[12];
+SORT16_0C070D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[12] = n[7];
+ index[13] = n[13];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[12] = n[13];
+SORT16_0D070E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[13] = n[7];
+ index[14] = n[14];
+ index[15] = n[15];
+ }else{
+ index[13] = n[14];
+SORT16_0E070F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[14] = n[7];
+ index[15] = n[15];
+ }else{
+ index[14] = n[15];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[6] = n[8];
+SORT16_070609:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[7] = n[6];
+ goto SORT16_080709;
+ }else{
+ index[7] = n[9];
+SORT16_08060A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[8] = n[6];
+ goto SORT16_09070A;
+ }else{
+ index[8] = n[10];
+SORT16_09060B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[9] = n[6];
+ goto SORT16_0A070B;
+ }else{
+ index[9] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_0A060C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[10] = n[6];
+ goto SORT16_0B070C;
+ }else{
+ index[10] = n[12];
+SORT16_0B060D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[11] = n[6];
+ goto SORT16_0C070D;
+ }else{
+ index[11] = n[13];
+SORT16_0C060E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[12] = n[6];
+ goto SORT16_0D070E;
+ }else{
+ index[12] = n[14];
+SORT16_0D060F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[13] = n[6];
+ goto SORT16_0E070F;
+ }else{
+ index[13] = n[15];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[5] = n[8];
+SORT16_060509:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[6] = n[5];
+ goto SORT16_070609;
+ }else{
+ index[6] = n[9];
+SORT16_07050A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[7] = n[5];
+ goto SORT16_08060A;
+ }else{
+ index[7] = n[10];
+SORT16_08050B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[8] = n[5];
+ goto SORT16_09060B;
+ }else{
+ index[8] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_09050C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[9] = n[5];
+ goto SORT16_0A060C;
+ }else{
+ index[9] = n[12];
+SORT16_0A050D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[10] = n[5];
+ goto SORT16_0B060D;
+ }else{
+ index[10] = n[13];
+SORT16_0B050E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[11] = n[5];
+ goto SORT16_0C060E;
+ }else{
+ index[11] = n[14];
+SORT16_0C050F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[12] = n[5];
+ goto SORT16_0D060F;
+ }else{
+ index[12] = n[15];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[4] = n[8];
+SORT16_050409:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[5] = n[4];
+ goto SORT16_060509;
+ }else{
+ index[5] = n[9];
+SORT16_06040A:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[6] = n[4];
+ goto SORT16_07050A;
+ }else{
+ index[6] = n[10];
+SORT16_07040B:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[7] = n[4];
+ goto SORT16_08050B;
+ }else{
+ index[7] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_08040C:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[8] = n[4];
+ goto SORT16_09050C;
+ }else{
+ index[8] = n[12];
+SORT16_09040D:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[9] = n[4];
+ goto SORT16_0A050D;
+ }else{
+ index[9] = n[13];
+SORT16_0A040E:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[10] = n[4];
+ goto SORT16_0B050E;
+ }else{
+ index[10] = n[14];
+SORT16_0B040F:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[11] = n[4];
+ goto SORT16_0C050F;
+ }else{
+ index[11] = n[15];
+ index[12] = n[4];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[3] = n[8];
+SORT16_040309:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[4] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_050409;
+ }else{
+ index[4] = n[9];
+SORT16_05030A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[5] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_06040A;
+ }else{
+ index[5] = n[10];
+SORT16_06030B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[6] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_07040B;
+ }else{
+ index[6] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_07030C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[7] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_08040C;
+ }else{
+ index[7] = n[12];
+SORT16_08030D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[8] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_09040D;
+ }else{
+ index[8] = n[13];
+SORT16_09030E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[9] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_0A040E;
+ }else{
+ index[9] = n[14];
+SORT16_0A030F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[10] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT16_0B040F;
+ }else{
+ index[10] = n[15];
+ index[11] = n[3];
+ index[12] = n[4];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[2] = n[8];
+SORT16_030209:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[3] = n[2];
+ goto SORT16_040309;
+ }else{
+ index[3] = n[9];
+SORT16_04020A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[4] = n[2];
+ goto SORT16_05030A;
+ }else{
+ index[4] = n[10];
+SORT16_05020B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[5] = n[2];
+ goto SORT16_06030B;
+ }else{
+ index[5] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_06020C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[6] = n[2];
+ goto SORT16_07030C;
+ }else{
+ index[6] = n[12];
+SORT16_07020D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[7] = n[2];
+ goto SORT16_08030D;
+ }else{
+ index[7] = n[13];
+SORT16_08020E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[8] = n[2];
+ goto SORT16_09030E;
+ }else{
+ index[8] = n[14];
+SORT16_09020F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[9] = n[2];
+ goto SORT16_0A030F;
+ }else{
+ index[9] = n[15];
+ index[10] = n[2];
+ index[11] = n[3];
+ index[12] = n[4];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[1] = n[8];
+SORT16_020109:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[2] = n[1];
+ goto SORT16_030209;
+ }else{
+ index[2] = n[9];
+SORT16_03010A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[3] = n[1];
+ goto SORT16_04020A;
+ }else{
+ index[3] = n[10];
+SORT16_04010B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[4] = n[1];
+ goto SORT16_05020B;
+ }else{
+ index[4] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+SORT16_05010C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[5] = n[1];
+ goto SORT16_06020C;
+ }else{
+ index[5] = n[12];
+SORT16_06010D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[6] = n[1];
+ goto SORT16_07020D;
+ }else{
+ index[6] = n[13];
+SORT16_07010E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[7] = n[1];
+ goto SORT16_08020E;
+ }else{
+ index[7] = n[14];
+SORT16_08010F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[8] = n[1];
+ goto SORT16_09020F;
+ }else{
+ index[8] = n[15];
+ index[9] = n[1];
+ index[10] = n[2];
+ index[11] = n[3];
+ index[12] = n[4];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[0] = n[8];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[1] = n[0];
+ goto SORT16_020109;
+ }else{
+ index[1] = n[9];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[2] = n[0];
+ goto SORT16_03010A;
+ }else{
+ index[2] = n[10];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[3] = n[0];
+ goto SORT16_04010B;
+ }else{
+ index[3] = n[11];
+ XMM4 = _mm_load_ss(data+n[12]);
+ XMM5 = _mm_load_ss(data+n[13]);
+ XMM6 = _mm_load_ss(data+n[14]);
+ XMM7 = _mm_load_ss(data+n[15]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[4] = n[0];
+ goto SORT16_05010C;
+ }else{
+ index[4] = n[12];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[5] = n[0];
+ goto SORT16_06010D;
+ }else{
+ index[5] = n[13];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[6] = n[0];
+ goto SORT16_07010E;
+ }else{
+ index[6] = n[14];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[7] = n[0];
+ goto SORT16_08010F;
+ }else{
+ index[7] = n[15];
+ index[8] = n[0];
+ index[9] = n[1];
+ index[10] = n[2];
+ index[11] = n[3];
+ index[12] = n[4];
+ index[13] = n[5];
+ index[14] = n[6];
+ index[15] = n[7];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+static inline void sortindex_fix32(int *index,
+ float *data,
+ int offset){
+ _MM_ALIGN16 int n[32];
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ sortindex_fix8(index,data,offset );
+ sortindex_fix8(index,data,offset+ 8);
+ sortindex_fix8(index,data,offset+16);
+ sortindex_fix8(index,data,offset+24);
+ index+=offset;
+ sortindex_fix16(n,index,data, 0);
+ sortindex_fix16(n,index,data,16);
+ XMM0 = _mm_load_ss(data+n[0]);
+ XMM4 = _mm_load_ss(data+n[16]);
+ XMM1 = _mm_load_ss(data+n[1]);
+ XMM5 = _mm_load_ss(data+n[17]);
+ XMM2 = _mm_load_ss(data+n[2]);
+ XMM6 = _mm_load_ss(data+n[18]);
+ XMM3 = _mm_load_ss(data+n[3]);
+ XMM7 = _mm_load_ss(data+n[19]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[0] = n[0];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[1] = n[1];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[2] = n[2];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[3] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[4] = n[4];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[5] = n[5];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[6] = n[6];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[7] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[8] = n[8];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[9] = n[9];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[10] = n[10];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[11] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[12] = n[12];
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[13] = n[13];
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[14] = n[14];
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[15] = n[15];
+ index[16] = n[16];
+ index[17] = n[17];
+ index[18] = n[18];
+ index[19] = n[19];
+ index[20] = n[20];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[15] = n[16];
+SORT32_100F11:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[16] = n[15];
+ index[17] = n[17];
+ index[18] = n[18];
+ index[19] = n[19];
+ index[20] = n[20];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[16] = n[17];
+SORT32_110F12:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[17] = n[15];
+ index[18] = n[18];
+ index[19] = n[19];
+ index[20] = n[20];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[17] = n[18];
+SORT32_120F13:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[18] = n[15];
+ index[19] = n[19];
+ index[20] = n[20];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[18] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_130F14:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[19] = n[15];
+ index[20] = n[20];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[19] = n[20];
+SORT32_140F15:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[20] = n[15];
+ index[21] = n[21];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[20] = n[21];
+SORT32_150F16:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[21] = n[15];
+ index[22] = n[22];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[21] = n[22];
+SORT32_160F17:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[22] = n[15];
+ index[23] = n[23];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[22] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_170F18:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[23] = n[15];
+ index[24] = n[24];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[23] = n[24];
+SORT32_180F19:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[24] = n[15];
+ index[25] = n[25];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[24] = n[25];
+SORT32_190F1A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[25] = n[15];
+ index[26] = n[26];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[25] = n[26];
+SORT32_1A0F1B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[26] = n[15];
+ index[27] = n[27];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[26] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_1B0F1C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[27] = n[15];
+ index[28] = n[28];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[27] = n[28];
+SORT32_1C0F1D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[28] = n[15];
+ index[29] = n[29];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[28] = n[29];
+SORT32_1D0F1E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[29] = n[15];
+ index[30] = n[30];
+ index[31] = n[31];
+ }else{
+ index[29] = n[30];
+SORT32_1E0F1F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[30] = n[15];
+ index[31] = n[31];
+ }else{
+ index[30] = n[31];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[14] = n[16];
+SORT32_0F0E11:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[15] = n[14];
+ goto SORT32_100F11;
+ }else{
+ index[15] = n[17];
+SORT32_100E12:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[16] = n[14];
+ goto SORT32_110F12;
+ }else{
+ index[16] = n[18];
+SORT32_110E13:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[17] = n[14];
+ goto SORT32_120F13;
+ }else{
+ index[17] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_120E14:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[18] = n[14];
+ goto SORT32_130F14;
+ }else{
+ index[18] = n[20];
+SORT32_130E15:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[19] = n[14];
+ goto SORT32_140F15;
+ }else{
+ index[19] = n[21];
+SORT32_140E16:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[20] = n[14];
+ goto SORT32_150F16;
+ }else{
+ index[20] = n[22];
+SORT32_150E17:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[21] = n[14];
+ goto SORT32_160F17;
+ }else{
+ index[21] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_160E18:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[22] = n[14];
+ goto SORT32_170F18;
+ }else{
+ index[22] = n[24];
+SORT32_170E19:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[23] = n[14];
+ goto SORT32_180F19;
+ }else{
+ index[23] = n[25];
+SORT32_180E1A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[24] = n[14];
+ goto SORT32_190F1A;
+ }else{
+ index[24] = n[26];
+SORT32_190E1B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[25] = n[14];
+ goto SORT32_1A0F1B;
+ }else{
+ index[25] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_1A0E1C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[26] = n[14];
+ goto SORT32_1B0F1C;
+ }else{
+ index[26] = n[28];
+SORT32_1B0E1D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[27] = n[14];
+ goto SORT32_1C0F1D;
+ }else{
+ index[27] = n[29];
+SORT32_1C0E1E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[28] = n[14];
+ goto SORT32_1D0F1E;
+ }else{
+ index[28] = n[30];
+SORT32_1D0E1F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[29] = n[14];
+ goto SORT32_1E0F1F;
+ }else{
+ index[29] = n[31];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[13] = n[16];
+SORT32_0E0D11:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[14] = n[13];
+ goto SORT32_0F0E11;
+ }else{
+ index[14] = n[17];
+SORT32_0F0D12:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[15] = n[13];
+ goto SORT32_100E12;
+ }else{
+ index[15] = n[18];
+SORT32_100D13:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[16] = n[13];
+ goto SORT32_110E13;
+ }else{
+ index[16] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_110D14:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[17] = n[13];
+ goto SORT32_120E14;
+ }else{
+ index[17] = n[20];
+SORT32_120D15:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[18] = n[13];
+ goto SORT32_130E15;
+ }else{
+ index[18] = n[21];
+SORT32_130D16:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[19] = n[13];
+ goto SORT32_140E16;
+ }else{
+ index[19] = n[22];
+SORT32_140D17:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[20] = n[13];
+ goto SORT32_150E17;
+ }else{
+ index[20] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_150D18:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[21] = n[13];
+ goto SORT32_160E18;
+ }else{
+ index[21] = n[24];
+SORT32_160D19:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[22] = n[13];
+ goto SORT32_170E19;
+ }else{
+ index[22] = n[25];
+SORT32_170D1A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[23] = n[13];
+ goto SORT32_180E1A;
+ }else{
+ index[23] = n[26];
+SORT32_180D1B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[24] = n[13];
+ goto SORT32_190E1B;
+ }else{
+ index[24] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_190D1C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[25] = n[13];
+ goto SORT32_1A0E1C;
+ }else{
+ index[25] = n[28];
+SORT32_1A0D1D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[26] = n[13];
+ goto SORT32_1B0E1D;
+ }else{
+ index[26] = n[29];
+SORT32_1B0D1E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[27] = n[13];
+ goto SORT32_1C0E1E;
+ }else{
+ index[27] = n[30];
+SORT32_1C0D1F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[28] = n[13];
+ goto SORT32_1D0E1F;
+ }else{
+ index[28] = n[31];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[12] = n[16];
+SORT32_0D0C11:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[13] = n[12];
+ goto SORT32_0E0D11;
+ }else{
+ index[13] = n[17];
+SORT32_0E0C12:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[14] = n[12];
+ goto SORT32_0F0D12;
+ }else{
+ index[14] = n[18];
+SORT32_0F0C13:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[15] = n[12];
+ goto SORT32_100D13;
+ }else{
+ index[15] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_100C14:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[16] = n[12];
+ goto SORT32_110D14;
+ }else{
+ index[16] = n[20];
+SORT32_110C15:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[17] = n[12];
+ goto SORT32_120D15;
+ }else{
+ index[17] = n[21];
+SORT32_120C16:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[18] = n[12];
+ goto SORT32_130D16;
+ }else{
+ index[18] = n[22];
+SORT32_130C17:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[19] = n[12];
+ goto SORT32_140D17;
+ }else{
+ index[19] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_140C18:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[20] = n[12];
+ goto SORT32_150D18;
+ }else{
+ index[20] = n[24];
+SORT32_150C19:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[21] = n[12];
+ goto SORT32_160D19;
+ }else{
+ index[21] = n[25];
+SORT32_160C1A:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[22] = n[12];
+ goto SORT32_170D1A;
+ }else{
+ index[22] = n[26];
+SORT32_170C1B:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[23] = n[12];
+ goto SORT32_180D1B;
+ }else{
+ index[23] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_180C1C:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[24] = n[12];
+ goto SORT32_190D1C;
+ }else{
+ index[24] = n[28];
+SORT32_190C1D:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[25] = n[12];
+ goto SORT32_1A0D1D;
+ }else{
+ index[25] = n[29];
+SORT32_1A0C1E:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[26] = n[12];
+ goto SORT32_1B0D1E;
+ }else{
+ index[26] = n[30];
+SORT32_1B0C1F:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[27] = n[12];
+ goto SORT32_1C0D1F;
+ }else{
+ index[27] = n[31];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[11] = n[16];
+SORT32_0C0B11:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[12] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_0D0C11;
+ }else{
+ index[12] = n[17];
+SORT32_0D0B12:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[13] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_0E0C12;
+ }else{
+ index[13] = n[18];
+SORT32_0E0B13:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[14] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_0F0C13;
+ }else{
+ index[14] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0F0B14:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[15] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_100C14;
+ }else{
+ index[15] = n[20];
+SORT32_100B15:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[16] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_110C15;
+ }else{
+ index[16] = n[21];
+SORT32_110B16:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[17] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_120C16;
+ }else{
+ index[17] = n[22];
+SORT32_120B17:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[18] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_130C17;
+ }else{
+ index[18] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_130B18:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[19] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_140C18;
+ }else{
+ index[19] = n[24];
+SORT32_140B19:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[20] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_150C19;
+ }else{
+ index[20] = n[25];
+SORT32_150B1A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[21] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_160C1A;
+ }else{
+ index[21] = n[26];
+SORT32_160B1B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[22] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_170C1B;
+ }else{
+ index[22] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_170B1C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[23] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_180C1C;
+ }else{
+ index[23] = n[28];
+SORT32_180B1D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[24] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_190C1D;
+ }else{
+ index[24] = n[29];
+SORT32_190B1E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[25] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_1A0C1E;
+ }else{
+ index[25] = n[30];
+SORT32_1A0B1F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[26] = n[11];
+ XMM0 = _mm_load_ss(data+n[12]);
+ XMM1 = _mm_load_ss(data+n[13]);
+ XMM2 = _mm_load_ss(data+n[14]);
+ XMM3 = _mm_load_ss(data+n[15]);
+ goto SORT32_1B0C1F;
+ }else{
+ index[26] = n[31];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[10] = n[16];
+SORT32_0B0A11:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[11] = n[10];
+ goto SORT32_0C0B11;
+ }else{
+ index[11] = n[17];
+SORT32_0C0A12:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[12] = n[10];
+ goto SORT32_0D0B12;
+ }else{
+ index[12] = n[18];
+SORT32_0D0A13:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[13] = n[10];
+ goto SORT32_0E0B13;
+ }else{
+ index[13] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0E0A14:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[14] = n[10];
+ goto SORT32_0F0B14;
+ }else{
+ index[14] = n[20];
+SORT32_0F0A15:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[15] = n[10];
+ goto SORT32_100B15;
+ }else{
+ index[15] = n[21];
+SORT32_100A16:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[16] = n[10];
+ goto SORT32_110B16;
+ }else{
+ index[16] = n[22];
+SORT32_110A17:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[17] = n[10];
+ goto SORT32_120B17;
+ }else{
+ index[17] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_120A18:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[18] = n[10];
+ goto SORT32_130B18;
+ }else{
+ index[18] = n[24];
+SORT32_130A19:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[19] = n[10];
+ goto SORT32_140B19;
+ }else{
+ index[19] = n[25];
+SORT32_140A1A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[20] = n[10];
+ goto SORT32_150B1A;
+ }else{
+ index[20] = n[26];
+SORT32_150A1B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[21] = n[10];
+ goto SORT32_160B1B;
+ }else{
+ index[21] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_160A1C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[22] = n[10];
+ goto SORT32_170B1C;
+ }else{
+ index[22] = n[28];
+SORT32_170A1D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[23] = n[10];
+ goto SORT32_180B1D;
+ }else{
+ index[23] = n[29];
+SORT32_180A1E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[24] = n[10];
+ goto SORT32_190B1E;
+ }else{
+ index[24] = n[30];
+SORT32_190A1F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[25] = n[10];
+ goto SORT32_1A0B1F;
+ }else{
+ index[25] = n[31];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[9] = n[16];
+SORT32_0A0911:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[10] = n[9];
+ goto SORT32_0B0A11;
+ }else{
+ index[10] = n[17];
+SORT32_0B0912:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[11] = n[9];
+ goto SORT32_0C0A12;
+ }else{
+ index[11] = n[18];
+SORT32_0C0913:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[12] = n[9];
+ goto SORT32_0D0A13;
+ }else{
+ index[12] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0D0914:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[13] = n[9];
+ goto SORT32_0E0A14;
+ }else{
+ index[13] = n[20];
+SORT32_0E0915:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[14] = n[9];
+ goto SORT32_0F0A15;
+ }else{
+ index[14] = n[21];
+SORT32_0F0916:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[15] = n[9];
+ goto SORT32_100A16;
+ }else{
+ index[15] = n[22];
+SORT32_100917:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[16] = n[9];
+ goto SORT32_110A17;
+ }else{
+ index[16] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_110918:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[17] = n[9];
+ goto SORT32_120A18;
+ }else{
+ index[17] = n[24];
+SORT32_120919:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[18] = n[9];
+ goto SORT32_130A19;
+ }else{
+ index[18] = n[25];
+SORT32_13091A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[19] = n[9];
+ goto SORT32_140A1A;
+ }else{
+ index[19] = n[26];
+SORT32_14091B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[20] = n[9];
+ goto SORT32_150A1B;
+ }else{
+ index[20] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_15091C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[21] = n[9];
+ goto SORT32_160A1C;
+ }else{
+ index[21] = n[28];
+SORT32_16091D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[22] = n[9];
+ goto SORT32_170A1D;
+ }else{
+ index[22] = n[29];
+SORT32_17091E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[23] = n[9];
+ goto SORT32_180A1E;
+ }else{
+ index[23] = n[30];
+SORT32_18091F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[24] = n[9];
+ goto SORT32_190A1F;
+ }else{
+ index[24] = n[31];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[8] = n[16];
+SORT32_090811:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[9] = n[8];
+ goto SORT32_0A0911;
+ }else{
+ index[9] = n[17];
+SORT32_0A0812:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[10] = n[8];
+ goto SORT32_0B0912;
+ }else{
+ index[10] = n[18];
+SORT32_0B0813:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[11] = n[8];
+ goto SORT32_0C0913;
+ }else{
+ index[11] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0C0814:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[12] = n[8];
+ goto SORT32_0D0914;
+ }else{
+ index[12] = n[20];
+SORT32_0D0815:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[13] = n[8];
+ goto SORT32_0E0915;
+ }else{
+ index[13] = n[21];
+SORT32_0E0816:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[14] = n[8];
+ goto SORT32_0F0916;
+ }else{
+ index[14] = n[22];
+SORT32_0F0817:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[15] = n[8];
+ goto SORT32_100917;
+ }else{
+ index[15] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_100818:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[16] = n[8];
+ goto SORT32_110918;
+ }else{
+ index[16] = n[24];
+SORT32_110819:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[17] = n[8];
+ goto SORT32_120919;
+ }else{
+ index[17] = n[25];
+SORT32_12081A:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[18] = n[8];
+ goto SORT32_13091A;
+ }else{
+ index[18] = n[26];
+SORT32_13081B:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[19] = n[8];
+ goto SORT32_14091B;
+ }else{
+ index[19] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_14081C:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[20] = n[8];
+ goto SORT32_15091C;
+ }else{
+ index[20] = n[28];
+SORT32_15081D:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[21] = n[8];
+ goto SORT32_16091D;
+ }else{
+ index[21] = n[29];
+SORT32_16081E:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[22] = n[8];
+ goto SORT32_17091E;
+ }else{
+ index[22] = n[30];
+SORT32_17081F:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[23] = n[8];
+ goto SORT32_18091F;
+ }else{
+ index[23] = n[31];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[7] = n[16];
+SORT32_080711:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[8] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_090811;
+ }else{
+ index[8] = n[17];
+SORT32_090712:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[9] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0A0812;
+ }else{
+ index[9] = n[18];
+SORT32_0A0713:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[10] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0B0813;
+ }else{
+ index[10] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0B0714:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[11] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0C0814;
+ }else{
+ index[11] = n[20];
+SORT32_0C0715:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[12] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0D0815;
+ }else{
+ index[12] = n[21];
+SORT32_0D0716:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[13] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0E0816;
+ }else{
+ index[13] = n[22];
+SORT32_0E0717:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[14] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_0F0817;
+ }else{
+ index[14] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0F0718:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[15] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_100818;
+ }else{
+ index[15] = n[24];
+SORT32_100719:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[16] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_110819;
+ }else{
+ index[16] = n[25];
+SORT32_11071A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[17] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_12081A;
+ }else{
+ index[17] = n[26];
+SORT32_12071B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[18] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_13081B;
+ }else{
+ index[18] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_13071C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[19] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_14081C;
+ }else{
+ index[19] = n[28];
+SORT32_14071D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[20] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_15081D;
+ }else{
+ index[20] = n[29];
+SORT32_15071E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[21] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_16081E;
+ }else{
+ index[21] = n[30];
+SORT32_16071F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[22] = n[7];
+ XMM0 = _mm_load_ss(data+n[8]);
+ XMM1 = _mm_load_ss(data+n[9]);
+ XMM2 = _mm_load_ss(data+n[10]);
+ XMM3 = _mm_load_ss(data+n[11]);
+ goto SORT32_17081F;
+ }else{
+ index[22] = n[31];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[6] = n[16];
+SORT32_070611:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[7] = n[6];
+ goto SORT32_080711;
+ }else{
+ index[7] = n[17];
+SORT32_080612:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[8] = n[6];
+ goto SORT32_090712;
+ }else{
+ index[8] = n[18];
+SORT32_090613:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[9] = n[6];
+ goto SORT32_0A0713;
+ }else{
+ index[9] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_0A0614:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[10] = n[6];
+ goto SORT32_0B0714;
+ }else{
+ index[10] = n[20];
+SORT32_0B0615:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[11] = n[6];
+ goto SORT32_0C0715;
+ }else{
+ index[11] = n[21];
+SORT32_0C0616:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[12] = n[6];
+ goto SORT32_0D0716;
+ }else{
+ index[12] = n[22];
+SORT32_0D0617:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[13] = n[6];
+ goto SORT32_0E0717;
+ }else{
+ index[13] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0E0618:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[14] = n[6];
+ goto SORT32_0F0718;
+ }else{
+ index[14] = n[24];
+SORT32_0F0619:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[15] = n[6];
+ goto SORT32_100719;
+ }else{
+ index[15] = n[25];
+SORT32_10061A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[16] = n[6];
+ goto SORT32_11071A;
+ }else{
+ index[16] = n[26];
+SORT32_11061B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[17] = n[6];
+ goto SORT32_12071B;
+ }else{
+ index[17] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_12061C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[18] = n[6];
+ goto SORT32_13071C;
+ }else{
+ index[18] = n[28];
+SORT32_13061D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[19] = n[6];
+ goto SORT32_14071D;
+ }else{
+ index[19] = n[29];
+SORT32_14061E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[20] = n[6];
+ goto SORT32_15071E;
+ }else{
+ index[20] = n[30];
+SORT32_15061F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[21] = n[6];
+ goto SORT32_16071F;
+ }else{
+ index[21] = n[31];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[5] = n[16];
+SORT32_060511:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[6] = n[5];
+ goto SORT32_070611;
+ }else{
+ index[6] = n[17];
+SORT32_070512:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[7] = n[5];
+ goto SORT32_080612;
+ }else{
+ index[7] = n[18];
+SORT32_080513:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[8] = n[5];
+ goto SORT32_090613;
+ }else{
+ index[8] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_090514:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[9] = n[5];
+ goto SORT32_0A0614;
+ }else{
+ index[9] = n[20];
+SORT32_0A0515:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[10] = n[5];
+ goto SORT32_0B0615;
+ }else{
+ index[10] = n[21];
+SORT32_0B0516:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[11] = n[5];
+ goto SORT32_0C0616;
+ }else{
+ index[11] = n[22];
+SORT32_0C0517:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[12] = n[5];
+ goto SORT32_0D0617;
+ }else{
+ index[12] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0D0518:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[13] = n[5];
+ goto SORT32_0E0618;
+ }else{
+ index[13] = n[24];
+SORT32_0E0519:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[14] = n[5];
+ goto SORT32_0F0619;
+ }else{
+ index[14] = n[25];
+SORT32_0F051A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[15] = n[5];
+ goto SORT32_10061A;
+ }else{
+ index[15] = n[26];
+SORT32_10051B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[16] = n[5];
+ goto SORT32_11061B;
+ }else{
+ index[16] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_11051C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[17] = n[5];
+ goto SORT32_12061C;
+ }else{
+ index[17] = n[28];
+SORT32_12051D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[18] = n[5];
+ goto SORT32_13061D;
+ }else{
+ index[18] = n[29];
+SORT32_13051E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[19] = n[5];
+ goto SORT32_14061E;
+ }else{
+ index[19] = n[30];
+SORT32_14051F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[20] = n[5];
+ goto SORT32_15061F;
+ }else{
+ index[20] = n[31];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[4] = n[16];
+SORT32_050411:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[5] = n[4];
+ goto SORT32_060511;
+ }else{
+ index[5] = n[17];
+SORT32_060412:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[6] = n[4];
+ goto SORT32_070512;
+ }else{
+ index[6] = n[18];
+SORT32_070413:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[7] = n[4];
+ goto SORT32_080513;
+ }else{
+ index[7] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_080414:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[8] = n[4];
+ goto SORT32_090514;
+ }else{
+ index[8] = n[20];
+SORT32_090415:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[9] = n[4];
+ goto SORT32_0A0515;
+ }else{
+ index[9] = n[21];
+SORT32_0A0416:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[10] = n[4];
+ goto SORT32_0B0516;
+ }else{
+ index[10] = n[22];
+SORT32_0B0417:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[11] = n[4];
+ goto SORT32_0C0517;
+ }else{
+ index[11] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0C0418:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[12] = n[4];
+ goto SORT32_0D0518;
+ }else{
+ index[12] = n[24];
+SORT32_0D0419:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[13] = n[4];
+ goto SORT32_0E0519;
+ }else{
+ index[13] = n[25];
+SORT32_0E041A:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[14] = n[4];
+ goto SORT32_0F051A;
+ }else{
+ index[14] = n[26];
+SORT32_0F041B:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[15] = n[4];
+ goto SORT32_10051B;
+ }else{
+ index[15] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_10041C:
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[16] = n[4];
+ goto SORT32_11051C;
+ }else{
+ index[16] = n[28];
+SORT32_11041D:
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[17] = n[4];
+ goto SORT32_12051D;
+ }else{
+ index[17] = n[29];
+SORT32_12041E:
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[18] = n[4];
+ goto SORT32_13051E;
+ }else{
+ index[18] = n[30];
+SORT32_13041F:
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[19] = n[4];
+ goto SORT32_14051F;
+ }else{
+ index[19] = n[31];
+ index[20] = n[4];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[3] = n[16];
+SORT32_040311:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[4] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_050411;
+ }else{
+ index[4] = n[17];
+SORT32_050312:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[5] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_060412;
+ }else{
+ index[5] = n[18];
+SORT32_060313:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[6] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_070413;
+ }else{
+ index[6] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_070314:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[7] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_080414;
+ }else{
+ index[7] = n[20];
+SORT32_080315:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[8] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_090415;
+ }else{
+ index[8] = n[21];
+SORT32_090316:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[9] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0A0416;
+ }else{
+ index[9] = n[22];
+SORT32_0A0317:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[10] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0B0417;
+ }else{
+ index[10] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0B0318:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[11] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0C0418;
+ }else{
+ index[11] = n[24];
+SORT32_0C0319:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[12] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0D0419;
+ }else{
+ index[12] = n[25];
+SORT32_0D031A:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[13] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0E041A;
+ }else{
+ index[13] = n[26];
+SORT32_0E031B:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[14] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_0F041B;
+ }else{
+ index[14] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_0F031C:
+ if(!_mm_comilt_ss(XMM3, XMM4)){
+ index[15] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_10041C;
+ }else{
+ index[15] = n[28];
+SORT32_10031D:
+ if(!_mm_comilt_ss(XMM3, XMM5)){
+ index[16] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_11041D;
+ }else{
+ index[16] = n[29];
+SORT32_11031E:
+ if(!_mm_comilt_ss(XMM3, XMM6)){
+ index[17] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_12041E;
+ }else{
+ index[17] = n[30];
+SORT32_12031F:
+ if(!_mm_comilt_ss(XMM3, XMM7)){
+ index[18] = n[3];
+ XMM0 = _mm_load_ss(data+n[4]);
+ XMM1 = _mm_load_ss(data+n[5]);
+ XMM2 = _mm_load_ss(data+n[6]);
+ XMM3 = _mm_load_ss(data+n[7]);
+ goto SORT32_13041F;
+ }else{
+ index[18] = n[31];
+ index[19] = n[3];
+ index[20] = n[4];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[2] = n[16];
+SORT32_030211:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[3] = n[2];
+ goto SORT32_040311;
+ }else{
+ index[3] = n[17];
+SORT32_040212:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[4] = n[2];
+ goto SORT32_050312;
+ }else{
+ index[4] = n[18];
+SORT32_050213:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[5] = n[2];
+ goto SORT32_060313;
+ }else{
+ index[5] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_060214:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[6] = n[2];
+ goto SORT32_070314;
+ }else{
+ index[6] = n[20];
+SORT32_070215:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[7] = n[2];
+ goto SORT32_080315;
+ }else{
+ index[7] = n[21];
+SORT32_080216:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[8] = n[2];
+ goto SORT32_090316;
+ }else{
+ index[8] = n[22];
+SORT32_090217:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[9] = n[2];
+ goto SORT32_0A0317;
+ }else{
+ index[9] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_0A0218:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[10] = n[2];
+ goto SORT32_0B0318;
+ }else{
+ index[10] = n[24];
+SORT32_0B0219:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[11] = n[2];
+ goto SORT32_0C0319;
+ }else{
+ index[11] = n[25];
+SORT32_0C021A:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[12] = n[2];
+ goto SORT32_0D031A;
+ }else{
+ index[12] = n[26];
+SORT32_0D021B:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[13] = n[2];
+ goto SORT32_0E031B;
+ }else{
+ index[13] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_0E021C:
+ if(!_mm_comilt_ss(XMM2, XMM4)){
+ index[14] = n[2];
+ goto SORT32_0F031C;
+ }else{
+ index[14] = n[28];
+SORT32_0F021D:
+ if(!_mm_comilt_ss(XMM2, XMM5)){
+ index[15] = n[2];
+ goto SORT32_10031D;
+ }else{
+ index[15] = n[29];
+SORT32_10021E:
+ if(!_mm_comilt_ss(XMM2, XMM6)){
+ index[16] = n[2];
+ goto SORT32_11031E;
+ }else{
+ index[16] = n[30];
+SORT32_11021F:
+ if(!_mm_comilt_ss(XMM2, XMM7)){
+ index[17] = n[2];
+ goto SORT32_12031F;
+ }else{
+ index[17] = n[31];
+ index[18] = n[2];
+ index[19] = n[3];
+ index[20] = n[4];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[1] = n[16];
+SORT32_020111:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[2] = n[1];
+ goto SORT32_030211;
+ }else{
+ index[2] = n[17];
+SORT32_030112:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[3] = n[1];
+ goto SORT32_040212;
+ }else{
+ index[3] = n[18];
+SORT32_040113:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[4] = n[1];
+ goto SORT32_050213;
+ }else{
+ index[4] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+SORT32_050114:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[5] = n[1];
+ goto SORT32_060214;
+ }else{
+ index[5] = n[20];
+SORT32_060115:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[6] = n[1];
+ goto SORT32_070215;
+ }else{
+ index[6] = n[21];
+SORT32_070116:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[7] = n[1];
+ goto SORT32_080216;
+ }else{
+ index[7] = n[22];
+SORT32_080117:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[8] = n[1];
+ goto SORT32_090217;
+ }else{
+ index[8] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+SORT32_090118:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[9] = n[1];
+ goto SORT32_0A0218;
+ }else{
+ index[9] = n[24];
+SORT32_0A0119:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[10] = n[1];
+ goto SORT32_0B0219;
+ }else{
+ index[10] = n[25];
+SORT32_0B011A:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[11] = n[1];
+ goto SORT32_0C021A;
+ }else{
+ index[11] = n[26];
+SORT32_0C011B:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[12] = n[1];
+ goto SORT32_0D021B;
+ }else{
+ index[12] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+SORT32_0D011C:
+ if(!_mm_comilt_ss(XMM1, XMM4)){
+ index[13] = n[1];
+ goto SORT32_0E021C;
+ }else{
+ index[13] = n[28];
+SORT32_0E011D:
+ if(!_mm_comilt_ss(XMM1, XMM5)){
+ index[14] = n[1];
+ goto SORT32_0F021D;
+ }else{
+ index[14] = n[29];
+SORT32_0F011E:
+ if(!_mm_comilt_ss(XMM1, XMM6)){
+ index[15] = n[1];
+ goto SORT32_10021E;
+ }else{
+ index[15] = n[30];
+SORT32_10011F:
+ if(!_mm_comilt_ss(XMM1, XMM7)){
+ index[16] = n[1];
+ goto SORT32_11021F;
+ }else{
+ index[16] = n[31];
+ index[17] = n[1];
+ index[18] = n[2];
+ index[19] = n[3];
+ index[20] = n[4];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }else{
+ index[0] = n[16];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[1] = n[0];
+ goto SORT32_020111;
+ }else{
+ index[1] = n[17];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[2] = n[0];
+ goto SORT32_030112;
+ }else{
+ index[2] = n[18];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[3] = n[0];
+ goto SORT32_040113;
+ }else{
+ index[3] = n[19];
+ XMM4 = _mm_load_ss(data+n[20]);
+ XMM5 = _mm_load_ss(data+n[21]);
+ XMM6 = _mm_load_ss(data+n[22]);
+ XMM7 = _mm_load_ss(data+n[23]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[4] = n[0];
+ goto SORT32_050114;
+ }else{
+ index[4] = n[20];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[5] = n[0];
+ goto SORT32_060115;
+ }else{
+ index[5] = n[21];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[6] = n[0];
+ goto SORT32_070116;
+ }else{
+ index[6] = n[22];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[7] = n[0];
+ goto SORT32_080117;
+ }else{
+ index[7] = n[23];
+ XMM4 = _mm_load_ss(data+n[24]);
+ XMM5 = _mm_load_ss(data+n[25]);
+ XMM6 = _mm_load_ss(data+n[26]);
+ XMM7 = _mm_load_ss(data+n[27]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[8] = n[0];
+ goto SORT32_090118;
+ }else{
+ index[8] = n[24];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[9] = n[0];
+ goto SORT32_0A0119;
+ }else{
+ index[9] = n[25];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[10] = n[0];
+ goto SORT32_0B011A;
+ }else{
+ index[10] = n[26];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[11] = n[0];
+ goto SORT32_0C011B;
+ }else{
+ index[11] = n[27];
+ XMM4 = _mm_load_ss(data+n[28]);
+ XMM5 = _mm_load_ss(data+n[29]);
+ XMM6 = _mm_load_ss(data+n[30]);
+ XMM7 = _mm_load_ss(data+n[31]);
+ if(!_mm_comilt_ss(XMM0, XMM4)){
+ index[12] = n[0];
+ goto SORT32_0D011C;
+ }else{
+ index[12] = n[28];
+ if(!_mm_comilt_ss(XMM0, XMM5)){
+ index[13] = n[0];
+ goto SORT32_0E011D;
+ }else{
+ index[13] = n[29];
+ if(!_mm_comilt_ss(XMM0, XMM6)){
+ index[14] = n[0];
+ goto SORT32_0F011E;
+ }else{
+ index[14] = n[30];
+ if(!_mm_comilt_ss(XMM0, XMM7)){
+ index[15] = n[0];
+ goto SORT32_10011F;
+ }else{
+ index[15] = n[31];
+ index[16] = n[0];
+ index[17] = n[1];
+ index[18] = n[2];
+ index[19] = n[3];
+ index[20] = n[4];
+ index[21] = n[5];
+ index[22] = n[6];
+ index[23] = n[7];
+ index[24] = n[8];
+ index[25] = n[9];
+ index[26] = n[10];
+ index[27] = n[11];
+ index[28] = n[12];
+ index[29] = n[13];
+ index[30] = n[14];
+ index[31] = n[15];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+static void sortindex_shellsort(int *index,
+ float *data,
+ int offset,
+ int count){
+ int gap,pos,left,i,j;
+ index+=offset;
+ for(i=0;i<count;i++)index[i]=i+offset;
+ gap=1;
+ while (gap<=count)gap=gap*3+1;
+ gap/=3;
+ if(gap>=4)gap/=3;
+ while(gap>0){
+ for(pos=gap;pos<count;pos++){
+ for(left=pos-gap;left>=0;left-=gap){
+ i=index[left];j=index[left+gap];
+ if(!C(i,j)){
+ index[left]=j;
+ index[left+gap]=i;
+ }else break;
+ }
+ }
+ gap/=3;
+ }
+}
+#else /* SSE Optimize */
#define C(o,a,b)\
(fabs(data[o+a])>=fabs(data[o+b]))
#define O(o,a,b,c,d)\
@@ -1390,6 +9362,7 @@
gap/=3;
}
}
+#endif /* SSE Optimize */
static void sortindex(int *index,
float *data,
@@ -1401,18 +9374,62 @@
}
#undef C
+#ifndef __SSE__ /* SSE Optimize */
+/* this is for per-channel noise normalization */
+static int apsort(const void *a, const void *b){
+ float f1=fabs(**(float**)a);
+ float f2=fabs(**(float**)b);
+ return (f1<f2)-(f1>f2);
+}
#undef O
#undef SORT4
-
-#endif
-/*** OPT_SORT End ***/
-
+#endif /* SSE Optimize */
int **_vp_quantize_couple_sort(vorbis_block *vb,
vorbis_look_psy *p,
vorbis_info_mapping0 *vi,
- float **mags){
+#ifdef __SSE__ /* SSE Optimize */
+ float **mags,
+ float *temp){
+#else /* SSE Optimize */
+ float **mags){
+#endif /* SSE Optimize */
+
+#ifdef __SSE__ /* SSE Optimize */
+ if(p->vi->normal_point_p){
+ int i,j,n=p->n;
+ int **ret=_vorbis_block_alloc(vb,vi->coupling_steps*sizeof(*ret));
+ int partition=p->vi->normal_partition;
+
+ for(i=0;i<vi->coupling_steps;i++)
+ {
+ for(j=0;j<n;j+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(mags[i]+j );
+ __m128 XMM1 = _mm_load_ps(mags[i]+j+ 4);
+ __m128 XMM2 = _mm_load_ps(mags[i]+j+ 8);
+ __m128 XMM3 = _mm_load_ps(mags[i]+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ _mm_store_ps(temp+j , XMM0);
+ _mm_store_ps(temp+j+ 4, XMM1);
+ _mm_store_ps(temp+j+ 8, XMM2);
+ _mm_store_ps(temp+j+12, XMM3);
+ }
+ ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+
+ for(j=0;j<n;j+=partition)
+ {
+ sortindex(ret[i], temp, j, partition);
+ }
+ }
+ return(ret);
+ }
+ return(NULL);
+#else /* SSE Optimize */
#ifdef OPT_SORT
if(p->vi->normal_point_p){
int i,j,n=p->n;
@@ -1449,21 +9466,51 @@
}
return(NULL);
#endif
+#endif /* SSE Optimize */
}
+#ifdef __SSE__ /* SSE Optimize */
+void _vp_noise_normalize_sort(vorbis_look_psy *p,
+ float *magnitudes,int *sortedindex,float *temp){
+ int j, n=p->n;
+ vorbis_info_psy *vi=p->vi;
+ int partition=vi->normal_partition;
+ int start=vi->normal_start;
+
+ int k;
+ j = start;
+ k = (j+15)&(~15);
+ k = (k>=n)?n:k;
+ for(;j<k;j++)
+ {
+ __m128 XMM0 = _mm_load_ss(magnitudes+j);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ _mm_store_ss(temp+j,XMM0);
+ }
+ for(;j<n;j+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(magnitudes+j );
+ __m128 XMM1 = _mm_load_ps(magnitudes+j+ 4);
+ __m128 XMM2 = _mm_load_ps(magnitudes+j+ 8);
+ __m128 XMM3 = _mm_load_ps(magnitudes+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ _mm_store_ps(temp+j , XMM0);
+ _mm_store_ps(temp+j+ 4, XMM1);
+ _mm_store_ps(temp+j+ 8, XMM2);
+ _mm_store_ps(temp+j+12, XMM3);
+ }
+ for(j=start;j<n;j+=partition)
+ {
+ if(j+partition>n)
+ partition = n-j;
+ sortindex(sortedindex-start, temp, j, partition);
+ }
+#else /* SSE Optimize */
void _vp_noise_normalize_sort(vorbis_look_psy *p,
float *magnitudes,int *sortedindex){
-#ifdef OPT_SORT
- int j,n=p->n;
- vorbis_info_psy *vi=p->vi;
- int partition=vi->normal_partition;
- int start=vi->normal_start;
-
- for(j=start;j<n;j+=partition){
- if(j+partition>n)partition=n-j;
- sortindex(sortedindex-start,magnitudes,j,partition);
- }
-#else
int i,j,n=p->n;
vorbis_info_psy *vi=p->vi;
int partition=vi->normal_partition;
@@ -1478,12 +9525,12 @@
sortedindex[i+j-start]=work[i]-magnitudes;
}
}
-#endif
+#endif /* SSE Optimize */
}
void _vp_noise_normalize(vorbis_look_psy *p,
float *in,float *out,int *sortedindex){
- int i,j=0,n=p->n,min_energy;
+ int i,j=0,n=p->n/*,min_energy*/;
vorbis_info_psy *vi=p->vi;
int partition=vi->normal_partition;
int start=vi->normal_start;
@@ -1491,23 +9538,299 @@
if(start>n)start=n;
if(vi->normal_channel_p){
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ int k;
+ k = start&(~15);
+ for(;j<k;j+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+#if !defined(__SSE2__)
+ __m64 MM0, MM1, MM2, MM3;
+ __m64 MM4, MM5, MM6, MM7;
+#endif
+ XMM0 = _mm_load_ps(in+j );
+ XMM1 = _mm_load_ps(in+j+ 4);
+ XMM2 = _mm_load_ps(in+j+ 8);
+ XMM3 = _mm_load_ps(in+j+12);
+#if defined(__SSE2__)
+ XMM0 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM0));
+ XMM1 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM1));
+ XMM2 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM2));
+ XMM3 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM3));
+#else
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+ MM4 = _mm_cvtps_pi32(XMM2);
+ MM6 = _mm_cvtps_pi32(XMM3);
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+ MM5 = _mm_cvtps_pi32(XMM2);
+ MM7 = _mm_cvtps_pi32(XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM1);
+ XMM1 = _mm_cvtpi32_ps(XMM1, MM3);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM5);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM7);
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM1 = _mm_movelh_ps(XMM1, XMM1);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM3 = _mm_movelh_ps(XMM3, XMM3);
+ XMM0 = _mm_cvtpi32_ps(XMM0, MM0);
+ XMM1 = _mm_cvtpi32_ps(XMM1, MM2);
+ XMM2 = _mm_cvtpi32_ps(XMM2, MM4);
+ XMM3 = _mm_cvtpi32_ps(XMM3, MM6);
+#endif
+ _mm_store_ps(out+j , XMM0);
+ _mm_store_ps(out+j+ 4, XMM1);
+ _mm_store_ps(out+j+ 8, XMM2);
+ _mm_store_ps(out+j+12, XMM3);
+ }
+#if !defined(__SSE2__)
+ _mm_empty();
+#endif
+ for(;j<start;j++)
+ out[j] = rint(in[j]);
+ }
+#else /* SSE Optimize */
for(;j<start;j++)
out[j]=rint(in[j]);
+#endif /* SSE Optimize */
for(;j+partition<=n;j+=partition){
+#ifdef __SSE__ /* SSE Optimize */
+ float acc;
+ int k;
+ int energy_loss;
+#else
float acc=0.;
int k;
int energy_loss=0;
+#endif
int nn_num=0;
int freqband_mid=j+16;
int freqband_flag=0;
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ if(partition==8)
+ {
+ int c0, c1;
+#if defined(__SSE2__)
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(in+j );
+ XMM1 = _mm_load_ps(in+j+4);
+ XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+ XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+ XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+ XMM1 = _mm_and_ps(XMM1, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ c0 = _mm_movemask_ps(XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ c1 = _mm_movemask_ps(XMM3);
+#else
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(in+j );
+ XMM1 = _mm_load_ps(in+j+4);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+ XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+ XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+ XMM2 = _mm_and_ps(XMM2, XMM4);
+ XMM3 = _mm_and_ps(XMM3, XMM5);
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+ XMM1 = _mm_and_ps(XMM1, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ c0 = _mm_movemask_ps(XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ c1 = _mm_movemask_ps(XMM3);
+#endif
+ acc = _mm_add_horz(XMM0);
+ energy_loss = bitCountTable[c0];
+ energy_loss += bitCountTable[c1];
+ }
+ else if(partition==32)
+ {
+ int c0, c1;
+#if defined(__SSE2__)
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ XMM0 = _mm_load_ps(in+j );
+ XMM1 = _mm_load_ps(in+j+ 4);
+ XMM4 = _mm_load_ps(in+j+ 8);
+ XMM5 = _mm_load_ps(in+j+12);
+ XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+ XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM4));
+ XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+ XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+ XMM1 = _mm_and_ps(XMM1, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ c0 = _mm_movemask_ps(XMM2);
+ XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM5));
+ XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM1 = _mm_load_ps(in+j+16);
+ c1 = _mm_movemask_ps(XMM3);
+ XMM3 = _mm_load_ps(in+j+20);
+ XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+ XMM4 = _mm_and_ps(XMM4, XMM6);
+ XMM5 = _mm_and_ps(XMM5, XMM2);
+ energy_loss = bitCountTable[c0];
+ energy_loss += bitCountTable[c1];
+ XMM4 = _mm_mul_ps(XMM4, XMM4);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ c0 = _mm_movemask_ps(XMM6);
+ XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM5 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+ c1 = _mm_movemask_ps(XMM2);
+ XMM2 = _mm_load_ps(in+j+24);
+ energy_loss += bitCountTable[c0];
+ XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+ XMM5 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM5), PFV_0.pi));
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM4 = _mm_load_ps(in+j+28);
+ energy_loss += bitCountTable[c1];
+ XMM1 = _mm_and_ps(XMM1, XMM6);
+ XMM3 = _mm_and_ps(XMM3, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM3 = _mm_mul_ps(XMM3, XMM3);
+ c0 = _mm_movemask_ps(XMM6);
+ XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM4));
+ c1 = _mm_movemask_ps(XMM5);
+ energy_loss += bitCountTable[c0];
+ XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+ XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ energy_loss += bitCountTable[c1];
+ XMM2 = _mm_and_ps(XMM2, XMM6);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ XMM4 = _mm_mul_ps(XMM4, XMM4);
+ c0 = _mm_movemask_ps(XMM6);
+ XMM2 = _mm_add_ps(XMM2, XMM4);
+ c1 = _mm_movemask_ps(XMM3);
+ energy_loss += bitCountTable[c0];
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ energy_loss += bitCountTable[c1];
+ acc = _mm_add_horz(XMM0);
+#else
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ XMM0 = _mm_load_ps(in+j );
+ XMM1 = _mm_load_ps(in+j+ 4);
+ XMM6 = _mm_load_ps(in+j+ 8);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+ XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+ XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+ XMM2 = _mm_and_ps(XMM2, XMM4);
+ XMM4 = _mm_load_ps(in+j+12);
+ XMM3 = _mm_and_ps(XMM3, XMM5);
+ XMM5 = XMM6;
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+ XMM2 = XMM4;
+ XMM1 = _mm_and_ps(XMM1, XMM3);
+ XMM0 = _mm_mul_ps(XMM0, XMM0);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ c0 = _mm_movemask_ps(XMM2);
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ XMM1 = _mm_cmplt_ps(PFV_M0P5.ps, XMM6);
+ c1 = _mm_movemask_ps(XMM3);
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM4);
+ XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+ XMM2 = _mm_cmplt_ps(XMM2, PFV_0P5.ps);
+ energy_loss += bitCountTable[c0];
+ energy_loss += bitCountTable[c1];
+ XMM1 = _mm_and_ps(XMM1, XMM5);
+ XMM5 = _mm_load_ps(in+j+16);
+ XMM3 = _mm_and_ps(XMM3, XMM2);
+ XMM2 = _mm_load_ps(in+j+20);
+ XMM6 = _mm_and_ps(XMM6, XMM1);
+ XMM4 = _mm_and_ps(XMM4, XMM3);
+ XMM6 = _mm_mul_ps(XMM6, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, XMM4);
+ c0 = _mm_movemask_ps(XMM1);
+ XMM1 = XMM5;
+ XMM6 = _mm_add_ps(XMM6, XMM4);
+ XMM4 = XMM2;
+ c1 = _mm_movemask_ps(XMM3);
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM5);
+ XMM0 = _mm_add_ps(XMM0, XMM6);
+ XMM6 = _mm_cmplt_ps(PFV_M0P5.ps, XMM2);
+ XMM1 = _mm_cmplt_ps(XMM1, PFV_0P5.ps);
+ XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+ energy_loss += bitCountTable[c0];
+ energy_loss += bitCountTable[c1];
+ XMM3 = _mm_and_ps(XMM3, XMM1);
+ XMM1 = _mm_load_ps(in+j+24);
+ XMM6 = _mm_and_ps(XMM6, XMM4);
+ XMM4 = _mm_load_ps(in+j+28);
+ XMM5 = _mm_and_ps(XMM5, XMM3);
+ XMM2 = _mm_and_ps(XMM2, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ c0 = _mm_movemask_ps(XMM3);
+ XMM3 = XMM1;
+ XMM5 = _mm_add_ps(XMM5, XMM2);
+ XMM2 = XMM4;
+ c1 = _mm_movemask_ps(XMM6);
+ XMM6 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+ XMM0 = _mm_add_ps(XMM0, XMM5);
+ XMM5 = _mm_cmplt_ps(PFV_M0P5.ps, XMM4);
+ XMM3 = _mm_cmplt_ps(XMM3, PFV_0P5.ps);
+ XMM2 = _mm_cmplt_ps(XMM2, PFV_0P5.ps);
+ energy_loss += bitCountTable[c0];
+ energy_loss += bitCountTable[c1];
+ XMM6 = _mm_and_ps(XMM6, XMM3);
+ XMM5 = _mm_and_ps(XMM5, XMM2);
+ XMM1 = _mm_and_ps(XMM1, XMM6);
+ XMM4 = _mm_and_ps(XMM4, XMM5);
+ XMM1 = _mm_mul_ps(XMM1, XMM1);
+ XMM4 = _mm_mul_ps(XMM4, XMM4);
+ c0 = _mm_movemask_ps(XMM6);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ c1 = _mm_movemask_ps(XMM5);
+ energy_loss += bitCountTable[c0];
+ XMM0 = _mm_add_ps(XMM0, XMM1);
+ energy_loss += bitCountTable[c1];
+ acc = _mm_add_horz(XMM0);
+#endif
+ }
+ else
+ {
+ acc = 0.f;
+ energy_loss = 0;
+ for(i=j;i<j+partition;i++){
+ if(rint(in[i])==0.f){
+ acc+=in[i]*in[i];
+ energy_loss++;
+ }
+ }
+ }
+ }
+#else /* SSE Optimize */
for(i=j;i<j+partition;i++){
if(rint(in[i])==0.f){
acc+=in[i]*in[i];
energy_loss++;
}
}
+#endif /* SSE Optimize */
/* When an energy loss is large, NN processing is carried out in the middle of partition. */
/*if(energy_loss==32 && fabs(in[freqband_mid])>nnmid_th){
if(in[freqband_mid]*in[freqband_mid]<.25f){
@@ -1616,8 +9939,19 @@
int limit=g->coupling_pointlimit[p->vi->blockflag][blobno];
int pointlimit=limit;
int freqlimit=p->st_freqlimit;
+#ifdef __SSE__ /* SSE Optimize */
+ _MM_ALIGN16 unsigned int Mc_treshp[2048];
+ _MM_ALIGN16 unsigned int Ac_treshp[2048];
+ _MM_ALIGN16 float rMs[2048];
+ _MM_ALIGN16 float rAs[2048];
+ _MM_ALIGN16 unsigned int mdctMA[2048];
+ int midpoint0 = (limit/partition)*partition;
+ int midpoint1 = ((limit+partition-1)/partition)*partition;
+#else /* SSE Optimize */
unsigned char Mc_treshp[2048];
unsigned char Ac_treshp[2048];
+#endif /* SSE Optimize */
+ int s, e;
int lof_st;
int hif_st;
int hif_stcopy;
@@ -1629,11 +9963,1067 @@
nonzero[vi->coupling_mag[i]]=1;
nonzero[vi->coupling_ang[i]]=1;
+ s = 0;
+ e = p->n;
postpoint_backup=postpoint;
/** @ M6 PRE **/
// lossless only?
+#ifdef __SSE__ /* SSE Optimize */
+ for(j=0;j<e;j+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(mdctM+j );
+ XMM2 = _mm_load_ps(mdctA+j );
+ XMM1 = _mm_load_ps(mdctM+j+ 4);
+ XMM3 = _mm_load_ps(mdctA+j+ 4);
+ XMM4 = _mm_load_ps(mdctM+j+ 8);
+ XMM5 = _mm_load_ps(mdctA+j+ 8);
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_load_ps(mdctA+j+12);
+ XMM2 = _mm_load_ps(mdctM+j+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM5);
+ XMM3 = _mm_mul_ps(XMM3, XMM2);
+ XMM5 = _mm_load_ps(rMo+j );
+ XMM2 = _mm_load_ps(rMo+j+ 4);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+ XMM4 = _mm_cmplt_ps(XMM4, PFV_0.ps);
+ XMM3 = _mm_cmplt_ps(XMM3, PFV_0.ps);
+ _mm_store_ps(mdctMA+j , XMM0);
+ XMM0 = _mm_load_ps(rMo+j+ 8);
+ _mm_store_ps(mdctMA+j+ 4, XMM1);
+ XMM1 = _mm_load_ps(rMo+j+12);
+ _mm_store_ps(mdctMA+j+ 8, XMM4);
+ _mm_store_ps(mdctMA+j+12, XMM3);
+ XMM5 = _mm_and_ps(XMM5, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ _mm_store_ps(rMs+j , XMM5);
+ _mm_store_ps(rMs+j+ 4, XMM2);
+ _mm_store_ps(rMs+j+ 8, XMM0);
+ _mm_store_ps(rMs+j+12, XMM1);
+ XMM5 = _mm_load_ps(rAo+j );
+ XMM2 = _mm_load_ps(rAo+j+ 4);
+ XMM0 = _mm_load_ps(rAo+j+ 8);
+ XMM1 = _mm_load_ps(rAo+j+12);
+ XMM5 = _mm_and_ps(XMM5, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ _mm_store_ps(rAs+j , XMM5);
+ _mm_store_ps(rAs+j+ 4, XMM2);
+ _mm_store_ps(rAs+j+ 8, XMM0);
+ _mm_store_ps(rAs+j+12, XMM1);
+ }
+ for(;j<n;j+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_load_ps(mdctM+j );
+ XMM2 = _mm_load_ps(mdctA+j );
+ XMM1 = _mm_load_ps(mdctM+j+ 4);
+ XMM3 = _mm_load_ps(mdctA+j+ 4);
+ XMM4 = _mm_load_ps(mdctM+j+ 8);
+ XMM5 = _mm_load_ps(mdctA+j+ 8);
+ XMM0 = _mm_mul_ps(XMM0, XMM2);
+ XMM1 = _mm_mul_ps(XMM1, XMM3);
+ XMM3 = _mm_load_ps(mdctA+j+12);
+ XMM2 = _mm_load_ps(mdctM+j+12);
+ XMM4 = _mm_mul_ps(XMM4, XMM5);
+ XMM3 = _mm_mul_ps(XMM3, XMM2);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0.ps);
+ XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+ XMM4 = _mm_cmplt_ps(XMM4, PFV_0.ps);
+ XMM3 = _mm_cmplt_ps(XMM3, PFV_0.ps);
+ _mm_store_ps(mdctMA+j , XMM0);
+ _mm_store_ps(mdctMA+j+ 4, XMM1);
+ _mm_store_ps(mdctMA+j+ 8, XMM4);
+ _mm_store_ps(mdctMA+j+12, XMM3);
+ }
+ if(!stereo_threshholds[g->coupling_postpointamp[blobno]])stcont_start=n;
+ else{
+ static _MM_ALIGN16 __m128x PUI1 =
+ { .si32 = { 1, 1, 1, 1} };
+ int freqlimit16 = freqlimit&(~15);
+ __m128 PST_THRESH;
+ // exception handling
+ if((postpoint-sth_high)<prepoint)sth_high=postpoint-prepoint;
+ // start point setup
+ for(j=0;j<n;j++){
+ stcont_start=j;
+ if(p->noiseoffset[1][j]>=-2)break;
+ }
+ // start point correction & threshold setup
+ st_thresh=.1;
+ if(p->m_val<.5){
+ // low frequency limit
+ if(stcont_start<limit)stcont_start=limit;
+ }else if(p->vi->normal_thresh>1.)st_thresh=.5;
+ PST_THRESH = _mm_set_ps1(st_thresh);
+ for(j=0;j<freqlimit16;j+=16){
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(rM+j );
+ XMM1 = _mm_load_ps(rM+j+ 4);
+ XMM2 = _mm_load_ps(rM+j+ 8);
+ XMM3 = _mm_load_ps(rM+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ XMM0 = _mm_cmplt_ps(XMM0, PST_THRESH);
+ XMM1 = _mm_cmplt_ps(XMM1, PST_THRESH);
+ XMM2 = _mm_cmplt_ps(XMM2, PST_THRESH);
+ XMM3 = _mm_cmplt_ps(XMM3, PST_THRESH);
+ XMM0 = _mm_and_ps(XMM0, PUI1.ps);
+ XMM1 = _mm_and_ps(XMM1, PUI1.ps);
+ XMM2 = _mm_and_ps(XMM2, PUI1.ps);
+ XMM3 = _mm_and_ps(XMM3, PUI1.ps);
+ _mm_store_ps(Mc_treshp+j , XMM0);
+ _mm_store_ps(Mc_treshp+j+ 4, XMM1);
+ _mm_store_ps(Mc_treshp+j+ 8, XMM2);
+ _mm_store_ps(Mc_treshp+j+12, XMM3);
+ XMM0 = _mm_load_ps(rA+j );
+ XMM1 = _mm_load_ps(rA+j+ 4);
+ XMM2 = _mm_load_ps(rA+j+ 8);
+ XMM3 = _mm_load_ps(rA+j+12);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ XMM0 = _mm_cmplt_ps(XMM0, PST_THRESH);
+ XMM1 = _mm_cmplt_ps(XMM1, PST_THRESH);
+ XMM2 = _mm_cmplt_ps(XMM2, PST_THRESH);
+ XMM3 = _mm_cmplt_ps(XMM3, PST_THRESH);
+ XMM0 = _mm_and_ps(XMM0, PUI1.ps);
+ XMM1 = _mm_and_ps(XMM1, PUI1.ps);
+ XMM2 = _mm_and_ps(XMM2, PUI1.ps);
+ XMM3 = _mm_and_ps(XMM3, PUI1.ps);
+ _mm_store_ps(Ac_treshp+j , XMM0);
+ _mm_store_ps(Ac_treshp+j+ 4, XMM1);
+ _mm_store_ps(Ac_treshp+j+ 8, XMM2);
+ _mm_store_ps(Ac_treshp+j+12, XMM3);
+ }
+ for(;j<=freqlimit;j++){ // or j<n
+ if(fabs(rM[j])<st_thresh)Mc_treshp[j]=1;
+ else Mc_treshp[j]=0;
+ if(fabs(rA[j])<st_thresh)Ac_treshp[j]=1;
+ else Ac_treshp[j]=0;
+ }
+ }
+ if(n<=sliding_lowpass&&p->vi->normal_point_p&&partition%8==0)
+ {
+ static _MM_ALIGN16 const __m128x PP001 =
+ { .sf = {0.001f, 0.001f, 0.001f, 0.001f} };
+ static _MM_ALIGN16 const __m128x P1000 =
+ { .sf = {1000.f, 1000.f, 1000.f, 1000.f} };
+ __m128 PPOSTPOINT_BACKUP = _mm_set_ps1(postpoint_backup);
+ __m128 PDUMMYPOINT =
+ _mm_set_ps1(stereo_threshholds_rephase[g->coupling_postpointamp[blobno]]);
+ _MM_ALIGN16 float slowM[2048];
+ _MM_ALIGN16 float slowA[2048];
+ _MM_ALIGN16 float shigh[2048];
+ int midpoint0 = (limit/partition)*partition;
+ int midpoint1 = ((limit+partition-1)/partition)*partition;
+ for(j=0;j<e;j+=partition){
+ float rpacc;
+ int energy_loss=0;
+ int nn_num=0;
+
+ for(k=0;k<partition;k++){
+ int l=k+j;
+ float slow=0.f;
+ float shighM=0.f;
+ float shighA=0.f;
+
+ slowM[l] = prepoint;
+ slowA[l] = prepoint;
+ shigh[l] = 0.f;
+
+ postpoint=postpoint_backup;
+
+ /* AoTuV */
+ /** @ M6 MAIN **
+ The threshold of a stereo is changed dynamically.
+ by Aoyumi @ 2006/06/04
+ */
+ if(l>=stcont_start){
+ int m;
+ int lof_num;
+ int hif_num;
+
+ // (It may be better to calculate this in advance)
+ lof_st=l-(l/2)*.167;
+ hif_st=l+l*.167;
+
+ hif_stcopy=hif_st;
+
+ // limit setting
+ if(hif_st>freqlimit)hif_st=freqlimit;
+
+ if(old_lof_st || old_hif_st){
+ if(hif_st>l){
+ // hif_st, lof_st ...absolute value
+ // lof_num, hif_num ...relative value
+
+ // low freq.(lower)
+ lof_num=lof_st-old_lof_st;
+ if(lof_num==0){
+ Afreq_num+=Ac_treshp[l-1];
+ Mfreq_num+=Mc_treshp[l-1];
+ }else if(lof_num==1){
+ Afreq_num+=Ac_treshp[l-1];
+ Mfreq_num+=Mc_treshp[l-1];
+ Afreq_num-=Ac_treshp[old_lof_st];
+ Mfreq_num-=Mc_treshp[old_lof_st];
+ }//else puts("err. low");
+
+ // high freq.(higher)
+ hif_num=hif_st-old_hif_st;
+ if(hif_num==0){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ }else if(hif_num==1){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ Afreq_num+=Ac_treshp[hif_st];
+ Mfreq_num+=Mc_treshp[hif_st];
+ }else if(hif_num==2){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ Afreq_num+=Ac_treshp[hif_st];
+ Mfreq_num+=Mc_treshp[hif_st];
+ Afreq_num+=Ac_treshp[hif_st-1];
+ Mfreq_num+=Mc_treshp[hif_st-1];
+ }//else puts("err. high");
+ }
+ }else{
+ for(m=lof_st; m<=hif_st; m++){
+ if(m==l)continue;
+ if(Ac_treshp[m]) Afreq_num++;
+ if(Mc_treshp[m]) Mfreq_num++;
+ }
+ }
+ if(l>=limit){
+ shigh[l]=sth_high/(hif_stcopy-lof_st);
+ shighA=shigh[l]*Afreq_num;
+ shighM=shigh[l]*Mfreq_num;
+ if((shighA+rAs[l])>(shighM+rMs[l]))shigh[l]=shighA;
+ else shigh[l]=shighM;
+ }else{
+ slow=sth_low/(hif_stcopy-lof_st);
+ slowA[l]=slow*Afreq_num;
+ slowM[l]=slow*Mfreq_num;
+ if(p->noiseoffset[1][l]<-1){
+ slowA[l]*=(p->noiseoffset[1][l]+2);
+ slowM[l]*=(p->noiseoffset[1][l]+2);
+ }
+ slowA[l] = prepoint - slowA[l];
+ slowM[l] = prepoint - slowM[l];
+ }
+ old_lof_st=lof_st;
+ old_hif_st=hif_st;
+ }
+ }
+ }
+
+ /* Phase 0 */
+ if(s<midpoint0)
+ {
+ int te;
+ if(e>=midpoint0)
+ te = midpoint0;
+ else
+ te = e;
+ for(j=s;j<te;j+=partition){
+ int energy_loss=0;
+ for(k=0;k<partition;k+=4)
+ {
+ int l = k+j;
+ int ifc0, m, o;
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(rMs+l );
+ XMM2 = _mm_load_ps(slowM+l );
+ XMM1 = _mm_load_ps(rAs+l );
+ XMM3 = _mm_load_ps(slowA+l );
+ XMM0 = _mm_cmplt_ps(XMM0, XMM2);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ ifc0 = _mm_movemask_ps(XMM1);
+ if(ifc0==0)
+ {
+ couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+ l += 4;
+ }
+ else if(ifc0==0xF)
+ {
+ precomputed_couple_point_ps(&mag_memo[i][l],
+ floorM+l,floorA+l,
+ qM+l,qA+l);
+ XMM0 = _mm_load_ps(qM+l);
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM1 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM1);
+#endif
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ l += 4;
+ }
+ else
+ {
+ for(m=0,o=1;m<4;m++)
+ {
+ if(ifc0&o)
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ else
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ l ++;
+ o = o << 1;
+ }
+ XMM0 = _mm_load_ps(qM+l-4);
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+ XMM0 = _mm_and_ps(XMM0, XMM1);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ }
+ }
+ {
+ int min_energy = 32-energy_loss;
+ if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+ int l;
+ float ab;
+ for(;k<partition;k++){
+ l=mag_sort[i][j+k];
+ ab=fabs(qM[l]);
+ if(ab<0.04)break;
+#if 1
+ if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+ && ab<0.11)break; // 0.11
+#else
+ if(mdctMA[l] && ab < 0.11)break;
+#endif
+ if( l>=pointlimit){
+ __m128 XMM0 = _mm_load_ss(qM+l);
+ if(_mm_cvtss_si32(XMM0)==0){
+ qM[l]=unitnorm(qM[l]);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /* Phase 1 */
+ if(s<=midpoint0&&e>=midpoint1)
+ {
+ for(j=midpoint0;j<midpoint1;j+=partition)
+ {
+ __m128 PACC;
+ int midpoint033 = (limit-midpoint0)&(~3);
+ int midpoint066 = (limit-midpoint0+3)&(~3);
+ float acc=0.f;
+ float rpacc;
+ int energy_loss=0;
+ int nn_num=0;
+
+ for(k=0;k<midpoint033;k+=4)
+ {
+ int l = k+j;
+ int ifc0, m, o;
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(rMs+l );
+ XMM2 = _mm_load_ps(slowM+l );
+ XMM1 = _mm_load_ps(rAs+l );
+ XMM3 = _mm_load_ps(slowA+l );
+ XMM0 = _mm_cmplt_ps(XMM0, XMM2);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+ XMM1 = _mm_and_ps(XMM1, XMM0);
+ ifc0 = _mm_movemask_ps(XMM1);
+ if(ifc0==0)
+ {
+ couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+ l += 4;
+ }
+ else if(ifc0==0xF)
+ {
+ precomputed_couple_point_ps(&mag_memo[i][l],
+ floorM+l,floorA+l,
+ qM+l,qA+l);
+ XMM0 = _mm_load_ps(qM+l);
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ l += 4;
+ }
+ else
+ {
+ for(m=0,o=1;m<4;m++)
+ {
+ if(ifc0&o)
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ else
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ l ++;
+ o = o << 1;
+ }
+ XMM0 = _mm_load_ps(qM+l-4);
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+ XMM0 = _mm_and_ps(XMM0, XMM1);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ }
+ }
+ for(;k<midpoint066;k++)
+ {
+ int l=k+j;
+ float a=mdctM[l];
+ float b=mdctA[l];
+ float dummypoint;
+ float hypot_reserve;
+
+ postpoint=postpoint_backup;
+
+ if(l>=limit){
+ postpoint-=shigh[l];
+ /* The following prevents an extreme reduction of residue. (2ch stereo only) */
+ if(mdctMA[l]){
+ hypot_reserve = fabs(fabs(a)-fabs(b));
+ if(hypot_reserve < 0.001){ // 0~0.000999-
+ dummypoint = stereo_threshholds_rephase[g->coupling_postpointamp[blobno]];
+ dummypoint = dummypoint+((postpoint-dummypoint)*(hypot_reserve*1000));
+ if(postpoint > dummypoint) postpoint = dummypoint;
+ }
+ }
+ }
+
+ if((l>=limit && rMs[l]<postpoint && rAs[l]<postpoint) ||
+ (rMs[l]<slowM[l] && rAs[l]<slowA[l])){
+
+ __m128 XMM0;
+ if(l>=0&&l<=n)
+ {
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ }
+ //if(rint(qM[l])==0.f)acc+=qM[l]*qM[l];
+ XMM0 = _mm_load_ss(qM+l);
+ if(_mm_cvtss_si32(XMM0)==0){
+ energy_loss++;
+ if(l>=limit)acc+=qM[l]*qM[l];
+ }
+ }else{
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ }
+ }
+ PACC = _mm_set_ss(acc);
+ for(;k<partition;k+=4)
+ {
+ int l = k+j;
+ int ifc0, m, o;
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM3 = PPOSTPOINT_BACKUP;
+ XMM4 = _mm_load_ps(shigh+l );
+ XMM0 = _mm_load_ps(mdctM+l );
+ XMM1 = _mm_load_ps(mdctA+l );
+ XMM2 = _mm_load_ps(mdctMA+l );
+ XMM3 = _mm_sub_ps(XMM3, XMM4); /* postpoint */
+ if(_mm_movemask_ps(XMM2)!=0)
+ {
+ XMM5 = XMM3; /* copy of postpoint */
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps); /* hypot_reserve */
+ XMM1 = _mm_cmplt_ps(XMM0, PP001.ps); /* Mask of hypot_reserve */
+ XMM0 = _mm_mul_ps(XMM0, P1000.ps);
+ XMM5 = _mm_sub_ps(XMM5, PDUMMYPOINT);
+ XMM0 = _mm_mul_ps(XMM0, XMM5);
+ XMM0 = _mm_add_ps(XMM0, PDUMMYPOINT); /* dummypoint */
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ XMM0 = _mm_min_ps(XMM0, XMM3);
+ XMM0 = _mm_or_ps(
+ _mm_and_ps(XMM0, XMM1),
+ _mm_andnot_ps(XMM1, XMM3)
+ ); /* postpoint */
+ }
+ else
+ XMM0 = XMM3;
+ XMM3 = _mm_load_ps(slowM+l );
+ XMM4 = _mm_load_ps(slowA+l );
+ XMM1 = _mm_load_ps(rMs+l );
+ XMM2 = _mm_load_ps(rAs+l );
+ XMM3 = _mm_max_ps(XMM3, XMM0);
+ XMM4 = _mm_max_ps(XMM4, XMM0);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+ XMM2 = _mm_cmplt_ps(XMM2, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ ifc0 = _mm_movemask_ps(XMM1);
+ if(ifc0==0)
+ {
+ couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+ l += 4;
+ }
+ else if(ifc0==0xF)
+ {
+ precomputed_couple_point_ps(&mag_memo[i][l],
+ floorM+l,floorA+l,
+ qM+l,qA+l);
+ XMM0 = _mm_load_ps(qM+l);
+ XMM2 = XMM0;
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+ XMM2 = _mm_and_ps(XMM2, XMM0);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ PACC = _mm_add_ps(PACC, XMM2);
+ l += 4;
+ }
+ else
+ {
+ for(m=0,o=1;m<4;m++)
+ {
+ if(ifc0&o)
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ else
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ l ++;
+ o = o << 1;
+ }
+ XMM0 = _mm_load_ps(qM+l-4);
+ XMM2 = XMM0;
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+ XMM2 = _mm_and_ps(XMM2, XMM0);
+ XMM2 = _mm_and_ps(XMM2, XMM1);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ PACC = _mm_add_ps(PACC, XMM2);
+ }
+ }
+ acc = _mm_add_horz(PACC);
+ {
+ int freqband_mid=j+16;
+ int freqband_flag=0;
+ int min_energy;
+
+ rpacc=acc;
+ /* When the energy loss of a partition is large, NN is performed in the middle of partition.
+ for 48/44.1/32kHz */
+ if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+ && freqband_mid>=pointlimit){
+ __m128 XMM0;
+ XMM0 = _mm_load_ss(qM+freqband_mid);
+ if(_mm_cvtss_si32(XMM0)==0){
+ if(mdctMA[freqband_mid]){
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+ freqband_flag=1;
+ nn_num++;
+ }
+ }
+ /* NN main (point stereo) */
+ for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+ int l;
+ l=mag_sort[i][j+k];
+ if(freqband_mid==l && freqband_flag)continue;
+ if(l>=pointlimit){
+ __m128 XMM0 = _mm_load_ss(qM+l);
+ if(_mm_cvtss_si32(XMM0)==0){
+ if(mdctMA[l]){
+ if(rpacc<p->vi->normal_thresh)continue;
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[l]=unitnorm(qM[l]);
+ nn_num++;
+ }
+ }
+ }
+ /* The minimum energy complement.
+ for 48/44.1/32kHz */
+ min_energy=32-energy_loss+nn_num;
+ if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+ int l;
+ float ab;
+ for(;k<partition;k++){
+ l=mag_sort[i][j+k];
+ ab=fabs(qM[l]);
+ if(ab<0.04)break;
+#if 1
+ if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+ && ab<0.11)break; // 0.11
+#else
+ if(mdctMA[l] && ab < 0.11)break;
+#endif
+ if(l>=pointlimit){
+ __m128 XMM0 = _mm_load_ss(qM+l);
+ if(_mm_cvtss_si32(XMM0)==0){
+ qM[l]=unitnorm(qM[l]);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /* Phase 2 */
+ if(e>midpoint1)
+ {
+ int ts;
+ if(s<midpoint1)
+ ts = midpoint1;
+ else
+ ts = s;
+ for(j=ts;j<e;j+=partition){
+ float acc=0.f;
+ float rpacc;
+ int energy_loss=0;
+ int nn_num=0;
+ __m128 PACC = _mm_setzero_ps();
+
+ for(k=0;k<partition;k+=4)
+ {
+ int l = k+j;
+ int ifc0, m, o;
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM3 = PPOSTPOINT_BACKUP;
+ XMM4 = _mm_load_ps(shigh+l );
+ XMM0 = _mm_load_ps(mdctM+l );
+ XMM1 = _mm_load_ps(mdctA+l );
+ XMM2 = _mm_load_ps(mdctMA+l );
+ XMM3 = _mm_sub_ps(XMM3, XMM4); /* postpoint */
+ if(_mm_movemask_ps(XMM2)!=0)
+ {
+ XMM5 = XMM3; /* copy of postpoint */
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps); /* hypot_reserve */
+ XMM1 = _mm_cmplt_ps(XMM0, PP001.ps); /* Mask of hypot_reserve */
+ XMM0 = _mm_mul_ps(XMM0, P1000.ps);
+ XMM5 = _mm_sub_ps(XMM5, PDUMMYPOINT);
+ XMM0 = _mm_mul_ps(XMM0, XMM5);
+ XMM0 = _mm_add_ps(XMM0, PDUMMYPOINT); /* dummypoint */
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ XMM0 = _mm_min_ps(XMM0, XMM3);
+ XMM0 = _mm_or_ps(
+ _mm_and_ps(XMM0, XMM1),
+ _mm_andnot_ps(XMM1, XMM3)
+ ); /* postpoint */
+ }
+ else
+ XMM0 = XMM3;
+ XMM3 = _mm_load_ps(slowM+l );
+ XMM4 = _mm_load_ps(slowA+l );
+ XMM1 = _mm_load_ps(rMs+l );
+ XMM2 = _mm_load_ps(rAs+l );
+ XMM3 = _mm_max_ps(XMM3, XMM0);
+ XMM4 = _mm_max_ps(XMM4, XMM0);
+ XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+ XMM2 = _mm_cmplt_ps(XMM2, XMM4);
+ XMM1 = _mm_and_ps(XMM1, XMM2);
+ ifc0 = _mm_movemask_ps(XMM1);
+ if(ifc0==0)
+ {
+ couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+ l += 4;
+ }
+ else if(ifc0==0xF)
+ {
+ precomputed_couple_point_ps(&mag_memo[i][l],
+ floorM+l,floorA+l,
+ qM+l,qA+l);
+ XMM0 = _mm_load_ps(qM+l);
+ XMM2 = XMM0;
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+ XMM2 = _mm_and_ps(XMM2, XMM0);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ PACC = _mm_add_ps(PACC, XMM2);
+ l += 4;
+ }
+ else
+ {
+ for(m=0,o=1;m<4;m++)
+ {
+ if(ifc0&o)
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ else
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ l ++;
+ o = o << 1;
+ }
+ XMM0 = _mm_load_ps(qM+l-4);
+ XMM2 = XMM0;
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+ XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+ XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+ XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+ XMM2 = _mm_and_ps(XMM2, XMM0);
+ XMM2 = _mm_and_ps(XMM2, XMM1);
+ energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+ XMM2 = _mm_mul_ps(XMM2, XMM2);
+ PACC = _mm_add_ps(PACC, XMM2);
+ }
+ }
+ acc = _mm_add_horz(PACC);
+ {
+ int freqband_mid=j+16;
+ int freqband_flag=0;
+ int min_energy;
+
+ rpacc=acc;
+ /* When the energy loss of a partition is large, NN is performed in the middle of partition.
+ for 48/44.1/32kHz */
+ if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+ && freqband_mid>=pointlimit){
+ __m128 XMM0;
+ XMM0 = _mm_load_ss(qM+freqband_mid);
+ if(_mm_cvtss_si32(XMM0)==0){
+ if(mdctMA[freqband_mid]){
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+ freqband_flag=1;
+ nn_num++;
+ }
+ }
+ /* NN main (point stereo) */
+ for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+ int l;
+ l=mag_sort[i][j+k];
+ if(freqband_mid==l && freqband_flag)continue;
+ if(l>=pointlimit && rint(qM[l])==0.f){
+ if(mdctMA[l]){
+ if(rpacc<p->vi->normal_thresh)continue;
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[l]=unitnorm(qM[l]);
+ nn_num++;
+ }
+ }
+ /* The minimum energy complement.
+ for 48/44.1/32kHz */
+ min_energy=32-energy_loss+nn_num;
+ if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+ int l;
+ float ab;
+ for(;k<partition;k++){
+ __m128 XMM0;
+ l=mag_sort[i][j+k];
+ ab=fabs(qM[l]);
+ if(ab<0.04)break;
+#if 1
+ if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+ && ab<0.11)break; // 0.11
+#else
+ if(mdctMA[l] && ab < 0.11)break;
+#endif
+ if(l>=pointlimit){
+ __m128 XMM0 = _mm_load_ss(qM+l);
+ if(_mm_cvtss_si32(XMM0)==0){
+ qM[l]=unitnorm(qM[l]);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ _MM_ALIGN16 float slowM[2048];
+ _MM_ALIGN16 float slowA[2048];
+ _MM_ALIGN16 float shigh[2048];
+ int midpoint0 = (limit/partition)*partition;
+ int midpoint1 = ((limit+partition-1)/partition)*partition;
+ for(j=0;j<e;j+=partition){
+ float rpacc;
+ int energy_loss=0;
+ int nn_num=0;
+
+ for(k=0;k<partition;k++){
+ int l=k+j;
+ float slow=0.f;
+ float shighM=0.f;
+ float shighA=0.f;
+
+ slowM[l] = prepoint;
+ slowA[l] = prepoint;
+ shigh[l] = 0.f;
+
+ postpoint=postpoint_backup;
+
+ /* AoTuV */
+ /** @ M6 MAIN **
+ The threshold of a stereo is changed dynamically.
+ by Aoyumi @ 2006/06/04
+ */
+ if(l>=stcont_start){
+ int m;
+ int lof_num;
+ int hif_num;
+
+ // (It may be better to calculate this in advance)
+ lof_st=l-(l/2)*.167;
+ hif_st=l+l*.167;
+
+ hif_stcopy=hif_st;
+
+ // limit setting
+ if(hif_st>freqlimit)hif_st=freqlimit;
+
+ if(old_lof_st || old_hif_st){
+ if(hif_st>l){
+ // hif_st, lof_st ...absolute value
+ // lof_num, hif_num ...relative value
+
+ // low freq.(lower)
+ lof_num=lof_st-old_lof_st;
+ if(lof_num==0){
+ Afreq_num+=Ac_treshp[l-1];
+ Mfreq_num+=Mc_treshp[l-1];
+ }else if(lof_num==1){
+ Afreq_num+=Ac_treshp[l-1];
+ Mfreq_num+=Mc_treshp[l-1];
+ Afreq_num-=Ac_treshp[old_lof_st];
+ Mfreq_num-=Mc_treshp[old_lof_st];
+ }//else puts("err. low");
+
+ // high freq.(higher)
+ hif_num=hif_st-old_hif_st;
+ if(hif_num==0){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ }else if(hif_num==1){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ Afreq_num+=Ac_treshp[hif_st];
+ Mfreq_num+=Mc_treshp[hif_st];
+ }else if(hif_num==2){
+ Afreq_num-=Ac_treshp[l];
+ Mfreq_num-=Mc_treshp[l];
+ Afreq_num+=Ac_treshp[hif_st];
+ Mfreq_num+=Mc_treshp[hif_st];
+ Afreq_num+=Ac_treshp[hif_st-1];
+ Mfreq_num+=Mc_treshp[hif_st-1];
+ }//else puts("err. high");
+ }
+ }else{
+ for(m=lof_st; m<=hif_st; m++){
+ if(m==l)continue;
+ if(Ac_treshp[m]) Afreq_num++;
+ if(Mc_treshp[m]) Mfreq_num++;
+ }
+ }
+ if(l>=limit){
+ shigh[l]=sth_high/(hif_stcopy-lof_st);
+ shighA=shigh[l]*Afreq_num;
+ shighM=shigh[l]*Mfreq_num;
+ if((shighA+rAs[l])>(shighM+rMs[l]))shigh[l]=shighA;
+ else shigh[l]=shighM;
+ }else{
+ slow=sth_low/(hif_stcopy-lof_st);
+ slowA[l]=slow*Afreq_num;
+ slowM[l]=slow*Mfreq_num;
+ if(p->noiseoffset[1][l]<-1){
+ slowA[l]*=(p->noiseoffset[1][l]+2);
+ slowM[l]*=(p->noiseoffset[1][l]+2);
+ }
+ slowA[l] = prepoint - slowA[l];
+ slowM[l] = prepoint - slowM[l];
+ }
+ old_lof_st=lof_st;
+ old_hif_st=hif_st;
+ }
+ }
+ }
+
+ for(j=0;j<n;j+=partition){
+ float acc=0.f;
+ float rpacc;
+ int energy_loss=0;
+ int nn_num=0;
+
+ for(k=0;k<partition;k++){
+ int l=k+j;
+ float a=mdctM[l];
+ float b=mdctA[l];
+ float dummypoint;
+ float hypot_reserve;
+
+ postpoint=postpoint_backup;
+
+ if(l>=limit){
+ postpoint-=shigh[l];
+ if(mdctMA[l]){
+ hypot_reserve = fabs(fabs(a)-fabs(b));
+ if(hypot_reserve < 0.001){ // 0~0.000999-
+ dummypoint = stereo_threshholds_rephase[g->coupling_postpointamp[blobno]];
+ dummypoint = dummypoint+((postpoint-dummypoint)*(hypot_reserve*1000));
+ if(postpoint > dummypoint) postpoint = dummypoint;
+ }
+ }
+ }
+
+ if((l>=limit && rMs[l]<postpoint && rAs[l]<postpoint) ||
+ (rMs[l]<slowM[l] && rAs[l]<slowA[l])){
+
+ if(l>=0&&l<=n)
+ {
+ precomputed_couple_point(mag_memo[i][l],
+ floorM[l],floorA[l],
+ qM+l,qA+l);
+ }
+ if(rint(qM[l])==0.f){
+ energy_loss++;
+ if(l>=limit)acc+=qM[l]*qM[l];
+ }
+ }else{
+ couple_lossless(rM[l],rA[l],qM+l,qA+l);
+ }
+ }
+
+ {
+ int freqband_mid=j+16;
+ int freqband_flag=0;
+ int min_energy;
+
+ rpacc=acc;
+ /* When the energy loss of a partition is large, NN is performed in the middle of partition.
+ for 48/44.1/32kHz */
+ if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+ && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
+ if(mdctMA[freqband_mid]){
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+ freqband_flag=1;
+ nn_num++;
+ }
+ /* NN main (point stereo) */
+ for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+ int l;
+ l=mag_sort[i][j+k];
+ if(freqband_mid==l && freqband_flag)continue;
+ if(l>=pointlimit && rint(qM[l])==0.f){
+ if(mdctMA[l]){
+ if(rpacc<p->vi->normal_thresh)continue;
+ acc-=1.f;
+ rpacc-=1.32;
+ }else{
+ acc-=1.f;
+ rpacc-=1.f;
+ }
+ qM[l]=unitnorm(qM[l]);
+ nn_num++;
+ }
+ }
+ /* The minimum energy complement.
+ for 48/44.1/32kHz */
+ min_energy=32-energy_loss+nn_num;
+ if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+ int l;
+ float ab;
+ for(;k<partition;k++){
+ l=mag_sort[i][j+k];
+ ab=fabs(qM[l]);
+ if(ab<0.04)break;
+#if 1
+ if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+ && ab<0.11)break; // 0.11
+#else
+ if(mdctMA[l] && ab < 0.11)break;
+#endif
+ if(rint(qM[l])==0.f && l>=pointlimit){
+ qM[l]=unitnorm(qM[l]);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+#else /* SSE Optimize */
if(!stereo_threshholds[g->coupling_postpointamp[blobno]])stcont_start=n;
else{
// exception handling
@@ -1760,7 +11150,6 @@
old_lof_st=lof_st;
old_hif_st=hif_st;
}
-
if(l>=limit){
postpoint-=shigh;
/* The following prevents an extreme reduction of residue. (2ch stereo only) */
@@ -1778,11 +11167,12 @@
if((l>=limit && rMs<postpoint && rAs<postpoint) ||
(rMs<(prepoint-slowM) && rAs<(prepoint-slowA))){
-
+ if(l>=0&&l<=n)
+ {
precomputed_couple_point(mag_memo[i][l],
floorM[l],floorA[l],
qM+l,qA+l);
-
+ }
//if(rint(qM[l])==0.f)acc+=qM[l]*qM[l];
if(rint(qM[l])==0.f){
energy_loss++;
@@ -1806,7 +11196,7 @@
/* When the energy loss of a partition is large, NN is performed in the middle of partition.
for 48/44.1/32kHz */
if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
- && freqband_mid<sliding_lowpass && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
+ && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
if( ((mdctM[freqband_mid]>0.) && (mdctA[freqband_mid]<0.)) ||
((mdctA[freqband_mid]>0.) && (mdctM[freqband_mid]<0.)) ){
acc-=1.f;
@@ -1857,6 +11247,7 @@
}
}
}
+#endif /* SSE Optimize */
}
}
}
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/psy.h libvorbis-1.2.0-sse/lib/psy.h
--- libvorbis-1.2.0/lib/psy.h 2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/psy.h 2007-08-02 12:43:10.000000000 +0200
@@ -126,6 +126,24 @@
int n33p;
int n75p;
+#ifdef __SSE__
+ int midpoint1; /* for bark_noise_hybridmp */
+ int midpoint1_4;
+ int midpoint1_8;
+ int midpoint1_16;
+ int midpoint2;
+ int midpoint2_4;
+ int midpoint2_8;
+ int midpoint2_16;
+
+ long *octsft; /* shifted octave */
+ long *octend; /* for seed_loop */
+ long *octpos; /* for max_seeds */
+#if defined(_OPENMP)
+ int _vp_couple_spoint0;
+ int _vp_couple_spoint1;
+#endif
+#endif
} vorbis_look_psy;
extern void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,
@@ -165,7 +183,12 @@
int end_block,
int blocktype, int modenumber,
int nW_modenumber,
+#ifdef __SSE__ /* SSE Optimize */
+ int lW_blocktype, int lW_modenumber, int lW_no,
+ float *tlogmdct);
+#else /* SSE Optimize */
int lW_blocktype, int lW_modenumber, int lW_no);
+#endif /* SSE Optimize */
extern float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd);
@@ -173,7 +196,14 @@
vorbis_info_psy_global *g,
vorbis_look_psy *p,
vorbis_info_mapping0 *vi,
+#if defined(_OPENMP)
+ float **mdct,
+ float **ret,
+ int thnum,
+ int thmax);
+#else
float **mdct);
+#endif
extern void _vp_couple(int blobno,
vorbis_info_psy_global *g,
@@ -185,18 +215,39 @@
int **ifloor,
int *nonzero,
int sliding_lowpass,
+#if defined(_OPENMP)
+ float **mdct, float **res_org,
+ int thnum, int thmax);
+#else
float **mdct, float **res_org);
+#endif
extern void _vp_noise_normalize(vorbis_look_psy *p,
float *in,float *out,int *sortedindex);
extern void _vp_noise_normalize_sort(vorbis_look_psy *p,
+#ifdef __SSE__ /* SSE Optimize */
+ float *magnitudes,int *sortedindex,float *temp);
+#else /* SSE Optimize */
float *magnitudes,int *sortedindex);
+#endif /* SSE Optimize */
extern int **_vp_quantize_couple_sort(vorbis_block *vb,
vorbis_look_psy *p,
vorbis_info_mapping0 *vi,
+#ifdef __SSE__ /* SSE Optimize */
+ float **mags,
+#if defined(_OPENMP)
+ float *temp,
+ int **ret,
+ int thnum,
+ int thmax);
+#else
+ float *temp);
+#endif
+#else /* SSE Optimize */
float **mags);
+#endif /* SSE Optimize */
extern float lb_loudnoise_fix(vorbis_look_psy *p,
float noise_compand_level,
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/registry.h libvorbis-1.2.0-sse/lib/registry.h
--- libvorbis-1.2.0/lib/registry.h 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/registry.h 2007-08-02 12:43:10.000000000 +0200
@@ -27,6 +27,9 @@
extern vorbis_func_floor *_floor_P[];
extern vorbis_func_residue *_residue_P[];
+#if defined(_OPENMP)&&defined(__SSE__)
+extern vorbis_func_residue_mt *_residue_mt_P[];
+#endif
extern vorbis_func_mapping *_mapping_P[];
#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/res0.c libvorbis-1.2.0-sse/lib/res0.c
--- libvorbis-1.2.0/lib/res0.c 2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/res0.c 2007-08-02 13:01:02.000000000 +0200
@@ -30,6 +30,9 @@
#include "codebook.h"
#include "misc.h"
#include "os.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
#if defined(TRAIN_RES) || defined (TRAIN_RESAUX)
#include <stdio.h>
@@ -337,18 +340,582 @@
}
if(best>-1){
+#ifdef __SSE__ /* SSE Optimize */
+ switch(dim)
+ {
+ case 2 :
+ {
+ float *ptr = book->valuelist+best*2;
+ __m128 XMM0 = _mm_load_ss(a );
+ __m128 XMM1 = _mm_load_ss(a+1);
+ XMM0 = _mm_sub_ss(XMM0, PM128(ptr ));
+ XMM1 = _mm_sub_ss(XMM1, PM128(ptr+1));
+ _mm_store_ss(a , XMM0);
+ _mm_store_ss(a+1, XMM1);
+ }
+ break;
+ case 4 :
+ {
+ float *ptr = book->valuelist+best*4;
+ __m128 XMM0;
+ XMM0 = _mm_load_ps(a );
+ XMM0 = _mm_sub_ps(XMM0, PM128(ptr ));
+ _mm_store_ps(a , XMM0);
+ }
+ break;
+ case 8 :
+ {
+ float *ptr = book->valuelist+best*8;
+ __m128 XMM0, XMM1;
+ XMM0 = _mm_load_ps(a );
+ XMM1 = _mm_load_ps(a+4);
+ XMM0 = _mm_sub_ps(XMM0, PM128(ptr ));
+ XMM1 = _mm_sub_ps(XMM1, PM128(ptr+4));
+ _mm_store_ps(a , XMM0);
+ _mm_store_ps(a+4, XMM1);
+ }
+ break;
+ default :
+ {
+ float *ptr = book->valuelist+best*dim;
+ for(i=0;i<dim;i++)
+ *a++ -= *ptr++;
+ }
+ break;
+ }
+#else /* SSE Optimize */
+ float *ptr=book->valuelist+best*dim;
+ for(i=0;i<dim;i++)
+ *a++ -= *ptr++;
float *ptr=book->valuelist+best*dim;
for(i=0;i<dim;i++)
*a++ -= *ptr++;
+#endif /* SSE Optimize */
}
return(best);
}
+#ifdef __SSE__ /* SSE Optimize */
+static inline int local_book_besterror_dim1x4(codebook *book,float *a,oggpack_buffer *opb, int* ia)
+{
+ int bits;
+ encode_aux_threshmatch *tt = book->c->thresh_tree;
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ int ctrl0, ctrl1, ctrl2, ctrl3;
+ ctrl0 = ia[0];
+ ctrl1 = ia[1];
+ ctrl2 = ia[2];
+ ctrl3 = ia[3];
+ XMM0 = _mm_load_ss(a );
+ XMM1 = _mm_load_ss(a+1);
+ XMM2 = _mm_load_ss(a+2);
+ XMM3 = _mm_load_ss(a+3);
+ ctrl0 = tt->quantmap[ctrl0];
+ ctrl1 = tt->quantmap[ctrl1];
+ ctrl2 = tt->quantmap[ctrl2];
+ ctrl3 = tt->quantmap[ctrl3];
+ if(book->c->lengthlist[ctrl0]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl0 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ {
+ float val = (e[0]-a[0]);
+ this = val*val;
+ }
+ if(ctrl0==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl0 = i;
+ }
+ }
+ e ++;
+ }
+ }
+ if(book->c->lengthlist[ctrl1]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl1 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ {
+ float val = (e[0]-a[1]);
+ this = val*val;
+ }
+ if(ctrl1==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl1 = i;
+ }
+ }
+ e ++;
+ }
+ }
+ if(book->c->lengthlist[ctrl2]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl2 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ {
+ float val = (e[0]-a[2]);
+ this = val*val;
+ }
+ if(ctrl2==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl2 = i;
+ }
+ }
+ e ++;
+ }
+ }
+ if(book->c->lengthlist[ctrl3]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl3 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ {
+ float val = (e[0]-a[3]);
+ this = val*val;
+ }
+ if(ctrl3==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl3 = i;
+ }
+ }
+ e ++;
+ }
+ }
+ XMM4 = _mm_load_ss(book->valuelist+ctrl0);
+ XMM5 = _mm_load_ss(book->valuelist+ctrl1);
+ XMM6 = _mm_load_ss(book->valuelist+ctrl2);
+ XMM7 = _mm_load_ss(book->valuelist+ctrl3);
+ XMM0 = _mm_sub_ss(XMM0, XMM4);
+ XMM1 = _mm_sub_ss(XMM1, XMM5);
+ XMM2 = _mm_sub_ss(XMM2, XMM6);
+ XMM3 = _mm_sub_ss(XMM3, XMM7);
+ _mm_store_ss(a , XMM0);
+ _mm_store_ss(a+1, XMM1);
+ _mm_store_ss(a+2, XMM2);
+ _mm_store_ss(a+3, XMM3);
+ bits = vorbis_book_encode(book, ctrl0, opb);
+ bits += vorbis_book_encode(book, ctrl1, opb);
+ bits += vorbis_book_encode(book, ctrl2, opb);
+ bits += vorbis_book_encode(book, ctrl3, opb);
+ return(bits);
+}
+
+static inline int local_book_besterror_dim2x2(codebook *book,float *a,oggpack_buffer *opb, int* ia)
+{
+ int bits;
+ encode_aux_threshmatch *tt = book->c->thresh_tree;
+ __m128 XMM0, XMM1;
+ int ctrl0, ctrl1, ctrl2, ctrl3;
+ ctrl0 = ia[1];
+ ctrl1 = ia[0];
+ ctrl2 = ia[3];
+ ctrl3 = ia[2];
+ XMM0 = _mm_load_ps(a);
+ ctrl0 = tt->quantmap[ctrl0];
+ ctrl1 = tt->quantmap[ctrl1];
+ ctrl2 = tt->quantmap[ctrl2];
+ ctrl3 = tt->quantmap[ctrl3];
+ ctrl0 = ctrl0*tt->quantvals+ctrl1;
+ ctrl2 = ctrl2*tt->quantvals+ctrl3;
+
+ if(book->c->lengthlist[ctrl0]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i,j;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl0 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this = 0.f;
+ for(j=0;j<2;j++)
+ {
+ float val = (e[j]-a[j]);
+ this += val*val;
+ }
+ if(ctrl0==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl0 = i;
+ }
+ }
+ e += 2;
+ }
+ }
+ if(book->c->lengthlist[ctrl2]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i,j;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl2 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this = 0.f;
+ for(j=0;j<2;j++)
+ {
+ float val = (e[j]-a[j+2]);
+ this += val*val;
+ }
+ if(ctrl2==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl2 = i;
+ }
+ }
+ e += 2;
+ }
+ }
+ XMM1 = _mm_loadl_pi(XMM1, (__m64*)(book->valuelist+ctrl0*2));
+ XMM1 = _mm_loadh_pi(XMM1, (__m64*)(book->valuelist+ctrl2*2));
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ _mm_store_ps(a, XMM0);
+ bits = vorbis_book_encode(book, ctrl0, opb);
+ bits += vorbis_book_encode(book, ctrl2, opb);
+ return(bits);
+}
+
+static inline int local_book_besterror_dim4(codebook *book,float *a, int* ia)
+{
+ encode_aux_threshmatch *tt = book->c->thresh_tree;
+ __m128 XMM0;
+ int ctrl0, ctrl1, ctrl2, ctrl3;
+ ctrl0 = ia[3];
+ ctrl1 = ia[2];
+ ctrl2 = ia[1];
+ ctrl3 = ia[0];
+ XMM0 = _mm_load_ps(a );
+ ctrl0 = tt->quantmap[ctrl0];
+ ctrl1 = tt->quantmap[ctrl1];
+ ctrl2 = tt->quantmap[ctrl2];
+ ctrl3 = tt->quantmap[ctrl3];
+ ctrl0 = ctrl0 *tt->quantvals+ctrl1;
+ ctrl0 = ctrl0 *tt->quantvals+ctrl2;
+ ctrl0 = ctrl0 *tt->quantvals+ctrl3;
+
+ if(book->c->lengthlist[ctrl0]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ ctrl0 = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ __m128 PVAL = _mm_load_ps(e);
+ PVAL = _mm_sub_ps(PVAL, XMM0);
+ PVAL = _mm_mul_ps(PVAL, PVAL);
+ this = _mm_add_horz(PVAL);
+ if(ctrl0==-1 || this<bestf)
+ {
+ bestf = this;
+ ctrl0 = i;
+ }
+ }
+ e += 4;
+ }
+ }
+ XMM0 = _mm_sub_ps(XMM0, PM128(book->valuelist+ctrl0*4));
+ _mm_store_ps(a, XMM0);
+ return(ctrl0);
+}
+
+static inline int local_book_besterror_dim8(codebook *book,float *a, int* ia)
+{
+ int best;
+ encode_aux_threshmatch *tt = book->c->thresh_tree;
+ __m128 XMM0, XMM1;
+ int ctrl0, ctrl1, ctrl2, ctrl3;
+ XMM0 = _mm_load_ps(a );
+ ctrl0 = ia[7];
+ ctrl1 = ia[6];
+ ctrl2 = ia[5];
+ ctrl3 = ia[4];
+ ctrl0 = tt->quantmap[ctrl0];
+ ctrl1 = tt->quantmap[ctrl1];
+ ctrl2 = tt->quantmap[ctrl2];
+ ctrl3 = tt->quantmap[ctrl3];
+ best = ctrl0*tt->quantvals+ctrl1;
+ best = best *tt->quantvals+ctrl2;
+ best = best *tt->quantvals+ctrl3;
+ XMM1 = _mm_load_ps(a+4);
+ ctrl0 = ia[3];
+ ctrl1 = ia[2];
+ ctrl2 = ia[1];
+ ctrl3 = ia[0];
+ ctrl0 = tt->quantmap[ctrl0];
+ ctrl1 = tt->quantmap[ctrl1];
+ ctrl2 = tt->quantmap[ctrl2];
+ ctrl3 = tt->quantmap[ctrl3];
+ best = best *tt->quantvals+ctrl0;
+ best = best *tt->quantvals+ctrl1;
+ best = best *tt->quantvals+ctrl2;
+ best = best *tt->quantvals+ctrl3;
+
+ if(book->c->lengthlist[best]<=0)
+ {
+ const static_codebook *c = book->c;
+ int i;
+ float bestf = 0.f;
+ float *e = book->valuelist;
+ best = -1;
+ for(i=0;i<book->entries;i++)
+ {
+ if(c->lengthlist[i]>0)
+ {
+ float this;
+ __m128 PVAL0 = _mm_load_ps(e );
+ __m128 PVAL1 = _mm_load_ps(e+4);
+ PVAL0 = _mm_sub_ps(PVAL0, PM128(a ));
+ PVAL1 = _mm_sub_ps(PVAL1, PM128(a+4));
+ PVAL0 = _mm_mul_ps(PVAL0, PVAL0);
+ PVAL1 = _mm_mul_ps(PVAL1, PVAL1);
+ PVAL0 = _mm_add_ps(PVAL0, PVAL1);
+ this = _mm_add_horz(PVAL0);
+ if(best==-1 || this<bestf)
+ {
+ bestf = this;
+ best = i;
+ }
+ }
+ e += 8;
+ }
+ }
+ XMM0 = _mm_sub_ps(XMM0, PM128(book->valuelist+best*8 ));
+ XMM1 = _mm_sub_ps(XMM1, PM128(book->valuelist+best*8+4));
+ _mm_store_ps(a , XMM0);
+ _mm_store_ps(a+4, XMM1);
+ return(best);
+}
+#endif /* SSE Optimize */
+
static int _encodepart(oggpack_buffer *opb,float *vec, int n,
codebook *book,long *acc){
int i,bits=0;
int dim=book->dim;
+#ifdef __SSE__ /* SSE Optimize */
+ int* TEMP = (int*)_ogg_alloca(sizeof(int)*n);
+ __m128 PMIN = _mm_set1_ps(-(float)(book->c->thresh_tree->threshvals>>1));
+ __m128 PMAX = _mm_set1_ps( (float)(book->c->thresh_tree->threshvals>>1));
+
+ if(dim<=8)
+ {
+ if(book->c->thresh_tree->quantthresh[0]==-(float)(book->c->thresh_tree->threshvals>>1)+.5f)
+ {
+#if defined(__SSE2__)
+ for(i=0;i<n;i+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(vec+i );
+ __m128 XMM1 = _mm_load_ps(vec+i+ 4);
+ __m128 XMM2 = _mm_load_ps(vec+i+ 8);
+ __m128 XMM3 = _mm_load_ps(vec+i+12);
+ XMM0 = _mm_min_ps(XMM0, PMAX);
+ XMM1 = _mm_min_ps(XMM1, PMAX);
+ XMM2 = _mm_min_ps(XMM2, PMAX);
+ XMM3 = _mm_min_ps(XMM3, PMAX);
+ XMM0 = _mm_max_ps(XMM0, PMIN);
+ XMM1 = _mm_max_ps(XMM1, PMIN);
+ XMM2 = _mm_max_ps(XMM2, PMIN);
+ XMM3 = _mm_max_ps(XMM3, PMIN);
+ XMM0 = _mm_add_ps(XMM0, PMAX);
+ XMM1 = _mm_add_ps(XMM1, PMAX);
+ XMM2 = _mm_add_ps(XMM2, PMAX);
+ XMM3 = _mm_add_ps(XMM3, PMAX);
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM1 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+ XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+ XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+ _mm_store_ps((__m128*)(TEMP+i ), XMM0);
+ _mm_store_ps((__m128*)(TEMP+i+ 4), XMM1);
+ _mm_store_ps((__m128*)(TEMP+i+ 8), XMM2);
+ _mm_store_ps((__m128*)(TEMP+i+12), XMM3);
+ }
+#else
+ for(i=0;i<n;i+=16)
+ {
+ __m64 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+ __m128 XMM0 = _mm_load_ps(vec+i );
+ __m128 XMM1 = _mm_load_ps(vec+i+ 4);
+ __m128 XMM2 = _mm_load_ps(vec+i+ 8);
+ __m128 XMM3 = _mm_load_ps(vec+i+12);
+ XMM0 = _mm_min_ps(XMM0, PMAX);
+ XMM1 = _mm_min_ps(XMM1, PMAX);
+ XMM2 = _mm_min_ps(XMM2, PMAX);
+ XMM3 = _mm_min_ps(XMM3, PMAX);
+ XMM0 = _mm_max_ps(XMM0, PMIN);
+ XMM1 = _mm_max_ps(XMM1, PMIN);
+ XMM2 = _mm_max_ps(XMM2, PMIN);
+ XMM3 = _mm_max_ps(XMM3, PMIN);
+ XMM0 = _mm_add_ps(XMM0, PMAX);
+ XMM1 = _mm_add_ps(XMM1, PMAX);
+ XMM2 = _mm_add_ps(XMM2, PMAX);
+ XMM3 = _mm_add_ps(XMM3, PMAX);
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+ MM4 = _mm_cvtps_pi32(XMM2);
+ MM6 = _mm_cvtps_pi32(XMM3);
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+ MM5 = _mm_cvtps_pi32(XMM2);
+ MM7 = _mm_cvtps_pi32(XMM3);
+ PM64(TEMP+i ) = MM0;
+ PM64(TEMP+i+ 4) = MM2;
+ PM64(TEMP+i+ 8) = MM4;
+ PM64(TEMP+i+12) = MM6;
+ PM64(TEMP+i+ 2) = MM1;
+ PM64(TEMP+i+ 6) = MM3;
+ PM64(TEMP+i+10) = MM5;
+ PM64(TEMP+i+14) = MM7;
+ }
+ _mm_empty();
+#endif
+ }
+ else
+ {
+ __m128 PM = _mm_set1_ps(1.f/(
+ (float)(book->c->thresh_tree->quantthresh[1]-book->c->thresh_tree->quantthresh[0])
+ -1.0e-04)); /* for control of round */
+ for(i=0;i<n;i+=16)
+ {
+#if !defined(__SSE2__)
+ __m64 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 = _mm_load_ps(vec+i );
+ XMM1 = _mm_load_ps(vec+i+ 4);
+ XMM2 = _mm_load_ps(vec+i+ 8);
+ XMM3 = _mm_load_ps(vec+i+12);
+ XMM0 = _mm_mul_ps(XMM0, PM);
+ XMM1 = _mm_mul_ps(XMM1, PM);
+ XMM2 = _mm_mul_ps(XMM2, PM);
+ XMM3 = _mm_mul_ps(XMM3, PM);
+ XMM0 = _mm_max_ps(XMM0, PMIN);
+ XMM1 = _mm_max_ps(XMM1, PMIN);
+ XMM2 = _mm_max_ps(XMM2, PMIN);
+ XMM3 = _mm_max_ps(XMM3, PMIN);
+ XMM0 = _mm_min_ps(XMM0, PMAX);
+ XMM1 = _mm_min_ps(XMM1, PMAX);
+ XMM2 = _mm_min_ps(XMM2, PMAX);
+ XMM3 = _mm_min_ps(XMM3, PMAX);
+ XMM0 = _mm_add_ps(XMM0, PMAX);
+ XMM1 = _mm_add_ps(XMM1, PMAX);
+ XMM2 = _mm_add_ps(XMM2, PMAX);
+ XMM3 = _mm_add_ps(XMM3, PMAX);
+#if defined(__SSE2__)
+ XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+ XMM1 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+ XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+ XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+ _mm_store_ps((__m128*)(TEMP+i ), XMM0);
+ _mm_store_ps((__m128*)(TEMP+i+ 4), XMM1);
+ _mm_store_ps((__m128*)(TEMP+i+ 8), XMM2);
+ _mm_store_ps((__m128*)(TEMP+i+12), XMM3);
+ }
+#else
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+ MM4 = _mm_cvtps_pi32(XMM2);
+ MM6 = _mm_cvtps_pi32(XMM3);
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+ MM5 = _mm_cvtps_pi32(XMM2);
+ MM7 = _mm_cvtps_pi32(XMM3);
+ PM64(TEMP+i ) = MM0;
+ PM64(TEMP+i+ 4) = MM2;
+ PM64(TEMP+i+ 8) = MM4;
+ PM64(TEMP+i+12) = MM6;
+ PM64(TEMP+i+ 2) = MM1;
+ PM64(TEMP+i+ 6) = MM3;
+ PM64(TEMP+i+10) = MM5;
+ PM64(TEMP+i+14) = MM7;
+ }
+ _mm_empty();
+#endif
+ }
+ switch(dim)
+ {
+ case 1:
+ for(i=0;i<n;i+=4)
+ {
+ bits += local_book_besterror_dim1x4(book,vec+i, opb, TEMP+i);
+ }
+ break;
+ case 2:
+ for(i=0;i<n;i+=4)
+ {
+ bits += local_book_besterror_dim2x2(book,vec+i, opb, TEMP+i);
+ }
+ break;
+ case 4:
+ for(i=0;i<n;i+=4)
+ {
+ int entry = local_book_besterror_dim4(book,vec+i, TEMP+i);
+ bits += vorbis_book_encode(book,entry,opb);
+ }
+ break;
+ case 8:
+ for(i=0;i<n;i+=8)
+ {
+ int entry = local_book_besterror_dim8(book,vec+i, TEMP+i);
+ bits += vorbis_book_encode(book,entry,opb);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ else
+ {
+#endif /* SSE Optimize */
int step=n/dim;
for(i=0;i<step;i++){
@@ -362,6 +929,9 @@
bits+=vorbis_book_encode(book,entry,opb);
}
+#if defined(__SSE__) /* SSE Optimize */
+ }
+#endif /* SSE Optimize */
return(bits);
}
@@ -455,11 +1025,140 @@
#endif
partword[0]=_vorbis_block_alloc(vb,n*ch/samples_per_partition*sizeof(*partword[0]));
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ int pn = n*ch/samples_per_partition;
+ __m128 XMM0 = _mm_setzero_ps();
+ int tn;
+ float *d = (float*)(partword[0]);
+ tn = pn&(~31);
+ for(i=0;i<tn;i+=32)
+ {
+ _mm_store_ps(d+i , XMM0);
+ _mm_store_ps(d+i+ 4, XMM0);
+ _mm_store_ps(d+i+ 8, XMM0);
+ _mm_store_ps(d+i+12, XMM0);
+ _mm_store_ps(d+i+16, XMM0);
+ _mm_store_ps(d+i+20, XMM0);
+ _mm_store_ps(d+i+24, XMM0);
+ _mm_store_ps(d+i+28, XMM0);
+ }
+ tn = pn&(~15);
+ for(;i<tn;i+=16)
+ {
+ _mm_store_ps(d+i , XMM0);
+ _mm_store_ps(d+i+ 4, XMM0);
+ _mm_store_ps(d+i+ 8, XMM0);
+ _mm_store_ps(d+i+12, XMM0);
+ }
+ tn = pn&(~7);
+ for(;i<tn;i+=8)
+ {
+ _mm_store_ps(d+i , XMM0);
+ _mm_store_ps(d+i+ 4, XMM0);
+ }
+ tn = pn&(~3);
+ for(;i<tn;i+=4)
+ {
+ _mm_store_ps(d+i , XMM0);
+ }
+ for(;i<pn;i++)
+ {
+ *(d+i ) = 0;
+ }
+ }
+#else /* SSE Optimize */
memset(partword[0],0,n*ch/samples_per_partition*sizeof(*partword[0]));
+#endif /* SSE Optimize */
for(i=0,l=info->begin/ch;i<partvals;i++){
float magmax=0.f;
float angmax=0.f;
+#ifdef __SSE__ /* SSE Optimize */
+ if(ch==2&&possible_partitions==10)
+ {
+ register __m128 PMAGMAX = _mm_setzero_ps();
+ register __m128 PANGMAX = _mm_setzero_ps();
+ float *pin0 = in[0];
+ float *pin1 = in[1];
+
+ for(j=0;j<samples_per_partition;j+=16)
+ {
+ __m128 XMM0 = _mm_load_ps(pin0+l );
+ __m128 XMM1 = _mm_load_ps(pin1+l );
+ __m128 XMM2 = _mm_load_ps(pin0+l+4);
+ __m128 XMM3 = _mm_load_ps(pin1+l+4);
+ XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+ XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+ XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+ XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+ PMAGMAX = _mm_max_ps(PMAGMAX, XMM0);
+ PANGMAX = _mm_max_ps(PANGMAX, XMM1);
+ PMAGMAX = _mm_max_ps(PMAGMAX, XMM2);
+ PANGMAX = _mm_max_ps(PANGMAX, XMM3);
+ l += 8;
+ }
+ magmax = _mm_max_horz(PMAGMAX);
+ angmax = _mm_max_horz(PANGMAX);
+ PMAGMAX = _mm_set1_ps(magmax);
+ PANGMAX = _mm_set1_ps(angmax);
+ {
+ static int jtable0[16] =
+ {
+ 0, 1, 0, 2, 0, 1, 0, 3,
+ 0, 1, 0, 2, 0, 1, 0,-1,
+ };
+ static int jtable1[16] =
+ {
+ 4, 5, 4, 6, 4, 5, 4, 7,
+ 4, 5, 4, 6, 5, 5, 4,-1,
+ };
+ __m128 XMM0 = _mm_lddqu_ps(info->classmetric1);
+ __m128 XMM1 = _mm_lddqu_ps(info->classmetric2);
+ XMM0 = _mm_cmplt_ps(XMM0, PMAGMAX);
+ XMM1 = _mm_cmplt_ps(XMM1, PANGMAX);
+ XMM0 = _mm_or_ps(XMM0, XMM1);
+ j = _mm_movemask_ps(XMM0);
+ if(j!=15)
+ j = jtable0[j];
+ else
+ {
+ __m128 XMM0 = _mm_lddqu_ps(info->classmetric1+4);
+ __m128 XMM1 = _mm_lddqu_ps(info->classmetric2+4);
+ XMM0 = _mm_cmplt_ps(XMM0, PMAGMAX);
+ XMM1 = _mm_cmplt_ps(XMM1, PANGMAX);
+ XMM0 = _mm_or_ps(XMM0, XMM1);
+ j = _mm_movemask_ps(XMM0);
+ if(j!=15)
+ j = jtable1[j];
+ else
+ {
+ if(magmax<=info->classmetric1[8] &&
+ angmax<=info->classmetric2[8])
+ j = 8;
+ else
+ j = 9;
+ }
+ }
+ }
+ }
+ else
+ {
+ for(j=0;j<samples_per_partition;j+=ch)
+ {
+ if(fabs(in[0][l])>magmax)
+ magmax=fabs(in[0][l]);
+ for(k=1;k<ch;k++)
+ if(fabs(in[k][l])>angmax)
+ angmax=fabs(in[k][l]);
+ l++;
+ }
+ for(j=0;j<possible_partitions-1;j++)
+ if(magmax<=info->classmetric1[j] &&
+ angmax<=info->classmetric2[j])
+ break;
+ }
+#else /* SSE Optimize */
for(j=0;j<samples_per_partition;j+=ch){
if(fabs(in[0][l])>magmax)magmax=fabs(in[0][l]);
for(k=1;k<ch;k++)
@@ -471,6 +1170,7 @@
if(magmax<=info->classmetric1[j] &&
angmax<=info->classmetric2[j])
break;
+#endif /* SSE Optimize */
partword[0][i]=j;
@@ -735,8 +1435,27 @@
for(i=0;i<ch;i++)
if(nonzero[i]){
if(out)
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ float *pin = in[i];
+ float *pout = out[i];
+ for(j=0;j<n;j+=8)
+ {
+ __m128 XMM0 = _mm_load_ps(pout+j );
+ __m128 XMM2 = _mm_load_ps(pin+j );
+ __m128 XMM1 = _mm_load_ps(pout+j+4);
+ __m128 XMM3 = _mm_load_ps(pin+j+4);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_store_ps(pout+j , XMM0);
+ _mm_store_ps(pout+j+4, XMM1);
+
+ }
+ }
+#else /* SSE Optimize */
for(j=0;j<n;j++)
out[i][j]+=in[i][j];
+#endif /* SSE Optimize */
in[used++]=in[i];
}
@@ -746,8 +1465,26 @@
used=0;
for(i=0;i<ch;i++)
if(nonzero[i]){
+#ifdef __SSE__ /* SSE Optimize */
+ {
+ float *pin = in[i];
+ float *pout = out[used];
+ for(j=0;j<n;j+=8)
+ {
+ __m128 XMM0 = _mm_load_ps(pout+j );
+ __m128 XMM2 = _mm_load_ps(pin+j );
+ __m128 XMM1 = _mm_load_ps(pout+j+4);
+ __m128 XMM3 = _mm_load_ps(pin+j+4);
+ XMM0 = _mm_sub_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM3);
+ _mm_store_ps(pout+j , XMM0);
+ _mm_store_ps(pout+j+4, XMM1);
+ }
+ }
+#else /* SSE Optimize */
for(j=0;j<n;j++)
out[i][j]-=in[used][j];
+#endif /* SSE Optimize */
used++;
}
}
@@ -805,24 +1542,124 @@
reshape ourselves into a single channel res1 */
/* ugly; reallocs for each coupling pass :-( */
float *work=_vorbis_block_alloc(vb,ch*n*sizeof(*work));
+#ifdef __SSE__ /* SSE Optimize */
+ for(i=0;i<ch;i++){
+ if(nonzero[i])used++;
+ }
+ if(ch==2)
+ {
+ float *pcm0=in[0];
+ float *pcm1=in[1];
+ for(j=0;j<n;j+=16)
+ {
+ // ABCD ABEF AEBF
+ // EFGH -> CDGH -> CGDH
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(pcm0+j );
+ XMM2 = _mm_load_ps(pcm0+j+ 4);
+ XMM4 = _mm_load_ps(pcm1+j );
+ XMM5 = _mm_load_ps(pcm1+j+ 4);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_unpacklo_ps(XMM0, XMM4);
+ XMM1 = _mm_unpackhi_ps(XMM1, XMM4);
+ XMM6 = _mm_load_ps(pcm0+j+ 8);
+ XMM7 = _mm_load_ps(pcm0+j+12);
+ XMM2 = _mm_unpacklo_ps(XMM2, XMM5);
+ XMM3 = _mm_unpackhi_ps(XMM3, XMM5);
+ XMM4 = _mm_load_ps(pcm1+j+ 8);
+ XMM5 = _mm_load_ps(pcm1+j+12);
+ _mm_store_ps(work+j*2 , XMM0);
+ _mm_store_ps(work+j*2+ 4, XMM1);
+ XMM1 = XMM6;
+ _mm_store_ps(work+j*2+ 8, XMM2);
+ _mm_store_ps(work+j*2+12, XMM3);
+ XMM3 = XMM7;
+ XMM6 = _mm_unpacklo_ps(XMM6, XMM4);
+ XMM1 = _mm_unpackhi_ps(XMM1, XMM4);
+ XMM7 = _mm_unpacklo_ps(XMM7, XMM5);
+ XMM3 = _mm_unpackhi_ps(XMM3, XMM5);
+ _mm_store_ps(work+j*2+16, XMM6);
+ _mm_store_ps(work+j*2+20, XMM1);
+ _mm_store_ps(work+j*2+24, XMM7);
+ _mm_store_ps(work+j*2+28, XMM3);
+ }
+ }
+ else
+ {
+ for(i=0;i<ch;i++){
+ float *pcm=in[i];
+ for(j=0,k=i;j<n;j++,k+=ch)
+ work[k]=pcm[j];
+ }
+ }
+#else /* SSE Optimize */
for(i=0;i<ch;i++){
float *pcm=in[i];
if(nonzero[i])used++;
for(j=0,k=i;j<n;j++,k+=ch)
work[k]=pcm[j];
}
+#endif /* SSE Optimize */
if(used){
int ret=_01forward(opb,vb,vl,&work,1,partword,_encodepart);
/* update the sofar vector */
if(out){
+#ifdef __SSE__ /* SSE Optimize */
+ if(ch==2)
+ {
+ float *pcm0 = in[0];
+ float *pcm1 = in[1];
+ float *sofar0 = out[0];
+ float *sofar1 = out[1];
+ for(j=0;j<n;j+=8)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ XMM0 = _mm_load_ps(work+j*2 );
+ XMM4 = _mm_load_ps(work+j*2+ 8);
+ XMM6 = _mm_load_ps(work+j*2+ 4);
+ XMM7 = _mm_load_ps(work+j*2+12);
+ XMM1 = XMM0;
+ XMM5 = XMM5;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+ XMM4 = _mm_shuffle_ps(XMM4, XMM7, _MM_SHUFFLE(2,0,2,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+ XMM2 = _mm_load_ps(pcm0+j );
+ XMM3 = _mm_load_ps(pcm1+j );
+ XMM6 = _mm_load_ps(pcm0+j + 4);
+ XMM7 = _mm_load_ps(pcm1+j + 4);
+ XMM2 = _mm_sub_ps(XMM2, XMM0);
+ XMM3 = _mm_sub_ps(XMM3, XMM1);
+ XMM6 = _mm_sub_ps(XMM6, XMM4);
+ XMM7 = _mm_sub_ps(XMM7, XMM5);
+ XMM0 = _mm_load_ps(sofar0+j );
+ XMM1 = _mm_load_ps(sofar0+j+4);
+ XMM2 = _mm_add_ps(XMM2, XMM0);
+ XMM3 = _mm_add_ps(XMM3, XMM0);
+ XMM6 = _mm_add_ps(XMM6, XMM1);
+ XMM7 = _mm_add_ps(XMM7, XMM1);
+ _mm_store_ps(sofar0+j , XMM2);
+ _mm_store_ps(sofar1+j , XMM3);
+ _mm_store_ps(sofar0+j+4, XMM6);
+ _mm_store_ps(sofar1+j+4, XMM7);
+ }
+ }
+ else
+ {
+#endif /* SSE Optimize */
for(i=0;i<ch;i++){
float *pcm=in[i];
float *sofar=out[i];
for(j=0,k=i;j<n;j++,k+=ch)
sofar[j]+=pcm[j]-work[k];
-
+
}
+#ifdef __SSE__ /* SSE Optimize */
+ }
+#endif /* SSE Optimize */
}
return(ret);
}else{
@@ -883,7 +1720,6 @@
return(0);
}
-
vorbis_func_residue residue0_exportbundle={
NULL,
&res0_unpack,
@@ -916,4 +1752,3 @@
&res2_forward,
&res2_inverse
};
-
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/sharedbook.c libvorbis-1.2.0-sse/lib/sharedbook.c
--- libvorbis-1.2.0/lib/sharedbook.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/sharedbook.c 2007-08-02 12:43:15.000000000 +0200
@@ -24,6 +24,9 @@
#include "vorbis/codec.h"
#include "codebook.h"
#include "scales.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
/**** pack/unpack helpers ******************************************/
int _ilog(unsigned int v){
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/smallft.c libvorbis-1.2.0-sse/lib/smallft.c
--- libvorbis-1.2.0/lib/smallft.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/smallft.c 2007-08-02 12:43:15.000000000 +0200
@@ -34,6 +34,5529 @@
#include "smallft.h"
#include "os.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+
+static _MM_ALIGN16 int IP256[16] = { 64, 64, 0, 128, 64, 192, 32, 160, 96, 224};
+static _MM_ALIGN16 int IP512[32] = { 128, 128, 0, 256, 128, 384, 64, 320, 192, 448};
+static _MM_ALIGN16 int IP1024[32] = {
+ 256, 256,
+ 0, 512, 256, 768, 128, 640, 384, 896,
+ 64, 576, 320, 832, 192, 704, 448, 960
+};
+static _MM_ALIGN16 int IP2048[32] = {
+ 512, 512,
+ 0, 1024, 512, 1536, 256, 1280, 768, 1792,
+ 128, 1152, 640, 1664, 384, 1408, 896, 1920
+};
+static _MM_ALIGN16 int IP4096[64] = {
+ 1024, 1024,
+ 0, 2048, 1024, 3072, 512, 2560, 1536, 3584,
+ 256, 2304, 1280, 3328, 768, 2816, 1792, 3840,
+ 128, 2176, 1152, 3200, 640, 2688, 1664, 3712,
+ 384, 2432, 1408, 3456, 896, 2944, 1920, 3968
+};
+
+static _MM_ALIGN16 float W2[4] = {
+ 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001
+};
+
+static _MM_ALIGN16 float W256[] = {
+ 4.8772937059402e-001, 4.9984940886497e-001, 0.0000000000000e+000, 0.0000000000000e+000,
+ 4.7546616196632e-001, 4.7546616196632e-001, 4.6321770548820e-001, 4.6321770548820e-001,
+ -4.9939772486687e-001, 4.9939772486687e-001, -4.9864521622658e-001, 4.9864521622658e-001,
+ 4.5099142193794e-001, 4.5099142193794e-001, 4.3879467248917e-001, 4.3879467248917e-001,
+ -4.9759235978127e-001, 4.9759235978127e-001, -4.9623978137970e-001, 4.9623978137970e-001,
+ 4.2663475871086e-001, 4.2663475871086e-001, 4.1451907157898e-001, 4.1451907157898e-001,
+ -4.9458825588226e-001, 4.9458825588226e-001, -4.9263882637024e-001, 4.9263882637024e-001,
+ 4.0245485305786e-001, 4.0245485305786e-001, 3.9044937491417e-001, 3.9044937491417e-001,
+ -4.9039262533188e-001, 4.9039262533188e-001, -4.8785105347633e-001, 4.8785105347633e-001,
+ 3.7850990891457e-001, 3.7850990891457e-001, 3.6664360761642e-001, 3.6664360761642e-001,
+ -4.8501563072205e-001, 4.8501563072205e-001, -4.8188802599907e-001, 4.8188802599907e-001,
+ 3.5485765337944e-001, 3.5485765337944e-001, 3.4315913915634e-001, 3.4315913915634e-001,
+ -4.7847017645836e-001, 4.7847017645836e-001, -4.7476407885551e-001, 4.7476407885551e-001,
+ 3.3155506849289e-001, 3.3155506849289e-001, 3.2005247473717e-001, 3.2005247473717e-001,
+ -4.7077202796936e-001, 4.7077202796936e-001, -4.6649640798569e-001, 4.6649640798569e-001,
+ 3.0865827202797e-001, 3.0865827202797e-001, 2.9737934470177e-001, 2.9737934470177e-001,
+ -4.6193975210190e-001, 4.6193975210190e-001, -4.5710486173630e-001, 4.5710486173630e-001,
+ 2.8622245788574e-001, 2.8622245788574e-001, 2.7519434690475e-001, 2.7519434690475e-001,
+ -4.5199465751648e-001, 4.5199465751648e-001, -4.4661214947701e-001, 4.4661214947701e-001,
+ 2.6430162787437e-001, 2.6430162787437e-001, 2.5355088710785e-001, 2.5355088710785e-001,
+ -4.4096061587334e-001, 4.4096061587334e-001, -4.3504348397255e-001, 4.3504348397255e-001,
+ 2.4294862151146e-001, 2.4294862151146e-001, 2.3250117897987e-001, 2.3250117897987e-001,
+ -4.2886430025101e-001, 4.2886430025101e-001, -4.2242678999901e-001, 4.2242678999901e-001,
+ 2.2221487760544e-001, 2.2221487760544e-001, 2.1209588646889e-001, 2.1209588646889e-001,
+ -4.1573479771614e-001, 4.1573479771614e-001, -4.0879240632057e-001, 4.0879240632057e-001,
+ 2.0215034484863e-001, 2.0215034484863e-001, 1.9238418340683e-001, 1.9238418340683e-001,
+ -4.0160375833511e-001, 4.0160375833511e-001, -3.9417320489883e-001, 3.9417320489883e-001,
+ 1.8280336260796e-001, 1.8280336260796e-001, 1.7341357469559e-001, 1.7341357469559e-001,
+ -3.8650521636009e-001, 3.8650521636009e-001, -3.7860441207886e-001, 3.7860441207886e-001,
+ 1.6422051191330e-001, 1.6422051191330e-001, 1.5522971749306e-001, 1.5522971749306e-001,
+ -3.7047556042671e-001, 3.7047556042671e-001, -3.6212354898453e-001, 3.6212354898453e-001,
+ 1.4644661545753e-001, 1.4644661545753e-001, 1.3787645101547e-001, 1.3787645101547e-001,
+ -3.5355338454247e-001, 3.5355338454247e-001, -3.4477028250694e-001, 3.4477028250694e-001,
+ 1.2952443957329e-001, 1.2952443957329e-001, 1.2139558792114e-001, 1.2139558792114e-001,
+ -3.3577948808670e-001, 3.3577948808670e-001, -3.2658642530441e-001, 3.2658642530441e-001,
+ 1.1349478363991e-001, 1.1349478363991e-001, 1.0582679510117e-001, 1.0582679510117e-001,
+ -3.1719663739204e-001, 3.1719663739204e-001, -3.0761581659317e-001, 3.0761581659317e-001,
+ 9.8396241664886e-002, 9.8396241664886e-002, 9.1207593679428e-002, 9.1207593679428e-002,
+ -2.9784965515137e-001, 2.9784965515137e-001, -2.8790411353111e-001, 2.8790411353111e-001,
+ 8.4265202283859e-002, 8.4265202283859e-002, 7.7573210000992e-002, 7.7573210000992e-002,
+ -2.7778512239456e-001, 2.7778512239456e-001, -2.6749882102013e-001, 2.6749882102013e-001,
+ 7.1135699748993e-002, 7.1135699748993e-002, 6.4956516027451e-002, 6.4956516027451e-002,
+ -2.5705137848854e-001, 2.5705137848854e-001, -2.4644909799099e-001, 2.4644909799099e-001,
+ 5.9039384126663e-002, 5.9039384126663e-002, 5.3387850522995e-002, 5.3387850522995e-002,
+ -2.3569837212563e-001, 2.3569837212563e-001, -2.2480566799641e-001, 2.2480566799641e-001,
+ 4.8005342483521e-002, 4.8005342483521e-002, 4.2895138263702e-002, 4.2895138263702e-002,
+ -2.1377755701542e-001, 2.1377755701542e-001, -2.0262065529823e-001, 2.0262065529823e-001,
+ 3.8060247898102e-002, 3.8060247898102e-002, 3.3503592014313e-002, 3.3503592014313e-002,
+ -1.9134172797203e-001, 1.9134172797203e-001, -1.7994752526283e-001, 1.7994752526283e-001,
+ 2.9227972030640e-002, 2.9227972030640e-002, 2.5235921144485e-002, 2.5235921144485e-002,
+ -1.6844493150711e-001, 1.6844493150711e-001, -1.5684087574482e-001, 1.5684087574482e-001,
+ 2.1529823541641e-002, 2.1529823541641e-002, 1.8111974000931e-002, 1.8111974000931e-002,
+ -1.4514234662056e-001, 1.4514234662056e-001, -1.3335637748241e-001, 1.3335637748241e-001,
+ 1.4984369277954e-002, 1.4984369277954e-002, 1.2148946523666e-002, 1.2148946523666e-002,
+ -1.2149009108543e-001, 1.2149009108543e-001, -1.0955062508583e-001, 1.0955062508583e-001,
+ 9.6073746681213e-003, 9.6073746681213e-003, 7.3611736297607e-003, 7.3611736297607e-003,
+ -9.7545161843300e-002, 9.7545161843300e-002, -8.5480943322182e-002, 8.5480943322182e-002,
+ 5.4117441177368e-003, 5.4117441177368e-003, 3.7602186203003e-003, 3.7602186203003e-003,
+ -7.3365241289139e-002, 7.3365241289139e-002, -6.1205338686705e-002, 6.1205338686705e-002,
+ 2.4076402187347e-003, 2.4076402187347e-003, 1.3547837734222e-003, 1.3547837734222e-003,
+ -4.9008570611477e-002, 4.9008570611477e-002, -3.6782283335924e-002, 3.6782283335924e-002,
+ 6.0227513313293e-004, 6.0227513313293e-004, 1.5059113502502e-004, 1.5059113502502e-004,
+ -2.4533838033676e-002, 2.4533838033676e-002, -1.2270614504814e-002, 1.2270614504814e-002
+};
+static _MM_ALIGN16 float W512[] = {
+ 4.9386423826218e-001, 4.9996235966682e-001, 0.0000000000000e+000, 0.0000000000000e+000,
+ 4.8772937059402e-001, 4.8772937059402e-001, 4.8159638047218e-001, 4.8159638047218e-001,
+ -4.9984940886497e-001, 4.9984940886497e-001, -4.9966117739677e-001, 4.9966117739677e-001,
+ 4.7546616196632e-001, 4.7546616196632e-001, 4.6933963894844e-001, 4.6933963894844e-001,
+ -4.9939772486687e-001, 4.9939772486687e-001, -4.9905905127525e-001, 4.9905905127525e-001,
+ 4.6321770548820e-001, 4.6321770548820e-001, 4.5710134506226e-001, 4.5710134506226e-001,
+ -4.9864521622658e-001, 4.9864521622658e-001, -4.9815630912781e-001, 4.9815630912781e-001,
+ 4.5099142193794e-001, 4.5099142193794e-001, 4.4488888978958e-001, 4.4488888978958e-001,
+ -4.9759235978127e-001, 4.9759235978127e-001, -4.9695348739624e-001, 4.9695348739624e-001,
+ 4.3879467248917e-001, 4.3879467248917e-001, 4.3270963430405e-001, 4.3270963430405e-001,
+ -4.9623978137970e-001, 4.9623978137970e-001, -4.9545133113861e-001, 4.9545133113861e-001,
+ 4.2663475871086e-001, 4.2663475871086e-001, 4.2057090997696e-001, 4.2057090997696e-001,
+ -4.9458825588226e-001, 4.9458825588226e-001, -4.9365070462227e-001, 4.9365070462227e-001,
+ 4.1451907157898e-001, 4.1451907157898e-001, 4.0848004817963e-001, 4.0848004817963e-001,
+ -4.9263882637024e-001, 4.9263882637024e-001, -4.9155274033546e-001, 4.9155274033546e-001,
+ 4.0245485305786e-001, 4.0245485305786e-001, 3.9644432067871e-001, 3.9644432067871e-001,
+ -4.9039262533188e-001, 4.9039262533188e-001, -4.8915868997574e-001, 4.8915868997574e-001,
+ 3.9044937491417e-001, 3.9044937491417e-001, 3.8447093963623e-001, 3.8447093963623e-001,
+ -4.8785105347633e-001, 4.8785105347633e-001, -4.8646998405457e-001, 4.8646998405457e-001,
+ 3.7850990891457e-001, 3.7850990891457e-001, 3.7256717681885e-001, 3.7256717681885e-001,
+ -4.8501563072205e-001, 4.8501563072205e-001, -4.8348823189735e-001, 4.8348823189735e-001,
+ 3.6664360761642e-001, 3.6664360761642e-001, 3.6074015498161e-001, 3.6074015498161e-001,
+ -4.8188802599907e-001, 4.8188802599907e-001, -4.8021525144577e-001, 4.8021525144577e-001,
+ 3.5485765337944e-001, 3.5485765337944e-001, 3.4899702668190e-001, 3.4899702668190e-001,
+ -4.7847017645836e-001, 4.7847017645836e-001, -4.7665300965309e-001, 4.7665300965309e-001,
+ 3.4315913915634e-001, 3.4315913915634e-001, 3.3734485507011e-001, 3.3734485507011e-001,
+ -4.7476407885551e-001, 4.7476407885551e-001, -4.7280365228653e-001, 4.7280365228653e-001,
+ 3.3155506849289e-001, 3.3155506849289e-001, 3.2579064369202e-001, 3.2579064369202e-001,
+ -4.7077202796936e-001, 4.7077202796936e-001, -4.6866950392723e-001, 4.6866950392723e-001,
+ 3.2005247473717e-001, 3.2005247473717e-001, 3.1434139609337e-001, 3.1434139609337e-001,
+ -4.6649640798569e-001, 4.6649640798569e-001, -4.6425303816795e-001, 4.6425303816795e-001,
+ 3.0865827202797e-001, 3.0865827202797e-001, 3.0300396680832e-001, 3.0300396680832e-001,
+ -4.6193975210190e-001, 4.6193975210190e-001, -4.5955693721771e-001, 4.5955693721771e-001,
+ 2.9737934470177e-001, 2.9737934470177e-001, 2.9178521037102e-001, 2.9178521037102e-001,
+ -4.5710486173630e-001, 4.5710486173630e-001, -4.5458400249481e-001, 4.5458400249481e-001,
+ 2.8622245788574e-001, 2.8622245788574e-001, 2.8069186210632e-001, 2.8069186210632e-001,
+ -4.5199465751648e-001, 4.5199465751648e-001, -4.4933724403381e-001, 4.4933724403381e-001,
+ 2.7519434690475e-001, 2.7519434690475e-001, 2.6973062753677e-001, 2.6973062753677e-001,
+ -4.4661214947701e-001, 4.4661214947701e-001, -4.4381982088089e-001, 4.4381982088089e-001,
+ 2.6430162787437e-001, 2.6430162787437e-001, 2.5890809297562e-001, 2.5890809297562e-001,
+ -4.4096061587334e-001, 4.4096061587334e-001, -4.3803504109383e-001, 4.3803504109383e-001,
+ 2.5355088710785e-001, 2.5355088710785e-001, 2.4823081493378e-001, 2.4823081493378e-001,
+ -4.3504348397255e-001, 4.3504348397255e-001, -4.3198642134666e-001, 4.3198642134666e-001,
+ 2.4294862151146e-001, 2.4294862151146e-001, 2.3770514130592e-001, 2.3770514130592e-001,
+ -4.2886430025101e-001, 4.2886430025101e-001, -4.2567759752274e-001, 4.2567759752274e-001,
+ 2.3250117897987e-001, 2.3250117897987e-001, 2.2733750939369e-001, 2.2733750939369e-001,
+ -4.2242678999901e-001, 4.2242678999901e-001, -4.1911235451698e-001, 4.1911235451698e-001,
+ 2.2221487760544e-001, 2.2221487760544e-001, 2.1713408827782e-001, 2.1713408827782e-001,
+ -4.1573479771614e-001, 4.1573479771614e-001, -4.1229465603828e-001, 4.1229465603828e-001,
+ 2.1209588646889e-001, 2.1209588646889e-001, 2.0710107684135e-001, 2.0710107684135e-001,
+ -4.0879240632057e-001, 4.0879240632057e-001, -4.0522858500481e-001, 4.0522858500481e-001,
+ 2.0215034484863e-001, 2.0215034484863e-001, 1.9724446535110e-001, 1.9724446535110e-001,
+ -4.0160375833511e-001, 4.0160375833511e-001, -3.9791843295097e-001, 3.9791843295097e-001,
+ 1.9238418340683e-001, 1.9238418340683e-001, 1.8757024407387e-001, 1.8757024407387e-001,
+ -3.9417320489883e-001, 3.9417320489883e-001, -3.9036861062050e-001, 3.9036861062050e-001,
+ 1.8280336260796e-001, 1.8280336260796e-001, 1.7808422446251e-001, 1.7808422446251e-001,
+ -3.8650521636009e-001, 3.8650521636009e-001, -3.8258361816406e-001, 3.8258361816406e-001,
+ 1.7341357469559e-001, 1.7341357469559e-001, 1.6879209876060e-001, 1.6879209876060e-001,
+ -3.7860441207886e-001, 3.7860441207886e-001, -3.7456819415092e-001, 3.7456819415092e-001,
+ 1.6422051191330e-001, 1.6422051191330e-001, 1.5969949960709e-001, 1.5969949960709e-001,
+ -3.7047556042671e-001, 3.7047556042671e-001, -3.6632713675499e-001, 3.6632713675499e-001,
+ 1.5522971749306e-001, 1.5522971749306e-001, 1.5081188082695e-001, 1.5081188082695e-001,
+ -3.6212354898453e-001, 3.6212354898453e-001, -3.5786539316177e-001, 3.5786539316177e-001,
+ 1.4644661545753e-001, 1.4644661545753e-001, 1.4213460683823e-001, 1.4213460683823e-001,
+ -3.5355338454247e-001, 3.5355338454247e-001, -3.4918811917305e-001, 3.4918811917305e-001,
+ 1.3787645101547e-001, 1.3787645101547e-001, 1.3367286324501e-001, 1.3367286324501e-001,
+ -3.4477028250694e-001, 3.4477028250694e-001, -3.4030050039291e-001, 3.4030050039291e-001,
+ 1.2952443957329e-001, 1.2952443957329e-001, 1.2543180584908e-001, 1.2543180584908e-001,
+ -3.3577948808670e-001, 3.3577948808670e-001, -3.3120790123940e-001, 3.3120790123940e-001,
+ 1.2139558792114e-001, 1.2139558792114e-001, 1.1741638183594e-001, 1.1741638183594e-001,
+ -3.2658642530441e-001, 3.2658642530441e-001, -3.2191577553749e-001, 3.2191577553749e-001,
+ 1.1349478363991e-001, 1.1349478363991e-001, 1.0963138937950e-001, 1.0963138937950e-001,
+ -3.1719663739204e-001, 3.1719663739204e-001, -3.1242975592613e-001, 3.1242975592613e-001,
+ 1.0582679510117e-001, 1.0582679510117e-001, 1.0208156704903e-001, 1.0208156704903e-001,
+ -3.0761581659317e-001, 3.0761581659317e-001, -3.0275553464890e-001, 3.0275553464890e-001,
+ 9.8396241664886e-002, 9.8396241664886e-002, 9.4771414995193e-002, 9.4771414995193e-002,
+ -2.9784965515137e-001, 2.9784965515137e-001, -2.9289892315865e-001, 2.9289892315865e-001,
+ 9.1207593679428e-002, 9.1207593679428e-002, 8.7705343961716e-002, 8.7705343961716e-002,
+ -2.8790411353111e-001, 2.8790411353111e-001, -2.8286591172218e-001, 2.8286591172218e-001,
+ 8.4265202283859e-002, 8.4265202283859e-002, 8.0887645483017e-002, 8.0887645483017e-002,
+ -2.7778512239456e-001, 2.7778512239456e-001, -2.7266249060631e-001, 2.7266249060631e-001,
+ 7.7573210000992e-002, 7.7573210000992e-002, 7.4322402477264e-002, 7.4322402477264e-002,
+ -2.6749882102013e-001, 2.6749882102013e-001, -2.6229485869408e-001, 2.6229485869408e-001,
+ 7.1135699748993e-002, 7.1135699748993e-002, 6.8013578653336e-002, 6.8013578653336e-002,
+ -2.5705137848854e-001, 2.5705137848854e-001, -2.5176918506622e-001, 2.5176918506622e-001,
+ 6.4956516027451e-002, 6.4956516027451e-002, 6.1964958906174e-002, 6.1964958906174e-002,
+ -2.4644909799099e-001, 2.4644909799099e-001, -2.4109189212322e-001, 2.4109189212322e-001,
+ 5.9039384126663e-002, 5.9039384126663e-002, 5.6180179119110e-002, 5.6180179119110e-002,
+ -2.3569837212563e-001, 2.3569837212563e-001, -2.3026935756207e-001, 2.3026935756207e-001,
+ 5.3387850522995e-002, 5.3387850522995e-002, 5.0662755966187e-002, 5.0662755966187e-002,
+ -2.2480566799641e-001, 2.2480566799641e-001, -2.1930812299252e-001, 2.1930812299252e-001,
+ 4.8005342483521e-002, 4.8005342483521e-002, 4.5415997505188e-002, 4.5415997505188e-002,
+ -2.1377755701542e-001, 2.1377755701542e-001, -2.0821478962898e-001, 2.0821478962898e-001,
+ 4.2895138263702e-002, 4.2895138263702e-002, 4.0443062782288e-002, 4.0443062782288e-002,
+ -2.0262065529823e-001, 2.0262065529823e-001, -1.9699601829052e-001, 1.9699601829052e-001,
+ 3.8060247898102e-002, 3.8060247898102e-002, 3.5746961832047e-002, 3.5746961832047e-002,
+ -1.9134172797203e-001, 1.9134172797203e-001, -1.8565860390663e-001, 1.8565860390663e-001,
+ 3.3503592014313e-002, 3.3503592014313e-002, 3.1330496072769e-002, 3.1330496072769e-002,
+ -1.7994752526283e-001, 1.7994752526283e-001, -1.7420934140682e-001, 1.7420934140682e-001,
+ 2.9227972030640e-002, 2.9227972030640e-002, 2.7196347713470e-002, 2.7196347713470e-002,
+ -1.6844493150711e-001, 1.6844493150711e-001, -1.6265514492989e-001, 1.6265514492989e-001,
+ 2.5235921144485e-002, 2.5235921144485e-002, 2.3346990346909e-002, 2.3346990346909e-002,
+ -1.5684087574482e-001, 1.5684087574482e-001, -1.5100297331810e-001, 1.5100297331810e-001,
+ 2.1529823541641e-002, 2.1529823541641e-002, 1.9784748554230e-002, 1.9784748554230e-002,
+ -1.4514234662056e-001, 1.4514234662056e-001, -1.3925984501839e-001, 1.3925984501839e-001,
+ 1.8111974000931e-002, 1.8111974000931e-002, 1.6511768102646e-002, 1.6511768102646e-002,
+ -1.3335637748241e-001, 1.3335637748241e-001, -1.2743283808231e-001, 1.2743283808231e-001,
+ 1.4984369277954e-002, 1.4984369277954e-002, 1.3530015945435e-002, 1.3530015945435e-002,
+ -1.2149009108543e-001, 1.2149009108543e-001, -1.1552906036377e-001, 1.1552906036377e-001,
+ 1.2148946523666e-002, 1.2148946523666e-002, 1.0841310024261e-002, 1.0841310024261e-002,
+ -1.0955062508583e-001, 1.0955062508583e-001, -1.0355569422245e-001, 1.0355569422245e-001,
+ 9.6073746681213e-003, 9.6073746681213e-003, 8.4472596645355e-003, 8.4472596645355e-003,
+ -9.7545161843300e-002, 9.7545161843300e-002, -9.1519944369793e-002, 9.1519944369793e-002,
+ 7.3611736297607e-003, 7.3611736297607e-003, 6.3492953777313e-003, 6.3492953777313e-003,
+ -8.5480943322182e-002, 8.5480943322182e-002, -7.9429075121880e-002, 7.9429075121880e-002,
+ 5.4117441177368e-003, 5.4117441177368e-003, 4.5486688613892e-003, 4.5486688613892e-003,
+ -7.3365241289139e-002, 7.3365241289139e-002, -6.7290358245373e-002, 6.7290358245373e-002,
+ 3.7602186203003e-003, 3.7602186203003e-003, 3.0465126037598e-003, 3.0465126037598e-003,
+ -6.1205338686705e-002, 6.1205338686705e-002, -5.5111106485128e-002, 5.5111106485128e-002,
+ 2.4076402187347e-003, 2.4076402187347e-003, 1.8436908721924e-003, 1.8436908721924e-003,
+ -4.9008570611477e-002, 4.9008570611477e-002, -4.2898658663034e-002, 4.2898658663034e-002,
+ 1.3547837734222e-003, 1.3547837734222e-003, 9.4094872474670e-004, 9.4094872474670e-004,
+ -3.6782283335924e-002, 3.6782283335924e-002, -3.0660368502140e-002, 3.0660368502140e-002,
+ 6.0227513313293e-004, 6.0227513313293e-004, 3.3882260322571e-004, 3.3882260322571e-004,
+ -2.4533838033676e-002, 2.4533838033676e-002, -1.8403612077236e-002, 1.8403612077236e-002,
+ 1.5059113502502e-004, 1.5059113502502e-004, 3.7640333175659e-005, 3.7640333175659e-005,
+ -1.2270614504814e-002, 1.2270614504814e-002, -6.1357691884041e-003, 6.1357691884041e-003,
+};
+static _MM_ALIGN16 float W1024[] = {
+ 4.9693205952644e-001, 4.9999058246613e-001, 0.0000000000000e+000, 0.0000000000000e+000,
+ 4.9386423826218e-001, 4.9386423826218e-001, 4.9079662561417e-001, 4.9079662561417e-001,
+ -4.9996235966682e-001, 4.9996235966682e-001, -4.9991530179977e-001, 4.9991530179977e-001,
+ 4.8772937059402e-001, 4.8772937059402e-001, 4.8466259241104e-001, 4.8466259241104e-001,
+ -4.9984940886497e-001, 4.9984940886497e-001, -4.9976471066475e-001, 4.9976471066475e-001,
+ 4.8159638047218e-001, 4.8159638047218e-001, 4.7853088378906e-001, 4.7853088378906e-001,
+ -4.9966117739677e-001, 4.9966117739677e-001, -4.9953886866570e-001, 4.9953886866570e-001,
+ 4.7546616196632e-001, 4.7546616196632e-001, 4.7240236401558e-001, 4.7240236401558e-001,
+ -4.9939772486687e-001, 4.9939772486687e-001, -4.9923777580261e-001, 4.9923777580261e-001,
+ 4.6933963894844e-001, 4.6933963894844e-001, 4.6627804636955e-001, 4.6627804636955e-001,
+ -4.9905905127525e-001, 4.9905905127525e-001, -4.9886152148247e-001, 4.9886152148247e-001,
+ 4.6321770548820e-001, 4.6321770548820e-001, 4.6015876531601e-001, 4.6015876531601e-001,
+ -4.9864521622658e-001, 4.9864521622658e-001, -4.9841013550758e-001, 4.9841013550758e-001,
+ 4.5710134506226e-001, 4.5710134506226e-001, 4.5404553413391e-001, 4.5404553413391e-001,
+ -4.9815630912781e-001, 4.9815630912781e-001, -4.9788370728493e-001, 4.9788370728493e-001,
+ 4.5099142193794e-001, 4.5099142193794e-001, 4.4793918728828e-001, 4.4793918728828e-001,
+ -4.9759235978127e-001, 4.9759235978127e-001, -4.9728229641914e-001, 4.9728229641914e-001,
+ 4.4488888978958e-001, 4.4488888978958e-001, 4.4184067845345e-001, 4.4184067845345e-001,
+ -4.9695348739624e-001, 4.9695348739624e-001, -4.9660596251488e-001, 4.9660596251488e-001,
+ 4.3879467248917e-001, 4.3879467248917e-001, 4.3575096130371e-001, 4.3575096130371e-001,
+ -4.9623978137970e-001, 4.9623978137970e-001, -4.9585488438606e-001, 4.9585488438606e-001,
+ 4.3270963430405e-001, 4.3270963430405e-001, 4.2967087030411e-001, 4.2967087030411e-001,
+ -4.9545133113861e-001, 4.9545133113861e-001, -4.9502909183502e-001, 4.9502909183502e-001,
+ 4.2663475871086e-001, 4.2663475871086e-001, 4.2360138893127e-001, 4.2360138893127e-001,
+ -4.9458825588226e-001, 4.9458825588226e-001, -4.9412879347801e-001, 4.9412879347801e-001,
+ 4.2057090997696e-001, 4.2057090997696e-001, 4.1754344105721e-001, 4.1754344105721e-001,
+ -4.9365070462227e-001, 4.9365070462227e-001, -4.9315404891968e-001, 4.9315404891968e-001,
+ 4.1451907157898e-001, 4.1451907157898e-001, 4.1149789094925e-001, 4.1149789094925e-001,
+ -4.9263882637024e-001, 4.9263882637024e-001, -4.9210503697395e-001, 4.9210503697395e-001,
+ 4.0848004817963e-001, 4.0848004817963e-001, 4.0546566247940e-001, 4.0546566247940e-001,
+ -4.9155274033546e-001, 4.9155274033546e-001, -4.9098193645477e-001, 4.9098193645477e-001,
+ 4.0245485305786e-001, 4.0245485305786e-001, 3.9944767951965e-001, 3.9944767951965e-001,
+ -4.9039262533188e-001, 4.9039262533188e-001, -4.8978489637375e-001, 4.8978489637375e-001,
+ 3.9644432067871e-001, 3.9644432067871e-001, 3.9344483613968e-001, 3.9344483613968e-001,
+ -4.8915868997574e-001, 4.8915868997574e-001, -4.8851406574249e-001, 4.8851406574249e-001,
+ 3.9044937491417e-001, 3.9044937491417e-001, 3.8745802640915e-001, 3.8745802640915e-001,
+ -4.8785105347633e-001, 4.8785105347633e-001, -4.8716968297958e-001, 4.8716968297958e-001,
+ 3.8447093963623e-001, 3.8447093963623e-001, 3.8148820400238e-001, 3.8148820400238e-001,
+ -4.8646998405457e-001, 4.8646998405457e-001, -4.8575195670128e-001, 4.8575195670128e-001,
+ 3.7850990891457e-001, 3.7850990891457e-001, 3.7553620338440e-001, 3.7553620338440e-001,
+ -4.8501563072205e-001, 4.8501563072205e-001, -4.8426103591919e-001, 4.8426103591919e-001,
+ 3.7256717681885e-001, 3.7256717681885e-001, 3.6960291862488e-001, 3.6960291862488e-001,
+ -4.8348823189735e-001, 4.8348823189735e-001, -4.8269721865654e-001, 4.8269721865654e-001,
+ 3.6664360761642e-001, 3.6664360761642e-001, 3.6368930339813e-001, 3.6368930339813e-001,
+ -4.8188802599907e-001, 4.8188802599907e-001, -4.8106071352959e-001, 4.8106071352959e-001,
+ 3.6074015498161e-001, 3.6074015498161e-001, 3.5779622197151e-001, 3.5779622197151e-001,
+ -4.8021525144577e-001, 4.8021525144577e-001, -4.7935172915459e-001, 4.7935172915459e-001,
+ 3.5485765337944e-001, 3.5485765337944e-001, 3.5192453861237e-001, 3.5192453861237e-001,
+ -4.7847017645836e-001, 4.7847017645836e-001, -4.7757059335709e-001, 4.7757059335709e-001,
+ 3.4899702668190e-001, 3.4899702668190e-001, 3.4607517719269e-001, 3.4607517719269e-001,
+ -4.7665300965309e-001, 4.7665300965309e-001, -4.7571751475334e-001, 4.7571751475334e-001,
+ 3.4315913915634e-001, 3.4315913915634e-001, 3.4024900197983e-001, 3.4024900197983e-001,
+ -4.7476407885551e-001, 4.7476407885551e-001, -4.7379279136658e-001, 4.7379279136658e-001,
+ 3.3734485507011e-001, 3.3734485507011e-001, 3.3444684743881e-001, 3.3444684743881e-001,
+ -4.7280365228653e-001, 4.7280365228653e-001, -4.7179672122002e-001, 4.7179672122002e-001,
+ 3.3155506849289e-001, 3.3155506849289e-001, 3.2866963744164e-001, 3.2866963744164e-001,
+ -4.7077202796936e-001, 4.7077202796936e-001, -4.6972960233688e-001, 4.6972960233688e-001,
+ 3.2579064369202e-001, 3.2579064369202e-001, 3.2291823625565e-001, 3.2291823625565e-001,
+ -4.6866950392723e-001, 4.6866950392723e-001, -4.6759176254272e-001, 4.6759176254272e-001,
+ 3.2005247473717e-001, 3.2005247473717e-001, 3.1719350814819e-001, 3.1719350814819e-001,
+ -4.6649640798569e-001, 4.6649640798569e-001, -4.6538347005844e-001, 4.6538347005844e-001,
+ 3.1434139609337e-001, 3.1434139609337e-001, 3.1149628758430e-001, 3.1149628758430e-001,
+ -4.6425303816795e-001, 4.6425303816795e-001, -4.6310511231422e-001, 4.6310511231422e-001,
+ 3.0865827202797e-001, 3.0865827202797e-001, 3.0582746863365e-001, 3.0582746863365e-001,
+ -4.6193975210190e-001, 4.6193975210190e-001, -4.6075701713562e-001, 4.6075701713562e-001,
+ 3.0300396680832e-001, 3.0300396680832e-001, 3.0018788576126e-001, 3.0018788576126e-001,
+ -4.5955693721771e-001, 4.5955693721771e-001, -4.5833954215050e-001, 4.5833954215050e-001,
+ 2.9737934470177e-001, 2.9737934470177e-001, 2.9457840323448e-001, 2.9457840323448e-001,
+ -4.5710486173630e-001, 4.5710486173630e-001, -4.5585301518440e-001, 4.5585301518440e-001,
+ 2.9178521037102e-001, 2.9178521037102e-001, 2.8899985551834e-001, 2.8899985551834e-001,
+ -4.5458400249481e-001, 4.5458400249481e-001, -4.5329785346985e-001, 4.5329785346985e-001,
+ 2.8622245788574e-001, 2.8622245788574e-001, 2.8345310688019e-001, 2.8345310688019e-001,
+ -4.5199465751648e-001, 4.5199465751648e-001, -4.5067441463470e-001, 4.5067441463470e-001,
+ 2.8069186210632e-001, 2.8069186210632e-001, 2.7793890237808e-001, 2.7793890237808e-001,
+ -4.4933724403381e-001, 4.4933724403381e-001, -4.4798311591148e-001, 4.4798311591148e-001,
+ 2.7519434690475e-001, 2.7519434690475e-001, 2.7245819568634e-001, 2.7245819568634e-001,
+ -4.4661214947701e-001, 4.4661214947701e-001, -4.4522434473038e-001, 4.4522434473038e-001,
+ 2.6973062753677e-001, 2.6973062753677e-001, 2.6701176166534e-001, 2.6701176166534e-001,
+ -4.4381982088089e-001, 4.4381982088089e-001, -4.4239854812622e-001, 4.4239854812622e-001,
+ 2.6430162787437e-001, 2.6430162787437e-001, 2.6160037517548e-001, 2.6160037517548e-001,
+ -4.4096061587334e-001, 4.4096061587334e-001, -4.3950611352921e-001, 4.3950611352921e-001,
+ 2.5890809297562e-001, 2.5890809297562e-001, 2.5622493028641e-001, 2.5622493028641e-001,
+ -4.3803504109383e-001, 4.3803504109383e-001, -4.3654748797417e-001, 4.3654748797417e-001,
+ 2.5355088710785e-001, 2.5355088710785e-001, 2.5088614225388e-001, 2.5088614225388e-001,
+ -4.3504348397255e-001, 4.3504348397255e-001, -4.3352311849594e-001, 4.3352311849594e-001,
+ 2.4823081493378e-001, 2.4823081493378e-001, 2.4558493494987e-001, 2.4558493494987e-001,
+ -4.3198642134666e-001, 4.3198642134666e-001, -4.3043345212936e-001, 4.3043345212936e-001,
+ 2.4294862151146e-001, 2.4294862151146e-001, 2.4032199382782e-001, 2.4032199382782e-001,
+ -4.2886430025101e-001, 4.2886430025101e-001, -4.2727899551392e-001, 4.2727899551392e-001,
+ 2.3770514130592e-001, 2.3770514130592e-001, 2.3509818315506e-001, 2.3509818315506e-001,
+ -4.2567759752274e-001, 4.2567759752274e-001, -4.2406016588211e-001, 4.2406016588211e-001,
+ 2.3250117897987e-001, 2.3250117897987e-001, 2.2991424798965e-001, 2.2991424798965e-001,
+ -4.2242678999901e-001, 4.2242678999901e-001, -4.2077746987343e-001, 4.2077746987343e-001,
+ 2.2733750939369e-001, 2.2733750939369e-001, 2.2477099299431e-001, 2.2477099299431e-001,
+ -4.1911235451698e-001, 4.1911235451698e-001, -4.1743144392967e-001, 4.1743144392967e-001,
+ 2.2221487760544e-001, 2.2221487760544e-001, 2.1966919302940e-001, 2.1966919302940e-001,
+ -4.1573479771614e-001, 4.1573479771614e-001, -4.1402250528336e-001, 4.1402250528336e-001,
+ 2.1713408827782e-001, 2.1713408827782e-001, 2.1460962295532e-001, 2.1460962295532e-001,
+ -4.1229465603828e-001, 4.1229465603828e-001, -4.1055124998093e-001, 4.1055124998093e-001,
+ 2.1209588646889e-001, 2.1209588646889e-001, 2.0959302783012e-001, 2.0959302783012e-001,
+ -4.0879240632057e-001, 4.0879240632057e-001, -4.0701815485954e-001, 4.0701815485954e-001,
+ 2.0710107684135e-001, 2.0710107684135e-001, 2.0462015271187e-001, 2.0462015271187e-001,
+ -4.0522858500481e-001, 4.0522858500481e-001, -4.0342378616333e-001, 4.0342378616333e-001,
+ 2.0215034484863e-001, 2.0215034484863e-001, 1.9969174265862e-001, 1.9969174265862e-001,
+ -4.0160375833511e-001, 4.0160375833511e-001, -3.9976862072945e-001, 3.9976862072945e-001,
+ 1.9724446535110e-001, 1.9724446535110e-001, 1.9480860233307e-001, 1.9480860233307e-001,
+ -3.9791843295097e-001, 3.9791843295097e-001, -3.9605328440666e-001, 3.9605328440666e-001,
+ 1.9238418340683e-001, 1.9238418340683e-001, 1.8997138738632e-001, 1.8997138738632e-001,
+ -3.9417320489883e-001, 3.9417320489883e-001, -3.9227828383446e-001, 3.9227828383446e-001,
+ 1.8757024407387e-001, 1.8757024407387e-001, 1.8518087267876e-001, 1.8518087267876e-001,
+ -3.9036861062050e-001, 3.9036861062050e-001, -3.8844421505928e-001, 3.8844421505928e-001,
+ 1.8280336260796e-001, 1.8280336260796e-001, 1.8043777346611e-001, 1.8043777346611e-001,
+ -3.8650521636009e-001, 3.8650521636009e-001, -3.8455167412758e-001, 3.8455167412758e-001,
+ 1.7808422446251e-001, 1.7808422446251e-001, 1.7574280500412e-001, 1.7574280500412e-001,
+ -3.8258361816406e-001, 3.8258361816406e-001, -3.8060119748116e-001, 3.8060119748116e-001,
+ 1.7341357469559e-001, 1.7341357469559e-001, 1.7109665274620e-001, 1.7109665274620e-001,
+ -3.7860441207886e-001, 3.7860441207886e-001, -3.7659338116646e-001, 3.7659338116646e-001,
+ 1.6879209876060e-001, 1.6879209876060e-001, 1.6650003194809e-001, 1.6650003194809e-001,
+ -3.7456819415092e-001, 3.7456819415092e-001, -3.7252888083458e-001, 3.7252888083458e-001,
+ 1.6422051191330e-001, 1.6422051191330e-001, 1.6195362806320e-001, 1.6195362806320e-001,
+ -3.7047556042671e-001, 3.7047556042671e-001, -3.6840826272964e-001, 3.6840826272964e-001,
+ 1.5969949960709e-001, 1.5969949960709e-001, 1.5745815634727e-001, 1.5745815634727e-001,
+ -3.6632713675499e-001, 3.6632713675499e-001, -3.6423218250275e-001, 3.6423218250275e-001,
+ 1.5522971749306e-001, 1.5522971749306e-001, 1.5301427245140e-001, 1.5301427245140e-001,
+ -3.6212354898453e-001, 3.6212354898453e-001, -3.6000123620033e-001, 3.6000123620033e-001,
+ 1.5081188082695e-001, 1.5081188082695e-001, 1.4862263202667e-001, 1.4862263202667e-001,
+ -3.5786539316177e-001, 3.5786539316177e-001, -3.5571607947350e-001, 3.5571607947350e-001,
+ 1.4644661545753e-001, 1.4644661545753e-001, 1.4428392052650e-001, 1.4428392052650e-001,
+ -3.5355338454247e-001, 3.5355338454247e-001, -3.5137736797333e-001, 3.5137736797333e-001,
+ 1.4213460683823e-001, 1.4213460683823e-001, 1.3999876379967e-001, 1.3999876379967e-001,
+ -3.4918811917305e-001, 3.4918811917305e-001, -3.4698572754860e-001, 3.4698572754860e-001,
+ 1.3787645101547e-001, 1.3787645101547e-001, 1.3576781749725e-001, 1.3576781749725e-001,
+ -3.4477028250694e-001, 3.4477028250694e-001, -3.4254184365273e-001, 3.4254184365273e-001,
+ 1.3367286324501e-001, 1.3367286324501e-001, 1.3159173727036e-001, 1.3159173727036e-001,
+ -3.4030050039291e-001, 3.4030050039291e-001, -3.3804637193680e-001, 3.3804637193680e-001,
+ 1.2952443957329e-001, 1.2952443957329e-001, 1.2747111916542e-001, 1.2747111916542e-001,
+ -3.3577948808670e-001, 3.3577948808670e-001, -3.3349996805191e-001, 3.3349996805191e-001,
+ 1.2543180584908e-001, 1.2543180584908e-001, 1.2340661883354e-001, 1.2340661883354e-001,
+ -3.3120790123940e-001, 3.3120790123940e-001, -3.2890334725380e-001, 3.2890334725380e-001,
+ 1.2139558792114e-001, 1.2139558792114e-001, 1.1939880251884e-001, 1.1939880251884e-001,
+ -3.2658642530441e-001, 3.2658642530441e-001, -3.2425719499588e-001, 3.2425719499588e-001,
+ 1.1741638183594e-001, 1.1741638183594e-001, 1.1544832587242e-001, 1.1544832587242e-001,
+ -3.2191577553749e-001, 3.2191577553749e-001, -3.1956222653389e-001, 3.1956222653389e-001,
+ 1.1349478363991e-001, 1.1349478363991e-001, 1.1155578494072e-001, 1.1155578494072e-001,
+ -3.1719663739204e-001, 3.1719663739204e-001, -3.1481912732124e-001, 3.1481912732124e-001,
+ 1.0963138937950e-001, 1.0963138937950e-001, 1.0772171616554e-001, 1.0772171616554e-001,
+ -3.1242975592613e-001, 3.1242975592613e-001, -3.1002861261368e-001, 3.1002861261368e-001,
+ 1.0582679510117e-001, 1.0582679510117e-001, 1.0394671559334e-001, 1.0394671559334e-001,
+ -3.0761581659317e-001, 3.0761581659317e-001, -3.0519139766693e-001, 3.0519139766693e-001,
+ 1.0208156704903e-001, 1.0208156704903e-001, 1.0023137927055e-001, 1.0023137927055e-001,
+ -3.0275553464890e-001, 3.0275553464890e-001, -3.0030825734138e-001, 3.0030825734138e-001,
+ 9.8396241664886e-002, 9.8396241664886e-002, 9.6576213836670e-002, 9.6576213836670e-002,
+ -2.9784965515137e-001, 2.9784965515137e-001, -2.9537984728813e-001, 2.9537984728813e-001,
+ 9.4771414995193e-002, 9.4771414995193e-002, 9.2981845140457e-002, 9.2981845140457e-002,
+ -2.9289892315865e-001, 2.9289892315865e-001, -2.9040697216988e-001, 2.9040697216988e-001,
+ 9.1207593679428e-002, 9.1207593679428e-002, 8.9448750019073e-002, 8.9448750019073e-002,
+ -2.8790411353111e-001, 2.8790411353111e-001, -2.8539037704468e-001, 2.8539037704468e-001,
+ 8.7705343961716e-002, 8.7705343961716e-002, 8.5977494716644e-002, 8.5977494716644e-002,
+ -2.8286591172218e-001, 2.8286591172218e-001, -2.8033080697060e-001, 2.8033080697060e-001,
+ 8.4265202283859e-002, 8.4265202283859e-002, 8.2568556070328e-002, 8.2568556070328e-002,
+ -2.7778512239456e-001, 2.7778512239456e-001, -2.7522900700569e-001, 2.7522900700569e-001,
+ 8.0887645483017e-002, 8.0887645483017e-002, 7.9222530126572e-002, 7.9222530126572e-002,
+ -2.7266249060631e-001, 2.7266249060631e-001, -2.7008575201035e-001, 2.7008575201035e-001,
+ 7.7573210000992e-002, 7.7573210000992e-002, 7.5939834117889e-002, 7.5939834117889e-002,
+ -2.6749882102013e-001, 2.6749882102013e-001, -2.6490181684494e-001, 2.6490181684494e-001,
+ 7.4322402477264e-002, 7.4322402477264e-002, 7.2721004486084e-002, 7.2721004486084e-002,
+ -2.6229485869408e-001, 2.6229485869408e-001, -2.5967800617218e-001, 2.5967800617218e-001,
+ 7.1135699748993e-002, 7.1135699748993e-002, 6.9566547870636e-002, 6.9566547870636e-002,
+ -2.5705137848854e-001, 2.5705137848854e-001, -2.5441506505013e-001, 2.5441506505013e-001,
+ 6.8013578653336e-002, 6.8013578653336e-002, 6.6476881504059e-002, 6.6476881504059e-002,
+ -2.5176918506622e-001, 2.5176918506622e-001, -2.4911384284496e-001, 2.4911384284496e-001,
+ 6.4956516027451e-002, 6.4956516027451e-002, 6.3452512025833e-002, 6.3452512025833e-002,
+ -2.4644909799099e-001, 2.4644909799099e-001, -2.4377508461475e-001, 2.4377508461475e-001,
+ 6.1964958906174e-002, 6.1964958906174e-002, 6.0493886470795e-002, 6.0493886470795e-002,
+ -2.4109189212322e-001, 2.4109189212322e-001, -2.3839962482452e-001, 2.3839962482452e-001,
+ 5.9039384126663e-002, 5.9039384126663e-002, 5.7601451873779e-002, 5.7601451873779e-002,
+ -2.3569837212563e-001, 2.3569837212563e-001, -2.3298825323582e-001, 2.3298825323582e-001,
+ 5.6180179119110e-002, 5.6180179119110e-002, 5.4775655269623e-002, 5.4775655269623e-002,
+ -2.3026935756207e-001, 2.3026935756207e-001, -2.2754180431366e-001, 2.2754180431366e-001,
+ 5.3387850522995e-002, 5.3387850522995e-002, 5.2016884088516e-002, 5.2016884088516e-002,
+ -2.2480566799641e-001, 2.2480566799641e-001, -2.2206108272076e-001, 2.2206108272076e-001,
+ 5.0662755966187e-002, 5.0662755966187e-002, 4.9325585365295e-002, 4.9325585365295e-002,
+ -2.1930812299252e-001, 2.1930812299252e-001, -2.1654690802097e-001, 2.1654690802097e-001,
+ 4.8005342483521e-002, 4.8005342483521e-002, 4.6702146530151e-002, 4.6702146530151e-002,
+ -2.1377755701542e-001, 2.1377755701542e-001, -2.1100014448166e-001, 2.1100014448166e-001,
+ 4.5415997505188e-002, 4.5415997505188e-002, 4.4146984815598e-002, 4.4146984815598e-002,
+ -2.0821478962898e-001, 2.0821478962898e-001, -2.0542159676552e-001, 2.0542159676552e-001,
+ 4.2895138263702e-002, 4.2895138263702e-002, 4.1660457849503e-002, 4.1660457849503e-002,
+ -2.0262065529823e-001, 2.0262065529823e-001, -1.9981209933758e-001, 1.9981209933758e-001,
+ 4.0443062782288e-002, 4.0443062782288e-002, 3.9242982864380e-002, 3.9242982864380e-002,
+ -1.9699601829052e-001, 1.9699601829052e-001, -1.9417253136635e-001, 1.9417253136635e-001,
+ 3.8060247898102e-002, 3.8060247898102e-002, 3.6894887685776e-002, 3.6894887685776e-002,
+ -1.9134172797203e-001, 1.9134172797203e-001, -1.8850371241570e-001, 1.8850371241570e-001,
+ 3.5746961832047e-002, 3.5746961832047e-002, 3.4616529941559e-002, 3.4616529941559e-002,
+ -1.8565860390663e-001, 1.8565860390663e-001, -1.8280650675297e-001, 1.8280650675297e-001,
+ 3.3503592014313e-002, 3.3503592014313e-002, 3.2408237457275e-002, 3.2408237457275e-002,
+ -1.7994752526283e-001, 1.7994752526283e-001, -1.7708176374435e-001, 1.7708176374435e-001,
+ 3.1330496072769e-002, 3.1330496072769e-002, 3.0270397663116e-002, 3.0270397663116e-002,
+ -1.7420934140682e-001, 1.7420934140682e-001, -1.7133036255836e-001, 1.7133036255836e-001,
+ 2.9227972030640e-002, 2.9227972030640e-002, 2.8203278779984e-002, 2.8203278779984e-002,
+ -1.6844493150711e-001, 1.6844493150711e-001, -1.6555315256119e-001, 1.6555315256119e-001,
+ 2.7196347713470e-002, 2.7196347713470e-002, 2.6207208633423e-002, 2.6207208633423e-002,
+ -1.6265514492989e-001, 1.6265514492989e-001, -1.5975101292133e-001, 1.5975101292133e-001,
+ 2.5235921144485e-002, 2.5235921144485e-002, 2.4282485246658e-002, 2.4282485246658e-002,
+ -1.5684087574482e-001, 1.5684087574482e-001, -1.5392482280731e-001, 1.5392482280731e-001,
+ 2.3346990346909e-002, 2.3346990346909e-002, 2.2429406642914e-002, 2.2429406642914e-002,
+ -1.5100297331810e-001, 1.5100297331810e-001, -1.4807544648647e-001, 1.4807544648647e-001,
+ 2.1529823541641e-002, 2.1529823541641e-002, 2.0648270845413e-002, 2.0648270845413e-002,
+ -1.4514234662056e-001, 1.4514234662056e-001, -1.4220377802849e-001, 1.4220377802849e-001,
+ 1.9784748554230e-002, 1.9784748554230e-002, 1.8939286470413e-002, 1.8939286470413e-002,
+ -1.3925984501839e-001, 1.3925984501839e-001, -1.3631068170071e-001, 1.3631068170071e-001,
+ 1.8111974000931e-002, 1.8111974000931e-002, 1.7302781343460e-002, 1.7302781343460e-002,
+ -1.3335637748241e-001, 1.3335637748241e-001, -1.3039706647396e-001, 1.3039706647396e-001,
+ 1.6511768102646e-002, 1.6511768102646e-002, 1.5738964080811e-002, 1.5738964080811e-002,
+ -1.2743283808231e-001, 1.2743283808231e-001, -1.2446380406618e-001, 1.2446380406618e-001,
+ 1.4984369277954e-002, 1.4984369277954e-002, 1.4248043298721e-002, 1.4248043298721e-002,
+ -1.2149009108543e-001, 1.2149009108543e-001, -1.1851180344820e-001, 1.1851180344820e-001,
+ 1.3530015945435e-002, 1.3530015945435e-002, 1.2830317020416e-002, 1.2830317020416e-002,
+ -1.1552906036377e-001, 1.1552906036377e-001, -1.1254195868969e-001, 1.1254195868969e-001,
+ 1.2148946523666e-002, 1.2148946523666e-002, 1.1485934257507e-002, 1.1485934257507e-002,
+ -1.0955062508583e-001, 1.0955062508583e-001, -1.0655516386032e-001, 1.0655516386032e-001,
+ 1.0841310024261e-002, 1.0841310024261e-002, 1.0215103626251e-002, 1.0215103626251e-002,
+ -1.0355569422245e-001, 1.0355569422245e-001, -1.0055232048035e-001, 1.0055232048035e-001,
+ 9.6073746681213e-003, 9.6073746681213e-003, 9.0180635452271e-003, 9.0180635452271e-003,
+ -9.7545161843300e-002, 9.7545161843300e-002, -9.4534337520599e-002, 9.4534337520599e-002,
+ 8.4472596645355e-003, 8.4472596645355e-003, 7.8949630260468e-003, 7.8949630260468e-003,
+ -9.1519944369793e-002, 9.1519944369793e-002, -8.8502109050751e-002, 8.8502109050751e-002,
+ 7.3611736297607e-003, 7.3611736297607e-003, 6.8459510803223e-003, 6.8459510803223e-003,
+ -8.5480943322182e-002, 8.5480943322182e-002, -8.2456558942795e-002, 8.2456558942795e-002,
+ 6.3492953777313e-003, 6.3492953777313e-003, 5.8712065219879e-003, 5.8712065219879e-003,
+ -7.9429075121880e-002, 7.9429075121880e-002, -7.6398596167564e-002, 7.6398596167564e-002,
+ 5.4117441177368e-003, 5.4117441177368e-003, 4.9709081649780e-003, 4.9709081649780e-003,
+ -7.3365241289139e-002, 7.3365241289139e-002, -7.0329122245312e-002, 7.0329122245312e-002,
+ 4.5486688613892e-003, 4.5486688613892e-003, 4.1451156139374e-003, 4.1451156139374e-003,
+ -6.7290358245373e-002, 6.7290358245373e-002, -6.4249053597450e-002, 6.4249053597450e-002,
+ 3.7602186203003e-003, 3.7602186203003e-003, 3.3940374851227e-003, 3.3940374851227e-003,
+ -6.1205338686705e-002, 6.1205338686705e-002, -5.8159317821264e-002, 5.8159317821264e-002,
+ 3.0465126037598e-003, 3.0465126037598e-003, 2.7177035808563e-003, 2.7177035808563e-003,
+ -5.5111106485128e-002, 5.5111106485128e-002, -5.2060820162296e-002, 5.2060820162296e-002,
+ 2.4076402187347e-003, 2.4076402187347e-003, 2.1162927150726e-003, 2.1162927150726e-003,
+ -4.9008570611477e-002, 4.9008570611477e-002, -4.5954480767250e-002, 4.5954480767250e-002,
+ 1.8436908721924e-003, 1.8436908721924e-003, 1.5898644924164e-003, 1.5898644924164e-003,
+ -4.2898658663034e-002, 4.2898658663034e-002, -3.9841219782829e-002, 3.9841219782829e-002,
+ 1.3547837734222e-003, 1.3547837734222e-003, 1.1384785175323e-003, 1.1384785175323e-003,
+ -3.6782283335924e-002, 3.6782283335924e-002, -3.3721961081028e-002, 3.3721961081028e-002,
+ 9.4094872474670e-004, 9.4094872474670e-004, 7.6222419738770e-004, 7.6222419738770e-004,
+ -3.0660368502140e-002, 3.0660368502140e-002, -2.7597622945905e-002, 2.7597622945905e-002,
+ 6.0227513313293e-004, 6.0227513313293e-004, 4.6113133430481e-004, 4.6113133430481e-004,
+ -2.4533838033676e-002, 2.4533838033676e-002, -2.1469129249454e-002, 2.1469129249454e-002,
+ 3.3882260322571e-004, 3.3882260322571e-004, 2.3528933525085e-004, 2.3528933525085e-004,
+ -1.8403612077236e-002, 1.8403612077236e-002, -1.5337402001023e-002, 1.5337402001023e-002,
+ 1.5059113502502e-004, 1.5059113502502e-004, 8.4698200225830e-005, 8.4698200225830e-005,
+ -1.2270614504814e-002, 1.2270614504814e-002, -9.2033650726080e-003, 9.2033650726080e-003,
+ 3.7640333175659e-005, 3.7640333175659e-005, 9.4175338745117e-006, 9.4175338745117e-006,
+ -6.1357691884041e-003, 6.1357691884041e-003, -3.0679423362017e-003, 3.0679423362017e-003
+};
+static _MM_ALIGN16 float W2048[] = {
+ 4.9846601486206e-001, 4.9999764561653e-001, 0.0000000000000e+000, 0.0000000000000e+000,
+ 4.9693205952644e-001, 4.9693205952644e-001, 4.9539813399315e-001, 4.9539813399315e-001,
+ -4.9999058246613e-001, 4.9999058246613e-001, -4.9997881054878e-001, 4.9997881054878e-001,
+ 4.9386423826218e-001, 4.9386423826218e-001, 4.9233040213585e-001, 4.9233040213585e-001,
+ -4.9996235966682e-001, 4.9996235966682e-001, -4.9994117021561e-001, 4.9994117021561e-001,
+ 4.9079662561417e-001, 4.9079662561417e-001, 4.8926296830177e-001, 4.8926296830177e-001,
+ -4.9991530179977e-001, 4.9991530179977e-001, -4.9988469481468e-001, 4.9988469481468e-001,
+ 4.8772937059402e-001, 4.8772937059402e-001, 4.8619592189789e-001, 4.8619592189789e-001,
+ -4.9984940886497e-001, 4.9984940886497e-001, -4.9980941414833e-001, 4.9980941414833e-001,
+ 4.8466259241104e-001, 4.8466259241104e-001, 4.8312941193581e-001, 4.8312941193581e-001,
+ -4.9976471066475e-001, 4.9976471066475e-001, -4.9971529841423e-001, 4.9971529841423e-001,
+ 4.8159638047218e-001, 4.8159638047218e-001, 4.8006352782249e-001, 4.8006352782249e-001,
+ -4.9966117739677e-001, 4.9966117739677e-001, -4.9960237741470e-001, 4.9960237741470e-001,
+ 4.7853088378906e-001, 4.7853088378906e-001, 4.7699841856956e-001, 4.7699841856956e-001,
+ -4.9953886866570e-001, 4.9953886866570e-001, -4.9947065114975e-001, 4.9947065114975e-001,
+ 4.7546616196632e-001, 4.7546616196632e-001, 4.7393414378166e-001, 4.7393414378166e-001,
+ -4.9939772486687e-001, 4.9939772486687e-001, -4.9932011961937e-001, 4.9932011961937e-001,
+ 4.7240236401558e-001, 4.7240236401558e-001, 4.7087085247040e-001, 4.7087085247040e-001,
+ -4.9923777580261e-001, 4.9923777580261e-001, -4.9915078282356e-001, 4.9915078282356e-001,
+ 4.6933963894844e-001, 4.6933963894844e-001, 4.6780869364738e-001, 4.6780869364738e-001,
+ -4.9905905127525e-001, 4.9905905127525e-001, -4.9896264076233e-001, 4.9896264076233e-001,
+ 4.6627804636955e-001, 4.6627804636955e-001, 4.6474772691727e-001, 4.6474772691727e-001,
+ -4.9886152148247e-001, 4.9886152148247e-001, -4.9875572323799e-001, 4.9875572323799e-001,
+ 4.6321770548820e-001, 4.6321770548820e-001, 4.6168807148933e-001, 4.6168807148933e-001,
+ -4.9864521622658e-001, 4.9864521622658e-001, -4.9853003025055e-001, 4.9853003025055e-001,
+ 4.6015876531601e-001, 4.6015876531601e-001, 4.5862987637520e-001, 4.5862987637520e-001,
+ -4.9841013550758e-001, 4.9841013550758e-001, -4.9828556180000e-001, 4.9828556180000e-001,
+ 4.5710134506226e-001, 4.5710134506226e-001, 4.5557323098183e-001, 4.5557323098183e-001,
+ -4.9815630912781e-001, 4.9815630912781e-001, -4.9802234768867e-001, 4.9802234768867e-001,
+ 4.5404553413391e-001, 4.5404553413391e-001, 4.5251825451851e-001, 4.5251825451851e-001,
+ -4.9788370728493e-001, 4.9788370728493e-001, -4.9774038791656e-001, 4.9774038791656e-001,
+ 4.5099142193794e-001, 4.5099142193794e-001, 4.4946506619453e-001, 4.4946506619453e-001,
+ -4.9759235978127e-001, 4.9759235978127e-001, -4.9743965268135e-001, 4.9743965268135e-001,
+ 4.4793918728828e-001, 4.4793918728828e-001, 4.4641378521919e-001, 4.4641378521919e-001,
+ -4.9728229641914e-001, 4.9728229641914e-001, -4.9712023139000e-001, 4.9712023139000e-001,
+ 4.4488888978958e-001, 4.4488888978958e-001, 4.4336453080177e-001, 4.4336453080177e-001,
+ -4.9695348739624e-001, 4.9695348739624e-001, -4.9678206443787e-001, 4.9678206443787e-001,
+ 4.4184067845345e-001, 4.4184067845345e-001, 4.4031739234924e-001, 4.4031739234924e-001,
+ -4.9660596251488e-001, 4.9660596251488e-001, -4.9642521142960e-001, 4.9642521142960e-001,
+ 4.3879467248917e-001, 4.3879467248917e-001, 4.3727248907089e-001, 4.3727248907089e-001,
+ -4.9623978137970e-001, 4.9623978137970e-001, -4.9604964256287e-001, 4.9604964256287e-001,
+ 4.3575096130371e-001, 4.3575096130371e-001, 4.3422996997833e-001, 4.3422996997833e-001,
+ -4.9585488438606e-001, 4.9585488438606e-001, -4.9565541744232e-001, 4.9565541744232e-001,
+ 4.3270963430405e-001, 4.3270963430405e-001, 4.3118995428085e-001, 4.3118995428085e-001,
+ -4.9545133113861e-001, 4.9545133113861e-001, -4.9524253606796e-001, 4.9524253606796e-001,
+ 4.2967087030411e-001, 4.2967087030411e-001, 4.2815247178078e-001, 4.2815247178078e-001,
+ -4.9502909183502e-001, 4.9502909183502e-001, -4.9481099843979e-001, 4.9481099843979e-001,
+ 4.2663475871086e-001, 4.2663475871086e-001, 4.2511773109436e-001, 4.2511773109436e-001,
+ -4.9458825588226e-001, 4.9458825588226e-001, -4.9436083436012e-001, 4.9436083436012e-001,
+ 4.2360138893127e-001, 4.2360138893127e-001, 4.2208579182625e-001, 4.2208579182625e-001,
+ -4.9412879347801e-001, 4.9412879347801e-001, -4.9389207363129e-001, 4.9389207363129e-001,
+ 4.2057090997696e-001, 4.2057090997696e-001, 4.1905680298805e-001, 4.1905680298805e-001,
+ -4.9365070462227e-001, 4.9365070462227e-001, -4.9340468645096e-001, 4.9340468645096e-001,
+ 4.1754344105721e-001, 4.1754344105721e-001, 4.1603085398674e-001, 4.1603085398674e-001,
+ -4.9315404891968e-001, 4.9315404891968e-001, -4.9289876222610e-001, 4.9289876222610e-001,
+ 4.1451907157898e-001, 4.1451907157898e-001, 4.1300806403160e-001, 4.1300806403160e-001,
+ -4.9263882637024e-001, 4.9263882637024e-001, -4.9237424135208e-001, 4.9237424135208e-001,
+ 4.1149789094925e-001, 4.1149789094925e-001, 4.0998855233192e-001, 4.0998855233192e-001,
+ -4.9210503697395e-001, 4.9210503697395e-001, -4.9183121323586e-001, 4.9183121323586e-001,
+ 4.0848004817963e-001, 4.0848004817963e-001, 4.0697240829468e-001, 4.0697240829468e-001,
+ -4.9155274033546e-001, 4.9155274033546e-001, -4.9126964807510e-001, 4.9126964807510e-001,
+ 4.0546566247940e-001, 4.0546566247940e-001, 4.0395981073380e-001, 4.0395981073380e-001,
+ -4.9098193645477e-001, 4.9098193645477e-001, -4.9068960547447e-001, 4.9068960547447e-001,
+ 4.0245485305786e-001, 4.0245485305786e-001, 4.0095078945160e-001, 4.0095078945160e-001,
+ -4.9039262533188e-001, 4.9039262533188e-001, -4.9009105563164e-001, 4.9009105563164e-001,
+ 3.9944767951965e-001, 3.9944767951965e-001, 3.9794552326202e-001, 3.9794552326202e-001,
+ -4.8978489637375e-001, 4.8978489637375e-001, -4.8947408795357e-001, 4.8947408795357e-001,
+ 3.9644432067871e-001, 3.9644432067871e-001, 3.9494407176971e-001, 3.9494407176971e-001,
+ -4.8915868997574e-001, 4.8915868997574e-001, -4.8883867263794e-001, 4.8883867263794e-001,
+ 3.9344483613968e-001, 3.9344483613968e-001, 3.9194661378860e-001, 3.9194661378860e-001,
+ -4.8851406574249e-001, 4.8851406574249e-001, -4.8818486928940e-001, 4.8818486928940e-001,
+ 3.9044937491417e-001, 3.9044937491417e-001, 3.8895317912102e-001, 3.8895317912102e-001,
+ -4.8785105347633e-001, 4.8785105347633e-001, -4.8751267790794e-001, 4.8751267790794e-001,
+ 3.8745802640915e-001, 3.8745802640915e-001, 3.8596394658089e-001, 3.8596394658089e-001,
+ -4.8716968297958e-001, 4.8716968297958e-001, -4.8682212829590e-001, 4.8682212829590e-001,
+ 3.8447093963623e-001, 3.8447093963623e-001, 3.8297903537750e-001, 3.8297903537750e-001,
+ -4.8646998405457e-001, 4.8646998405457e-001, -4.8611325025558e-001, 4.8611325025558e-001,
+ 3.8148820400238e-001, 3.8148820400238e-001, 3.7999847531319e-001, 3.7999847531319e-001,
+ -4.8575195670128e-001, 4.8575195670128e-001, -4.8538607358932e-001, 4.8538607358932e-001,
+ 3.7850990891457e-001, 3.7850990891457e-001, 3.7702247500420e-001, 3.7702247500420e-001,
+ -4.8501563072205e-001, 4.8501563072205e-001, -4.8464062809944e-001, 4.8464062809944e-001,
+ 3.7553620338440e-001, 3.7553620338440e-001, 3.7405109405518e-001, 3.7405109405518e-001,
+ -4.8426103591919e-001, 4.8426103591919e-001, -4.8387691378593e-001, 4.8387691378593e-001,
+ 3.7256717681885e-001, 3.7256717681885e-001, 3.7108445167542e-001, 3.7108445167542e-001,
+ -4.8348823189735e-001, 4.8348823189735e-001, -4.8309499025345e-001, 4.8309499025345e-001,
+ 3.6960291862488e-001, 3.6960291862488e-001, 3.6812263727188e-001, 3.6812263727188e-001,
+ -4.8269721865654e-001, 4.8269721865654e-001, -4.8229488730431e-001, 4.8229488730431e-001,
+ 3.6664360761642e-001, 3.6664360761642e-001, 3.6516582965851e-001, 3.6516582965851e-001,
+ -4.8188802599907e-001, 4.8188802599907e-001, -4.8147663474083e-001, 4.8147663474083e-001,
+ 3.6368930339813e-001, 3.6368930339813e-001, 3.6221408843994e-001, 3.6221408843994e-001,
+ -4.8106071352959e-001, 4.8106071352959e-001, -4.8064023256302e-001, 4.8064023256302e-001,
+ 3.6074015498161e-001, 3.6074015498161e-001, 3.5926753282547e-001, 3.5926753282547e-001,
+ -4.8021525144577e-001, 4.8021525144577e-001, -4.7978577017784e-001, 4.7978577017784e-001,
+ 3.5779622197151e-001, 3.5779622197151e-001, 3.5632628202438e-001, 3.5632628202438e-001,
+ -4.7935172915459e-001, 4.7935172915459e-001, -4.7891321778297e-001, 4.7891321778297e-001,
+ 3.5485765337944e-001, 3.5485765337944e-001, 3.5339039564133e-001, 3.5339039564133e-001,
+ -4.7847017645836e-001, 4.7847017645836e-001, -4.7802263498306e-001, 4.7802263498306e-001,
+ 3.5192453861237e-001, 3.5192453861237e-001, 3.5046008229256e-001, 3.5046008229256e-001,
+ -4.7757059335709e-001, 4.7757059335709e-001, -4.7711405158043e-001, 4.7711405158043e-001,
+ 3.4899702668190e-001, 3.4899702668190e-001, 3.4753537178040e-001, 3.4753537178040e-001,
+ -4.7665300965309e-001, 4.7665300965309e-001, -4.7618749737740e-001, 4.7618749737740e-001,
+ 3.4607517719269e-001, 3.4607517719269e-001, 3.4461641311646e-001, 3.4461641311646e-001,
+ -4.7571751475334e-001, 4.7571751475334e-001, -4.7524303197861e-001, 4.7524303197861e-001,
+ 3.4315913915634e-001, 3.4315913915634e-001, 3.4170329570770e-001, 3.4170329570770e-001,
+ -4.7476407885551e-001, 4.7476407885551e-001, -4.7428068518639e-001, 4.7428068518639e-001,
+ 3.4024900197983e-001, 3.4024900197983e-001, 3.3879613876343e-001, 3.3879613876343e-001,
+ -4.7379279136658e-001, 4.7379279136658e-001, -4.7330045700073e-001, 4.7330045700073e-001,
+ 3.3734485507011e-001, 3.3734485507011e-001, 3.3589506149292e-001, 3.3589506149292e-001,
+ -4.7280365228653e-001, 4.7280365228653e-001, -4.7230240702629e-001, 4.7230240702629e-001,
+ 3.3444684743881e-001, 3.3444684743881e-001, 3.3300018310547e-001, 3.3300018310547e-001,
+ -4.7179672122002e-001, 4.7179672122002e-001, -4.7128659486771e-001, 4.7128659486771e-001,
+ 3.3155506849289e-001, 3.3155506849289e-001, 3.3011156320572e-001, 3.3011156320572e-001,
+ -4.7077202796936e-001, 4.7077202796936e-001, -4.7025302052498e-001, 4.7025302052498e-001,
+ 3.2866963744164e-001, 3.2866963744164e-001, 3.2722932100296e-001, 3.2722932100296e-001,
+ -4.6972960233688e-001, 4.6972960233688e-001, -4.6920177340508e-001, 4.6920177340508e-001,
+ 3.2579064369202e-001, 3.2579064369202e-001, 3.2435363531113e-001, 3.2435363531113e-001,
+ -4.6866950392723e-001, 4.6866950392723e-001, -4.6813282370567e-001, 4.6813282370567e-001,
+ 3.2291823625565e-001, 3.2291823625565e-001, 3.2148450613022e-001, 3.2148450613022e-001,
+ -4.6759176254272e-001, 4.6759176254272e-001, -4.6704626083374e-001, 4.6704626083374e-001,
+ 3.2005247473717e-001, 3.2005247473717e-001, 3.1862211227417e-001, 3.1862211227417e-001,
+ -4.6649640798569e-001, 4.6649640798569e-001, -4.6594214439392e-001, 4.6594214439392e-001,
+ 3.1719350814819e-001, 3.1719350814819e-001, 3.1576657295227e-001, 3.1576657295227e-001,
+ -4.6538347005844e-001, 4.6538347005844e-001, -4.6482044458389e-001, 4.6482044458389e-001,
+ 3.1434139609337e-001, 3.1434139609337e-001, 3.1291794776917e-001, 3.1291794776917e-001,
+ -4.6425303816795e-001, 4.6425303816795e-001, -4.6368125081062e-001, 4.6368125081062e-001,
+ 3.1149628758430e-001, 3.1149628758430e-001, 3.1007638573647e-001, 3.1007638573647e-001,
+ -4.6310511231422e-001, 4.6310511231422e-001, -4.6252462267876e-001, 4.6252462267876e-001,
+ 3.0865827202797e-001, 3.0865827202797e-001, 3.0724197626114e-001, 3.0724197626114e-001,
+ -4.6193975210190e-001, 4.6193975210190e-001, -4.6135056018829e-001, 4.6135056018829e-001,
+ 3.0582746863365e-001, 3.0582746863365e-001, 3.0441480875015e-001, 3.0441480875015e-001,
+ -4.6075701713562e-001, 4.6075701713562e-001, -4.6015912294388e-001, 4.6015912294388e-001,
+ 3.0300396680832e-001, 3.0300396680832e-001, 3.0159500241280e-001, 3.0159500241280e-001,
+ -4.5955693721771e-001, 4.5955693721771e-001, -4.5895040035248e-001, 4.5895040035248e-001,
+ 3.0018788576126e-001, 3.0018788576126e-001, 2.9878267645836e-001, 2.9878267645836e-001,
+ -4.5833954215050e-001, 4.5833954215050e-001, -4.5772436261177e-001, 4.5772436261177e-001,
+ 2.9737934470177e-001, 2.9737934470177e-001, 2.9597792029381e-001, 2.9597792029381e-001,
+ -4.5710486173630e-001, 4.5710486173630e-001, -4.5648109912872e-001, 4.5648109912872e-001,
+ 2.9457840323448e-001, 2.9457840323448e-001, 2.9318082332611e-001, 2.9318082332611e-001,
+ -4.5585301518440e-001, 4.5585301518440e-001, -4.5522063970566e-001, 4.5522063970566e-001,
+ 2.9178521037102e-001, 2.9178521037102e-001, 2.9039156436920e-001, 2.9039156436920e-001,
+ -4.5458400249481e-001, 4.5458400249481e-001, -4.5394304394722e-001, 4.5394304394722e-001,
+ 2.8899985551834e-001, 2.8899985551834e-001, 2.8761017322540e-001, 2.8761017322540e-001,
+ -4.5329785346985e-001, 4.5329785346985e-001, -4.5264837145805e-001, 4.5264837145805e-001,
+ 2.8622245788574e-001, 2.8622245788574e-001, 2.8483676910400e-001, 2.8483676910400e-001,
+ -4.5199465751648e-001, 4.5199465751648e-001, -4.5133665204048e-001, 4.5133665204048e-001,
+ 2.8345310688019e-001, 2.8345310688019e-001, 2.8207147121429e-001, 2.8207147121429e-001,
+ -4.5067441463470e-001, 4.5067441463470e-001, -4.5000794529915e-001, 4.5000794529915e-001,
+ 2.8069186210632e-001, 2.8069186210632e-001, 2.7931433916092e-001, 2.7931433916092e-001,
+ -4.4933724403381e-001, 4.4933724403381e-001, -4.4866228103638e-001, 4.4866228103638e-001,
+ 2.7793890237808e-001, 2.7793890237808e-001, 2.7656558156013e-001, 2.7656558156013e-001,
+ -4.4798311591148e-001, 4.4798311591148e-001, -4.4729974865913e-001, 4.4729974865913e-001,
+ 2.7519434690475e-001, 2.7519434690475e-001, 2.7382519841194e-001, 2.7382519841194e-001,
+ -4.4661214947701e-001, 4.4661214947701e-001, -4.4592034816742e-001, 4.4592034816742e-001,
+ 2.7245819568634e-001, 2.7245819568634e-001, 2.7109333872795e-001, 2.7109333872795e-001,
+ -4.4522434473038e-001, 4.4522434473038e-001, -4.4452416896820e-001, 4.4452416896820e-001,
+ 2.6973062753677e-001, 2.6973062753677e-001, 2.6837009191513e-001, 2.6837009191513e-001,
+ -4.4381982088089e-001, 4.4381982088089e-001, -4.4311127066612e-001, 4.4311127066612e-001,
+ 2.6701176166534e-001, 2.6701176166534e-001, 2.6565557718277e-001, 2.6565557718277e-001,
+ -4.4239854812622e-001, 4.4239854812622e-001, -4.4168165326118e-001, 4.4168165326118e-001,
+ 2.6430162787437e-001, 2.6430162787437e-001, 2.6294988393784e-001, 2.6294988393784e-001,
+ -4.4096061587334e-001, 4.4096061587334e-001, -4.4023543596268e-001, 4.4023543596268e-001,
+ 2.6160037517548e-001, 2.6160037517548e-001, 2.6025313138962e-001, 2.6025313138962e-001,
+ -4.3950611352921e-001, 4.3950611352921e-001, -4.3877264857292e-001, 4.3877264857292e-001,
+ 2.5890809297562e-001, 2.5890809297562e-001, 2.5756537914276e-001, 2.5756537914276e-001,
+ -4.3803504109383e-001, 4.3803504109383e-001, -4.3729332089424e-001, 4.3729332089424e-001,
+ 2.5622493028641e-001, 2.5622493028641e-001, 2.5488674640656e-001, 2.5488674640656e-001,
+ -4.3654748797417e-001, 4.3654748797417e-001, -4.3579754233360e-001, 4.3579754233360e-001,
+ 2.5355088710785e-001, 2.5355088710785e-001, 2.5221735239029e-001, 2.5221735239029e-001,
+ -4.3504348397255e-001, 4.3504348397255e-001, -4.3428534269333e-001, 4.3428534269333e-001,
+ 2.5088614225388e-001, 2.5088614225388e-001, 2.4955731630325e-001, 2.4955731630325e-001,
+ -4.3352311849594e-001, 4.3352311849594e-001, -4.3275681138039e-001, 4.3275681138039e-001,
+ 2.4823081493378e-001, 2.4823081493378e-001, 2.4690666794777e-001, 2.4690666794777e-001,
+ -4.3198642134666e-001, 4.3198642134666e-001, -4.3121197819710e-001, 4.3121197819710e-001,
+ 2.4558493494987e-001, 2.4558493494987e-001, 2.4426555633545e-001, 2.4426555633545e-001,
+ -4.3043345212936e-001, 4.3043345212936e-001, -4.2965090274811e-001, 4.2965090274811e-001,
+ 2.4294862151146e-001, 2.4294862151146e-001, 2.4163410067558e-001, 2.4163410067558e-001,
+ -4.2886430025101e-001, 4.2886430025101e-001, -4.2807367444038e-001, 4.2807367444038e-001,
+ 2.4032199382782e-001, 2.4032199382782e-001, 2.3901236057281e-001, 2.3901236057281e-001,
+ -4.2727899551392e-001, 4.2727899551392e-001, -4.2648029327393e-001, 4.2648029327393e-001,
+ 2.3770514130592e-001, 2.3770514130592e-001, 2.3640042543411e-001, 2.3640042543411e-001,
+ -4.2567759752274e-001, 4.2567759752274e-001, -4.2487087845802e-001, 4.2487087845802e-001,
+ 2.3509818315506e-001, 2.3509818315506e-001, 2.3379844427109e-001, 2.3379844427109e-001,
+ -4.2406016588211e-001, 4.2406016588211e-001, -4.2324545979500e-001, 4.2324545979500e-001,
+ 2.3250117897987e-001, 2.3250117897987e-001, 2.3120644688606e-001, 2.3120644688606e-001,
+ -4.2242678999901e-001, 4.2242678999901e-001, -4.2160412669182e-001, 4.2160412669182e-001,
+ 2.2991424798965e-001, 2.2991424798965e-001, 2.2862461209297e-001, 2.2862461209297e-001,
+ -4.2077746987343e-001, 4.2077746987343e-001, -4.1994687914848e-001, 4.1994687914848e-001,
+ 2.2733750939369e-001, 2.2733750939369e-001, 2.2605296969414e-001, 2.2605296969414e-001,
+ -4.1911235451698e-001, 4.1911235451698e-001, -4.1827386617661e-001, 4.1827386617661e-001,
+ 2.2477099299431e-001, 2.2477099299431e-001, 2.2349163889885e-001, 2.2349163889885e-001,
+ -4.1743144392967e-001, 4.1743144392967e-001, -4.1658508777618e-001, 4.1658508777618e-001,
+ 2.2221487760544e-001, 2.2221487760544e-001, 2.2094073891640e-001, 2.2094073891640e-001,
+ -4.1573479771614e-001, 4.1573479771614e-001, -4.1488060355186e-001, 4.1488060355186e-001,
+ 2.1966919302940e-001, 2.1966919302940e-001, 2.1840032935143e-001, 2.1840032935143e-001,
+ -4.1402250528336e-001, 4.1402250528336e-001, -4.1316053271294e-001, 4.1316053271294e-001,
+ 2.1713408827782e-001, 2.1713408827782e-001, 2.1587052941322e-001, 2.1587052941322e-001,
+ -4.1229465603828e-001, 4.1229465603828e-001, -4.1142487525940e-001, 4.1142487525940e-001,
+ 2.1460962295532e-001, 2.1460962295532e-001, 2.1335139870644e-001, 2.1335139870644e-001,
+ -4.1055124998093e-001, 4.1055124998093e-001, -4.0967375040054e-001, 4.0967375040054e-001,
+ 2.1209588646889e-001, 2.1209588646889e-001, 2.1084308624268e-001, 2.1084308624268e-001,
+ -4.0879240632057e-001, 4.0879240632057e-001, -4.0790718793869e-001, 4.0790718793869e-001,
+ 2.0959302783012e-001, 2.0959302783012e-001, 2.0834565162659e-001, 2.0834565162659e-001,
+ -4.0701815485954e-001, 4.0701815485954e-001, -4.0612527728081e-001, 4.0612527728081e-001,
+ 2.0710107684135e-001, 2.0710107684135e-001, 2.0585921406746e-001, 2.0585921406746e-001,
+ -4.0522858500481e-001, 4.0522858500481e-001, -4.0432807803154e-001, 4.0432807803154e-001,
+ 2.0462015271187e-001, 2.0462015271187e-001, 2.0338383316994e-001, 2.0338383316994e-001,
+ -4.0342378616333e-001, 4.0342378616333e-001, -4.0251564979553e-001, 4.0251564979553e-001,
+ 2.0215034484863e-001, 2.0215034484863e-001, 2.0091962814331e-001, 2.0091962814331e-001,
+ -4.0160375833511e-001, 4.0160375833511e-001, -4.0068808197975e-001, 4.0068808197975e-001,
+ 1.9969174265862e-001, 1.9969174265862e-001, 1.9846668839455e-001, 1.9846668839455e-001,
+ -3.9976862072945e-001, 3.9976862072945e-001, -3.9884540438652e-001, 3.9884540438652e-001,
+ 1.9724446535110e-001, 1.9724446535110e-001, 1.9602510333061e-001, 1.9602510333061e-001,
+ -3.9791843295097e-001, 3.9791843295097e-001, -3.9698773622513e-001, 3.9698773622513e-001,
+ 1.9480860233307e-001, 1.9480860233307e-001, 1.9359496235847e-001, 1.9359496235847e-001,
+ -3.9605328440666e-001, 3.9605328440666e-001, -3.9511510729790e-001, 3.9511510729790e-001,
+ 1.9238418340683e-001, 1.9238418340683e-001, 1.9117632508278e-001, 1.9117632508278e-001,
+ -3.9417320489883e-001, 3.9417320489883e-001, -3.9322760701180e-001, 3.9322760701180e-001,
+ 1.8997138738632e-001, 1.8997138738632e-001, 1.8876934051514e-001, 1.8876934051514e-001,
+ -3.9227828383446e-001, 3.9227828383446e-001, -3.9132529497147e-001, 3.9132529497147e-001,
+ 1.8757024407387e-001, 1.8757024407387e-001, 1.8637409806252e-001, 1.8637409806252e-001,
+ -3.9036861062050e-001, 3.9036861062050e-001, -3.8940826058388e-001, 3.8940826058388e-001,
+ 1.8518087267876e-001, 1.8518087267876e-001, 1.8399062752724e-001, 1.8399062752724e-001,
+ -3.8844421505928e-001, 3.8844421505928e-001, -3.8747653365135e-001, 3.8747653365135e-001,
+ 1.8280336260796e-001, 1.8280336260796e-001, 1.8161904811859e-001, 1.8161904811859e-001,
+ -3.8650521636009e-001, 3.8650521636009e-001, -3.8553026318550e-001, 3.8553026318550e-001,
+ 1.8043777346611e-001, 1.8043777346611e-001, 1.7925947904587e-001, 1.7925947904587e-001,
+ -3.8455167412758e-001, 3.8455167412758e-001, -3.8356944918633e-001, 3.8356944918633e-001,
+ 1.7808422446251e-001, 1.7808422446251e-001, 1.7691197991371e-001, 1.7691197991371e-001,
+ -3.8258361816406e-001, 3.8258361816406e-001, -3.8159421086311e-001, 3.8159421086311e-001,
+ 1.7574280500412e-001, 1.7574280500412e-001, 1.7457664012909e-001, 1.7457664012909e-001,
+ -3.8060119748116e-001, 3.8060119748116e-001, -3.7960457801819e-001, 3.7960457801819e-001,
+ 1.7341357469559e-001, 1.7341357469559e-001, 1.7225357890129e-001, 1.7225357890129e-001,
+ -3.7860441207886e-001, 3.7860441207886e-001, -3.7760066986084e-001, 3.7760066986084e-001,
+ 1.7109665274620e-001, 1.7109665274620e-001, 1.6994282603264e-001, 1.6994282603264e-001,
+ -3.7659338116646e-001, 3.7659338116646e-001, -3.7558254599571e-001, 3.7558254599571e-001,
+ 1.6879209876060e-001, 1.6879209876060e-001, 1.6764450073242e-001, 1.6764450073242e-001,
+ -3.7456819415092e-001, 3.7456819415092e-001, -3.7355029582977e-001, 3.7355029582977e-001,
+ 1.6650003194809e-001, 1.6650003194809e-001, 1.6535869240761e-001, 1.6535869240761e-001,
+ -3.7252888083458e-001, 3.7252888083458e-001, -3.7150397896767e-001, 3.7150397896767e-001,
+ 1.6422051191330e-001, 1.6422051191330e-001, 1.6308549046516e-001, 1.6308549046516e-001,
+ -3.7047556042671e-001, 3.7047556042671e-001, -3.6944365501404e-001, 3.6944365501404e-001,
+ 1.6195362806320e-001, 1.6195362806320e-001, 1.6082498431206e-001, 1.6082498431206e-001,
+ -3.6840826272964e-001, 3.6840826272964e-001, -3.6736944317818e-001, 3.6736944317818e-001,
+ 1.5969949960709e-001, 1.5969949960709e-001, 1.5857723355293e-001, 1.5857723355293e-001,
+ -3.6632713675499e-001, 3.6632713675499e-001, -3.6528137326241e-001, 3.6528137326241e-001,
+ 1.5745815634727e-001, 1.5745815634727e-001, 1.5634232759476e-001, 1.5634232759476e-001,
+ -3.6423218250275e-001, 3.6423218250275e-001, -3.6317956447601e-001, 3.6317956447601e-001,
+ 1.5522971749306e-001, 1.5522971749306e-001, 1.5412035584450e-001, 1.5412035584450e-001,
+ -3.6212354898453e-001, 3.6212354898453e-001, -3.6106407642365e-001, 3.6106407642365e-001,
+ 1.5301427245140e-001, 1.5301427245140e-001, 1.5191143751144e-001, 1.5191143751144e-001,
+ -3.6000123620033e-001, 3.6000123620033e-001, -3.5893502831459e-001, 3.5893502831459e-001,
+ 1.5081188082695e-001, 1.5081188082695e-001, 1.4971560239792e-001, 1.4971560239792e-001,
+ -3.5786539316177e-001, 3.5786539316177e-001, -3.5679242014885e-001, 3.5679242014885e-001,
+ 1.4862263202667e-001, 1.4862263202667e-001, 1.4753293991089e-001, 1.4753293991089e-001,
+ -3.5571607947350e-001, 3.5571607947350e-001, -3.5463640093803e-001, 3.5463640093803e-001,
+ 1.4644661545753e-001, 1.4644661545753e-001, 1.4536359906197e-001, 1.4536359906197e-001,
+ -3.5355338454247e-001, 3.5355338454247e-001, -3.5246706008911e-001, 3.5246706008911e-001,
+ 1.4428392052650e-001, 1.4428392052650e-001, 1.4320757985115e-001, 1.4320757985115e-001,
+ -3.5137736797333e-001, 3.5137736797333e-001, -3.5028439760208e-001, 3.5028439760208e-001,
+ 1.4213460683823e-001, 1.4213460683823e-001, 1.4106497168541e-001, 1.4106497168541e-001,
+ -3.4918811917305e-001, 3.4918811917305e-001, -3.4808856248856e-001, 3.4808856248856e-001,
+ 1.3999876379967e-001, 1.3999876379967e-001, 1.3893592357635e-001, 1.3893592357635e-001,
+ -3.4698572754860e-001, 3.4698572754860e-001, -3.4587964415550e-001, 3.4587964415550e-001,
+ 1.3787645101547e-001, 1.3787645101547e-001, 1.3682043552399e-001, 1.3682043552399e-001,
+ -3.4477028250694e-001, 3.4477028250694e-001, -3.4365767240524e-001, 3.4365767240524e-001,
+ 1.3576781749725e-001, 1.3576781749725e-001, 1.3471862673759e-001, 1.3471862673759e-001,
+ -3.4254184365273e-001, 3.4254184365273e-001, -3.4142276644707e-001, 3.4142276644707e-001,
+ 1.3367286324501e-001, 1.3367286324501e-001, 1.3263055682182e-001, 1.3263055682182e-001,
+ -3.4030050039291e-001, 3.4030050039291e-001, -3.3917501568794e-001, 3.3917501568794e-001,
+ 1.3159173727036e-001, 1.3159173727036e-001, 1.3055634498596e-001, 1.3055634498596e-001,
+ -3.3804637193680e-001, 3.3804637193680e-001, -3.3691450953484e-001, 3.3691450953484e-001,
+ 1.2952443957329e-001, 1.2952443957329e-001, 1.2849602103233e-001, 1.2849602103233e-001,
+ -3.3577948808670e-001, 3.3577948808670e-001, -3.3464130759239e-001, 3.3464130759239e-001,
+ 1.2747111916542e-001, 1.2747111916542e-001, 1.2644970417023e-001, 1.2644970417023e-001,
+ -3.3349996805191e-001, 3.3349996805191e-001, -3.3235549926758e-001, 3.3235549926758e-001,
+ 1.2543180584908e-001, 1.2543180584908e-001, 1.2441745400429e-001, 1.2441745400429e-001,
+ -3.3120790123940e-001, 3.3120790123940e-001, -3.3005717396736e-001, 3.3005717396736e-001,
+ 1.2340661883354e-001, 1.2340661883354e-001, 1.2239933013916e-001, 1.2239933013916e-001,
+ -3.2890334725380e-001, 3.2890334725380e-001, -3.2774642109871e-001, 3.2774642109871e-001,
+ 1.2139558792114e-001, 1.2139558792114e-001, 1.2039542198181e-001, 1.2039542198181e-001,
+ -3.2658642530441e-001, 3.2658642530441e-001, -3.2542335987091e-001, 3.2542335987091e-001,
+ 1.1939880251884e-001, 1.1939880251884e-001, 1.1840578913689e-001, 1.1840578913689e-001,
+ -3.2425719499588e-001, 3.2425719499588e-001, -3.2308802008629e-001, 3.2308802008629e-001,
+ 1.1741638183594e-001, 1.1741638183594e-001, 1.1643055081367e-001, 1.1643055081367e-001,
+ -3.2191577553749e-001, 3.2191577553749e-001, -3.2074052095413e-001, 3.2074052095413e-001,
+ 1.1544832587242e-001, 1.1544832587242e-001, 1.1446973681450e-001, 1.1446973681450e-001,
+ -3.1956222653389e-001, 3.1956222653389e-001, -3.1838095188141e-001, 3.1838095188141e-001,
+ 1.1349478363991e-001, 1.1349478363991e-001, 1.1252346634865e-001, 1.1252346634865e-001,
+ -3.1719663739204e-001, 3.1719663739204e-001, -3.1600937247276e-001, 3.1600937247276e-001,
+ 1.1155578494072e-001, 1.1155578494072e-001, 1.1059173941612e-001, 1.1059173941612e-001,
+ -3.1481912732124e-001, 3.1481912732124e-001, -3.1362590193748e-001, 3.1362590193748e-001,
+ 1.0963138937950e-001, 1.0963138937950e-001, 1.0867470502853e-001, 1.0867470502853e-001,
+ -3.1242975592613e-001, 3.1242975592613e-001, -3.1123065948486e-001, 3.1123065948486e-001,
+ 1.0772171616554e-001, 1.0772171616554e-001, 1.0677239298820e-001, 1.0677239298820e-001,
+ -3.1002861261368e-001, 3.1002861261368e-001, -3.0882367491722e-001, 3.0882367491722e-001,
+ 1.0582679510117e-001, 1.0582679510117e-001, 1.0488489270210e-001, 1.0488489270210e-001,
+ -3.0761581659317e-001, 3.0761581659317e-001, -3.0640503764153e-001, 3.0640503764153e-001,
+ 1.0394671559334e-001, 1.0394671559334e-001, 1.0301226377487e-001, 1.0301226377487e-001,
+ -3.0519139766693e-001, 3.0519139766693e-001, -3.0397489666939e-001, 3.0397489666939e-001,
+ 1.0208156704903e-001, 1.0208156704903e-001, 1.0115459561348e-001, 1.0115459561348e-001,
+ -3.0275553464890e-001, 3.0275553464890e-001, -3.0153331160545e-001, 3.0153331160545e-001,
+ 1.0023137927055e-001, 1.0023137927055e-001, 9.9311918020248e-002, 9.9311918020248e-002,
+ -3.0030825734138e-001, 3.0030825734138e-001, -2.9908037185669e-001, 2.9908037185669e-001,
+ 9.8396241664886e-002, 9.8396241664886e-002, 9.7484350204468e-002, 9.7484350204468e-002,
+ -2.9784965515137e-001, 2.9784965515137e-001, -2.9661616683006e-001, 2.9661616683006e-001,
+ 9.6576213836670e-002, 9.6576213836670e-002, 9.5671921968460e-002, 9.5671921968460e-002,
+ -2.9537984728813e-001, 2.9537984728813e-001, -2.9414078593254e-001, 2.9414078593254e-001,
+ 9.4771414995193e-002, 9.4771414995193e-002, 9.3874722719193e-002, 9.3874722719193e-002,
+ -2.9289892315865e-001, 2.9289892315865e-001, -2.9165434837341e-001, 2.9165434837341e-001,
+ 9.2981845140457e-002, 9.2981845140457e-002, 9.2092812061310e-002, 9.2092812061310e-002,
+ -2.9040697216988e-001, 2.9040697216988e-001, -2.8915691375732e-001, 2.8915691375732e-001,
+ 9.1207593679428e-002, 9.1207593679428e-002, 9.0326249599457e-002, 9.0326249599457e-002,
+ -2.8790411353111e-001, 2.8790411353111e-001, -2.8664860129356e-001, 2.8664860129356e-001,
+ 8.9448750019073e-002, 8.9448750019073e-002, 8.8575124740601e-002, 8.8575124740601e-002,
+ -2.8539037704468e-001, 2.8539037704468e-001, -2.8412947058678e-001, 2.8412947058678e-001,
+ 8.7705343961716e-002, 8.7705343961716e-002, 8.6839467287064e-002, 8.6839467287064e-002,
+ -2.8286591172218e-001, 2.8286591172218e-001, -2.8159967064857e-001, 2.8159967064857e-001,
+ 8.5977494716644e-002, 8.5977494716644e-002, 8.5119396448135e-002, 8.5119396448135e-002,
+ -2.8033080697060e-001, 2.8033080697060e-001, -2.7905926108360e-001, 2.7905926108360e-001,
+ 8.4265202283859e-002, 8.4265202283859e-002, 8.3414912223816e-002, 8.3414912223816e-002,
+ -2.7778512239456e-001, 2.7778512239456e-001, -2.7650836110115e-001, 2.7650836110115e-001,
+ 8.2568556070328e-002, 8.2568556070328e-002, 8.1726133823395e-002, 8.1726133823395e-002,
+ -2.7522900700569e-001, 2.7522900700569e-001, -2.7394703030586e-001, 2.7394703030586e-001,
+ 8.0887645483017e-002, 8.0887645483017e-002, 8.0053120851517e-002, 8.0053120851517e-002,
+ -2.7266249060631e-001, 2.7266249060631e-001, -2.7137538790703e-001, 2.7137538790703e-001,
+ 7.9222530126572e-002, 7.9222530126572e-002, 7.8395873308182e-002, 7.8395873308182e-002,
+ -2.7008575201035e-001, 2.7008575201035e-001, -2.6879355311394e-001, 2.6879355311394e-001,
+ 7.7573210000992e-002, 7.7573210000992e-002, 7.6754540205002e-002, 7.6754540205002e-002,
+ -2.6749882102013e-001, 2.6749882102013e-001, -2.6620155572891e-001, 2.6620155572891e-001,
+ 7.5939834117889e-002, 7.5939834117889e-002, 7.5129121541977e-002, 7.5129121541977e-002,
+ -2.6490181684494e-001, 2.6490181684494e-001, -2.6359957456589e-001, 2.6359957456589e-001,
+ 7.4322402477264e-002, 7.4322402477264e-002, 7.3519706726074e-002, 7.3519706726074e-002,
+ -2.6229485869408e-001, 2.6229485869408e-001, -2.6098763942719e-001, 2.6098763942719e-001,
+ 7.2721004486084e-002, 7.2721004486084e-002, 7.1926325559616e-002, 7.1926325559616e-002,
+ -2.5967800617218e-001, 2.5967800617218e-001, -2.5836589932442e-001, 2.5836589932442e-001,
+ 7.1135699748993e-002, 7.1135699748993e-002, 7.0349097251892e-002, 7.0349097251892e-002,
+ -2.5705137848854e-001, 2.5705137848854e-001, -2.5573444366455e-001, 2.5573444366455e-001,
+ 6.9566547870636e-002, 6.9566547870636e-002, 6.8788021802902e-002, 6.8788021802902e-002,
+ -2.5441506505013e-001, 2.5441506505013e-001, -2.5309333205223e-001, 2.5309333205223e-001,
+ 6.8013578653336e-002, 6.8013578653336e-002, 6.7243188619614e-002, 6.7243188619614e-002,
+ -2.5176918506622e-001, 2.5176918506622e-001, -2.5044268369675e-001, 2.5044268369675e-001,
+ 6.6476881504059e-002, 6.6476881504059e-002, 6.5714657306671e-002, 6.5714657306671e-002,
+ -2.4911384284496e-001, 2.4911384284496e-001, -2.4778263270855e-001, 2.4778263270855e-001,
+ 6.4956516027451e-002, 6.4956516027451e-002, 6.4202457666397e-002, 6.4202457666397e-002,
+ -2.4644909799099e-001, 2.4644909799099e-001, -2.4511325359344e-001, 2.4511325359344e-001,
+ 6.3452512025833e-002, 6.3452512025833e-002, 6.2706679105759e-002, 6.2706679105759e-002,
+ -2.4377508461475e-001, 2.4377508461475e-001, -2.4243463575840e-001, 2.4243463575840e-001,
+ 6.1964958906174e-002, 6.1964958906174e-002, 6.1227351427078e-002, 6.1227351427078e-002,
+ -2.4109189212322e-001, 2.4109189212322e-001, -2.3974688351154e-001, 2.3974688351154e-001,
+ 6.0493886470795e-002, 6.0493886470795e-002, 5.9764564037323e-002, 5.9764564037323e-002,
+ -2.3839962482452e-001, 2.3839962482452e-001, -2.3705011606216e-001, 2.3705011606216e-001,
+ 5.9039384126663e-002, 5.9039384126663e-002, 5.8318346738815e-002, 5.8318346738815e-002,
+ -2.3569837212563e-001, 2.3569837212563e-001, -2.3434442281723e-001, 2.3434442281723e-001,
+ 5.7601451873779e-002, 5.7601451873779e-002, 5.6888729333878e-002, 5.6888729333878e-002,
+ -2.3298825323582e-001, 2.3298825323582e-001, -2.3162989318371e-001, 2.3162989318371e-001,
+ 5.6180179119110e-002, 5.6180179119110e-002, 5.5475831031799e-002, 5.5475831031799e-002,
+ -2.3026935756207e-001, 2.3026935756207e-001, -2.2890666127205e-001, 2.2890666127205e-001,
+ 5.4775655269623e-002, 5.4775655269623e-002, 5.4079651832581e-002, 5.4079651832581e-002,
+ -2.2754180431366e-001, 2.2754180431366e-001, -2.2617480158806e-001, 2.2617480158806e-001,
+ 5.3387850522995e-002, 5.3387850522995e-002, 5.2700251340866e-002, 5.2700251340866e-002,
+ -2.2480566799641e-001, 2.2480566799641e-001, -2.2343441843987e-001, 2.2343441843987e-001,
+ 5.2016884088516e-002, 5.2016884088516e-002, 5.1337718963623e-002, 5.1337718963623e-002,
+ -2.2206108272076e-001, 2.2206108272076e-001, -2.2068564593792e-001, 2.2068564593792e-001,
+ 5.0662755966187e-002, 5.0662755966187e-002, 4.9992054700851e-002, 4.9992054700851e-002,
+ -2.1930812299252e-001, 2.1930812299252e-001, -2.1792854368687e-001, 2.1792854368687e-001,
+ 4.9325585365295e-002, 4.9325585365295e-002, 4.8663347959518e-002, 4.8663347959518e-002,
+ -2.1654690802097e-001, 2.1654690802097e-001, -2.1516324579716e-001, 2.1516324579716e-001,
+ 4.8005342483521e-002, 4.8005342483521e-002, 4.7351628541946e-002, 4.7351628541946e-002,
+ -2.1377755701542e-001, 2.1377755701542e-001, -2.1238984167576e-001, 2.1238984167576e-001,
+ 4.6702146530151e-002, 4.6702146530151e-002, 4.6056956052780e-002, 4.6056956052780e-002,
+ -2.1100014448166e-001, 2.1100014448166e-001, -2.0960845053196e-001, 2.0960845053196e-001,
+ 4.5415997505188e-002, 4.5415997505188e-002, 4.4779360294342e-002, 4.4779360294342e-002,
+ -2.0821478962898e-001, 2.0821478962898e-001, -2.0681916177273e-001, 2.0681916177273e-001,
+ 4.4146984815598e-002, 4.4146984815598e-002, 4.3518900871277e-002, 4.3518900871277e-002,
+ -2.0542159676552e-001, 2.0542159676552e-001, -2.0402207970619e-001, 2.0402207970619e-001,
+ 4.2895138263702e-002, 4.2895138263702e-002, 4.2275637388229e-002, 4.2275637388229e-002,
+ -2.0262065529823e-001, 2.0262065529823e-001, -2.0121732354164e-001, 2.0121732354164e-001,
+ 4.1660457849503e-002, 4.1660457849503e-002, 4.1049599647522e-002, 4.1049599647522e-002,
+ -1.9981209933758e-001, 1.9981209933758e-001, -1.9840499758720e-001, 1.9840499758720e-001,
+ 4.0443062782288e-002, 4.0443062782288e-002, 3.9840877056122e-002, 3.9840877056122e-002,
+ -1.9699601829052e-001, 1.9699601829052e-001, -1.9558519124985e-001, 1.9558519124985e-001,
+ 3.9242982864380e-002, 3.9242982864380e-002, 3.8649439811707e-002, 3.8649439811707e-002,
+ -1.9417253136635e-001, 1.9417253136635e-001, -1.9275803864002e-001, 1.9275803864002e-001,
+ 3.8060247898102e-002, 3.8060247898102e-002, 3.7475377321243e-002, 3.7475377321243e-002,
+ -1.9134172797203e-001, 1.9134172797203e-001, -1.8992361426353e-001, 1.8992361426353e-001,
+ 3.6894887685776e-002, 3.6894887685776e-002, 3.6318749189377e-002, 3.6318749189377e-002,
+ -1.8850371241570e-001, 1.8850371241570e-001, -1.8708203732967e-001, 1.8708203732967e-001,
+ 3.5746961832047e-002, 3.5746961832047e-002, 3.5179555416107e-002, 3.5179555416107e-002,
+ -1.8565860390663e-001, 1.8565860390663e-001, -1.8423342704773e-001, 1.8423342704773e-001,
+ 3.4616529941559e-002, 3.4616529941559e-002, 3.4057855606079e-002, 3.4057855606079e-002,
+ -1.8280650675297e-001, 1.8280650675297e-001, -1.8137787282467e-001, 1.8137787282467e-001,
+ 3.3503592014313e-002, 3.3503592014313e-002, 3.2953739166260e-002, 3.2953739166260e-002,
+ -1.7994752526283e-001, 1.7994752526283e-001, -1.7851547896862e-001, 1.7851547896862e-001,
+ 3.2408237457275e-002, 3.2408237457275e-002, 3.1867176294327e-002, 3.1867176294327e-002,
+ -1.7708176374435e-001, 1.7708176374435e-001, -1.7564637959003e-001, 1.7564637959003e-001,
+ 3.1330496072769e-002, 3.1330496072769e-002, 3.0798226594925e-002, 3.0798226594925e-002,
+ -1.7420934140682e-001, 1.7420934140682e-001, -1.7277066409588e-001, 1.7277066409588e-001,
+ 3.0270397663116e-002, 3.0270397663116e-002, 2.9746979475021e-002, 2.9746979475021e-002,
+ -1.7133036255836e-001, 1.7133036255836e-001, -1.6988845169544e-001, 1.6988845169544e-001,
+ 2.9227972030640e-002, 2.9227972030640e-002, 2.8713405132294e-002, 2.8713405132294e-002,
+ -1.6844493150711e-001, 1.6844493150711e-001, -1.6699983179569e-001, 1.6699983179569e-001,
+ 2.8203278779984e-002, 2.8203278779984e-002, 2.7697592973709e-002, 2.7697592973709e-002,
+ -1.6555315256119e-001, 1.6555315256119e-001, -1.6410492360592e-001, 1.6410492360592e-001,
+ 2.7196347713470e-002, 2.7196347713470e-002, 2.6699542999268e-002, 2.6699542999268e-002,
+ -1.6265514492989e-001, 1.6265514492989e-001, -1.6120384633541e-001, 1.6120384633541e-001,
+ 2.6207208633423e-002, 2.6207208633423e-002, 2.5719314813614e-002, 2.5719314813614e-002,
+ -1.5975101292133e-001, 1.5975101292133e-001, -1.5829668939114e-001, 1.5829668939114e-001,
+ 2.5235921144485e-002, 2.5235921144485e-002, 2.4756968021393e-002, 2.4756968021393e-002,
+ -1.5684087574482e-001, 1.5684087574482e-001, -1.5538358688354e-001, 1.5538358688354e-001,
+ 2.4282485246658e-002, 2.4282485246658e-002, 2.3812502622604e-002, 2.3812502622604e-002,
+ -1.5392482280731e-001, 1.5392482280731e-001, -1.5246461331844e-001, 1.5246461331844e-001,
+ 2.3346990346909e-002, 2.3346990346909e-002, 2.2885948419571e-002, 2.2885948419571e-002,
+ -1.5100297331810e-001, 1.5100297331810e-001, -1.4953991770744e-001, 1.4953991770744e-001,
+ 2.2429406642914e-002, 2.2429406642914e-002, 2.1977365016937e-002, 2.1977365016937e-002,
+ -1.4807544648647e-001, 1.4807544648647e-001, -1.4660958945751e-001, 1.4660958945751e-001,
+ 2.1529823541641e-002, 2.1529823541641e-002, 2.1086782217026e-002, 2.1086782217026e-002,
+ -1.4514234662056e-001, 1.4514234662056e-001, -1.4367373287678e-001, 1.4367373287678e-001,
+ 2.0648270845413e-002, 2.0648270845413e-002, 2.0214229822159e-002, 2.0214229822159e-002,
+ -1.4220377802849e-001, 1.4220377802849e-001, -1.4073246717453e-001, 1.4073246717453e-001,
+ 1.9784748554230e-002, 1.9784748554230e-002, 1.9359767436981e-002, 1.9359767436981e-002,
+ -1.3925984501839e-001, 1.3925984501839e-001, -1.3778591156006e-001, 1.3778591156006e-001,
+ 1.8939286470413e-002, 1.8939286470413e-002, 1.8523365259171e-002, 1.8523365259171e-002,
+ -1.3631068170071e-001, 1.3631068170071e-001, -1.3483417034149e-001, 1.3483417034149e-001,
+ 1.8111974000931e-002, 1.8111974000931e-002, 1.7705112695694e-002, 1.7705112695694e-002,
+ -1.3335637748241e-001, 1.3335637748241e-001, -1.3187734782696e-001, 1.3187734782696e-001,
+ 1.7302781343460e-002, 1.7302781343460e-002, 1.6905009746552e-002, 1.6905009746552e-002,
+ -1.3039706647396e-001, 1.3039706647396e-001, -1.2891554832458e-001, 1.2891554832458e-001,
+ 1.6511768102646e-002, 1.6511768102646e-002, 1.6123086214066e-002, 1.6123086214066e-002,
+ -1.2743283808231e-001, 1.2743283808231e-001, -1.2594890594482e-001, 1.2594890594482e-001,
+ 1.5738964080811e-002, 1.5738964080811e-002, 1.5359371900558e-002, 1.5359371900558e-002,
+ -1.2446380406618e-001, 1.2446380406618e-001, -1.2297752499580e-001, 1.2297752499580e-001,
+ 1.4984369277954e-002, 1.4984369277954e-002, 1.4613926410675e-002, 1.4613926410675e-002,
+ -1.2149009108543e-001, 1.2149009108543e-001, -1.2000151723623e-001, 1.2000151723623e-001,
+ 1.4248043298721e-002, 1.4248043298721e-002, 1.3886749744415e-002, 1.3886749744415e-002,
+ -1.1851180344820e-001, 1.1851180344820e-001, -1.1702097952366e-001, 1.1702097952366e-001,
+ 1.3530015945435e-002, 1.3530015945435e-002, 1.3177871704102e-002, 1.3177871704102e-002,
+ -1.1552906036377e-001, 1.1552906036377e-001, -1.1403604596853e-001, 1.1403604596853e-001,
+ 1.2830317020416e-002, 1.2830317020416e-002, 1.2487322092056e-002, 1.2487322092056e-002,
+ -1.1254195868969e-001, 1.1254195868969e-001, -1.1104681342840e-001, 1.1104681342840e-001,
+ 1.2148946523666e-002, 1.2148946523666e-002, 1.1815130710602e-002, 1.1815130710602e-002,
+ -1.0955062508583e-001, 1.0955062508583e-001, -1.0805340111256e-001, 1.0805340111256e-001,
+ 1.1485934257507e-002, 1.1485934257507e-002, 1.1161327362061e-002, 1.1161327362061e-002,
+ -1.0655516386032e-001, 1.0655516386032e-001, -1.0505592077971e-001, 1.0505592077971e-001,
+ 1.0841310024261e-002, 1.0841310024261e-002, 1.0525912046432e-002, 1.0525912046432e-002,
+ -1.0355569422245e-001, 1.0355569422245e-001, -1.0205448418856e-001, 1.0205448418856e-001,
+ 1.0215103626251e-002, 1.0215103626251e-002, 9.9089443683624e-003, 9.9089443683624e-003,
+ -1.0055232048035e-001, 1.0055232048035e-001, -9.9049210548401e-002, 9.9049210548401e-002,
+ 9.6073746681213e-003, 9.6073746681213e-003, 9.3103945255280e-003, 9.3103945255280e-003,
+ -9.7545161843300e-002, 9.7545161843300e-002, -9.6040204167366e-002, 9.6040204167366e-002,
+ 9.0180635452271e-003, 9.0180635452271e-003, 8.7303519248962e-003, 8.7303519248962e-003,
+ -9.4534337520599e-002, 9.4534337520599e-002, -9.3027576804161e-002, 9.3027576804161e-002,
+ 8.4472596645355e-003, 8.4472596645355e-003, 8.1687867641449e-003, 8.1687867641449e-003,
+ -9.1519944369793e-002, 9.1519944369793e-002, -9.0011455118656e-002, 9.0011455118656e-002,
+ 7.8949630260468e-003, 7.8949630260468e-003, 7.6257586479187e-003, 7.6257586479187e-003,
+ -8.8502109050751e-002, 8.8502109050751e-002, -8.6991935968399e-002, 8.6991935968399e-002,
+ 7.3611736297607e-003, 7.3611736297607e-003, 7.1012377738953e-003, 7.1012377738953e-003,
+ -8.5480943322182e-002, 8.5480943322182e-002, -8.3969153463840e-002, 8.3969153463840e-002,
+ 6.8459510803223e-003, 6.8459510803223e-003, 6.5953135490417e-003, 6.5953135490417e-003,
+ -8.2456558942795e-002, 8.2456558942795e-002, -8.0943197011948e-002, 8.0943197011948e-002,
+ 6.3492953777313e-003, 6.3492953777313e-003, 6.1079263687134e-003, 6.1079263687134e-003,
+ -7.9429075121880e-002, 7.9429075121880e-002, -7.7914200723171e-002, 7.7914200723171e-002,
+ 5.8712065219879e-003, 5.8712065219879e-003, 5.6391656398773e-003, 5.6391656398773e-003,
+ -7.6398596167564e-002, 7.6398596167564e-002, -7.4882268905640e-002, 7.4882268905640e-002,
+ 5.4117441177368e-003, 5.4117441177368e-003, 5.1890015602112e-003, 5.1890015602112e-003,
+ -7.3365241289139e-002, 7.3365241289139e-002, -7.1847520768642e-002, 7.1847520768642e-002,
+ 4.9709081649780e-003, 4.9709081649780e-003, 4.7574639320374e-003, 4.7574639320374e-003,
+ -7.0329122245312e-002, 7.0329122245312e-002, -6.8810060620308e-002, 6.8810060620308e-002,
+ 4.5486688613892e-003, 4.5486688613892e-003, 4.3445825576782e-003, 4.3445825576782e-003,
+ -6.7290358245373e-002, 6.7290358245373e-002, -6.5770015120506e-002, 6.5770015120506e-002,
+ 4.1451156139374e-003, 4.1451156139374e-003, 3.9503574371338e-003, 3.9503574371338e-003,
+ -6.4249053597450e-002, 6.4249053597450e-002, -6.2727496027946e-002, 6.2727496027946e-002,
+ 3.7602186203003e-003, 3.7602186203003e-003, 3.5747885704041e-003, 3.5747885704041e-003,
+ -6.1205338686705e-002, 6.1205338686705e-002, -5.9682607650757e-002, 5.9682607650757e-002,
+ 3.3940374851227e-003, 3.3940374851227e-003, 3.2179355621338e-003, 3.2179355621338e-003,
+ -5.8159317821264e-002, 5.8159317821264e-002, -5.6635476648808e-002, 5.6635476648808e-002,
+ 3.0465126037598e-003, 3.0465126037598e-003, 2.8797686100006e-003, 2.8797686100006e-003,
+ -5.5111106485128e-002, 5.5111106485128e-002, -5.3586214780807e-002, 5.3586214780807e-002,
+ 2.7177035808563e-003, 2.7177035808563e-003, 2.5603473186493e-003, 2.5603473186493e-003,
+ -5.2060820162296e-002, 5.2060820162296e-002, -5.0534933805466e-002, 5.0534933805466e-002,
+ 2.4076402187347e-003, 2.4076402187347e-003, 2.2596120834351e-003, 2.2596120834351e-003,
+ -4.9008570611477e-002, 4.9008570611477e-002, -4.7481749206781e-002, 4.7481749206781e-002,
+ 2.1162927150726e-003, 2.1162927150726e-003, 1.9776523113251e-003, 1.9776523113251e-003,
+ -4.5954480767250e-002, 4.5954480767250e-002, -4.4426776468754e-002, 4.4426776468754e-002,
+ 1.8436908721924e-003, 1.8436908721924e-003, 1.7144381999969e-003, 1.7144381999969e-003,
+ -4.2898658663034e-002, 4.2898658663034e-002, -4.1370134800673e-002, 4.1370134800673e-002,
+ 1.5898644924164e-003, 1.5898644924164e-003, 1.4699697494507e-003, 1.4699697494507e-003,
+ -3.9841219782829e-002, 3.9841219782829e-002, -3.8311932235956e-002, 3.8311932235956e-002,
+ 1.3547837734222e-003, 1.3547837734222e-003, 1.2442767620087e-003, 1.2442767620087e-003,
+ -3.6782283335924e-002, 3.6782283335924e-002, -3.5252287983894e-002, 3.5252287983894e-002,
+ 1.1384785175323e-003, 1.1384785175323e-003, 1.0373592376709e-003, 1.0373592376709e-003,
+ -3.3721961081028e-002, 3.3721961081028e-002, -3.2191317528486e-002, 3.2191317528486e-002,
+ 9.4094872474670e-004, 9.4094872474670e-004, 8.4921717643738e-004, 8.4921717643738e-004,
+ -3.0660368502140e-002, 3.0660368502140e-002, -2.9129132628441e-002, 2.9129132628441e-002,
+ 7.6222419738770e-004, 7.6222419738770e-004, 6.7988038063049e-004, 6.7988038063049e-004,
+ -2.7597622945905e-002, 2.7597622945905e-002, -2.6065852493048e-002, 2.6065852493048e-002,
+ 6.0227513313293e-004, 6.0227513313293e-004, 5.2934885025024e-004, 5.2934885025024e-004,
+ -2.4533838033676e-002, 2.4533838033676e-002, -2.3001592606306e-002, 2.3001592606306e-002,
+ 4.6113133430481e-004, 4.6113133430481e-004, 3.9762258529663e-004, 3.9762258529663e-004,
+ -2.1469129249454e-002, 2.1469129249454e-002, -1.9936464726925e-002, 1.9936464726925e-002,
+ 3.3882260322571e-004, 3.3882260322571e-004, 2.8470158576965e-004, 2.8470158576965e-004,
+ -1.8403612077236e-002, 1.8403612077236e-002, -1.6870586201549e-002, 1.6870586201549e-002,
+ 2.3528933525085e-004, 2.3528933525085e-004, 1.9058585166931e-004, 1.9058585166931e-004,
+ -1.5337402001023e-002, 1.5337402001023e-002, -1.3804073445499e-002, 1.3804073445499e-002,
+ 1.5059113502502e-004, 1.5059113502502e-004, 1.1530518531799e-004, 1.1530518531799e-004,
+ -1.2270614504814e-002, 1.2270614504814e-002, -1.0737040080130e-002, 1.0737040080130e-002,
+ 8.4698200225830e-005, 8.4698200225830e-005, 5.8829784393311e-005, 5.8829784393311e-005,
+ -9.2033650726080e-003, 9.2033650726080e-003, -7.6696034520864e-003, 7.6696034520864e-003,
+ 3.7640333175659e-005, 3.7640333175659e-005, 2.1189451217651e-005, 2.1189451217651e-005,
+ -6.1357691884041e-003, 6.1357691884041e-003, -4.6018776483834e-003, 4.6018776483834e-003,
+ 9.4175338745117e-006, 9.4175338745117e-006, 2.3543834686279e-006, 2.3543834686279e-006,
+ -3.0679423362017e-003, 3.0679423362017e-003, -1.5339783858508e-003, 1.5339783858508e-003
+};
+static _MM_ALIGN16 float W4096[] = {
+ 4.9923300743103e-001, 4.9999940395355e-001, 0.0000000000000e+000, 0.0000000000000e+000,
+ 4.9846601486206e-001, 4.9846601486206e-001, 4.9769902229309e-001, 4.9769902229309e-001,
+ -4.9999764561653e-001, 4.9999764561653e-001, -4.9999469518661e-001, 4.9999469518661e-001,
+ 4.9693205952644e-001, 4.9693205952644e-001, 4.9616509675980e-001, 4.9616509675980e-001,
+ -4.9999058246613e-001, 4.9999058246613e-001, -4.9998530745506e-001, 4.9998530745506e-001,
+ 4.9539813399315e-001, 4.9539813399315e-001, 4.9463117122650e-001, 4.9463117122650e-001,
+ -4.9997881054878e-001, 4.9997881054878e-001, -4.9997118115425e-001, 4.9997118115425e-001,
+ 4.9386423826218e-001, 4.9386423826218e-001, 4.9309730529785e-001, 4.9309730529785e-001,
+ -4.9996235966682e-001, 4.9996235966682e-001, -4.9995234608650e-001, 4.9995234608650e-001,
+ 4.9233040213585e-001, 4.9233040213585e-001, 4.9156349897385e-001, 4.9156349897385e-001,
+ -4.9994117021561e-001, 4.9994117021561e-001, -4.9992883205414e-001, 4.9992883205414e-001,
+ 4.9079662561417e-001, 4.9079662561417e-001, 4.9002978205681e-001, 4.9002978205681e-001,
+ -4.9991530179977e-001, 4.9991530179977e-001, -4.9990057945251e-001, 4.9990057945251e-001,
+ 4.8926296830177e-001, 4.8926296830177e-001, 4.8849615454674e-001, 4.8849615454674e-001,
+ -4.9988469481468e-001, 4.9988469481468e-001, -4.9986764788628e-001, 4.9986764788628e-001,
+ 4.8772937059402e-001, 4.8772937059402e-001, 4.8696264624596e-001, 4.8696264624596e-001,
+ -4.9984940886497e-001, 4.9984940886497e-001, -4.9983000755310e-001, 4.9983000755310e-001,
+ 4.8619592189789e-001, 4.8619592189789e-001, 4.8542925715446e-001, 4.8542925715446e-001,
+ -4.9980941414833e-001, 4.9980941414833e-001, -4.9978765845299e-001, 4.9978765845299e-001,
+ 4.8466259241104e-001, 4.8466259241104e-001, 4.8389598727226e-001, 4.8389598727226e-001,
+ -4.9976471066475e-001, 4.9976471066475e-001, -4.9974060058594e-001, 4.9974060058594e-001,
+ 4.8312941193581e-001, 4.8312941193581e-001, 4.8236286640167e-001, 4.8236286640167e-001,
+ -4.9971529841423e-001, 4.9971529841423e-001, -4.9968883395195e-001, 4.9968883395195e-001,
+ 4.8159638047218e-001, 4.8159638047218e-001, 4.8082995414734e-001, 4.8082995414734e-001,
+ -4.9966117739677e-001, 4.9966117739677e-001, -4.9963238835335e-001, 4.9963238835335e-001,
+ 4.8006352782249e-001, 4.8006352782249e-001, 4.7929719090462e-001, 4.7929719090462e-001,
+ -4.9960237741470e-001, 4.9960237741470e-001, -4.9957120418549e-001, 4.9957120418549e-001,
+ 4.7853088378906e-001, 4.7853088378906e-001, 4.7776460647583e-001, 4.7776460647583e-001,
+ -4.9953886866570e-001, 4.9953886866570e-001, -4.9950534105301e-001, 4.9950534105301e-001,
+ 4.7699841856956e-001, 4.7699841856956e-001, 4.7623226046562e-001, 4.7623226046562e-001,
+ -4.9947065114975e-001, 4.9947065114975e-001, -4.9943476915359e-001, 4.9943476915359e-001,
+ 4.7546616196632e-001, 4.7546616196632e-001, 4.7470012307167e-001, 4.7470012307167e-001,
+ -4.9939772486687e-001, 4.9939772486687e-001, -4.9935951828957e-001, 4.9935951828957e-001,
+ 4.7393414378166e-001, 4.7393414378166e-001, 4.7316822409630e-001, 4.7316822409630e-001,
+ -4.9932011961937e-001, 4.9932011961937e-001, -4.9927952885628e-001, 4.9927952885628e-001,
+ 4.7240236401558e-001, 4.7240236401558e-001, 4.7163659334183e-001, 4.7163659334183e-001,
+ -4.9923777580261e-001, 4.9923777580261e-001, -4.9919486045837e-001, 4.9919486045837e-001,
+ 4.7087085247040e-001, 4.7087085247040e-001, 4.7010520100594e-001, 4.7010520100594e-001,
+ -4.9915078282356e-001, 4.9915078282356e-001, -4.9910551309586e-001, 4.9910551309586e-001,
+ 4.6933963894844e-001, 4.6933963894844e-001, 4.6857410669327e-001, 4.6857410669327e-001,
+ -4.9905905127525e-001, 4.9905905127525e-001, -4.9901142716408e-001, 4.9901142716408e-001,
+ 4.6780869364738e-001, 4.6780869364738e-001, 4.6704331040382e-001, 4.6704331040382e-001,
+ -4.9896264076233e-001, 4.9896264076233e-001, -4.9891266226768e-001, 4.9891266226768e-001,
+ 4.6627804636955e-001, 4.6627804636955e-001, 4.6551284193993e-001, 4.6551284193993e-001,
+ -4.9886152148247e-001, 4.9886152148247e-001, -4.9880921840668e-001, 4.9880921840668e-001,
+ 4.6474772691727e-001, 4.6474772691727e-001, 4.6398267149925e-001, 4.6398267149925e-001,
+ -4.9875572323799e-001, 4.9875572323799e-001, -4.9870106577873e-001, 4.9870106577873e-001,
+ 4.6321770548820e-001, 4.6321770548820e-001, 4.6245285868645e-001, 4.6245285868645e-001,
+ -4.9864521622658e-001, 4.9864521622658e-001, -4.9858820438385e-001, 4.9858820438385e-001,
+ 4.6168807148933e-001, 4.6168807148933e-001, 4.6092337369919e-001, 4.6092337369919e-001,
+ -4.9853003025055e-001, 4.9853003025055e-001, -4.9847066402435e-001, 4.9847066402435e-001,
+ 4.6015876531601e-001, 4.6015876531601e-001, 4.5939427614212e-001, 4.5939427614212e-001,
+ -4.9841013550758e-001, 4.9841013550758e-001, -4.9834844470024e-001, 4.9834844470024e-001,
+ 4.5862987637520e-001, 4.5862987637520e-001, 4.5786556601524e-001, 4.5786556601524e-001,
+ -4.9828556180000e-001, 4.9828556180000e-001, -4.9822151660919e-001, 4.9822151660919e-001,
+ 4.5710134506226e-001, 4.5710134506226e-001, 4.5633724331856e-001, 4.5633724331856e-001,
+ -4.9815630912781e-001, 4.9815630912781e-001, -4.9808990955353e-001, 4.9808990955353e-001,
+ 4.5557323098183e-001, 4.5557323098183e-001, 4.5480930805206e-001, 4.5480930805206e-001,
+ -4.9802234768867e-001, 4.9802234768867e-001, -4.9795362353325e-001, 4.9795362353325e-001,
+ 4.5404553413391e-001, 4.5404553413391e-001, 4.5328181982040e-001, 4.5328181982040e-001,
+ -4.9788370728493e-001, 4.9788370728493e-001, -4.9781262874603e-001, 4.9781262874603e-001,
+ 4.5251825451851e-001, 4.5251825451851e-001, 4.5175477862358e-001, 4.5175477862358e-001,
+ -4.9774038791656e-001, 4.9774038791656e-001, -4.9766695499420e-001, 4.9766695499420e-001,
+ 4.5099142193794e-001, 4.5099142193794e-001, 4.5022818446159e-001, 4.5022818446159e-001,
+ -4.9759235978127e-001, 4.9759235978127e-001, -4.9751660227776e-001, 4.9751660227776e-001,
+ 4.4946506619453e-001, 4.4946506619453e-001, 4.4870206713676e-001, 4.4870206713676e-001,
+ -4.9743965268135e-001, 4.9743965268135e-001, -4.9736157059669e-001, 4.9736157059669e-001,
+ 4.4793918728828e-001, 4.4793918728828e-001, 4.4717642664909e-001, 4.4717642664909e-001,
+ -4.9728229641914e-001, 4.9728229641914e-001, -4.9720183014870e-001, 4.9720183014870e-001,
+ 4.4641378521919e-001, 4.4641378521919e-001, 4.4565129280090e-001, 4.4565129280090e-001,
+ -4.9712023139000e-001, 4.9712023139000e-001, -4.9703744053841e-001, 4.9703744053841e-001,
+ 4.4488888978958e-001, 4.4488888978958e-001, 4.4412663578987e-001, 4.4412663578987e-001,
+ -4.9695348739624e-001, 4.9695348739624e-001, -4.9686837196350e-001, 4.9686837196350e-001,
+ 4.4336453080177e-001, 4.4336453080177e-001, 4.4260254502296e-001, 4.4260254502296e-001,
+ -4.9678206443787e-001, 4.9678206443787e-001, -4.9669459462166e-001, 4.9669459462166e-001,
+ 4.4184067845345e-001, 4.4184067845345e-001, 4.4107896089554e-001, 4.4107896089554e-001,
+ -4.9660596251488e-001, 4.9660596251488e-001, -4.9651616811752e-001, 4.9651616811752e-001,
+ 4.4031739234924e-001, 4.4031739234924e-001, 4.3955594301224e-001, 4.3955594301224e-001,
+ -4.9642521142960e-001, 4.9642521142960e-001, -4.9633306264877e-001, 4.9633306264877e-001,
+ 4.3879467248917e-001, 4.3879467248917e-001, 4.3803352117538e-001, 4.3803352117538e-001,
+ -4.9623978137970e-001, 4.9623978137970e-001, -4.9614530801773e-001, 4.9614530801773e-001,
+ 4.3727248907089e-001, 4.3727248907089e-001, 4.3651163578033e-001, 4.3651163578033e-001,
+ -4.9604964256287e-001, 4.9604964256287e-001, -4.9595284461975e-001, 4.9595284461975e-001,
+ 4.3575096130371e-001, 4.3575096130371e-001, 4.3499037623405e-001, 4.3499037623405e-001,
+ -4.9585488438606e-001, 4.9585488438606e-001, -4.9575573205948e-001, 4.9575573205948e-001,
+ 4.3422996997833e-001, 4.3422996997833e-001, 4.3346974253654e-001, 4.3346974253654e-001,
+ -4.9565541744232e-001, 4.9565541744232e-001, -4.9555397033691e-001, 4.9555397033691e-001,
+ 4.3270963430405e-001, 4.3270963430405e-001, 4.3194970488548e-001, 4.3194970488548e-001,
+ -4.9545133113861e-001, 4.9545133113861e-001, -4.9534749984741e-001, 4.9534749984741e-001,
+ 4.3118995428085e-001, 4.3118995428085e-001, 4.3043032288551e-001, 4.3043032288551e-001,
+ -4.9524253606796e-001, 4.9524253606796e-001, -4.9513640999794e-001, 4.9513640999794e-001,
+ 4.2967087030411e-001, 4.2967087030411e-001, 4.2891159653664e-001, 4.2891159653664e-001,
+ -4.9502909183502e-001, 4.9502909183502e-001, -4.9492064118385e-001, 4.9492064118385e-001,
+ 4.2815247178078e-001, 4.2815247178078e-001, 4.2739352583885e-001, 4.2739352583885e-001,
+ -4.9481099843979e-001, 4.9481099843979e-001, -4.9470022320747e-001, 4.9470022320747e-001,
+ 4.2663475871086e-001, 4.2663475871086e-001, 4.2587614059448e-001, 4.2587614059448e-001,
+ -4.9458825588226e-001, 4.9458825588226e-001, -4.9447512626648e-001, 4.9447512626648e-001,
+ 4.2511773109436e-001, 4.2511773109436e-001, 4.2435947060585e-001, 4.2435947060585e-001,
+ -4.9436083436012e-001, 4.9436083436012e-001, -4.9424540996552e-001, 4.9424540996552e-001,
+ 4.2360138893127e-001, 4.2360138893127e-001, 4.2284351587296e-001, 4.2284351587296e-001,
+ -4.9412879347801e-001, 4.9412879347801e-001, -4.9401101469994e-001, 4.9401101469994e-001,
+ 4.2208579182625e-001, 4.2208579182625e-001, 4.2132827639580e-001, 4.2132827639580e-001,
+ -4.9389207363129e-001, 4.9389207363129e-001, -4.9377197027206e-001, 4.9377197027206e-001,
+ 4.2057090997696e-001, 4.2057090997696e-001, 4.1981375217438e-001, 4.1981375217438e-001,
+ -4.9365070462227e-001, 4.9365070462227e-001, -4.9352827668190e-001, 4.9352827668190e-001,
+ 4.1905680298805e-001, 4.1905680298805e-001, 4.1830003261566e-001, 4.1830003261566e-001,
+ -4.9340468645096e-001, 4.9340468645096e-001, -4.9327996373177e-001, 4.9327996373177e-001,
+ 4.1754344105721e-001, 4.1754344105721e-001, 4.1678702831268e-001, 4.1678702831268e-001,
+ -4.9315404891968e-001, 4.9315404891968e-001, -4.9302697181702e-001, 4.9302697181702e-001,
+ 4.1603085398674e-001, 4.1603085398674e-001, 4.1527485847473e-001, 4.1527485847473e-001,
+ -4.9289876222610e-001, 4.9289876222610e-001, -4.9276936054230e-001, 4.9276936054230e-001,
+ 4.1451907157898e-001, 4.1451907157898e-001, 4.1376346349716e-001, 4.1376346349716e-001,
+ -4.9263882637024e-001, 4.9263882637024e-001, -4.9250712990761e-001, 4.9250712990761e-001,
+ 4.1300806403160e-001, 4.1300806403160e-001, 4.1225287318230e-001, 4.1225287318230e-001,
+ -4.9237424135208e-001, 4.9237424135208e-001, -4.9224022030830e-001, 4.9224022030830e-001,
+ 4.1149789094925e-001, 4.1149789094925e-001, 4.1074311733246e-001, 4.1074311733246e-001,
+ -4.9210503697395e-001, 4.9210503697395e-001, -4.9196872115135e-001, 4.9196872115135e-001,
+ 4.0998855233192e-001, 4.0998855233192e-001, 4.0923419594765e-001, 4.0923419594765e-001,
+ -4.9183121323586e-001, 4.9183121323586e-001, -4.9169254302979e-001, 4.9169254302979e-001,
+ 4.0848004817963e-001, 4.0848004817963e-001, 4.0772613883018e-001, 4.0772613883018e-001,
+ -4.9155274033546e-001, 4.9155274033546e-001, -4.9141177535057e-001, 4.9141177535057e-001,
+ 4.0697240829468e-001, 4.0697240829468e-001, 4.0621894598007e-001, 4.0621894598007e-001,
+ -4.9126964807510e-001, 4.9126964807510e-001, -4.9112635850906e-001, 4.9112635850906e-001,
+ 4.0546566247940e-001, 4.0546566247940e-001, 4.0471261739731e-001, 4.0471261739731e-001,
+ -4.9098193645477e-001, 4.9098193645477e-001, -4.9083635210991e-001, 4.9083635210991e-001,
+ 4.0395981073380e-001, 4.0395981073380e-001, 4.0320721268654e-001, 4.0320721268654e-001,
+ -4.9068960547447e-001, 4.9068960547447e-001, -4.9054169654846e-001, 4.9054169654846e-001,
+ 4.0245485305786e-001, 4.0245485305786e-001, 4.0170270204544e-001, 4.0170270204544e-001,
+ -4.9039262533188e-001, 4.9039262533188e-001, -4.9024242162704e-001, 4.9024242162704e-001,
+ 4.0095078945160e-001, 4.0095078945160e-001, 4.0019911527634e-001, 4.0019911527634e-001,
+ -4.9009105563164e-001, 4.9009105563164e-001, -4.8993855714798e-001, 4.8993855714798e-001,
+ 3.9944767951965e-001, 3.9944767951965e-001, 3.9869648218155e-001, 3.9869648218155e-001,
+ -4.8978489637375e-001, 4.8978489637375e-001, -4.8963007330894e-001, 4.8963007330894e-001,
+ 3.9794552326202e-001, 3.9794552326202e-001, 3.9719480276108e-001, 3.9719480276108e-001,
+ -4.8947408795357e-001, 4.8947408795357e-001, -4.8931697010994e-001, 4.8931697010994e-001,
+ 3.9644432067871e-001, 3.9644432067871e-001, 3.9569407701492e-001, 3.9569407701492e-001,
+ -4.8915868997574e-001, 4.8915868997574e-001, -4.8899924755096e-001, 4.8899924755096e-001,
+ 3.9494407176971e-001, 3.9494407176971e-001, 3.9419433474541e-001, 3.9419433474541e-001,
+ -4.8883867263794e-001, 4.8883867263794e-001, -4.8867693543434e-001, 4.8867693543434e-001,
+ 3.9344483613968e-001, 3.9344483613968e-001, 3.9269560575485e-001, 3.9269560575485e-001,
+ -4.8851406574249e-001, 4.8851406574249e-001, -4.8835003376007e-001, 4.8835003376007e-001,
+ 3.9194661378860e-001, 3.9194661378860e-001, 3.9119786024094e-001, 3.9119786024094e-001,
+ -4.8818486928940e-001, 4.8818486928940e-001, -4.8801854252815e-001, 4.8801854252815e-001,
+ 3.9044937491417e-001, 3.9044937491417e-001, 3.8970115780830e-001, 3.8970115780830e-001,
+ -4.8785105347633e-001, 4.8785105347633e-001, -4.8768243193626e-001, 4.8768243193626e-001,
+ 3.8895317912102e-001, 3.8895317912102e-001, 3.8820546865463e-001, 3.8820546865463e-001,
+ -4.8751267790794e-001, 4.8751267790794e-001, -4.8734176158905e-001, 4.8734176158905e-001,
+ 3.8745802640915e-001, 3.8745802640915e-001, 3.8671088218689e-001, 3.8671088218689e-001,
+ -4.8716968297958e-001, 4.8716968297958e-001, -4.8699647188187e-001, 4.8699647188187e-001,
+ 3.8596394658089e-001, 3.8596394658089e-001, 3.8521730899811e-001, 3.8521730899811e-001,
+ -4.8682212829590e-001, 4.8682212829590e-001, -4.8664662241936e-001, 4.8664662241936e-001,
+ 3.8447093963623e-001, 3.8447093963623e-001, 3.8372483849525e-001, 3.8372483849525e-001,
+ -4.8646998405457e-001, 4.8646998405457e-001, -4.8629218339920e-001, 4.8629218339920e-001,
+ 3.8297903537750e-001, 3.8297903537750e-001, 3.8223347067833e-001, 3.8223347067833e-001,
+ -4.8611325025558e-001, 4.8611325025558e-001, -4.8593315482140e-001, 4.8593315482140e-001,
+ 3.8148820400238e-001, 3.8148820400238e-001, 3.8074320554733e-001, 3.8074320554733e-001,
+ -4.8575195670128e-001, 4.8575195670128e-001, -4.8556956648827e-001, 4.8556956648827e-001,
+ 3.7999847531319e-001, 3.7999847531319e-001, 3.7925404310226e-001, 3.7925404310226e-001,
+ -4.8538607358932e-001, 4.8538607358932e-001, -4.8520141839981e-001, 4.8520141839981e-001,
+ 3.7850990891457e-001, 3.7850990891457e-001, 3.7776604294777e-001, 3.7776604294777e-001,
+ -4.8501563072205e-001, 4.8501563072205e-001, -4.8482868075371e-001, 4.8482868075371e-001,
+ 3.7702247500420e-001, 3.7702247500420e-001, 3.7627917528152e-001, 3.7627917528152e-001,
+ -4.8464062809944e-001, 4.8464062809944e-001, -4.8445141315460e-001, 4.8445141315460e-001,
+ 3.7553620338440e-001, 3.7553620338440e-001, 3.7479349970818e-001, 3.7479349970818e-001,
+ -4.8426103591919e-001, 4.8426103591919e-001, -4.8406955599785e-001, 4.8406955599785e-001,
+ 3.7405109405518e-001, 3.7405109405518e-001, 3.7330895662308e-001, 3.7330895662308e-001,
+ -4.8387691378593e-001, 4.8387691378593e-001, -4.8368313908577e-001, 4.8368313908577e-001,
+ 3.7256717681885e-001, 3.7256717681885e-001, 3.7182563543320e-001, 3.7182563543320e-001,
+ -4.8348823189735e-001, 4.8348823189735e-001, -4.8329219222069e-001, 4.8329219222069e-001,
+ 3.7108445167542e-001, 3.7108445167542e-001, 3.7034353613853e-001, 3.7034353613853e-001,
+ -4.8309499025345e-001, 4.8309499025345e-001, -4.8289668560028e-001, 4.8289668560028e-001,
+ 3.6960291862488e-001, 3.6960291862488e-001, 3.6886262893677e-001, 3.6886262893677e-001,
+ -4.8269721865654e-001, 4.8269721865654e-001, -4.8249661922455e-001, 4.8249661922455e-001,
+ 3.6812263727188e-001, 3.6812263727188e-001, 3.6738300323486e-001, 3.6738300323486e-001,
+ -4.8229488730431e-001, 4.8229488730431e-001, -4.8209202289581e-001, 4.8209202289581e-001,
+ 3.6664360761642e-001, 3.6664360761642e-001, 3.6590456962585e-001, 3.6590456962585e-001,
+ -4.8188802599907e-001, 4.8188802599907e-001, -4.8168289661407e-001, 4.8168289661407e-001,
+ 3.6516582965851e-001, 3.6516582965851e-001, 3.6442741751671e-001, 3.6442741751671e-001,
+ -4.8147663474083e-001, 4.8147663474083e-001, -4.8126924037933e-001, 4.8126924037933e-001,
+ 3.6368930339813e-001, 3.6368930339813e-001, 3.6295154690742e-001, 3.6295154690742e-001,
+ -4.8106071352959e-001, 4.8106071352959e-001, -4.8085102438927e-001, 4.8085102438927e-001,
+ 3.6221408843994e-001, 3.6221408843994e-001, 3.6147695779800e-001, 3.6147695779800e-001,
+ -4.8064023256302e-001, 4.8064023256302e-001, -4.8042830824852e-001, 4.8042830824852e-001,
+ 3.6074015498161e-001, 3.6074015498161e-001, 3.6000367999077e-001, 3.6000367999077e-001,
+ -4.8021525144577e-001, 4.8021525144577e-001, -4.8000106215477e-001, 4.8000106215477e-001,
+ 3.5926753282547e-001, 3.5926753282547e-001, 3.5853171348572e-001, 3.5853171348572e-001,
+ -4.7978577017784e-001, 4.7978577017784e-001, -4.7956931591034e-001, 4.7956931591034e-001,
+ 3.5779622197151e-001, 3.5779622197151e-001, 3.5706108808517e-001, 3.5706108808517e-001,
+ -4.7935172915459e-001, 4.7935172915459e-001, -4.7913303971291e-001, 4.7913303971291e-001,
+ 3.5632628202438e-001, 3.5632628202438e-001, 3.5559177398682e-001, 3.5559177398682e-001,
+ -4.7891321778297e-001, 4.7891321778297e-001, -4.7869226336479e-001, 4.7869226336479e-001,
+ 3.5485765337944e-001, 3.5485765337944e-001, 3.5412386059761e-001, 3.5412386059761e-001,
+ -4.7847017645836e-001, 4.7847017645836e-001, -4.7824695706367e-001, 4.7824695706367e-001,
+ 3.5339039564133e-001, 3.5339039564133e-001, 3.5265731811523e-001, 3.5265731811523e-001,
+ -4.7802263498306e-001, 4.7802263498306e-001, -4.7779718041420e-001, 4.7779718041420e-001,
+ 3.5192453861237e-001, 3.5192453861237e-001, 3.5119214653969e-001, 3.5119214653969e-001,
+ -4.7757059335709e-001, 4.7757059335709e-001, -4.7734287381172e-001, 4.7734287381172e-001,
+ 3.5046008229256e-001, 3.5046008229256e-001, 3.4972837567329e-001, 3.4972837567329e-001,
+ -4.7711405158043e-001, 4.7711405158043e-001, -4.7688409686089e-001, 4.7688409686089e-001,
+ 3.4899702668190e-001, 3.4899702668190e-001, 3.4826600551605e-001, 3.4826600551605e-001,
+ -4.7665300965309e-001, 4.7665300965309e-001, -4.7642081975937e-001, 4.7642081975937e-001,
+ 3.4753537178040e-001, 3.4753537178040e-001, 3.4680509567261e-001, 3.4680509567261e-001,
+ -4.7618749737740e-001, 4.7618749737740e-001, -4.7595307230949e-001, 4.7595307230949e-001,
+ 3.4607517719269e-001, 3.4607517719269e-001, 3.4534561634064e-001, 3.4534561634064e-001,
+ -4.7571751475334e-001, 4.7571751475334e-001, -4.7548082470894e-001, 4.7548082470894e-001,
+ 3.4461641311646e-001, 3.4461641311646e-001, 3.4388756752014e-001, 3.4388756752014e-001,
+ -4.7524303197861e-001, 4.7524303197861e-001, -4.7500410676003e-001, 4.7500410676003e-001,
+ 3.4315913915634e-001, 3.4315913915634e-001, 3.4243103861809e-001, 3.4243103861809e-001,
+ -4.7476407885551e-001, 4.7476407885551e-001, -4.7452294826508e-001, 4.7452294826508e-001,
+ 3.4170329570770e-001, 3.4170329570770e-001, 3.4097594022751e-001, 3.4097594022751e-001,
+ -4.7428068518639e-001, 4.7428068518639e-001, -4.7403728961945e-001, 4.7403728961945e-001,
+ 3.4024900197983e-001, 3.4024900197983e-001, 3.3952236175537e-001, 3.3952236175537e-001,
+ -4.7379279136658e-001, 4.7379279136658e-001, -4.7354719042778e-001, 4.7354719042778e-001,
+ 3.3879613876343e-001, 3.3879613876343e-001, 3.3807033300400e-001, 3.3807033300400e-001,
+ -4.7330045700073e-001, 4.7330045700073e-001, -4.7305262088776e-001, 4.7305262088776e-001,
+ 3.3734485507011e-001, 3.3734485507011e-001, 3.3661976456642e-001, 3.3661976456642e-001,
+ -4.7280365228653e-001, 4.7280365228653e-001, -4.7255358099937e-001, 4.7255358099937e-001,
+ 3.3589506149292e-001, 3.3589506149292e-001, 3.3517074584961e-001, 3.3517074584961e-001,
+ -4.7230240702629e-001, 4.7230240702629e-001, -4.7205013036728e-001, 4.7205013036728e-001,
+ 3.3444684743881e-001, 3.3444684743881e-001, 3.3372330665588e-001, 3.3372330665588e-001,
+ -4.7179672122002e-001, 4.7179672122002e-001, -4.7154220938683e-001, 4.7154220938683e-001,
+ 3.3300018310547e-001, 3.3300018310547e-001, 3.3227741718292e-001, 3.3227741718292e-001,
+ -4.7128659486771e-001, 4.7128659486771e-001, -4.7102987766266e-001, 4.7102987766266e-001,
+ 3.3155506849289e-001, 3.3155506849289e-001, 3.3083310723305e-001, 3.3083310723305e-001,
+ -4.7077202796936e-001, 4.7077202796936e-001, -4.7051307559013e-001, 4.7051307559013e-001,
+ 3.3011156320572e-001, 3.3011156320572e-001, 3.2939040660858e-001, 3.2939040660858e-001,
+ -4.7025302052498e-001, 4.7025302052498e-001, -4.6999186277390e-001, 4.6999186277390e-001,
+ 3.2866963744164e-001, 3.2866963744164e-001, 3.2794928550720e-001, 3.2794928550720e-001,
+ -4.6972960233688e-001, 4.6972960233688e-001, -4.6946623921394e-001, 4.6946623921394e-001,
+ 3.2722932100296e-001, 3.2722932100296e-001, 3.2650977373123e-001, 3.2650977373123e-001,
+ -4.6920177340508e-001, 4.6920177340508e-001, -4.6893617510796e-001, 4.6893617510796e-001,
+ 3.2579064369202e-001, 3.2579064369202e-001, 3.2507193088531e-001, 3.2507193088531e-001,
+ -4.6866950392723e-001, 4.6866950392723e-001, -4.6840173006058e-001, 4.6840173006058e-001,
+ 3.2435363531113e-001, 3.2435363531113e-001, 3.2363569736481e-001, 3.2363569736481e-001,
+ -4.6813282370567e-001, 4.6813282370567e-001, -4.6786284446716e-001, 4.6786284446716e-001,
+ 3.2291823625565e-001, 3.2291823625565e-001, 3.2220116257668e-001, 3.2220116257668e-001,
+ -4.6759176254272e-001, 4.6759176254272e-001, -4.6731957793236e-001, 4.6731957793236e-001,
+ 3.2148450613022e-001, 3.2148450613022e-001, 3.2076829671860e-001, 3.2076829671860e-001,
+ -4.6704626083374e-001, 4.6704626083374e-001, -4.6677187085152e-001, 4.6677187085152e-001,
+ 3.2005247473717e-001, 3.2005247473717e-001, 3.1933709979057e-001, 3.1933709979057e-001,
+ -4.6649640798569e-001, 4.6649640798569e-001, -4.6621981263161e-001, 4.6621981263161e-001,
+ 3.1862211227417e-001, 3.1862211227417e-001, 3.1790760159492e-001, 3.1790760159492e-001,
+ -4.6594214439392e-001, 4.6594214439392e-001, -4.6566334366798e-001, 4.6566334366798e-001,
+ 3.1719350814819e-001, 3.1719350814819e-001, 3.1647980213165e-001, 3.1647980213165e-001,
+ -4.6538347005844e-001, 4.6538347005844e-001, -4.6510252356529e-001, 4.6510252356529e-001,
+ 3.1576657295227e-001, 3.1576657295227e-001, 3.1505376100540e-001, 3.1505376100540e-001,
+ -4.6482044458389e-001, 4.6482044458389e-001, -4.6453729271889e-001, 4.6453729271889e-001,
+ 3.1434139609337e-001, 3.1434139609337e-001, 3.1362944841385e-001, 3.1362944841385e-001,
+ -4.6425303816795e-001, 4.6425303816795e-001, -4.6396768093109e-001, 4.6396768093109e-001,
+ 3.1291794776917e-001, 3.1291794776917e-001, 3.1220692396164e-001, 3.1220692396164e-001,
+ -4.6368125081062e-001, 4.6368125081062e-001, -4.6339374780655e-001, 4.6339374780655e-001,
+ 3.1149628758430e-001, 3.1149628758430e-001, 3.1078612804413e-001, 3.1078612804413e-001,
+ -4.6310511231422e-001, 4.6310511231422e-001, -4.6281540393829e-001, 4.6281540393829e-001,
+ 3.1007638573647e-001, 3.1007638573647e-001, 3.0936712026596e-001, 3.0936712026596e-001,
+ -4.6252462267876e-001, 4.6252462267876e-001, -4.6223273873329e-001, 4.6223273873329e-001,
+ 3.0865827202797e-001, 3.0865827202797e-001, 3.0794990062714e-001, 3.0794990062714e-001,
+ -4.6193975210190e-001, 4.6193975210190e-001, -4.6164569258690e-001, 4.6164569258690e-001,
+ 3.0724197626114e-001, 3.0724197626114e-001, 3.0653449892998e-001, 3.0653449892998e-001,
+ -4.6135056018829e-001, 4.6135056018829e-001, -4.6105432510376e-001, 4.6105432510376e-001,
+ 3.0582746863365e-001, 3.0582746863365e-001, 3.0512091517448e-001, 3.0512091517448e-001,
+ -4.6075701713562e-001, 4.6075701713562e-001, -4.6045860648155e-001, 4.6045860648155e-001,
+ 3.0441480875015e-001, 3.0441480875015e-001, 3.0370914936066e-001, 3.0370914936066e-001,
+ -4.6015912294388e-001, 4.6015912294388e-001, -4.5985856652260e-001, 4.5985856652260e-001,
+ 3.0300396680832e-001, 3.0300396680832e-001, 3.0229926109314e-001, 3.0229926109314e-001,
+ -4.5955693721771e-001, 4.5955693721771e-001, -4.5925420522690e-001, 4.5925420522690e-001,
+ 3.0159500241280e-001, 3.0159500241280e-001, 3.0089122056961e-001, 3.0089122056961e-001,
+ -4.5895040035248e-001, 4.5895040035248e-001, -4.5864549279213e-001, 4.5864549279213e-001,
+ 3.0018788576126e-001, 3.0018788576126e-001, 2.9948502779007e-001, 2.9948502779007e-001,
+ -4.5833954215050e-001, 4.5833954215050e-001, -4.5803248882294e-001, 4.5803248882294e-001,
+ 2.9878267645836e-001, 2.9878267645836e-001, 2.9808077216148e-001, 2.9808077216148e-001,
+ -4.5772436261177e-001, 4.5772436261177e-001, -4.5741516351700e-001, 4.5741516351700e-001,
+ 2.9737934470177e-001, 2.9737934470177e-001, 2.9667836427689e-001, 2.9667836427689e-001,
+ -4.5710486173630e-001, 4.5710486173630e-001, -4.5679351687431e-001, 4.5679351687431e-001,
+ 2.9597792029381e-001, 2.9597792029381e-001, 2.9527792334557e-001, 2.9527792334557e-001,
+ -4.5648109912872e-001, 4.5648109912872e-001, -4.5616757869720e-001, 4.5616757869720e-001,
+ 2.9457840323448e-001, 2.9457840323448e-001, 2.9387938976288e-001, 2.9387938976288e-001,
+ -4.5585301518440e-001, 4.5585301518440e-001, -4.5553737878799e-001, 4.5553737878799e-001,
+ 2.9318082332611e-001, 2.9318082332611e-001, 2.9248279333115e-001, 2.9248279333115e-001,
+ -4.5522063970566e-001, 4.5522063970566e-001, -4.5490285754204e-001, 4.5490285754204e-001,
+ 2.9178521037102e-001, 2.9178521037102e-001, 2.9108813405037e-001, 2.9108813405037e-001,
+ -4.5458400249481e-001, 4.5458400249481e-001, -4.5426404476166e-001, 4.5426404476166e-001,
+ 2.9039156436920e-001, 2.9039156436920e-001, 2.8969544172287e-001, 2.8969544172287e-001,
+ -4.5394304394722e-001, 4.5394304394722e-001, -4.5362100005150e-001, 4.5362100005150e-001,
+ 2.8899985551834e-001, 2.8899985551834e-001, 2.8830474615097e-001, 2.8830474615097e-001,
+ -4.5329785346985e-001, 4.5329785346985e-001, -4.5297363400459e-001, 4.5297363400459e-001,
+ 2.8761017322540e-001, 2.8761017322540e-001, 2.8691604733467e-001, 2.8691604733467e-001,
+ -4.5264837145805e-001, 4.5264837145805e-001, -4.5232203602791e-001, 4.5232203602791e-001,
+ 2.8622245788574e-001, 2.8622245788574e-001, 2.8552934527397e-001, 2.8552934527397e-001,
+ -4.5199465751648e-001, 4.5199465751648e-001, -4.5166617631912e-001, 4.5166617631912e-001,
+ 2.8483676910400e-001, 2.8483676910400e-001, 2.8414466977119e-001, 2.8414466977119e-001,
+ -4.5133665204048e-001, 4.5133665204048e-001, -4.5100605487823e-001, 4.5100605487823e-001,
+ 2.8345310688019e-001, 2.8345310688019e-001, 2.8276202082634e-001, 2.8276202082634e-001,
+ -4.5067441463470e-001, 4.5067441463470e-001, -4.5034170150757e-001, 4.5034170150757e-001,
+ 2.8207147121429e-001, 2.8207147121429e-001, 2.8138142824173e-001, 2.8138142824173e-001,
+ -4.5000794529915e-001, 4.5000794529915e-001, -4.4967311620712e-001, 4.4967311620712e-001,
+ 2.8069186210632e-001, 2.8069186210632e-001, 2.8000286221504e-001, 2.8000286221504e-001,
+ -4.4933724403381e-001, 4.4933724403381e-001, -4.4900029897690e-001, 4.4900029897690e-001,
+ 2.7931433916092e-001, 2.7931433916092e-001, 2.7862638235092e-001, 2.7862638235092e-001,
+ -4.4866228103638e-001, 4.4866228103638e-001, -4.4832322001457e-001, 4.4832322001457e-001,
+ 2.7793890237808e-001, 2.7793890237808e-001, 2.7725198864937e-001, 2.7725198864937e-001,
+ -4.4798311591148e-001, 4.4798311591148e-001, -4.4764196872711e-001, 4.4764196872711e-001,
+ 2.7656558156013e-001, 2.7656558156013e-001, 2.7587968111038e-001, 2.7587968111038e-001,
+ -4.4729974865913e-001, 4.4729974865913e-001, -4.4695645570755e-001, 4.4695645570755e-001,
+ 2.7519434690475e-001, 2.7519434690475e-001, 2.7450948953629e-001, 2.7450948953629e-001,
+ -4.4661214947701e-001, 4.4661214947701e-001, -4.4626677036285e-001, 4.4626677036285e-001,
+ 2.7382519841194e-001, 2.7382519841194e-001, 2.7314144372940e-001, 2.7314144372940e-001,
+ -4.4592034816742e-001, 4.4592034816742e-001, -4.4557288289070e-001, 4.4557288289070e-001,
+ 2.7245819568634e-001, 2.7245819568634e-001, 2.7177548408508e-001, 2.7177548408508e-001,
+ -4.4522434473038e-001, 4.4522434473038e-001, -4.4487479329109e-001, 4.4487479329109e-001,
+ 2.7109333872795e-001, 2.7109333872795e-001, 2.7041172981262e-001, 2.7041172981262e-001,
+ -4.4452416896820e-001, 4.4452416896820e-001, -4.4417250156403e-001, 4.4417250156403e-001,
+ 2.6973062753677e-001, 2.6973062753677e-001, 2.6905009150505e-001, 2.6905009150505e-001,
+ -4.4381982088089e-001, 4.4381982088089e-001, -4.4346606731415e-001, 4.4346606731415e-001,
+ 2.6837009191513e-001, 2.6837009191513e-001, 2.6769065856934e-001, 2.6769065856934e-001,
+ -4.4311127066612e-001, 4.4311127066612e-001, -4.4275543093681e-001, 4.4275543093681e-001,
+ 2.6701176166534e-001, 2.6701176166534e-001, 2.6633340120316e-001, 2.6633340120316e-001,
+ -4.4239854812622e-001, 4.4239854812622e-001, -4.4204062223434e-001, 4.4204062223434e-001,
+ 2.6565557718277e-001, 2.6565557718277e-001, 2.6497834920883e-001, 2.6497834920883e-001,
+ -4.4168165326118e-001, 4.4168165326118e-001, -4.4132167100906e-001, 4.4132167100906e-001,
+ 2.6430162787437e-001, 2.6430162787437e-001, 2.6362547278404e-001, 2.6362547278404e-001,
+ -4.4096061587334e-001, 4.4096061587334e-001, -4.4059854745865e-001, 4.4059854745865e-001,
+ 2.6294988393784e-001, 2.6294988393784e-001, 2.6227486133575e-001, 2.6227486133575e-001,
+ -4.4023543596268e-001, 4.4023543596268e-001, -4.3987128138542e-001, 4.3987128138542e-001,
+ 2.6160037517548e-001, 2.6160037517548e-001, 2.6092648506165e-001, 2.6092648506165e-001,
+ -4.3950611352921e-001, 4.3950611352921e-001, -4.3913990259171e-001, 4.3913990259171e-001,
+ 2.6025313138962e-001, 2.6025313138962e-001, 2.5958031415939e-001, 2.5958031415939e-001,
+ -4.3877264857292e-001, 4.3877264857292e-001, -4.3840435147285e-001, 4.3840435147285e-001,
+ 2.5890809297562e-001, 2.5890809297562e-001, 2.5823646783829e-001, 2.5823646783829e-001,
+ -4.3803504109383e-001, 4.3803504109383e-001, -4.3766468763351e-001, 4.3766468763351e-001,
+ 2.5756537914276e-001, 2.5756537914276e-001, 2.5689485669136e-001, 2.5689485669136e-001,
+ -4.3729332089424e-001, 4.3729332089424e-001, -4.3692091107368e-001, 4.3692091107368e-001,
+ 2.5622493028641e-001, 2.5622493028641e-001, 2.5555554032326e-001, 2.5555554032326e-001,
+ -4.3654748797417e-001, 4.3654748797417e-001, -4.3617302179337e-001, 4.3617302179337e-001,
+ 2.5488674640656e-001, 2.5488674640656e-001, 2.5421851873398e-001, 2.5421851873398e-001,
+ -4.3579754233360e-001, 4.3579754233360e-001, -4.3542101979256e-001, 4.3542101979256e-001,
+ 2.5355088710785e-001, 2.5355088710785e-001, 2.5288385152817e-001, 2.5288385152817e-001,
+ -4.3504348397255e-001, 4.3504348397255e-001, -4.3466493487358e-001, 4.3466493487358e-001,
+ 2.5221735239029e-001, 2.5221735239029e-001, 2.5155144929886e-001, 2.5155144929886e-001,
+ -4.3428534269333e-001, 4.3428534269333e-001, -4.3390473723412e-001, 4.3390473723412e-001,
+ 2.5088614225388e-001, 2.5088614225388e-001, 2.5022143125534e-001, 2.5022143125534e-001,
+ -4.3352311849594e-001, 4.3352311849594e-001, -4.3314048647881e-001, 4.3314048647881e-001,
+ 2.4955731630325e-001, 2.4955731630325e-001, 2.4889376759529e-001, 2.4889376759529e-001,
+ -4.3275681138039e-001, 4.3275681138039e-001, -4.3237212300301e-001, 4.3237212300301e-001,
+ 2.4823081493378e-001, 2.4823081493378e-001, 2.4756842851639e-001, 2.4756842851639e-001,
+ -4.3198642134666e-001, 4.3198642134666e-001, -4.3159970641136e-001, 4.3159970641136e-001,
+ 2.4690666794777e-001, 2.4690666794777e-001, 2.4624550342560e-001, 2.4624550342560e-001,
+ -4.3121197819710e-001, 4.3121197819710e-001, -4.3082323670387e-001, 4.3082323670387e-001,
+ 2.4558493494987e-001, 2.4558493494987e-001, 2.4492493271828e-001, 2.4492493271828e-001,
+ -4.3043345212936e-001, 4.3043345212936e-001, -4.3004268407822e-001, 4.3004268407822e-001,
+ 2.4426555633545e-001, 2.4426555633545e-001, 2.4360680580139e-001, 2.4360680580139e-001,
+ -4.2965090274811e-001, 4.2965090274811e-001, -4.2925810813904e-001, 4.2925810813904e-001,
+ 2.4294862151146e-001, 2.4294862151146e-001, 2.4229106307030e-001, 2.4229106307030e-001,
+ -4.2886430025101e-001, 4.2886430025101e-001, -4.2846947908401e-001, 4.2846947908401e-001,
+ 2.4163410067558e-001, 2.4163410067558e-001, 2.4097773432732e-001, 2.4097773432732e-001,
+ -4.2807367444038e-001, 4.2807367444038e-001, -4.2767682671547e-001, 4.2767682671547e-001,
+ 2.4032199382782e-001, 2.4032199382782e-001, 2.3966687917709e-001, 2.3966687917709e-001,
+ -4.2727899551392e-001, 4.2727899551392e-001, -4.2688015103340e-001, 4.2688015103340e-001,
+ 2.3901236057281e-001, 2.3901236057281e-001, 2.3835843801498e-001, 2.3835843801498e-001,
+ -4.2648029327393e-001, 4.2648029327393e-001, -4.2607945203781e-001, 4.2607945203781e-001,
+ 2.3770514130592e-001, 2.3770514130592e-001, 2.3705247044563e-001, 2.3705247044563e-001,
+ -4.2567759752274e-001, 4.2567759752274e-001, -4.2527472972870e-001, 4.2527472972870e-001,
+ 2.3640042543411e-001, 2.3640042543411e-001, 2.3574900627136e-001, 2.3574900627136e-001,
+ -4.2487087845802e-001, 4.2487087845802e-001, -4.2446601390839e-001, 4.2446601390839e-001,
+ 2.3509818315506e-001, 2.3509818315506e-001, 2.3444798588753e-001, 2.3444798588753e-001,
+ -4.2406016588211e-001, 4.2406016588211e-001, -4.2365330457687e-001, 4.2365330457687e-001,
+ 2.3379844427109e-001, 2.3379844427109e-001, 2.3314949870110e-001, 2.3314949870110e-001,
+ -4.2324545979500e-001, 4.2324545979500e-001, -4.2283663153648e-001, 4.2283663153648e-001,
+ 2.3250117897987e-001, 2.3250117897987e-001, 2.3185351490974e-001, 2.3185351490974e-001,
+ -4.2242678999901e-001, 4.2242678999901e-001, -4.2201593518257e-001, 4.2201593518257e-001,
+ 2.3120644688606e-001, 2.3120644688606e-001, 2.3056003451347e-001, 2.3056003451347e-001,
+ -4.2160412669182e-001, 4.2160412669182e-001, -4.2119130492210e-001, 4.2119130492210e-001,
+ 2.2991424798965e-001, 2.2991424798965e-001, 2.2926911711693e-001, 2.2926911711693e-001,
+ -4.2077746987343e-001, 4.2077746987343e-001, -4.2036268115044e-001, 4.2036268115044e-001,
+ 2.2862461209297e-001, 2.2862461209297e-001, 2.2798073291779e-001, 2.2798073291779e-001,
+ -4.1994687914848e-001, 4.1994687914848e-001, -4.1953012347221e-001, 4.1953012347221e-001,
+ 2.2733750939369e-001, 2.2733750939369e-001, 2.2669491171837e-001, 2.2669491171837e-001,
+ -4.1911235451698e-001, 4.1911235451698e-001, -4.1869360208511e-001, 4.1869360208511e-001,
+ 2.2605296969414e-001, 2.2605296969414e-001, 2.2541165351868e-001, 2.2541165351868e-001,
+ -4.1827386617661e-001, 4.1827386617661e-001, -4.1785314679146e-001, 4.1785314679146e-001,
+ 2.2477099299431e-001, 2.2477099299431e-001, 2.2413098812103e-001, 2.2413098812103e-001,
+ -4.1743144392967e-001, 4.1743144392967e-001, -4.1700875759125e-001, 4.1700875759125e-001,
+ 2.2349163889885e-001, 2.2349163889885e-001, 2.2285294532776e-001, 2.2285294532776e-001,
+ -4.1658508777618e-001, 4.1658508777618e-001, -4.1616043448448e-001, 4.1616043448448e-001,
+ 2.2221487760544e-001, 2.2221487760544e-001, 2.2157746553421e-001, 2.2157746553421e-001,
+ -4.1573479771614e-001, 4.1573479771614e-001, -4.1530820727348e-001, 4.1530820727348e-001,
+ 2.2094073891640e-001, 2.2094073891640e-001, 2.2030463814735e-001, 2.2030463814735e-001,
+ -4.1488060355186e-001, 4.1488060355186e-001, -4.1445204615593e-001, 4.1445204615593e-001,
+ 2.1966919302940e-001, 2.1966919302940e-001, 2.1903443336487e-001, 2.1903443336487e-001,
+ -4.1402250528336e-001, 4.1402250528336e-001, -4.1359201073647e-001, 4.1359201073647e-001,
+ 2.1840032935143e-001, 2.1840032935143e-001, 2.1776688098907e-001, 2.1776688098907e-001,
+ -4.1316053271294e-001, 4.1316053271294e-001, -4.1272807121277e-001, 4.1272807121277e-001,
+ 2.1713408827782e-001, 2.1713408827782e-001, 2.1650198101997e-001, 2.1650198101997e-001,
+ -4.1229465603828e-001, 4.1229465603828e-001, -4.1186025738716e-001, 4.1186025738716e-001,
+ 2.1587052941322e-001, 2.1587052941322e-001, 2.1523973345757e-001, 2.1523973345757e-001,
+ -4.1142487525940e-001, 4.1142487525940e-001, -4.1098853945732e-001, 4.1098853945732e-001,
+ 2.1460962295532e-001, 2.1460962295532e-001, 2.1398016810417e-001, 2.1398016810417e-001,
+ -4.1055124998093e-001, 4.1055124998093e-001, -4.1011297702789e-001, 4.1011297702789e-001,
+ 2.1335139870644e-001, 2.1335139870644e-001, 2.1272331476212e-001, 2.1272331476212e-001,
+ -4.0967375040054e-001, 4.0967375040054e-001, -4.0923357009888e-001, 4.0923357009888e-001,
+ 2.1209588646889e-001, 2.1209588646889e-001, 2.1146914362907e-001, 2.1146914362907e-001,
+ -4.0879240632057e-001, 4.0879240632057e-001, -4.0835028886795e-001, 4.0835028886795e-001,
+ 2.1084308624268e-001, 2.1084308624268e-001, 2.1021771430969e-001, 2.1021771430969e-001,
+ -4.0790718793869e-001, 4.0790718793869e-001, -4.0746316313744e-001, 4.0746316313744e-001,
+ 2.0959302783012e-001, 2.0959302783012e-001, 2.0896899700165e-001, 2.0896899700165e-001,
+ -4.0701815485954e-001, 4.0701815485954e-001, -4.0657219290733e-001, 4.0657219290733e-001,
+ 2.0834565162659e-001, 2.0834565162659e-001, 2.0772302150726e-001, 2.0772302150726e-001,
+ -4.0612527728081e-001, 4.0612527728081e-001, -4.0567740797997e-001, 4.0567740797997e-001,
+ 2.0710107684135e-001, 2.0710107684135e-001, 2.0647978782654e-001, 2.0647978782654e-001,
+ -4.0522858500481e-001, 4.0522858500481e-001, -4.0477880835533e-001, 4.0477880835533e-001,
+ 2.0585921406746e-001, 2.0585921406746e-001, 2.0523932576180e-001, 2.0523932576180e-001,
+ -4.0432807803154e-001, 4.0432807803154e-001, -4.0387639403343e-001, 4.0387639403343e-001,
+ 2.0462015271187e-001, 2.0462015271187e-001, 2.0400163531303e-001, 2.0400163531303e-001,
+ -4.0342378616333e-001, 4.0342378616333e-001, -4.0297019481659e-001, 4.0297019481659e-001,
+ 2.0338383316994e-001, 2.0338383316994e-001, 2.0276674628258e-001, 2.0276674628258e-001,
+ -4.0251564979553e-001, 4.0251564979553e-001, -4.0206018090248e-001, 4.0206018090248e-001,
+ 2.0215034484863e-001, 2.0215034484863e-001, 2.0153462886810e-001, 2.0153462886810e-001,
+ -4.0160375833511e-001, 4.0160375833511e-001, -4.0114638209343e-001, 4.0114638209343e-001,
+ 2.0091962814331e-001, 2.0091962814331e-001, 2.0030534267426e-001, 2.0030534267426e-001,
+ -4.0068808197975e-001, 4.0068808197975e-001, -4.0022882819176e-001, 4.0022882819176e-001,
+ 1.9969174265862e-001, 1.9969174265862e-001, 1.9907885789871e-001, 1.9907885789871e-001,
+ -3.9976862072945e-001, 3.9976862072945e-001, -3.9930748939514e-001, 3.9930748939514e-001,
+ 1.9846668839455e-001, 1.9846668839455e-001, 1.9785523414612e-001, 1.9785523414612e-001,
+ -3.9884540438652e-001, 3.9884540438652e-001, -3.9838239550591e-001, 3.9838239550591e-001,
+ 1.9724446535110e-001, 1.9724446535110e-001, 1.9663444161415e-001, 1.9663444161415e-001,
+ -3.9791843295097e-001, 3.9791843295097e-001, -3.9745354652405e-001, 3.9745354652405e-001,
+ 1.9602510333061e-001, 1.9602510333061e-001, 1.9541648030281e-001, 1.9541648030281e-001,
+ -3.9698773622513e-001, 3.9698773622513e-001, -3.9652097225189e-001, 3.9652097225189e-001,
+ 1.9480860233307e-001, 1.9480860233307e-001, 1.9420140981674e-001, 1.9420140981674e-001,
+ -3.9605328440666e-001, 3.9605328440666e-001, -3.9558467268944e-001, 3.9558467268944e-001,
+ 1.9359496235847e-001, 1.9359496235847e-001, 1.9298920035362e-001, 1.9298920035362e-001,
+ -3.9511510729790e-001, 3.9511510729790e-001, -3.9464461803436e-001, 3.9464461803436e-001,
+ 1.9238418340683e-001, 1.9238418340683e-001, 1.9177991151810e-001, 1.9177991151810e-001,
+ -3.9417320489883e-001, 3.9417320489883e-001, -3.9370086789131e-001, 3.9370086789131e-001,
+ 1.9117632508278e-001, 1.9117632508278e-001, 1.9057351350784e-001, 1.9057351350784e-001,
+ -3.9322760701180e-001, 3.9322760701180e-001, -3.9275342226028e-001, 3.9275342226028e-001,
+ 1.8997138738632e-001, 1.8997138738632e-001, 1.8937000632286e-001, 1.8937000632286e-001,
+ -3.9227828383446e-001, 3.9227828383446e-001, -3.9180225133896e-001, 3.9180225133896e-001,
+ 1.8876934051514e-001, 1.8876934051514e-001, 1.8816941976547e-001, 1.8816941976547e-001,
+ -3.9132529497147e-001, 3.9132529497147e-001, -3.9084741473198e-001, 3.9084741473198e-001,
+ 1.8757024407387e-001, 1.8757024407387e-001, 1.8697178363800e-001, 1.8697178363800e-001,
+ -3.9036861062050e-001, 3.9036861062050e-001, -3.8988888263702e-001, 3.8988888263702e-001,
+ 1.8637409806252e-001, 1.8637409806252e-001, 1.8577709794044e-001, 1.8577709794044e-001,
+ -3.8940826058388e-001, 3.8940826058388e-001, -3.8892668485641e-001, 3.8892668485641e-001,
+ 1.8518087267876e-001, 1.8518087267876e-001, 1.8458539247513e-001, 1.8458539247513e-001,
+ -3.8844421505928e-001, 3.8844421505928e-001, -3.8796085119247e-001, 3.8796085119247e-001,
+ 1.8399062752724e-001, 1.8399062752724e-001, 1.8339660763741e-001, 1.8339660763741e-001,
+ -3.8747653365135e-001, 3.8747653365135e-001, -3.8699135184288e-001, 3.8699135184288e-001,
+ 1.8280336260796e-001, 1.8280336260796e-001, 1.8221083283424e-001, 1.8221083283424e-001,
+ -3.8650521636009e-001, 3.8650521636009e-001, -3.8601818680763e-001, 3.8601818680763e-001,
+ 1.8161904811859e-001, 1.8161904811859e-001, 1.8102803826332e-001, 1.8102803826332e-001,
+ -3.8553026318550e-001, 3.8553026318550e-001, -3.8504141569138e-001, 3.8504141569138e-001,
+ 1.8043777346611e-001, 1.8043777346611e-001, 1.7984825372696e-001, 1.7984825372696e-001,
+ -3.8455167412758e-001, 3.8455167412758e-001, -3.8406100869179e-001, 3.8406100869179e-001,
+ 1.7925947904587e-001, 1.7925947904587e-001, 1.7867147922516e-001, 1.7867147922516e-001,
+ -3.8356944918633e-001, 3.8356944918633e-001, -3.8307699561119e-001, 3.8307699561119e-001,
+ 1.7808422446251e-001, 1.7808422446251e-001, 1.7749771475792e-001, 1.7749771475792e-001,
+ -3.8258361816406e-001, 3.8258361816406e-001, -3.8208937644958e-001, 3.8208937644958e-001,
+ 1.7691197991371e-001, 1.7691197991371e-001, 1.7632701992989e-001, 1.7632701992989e-001,
+ -3.8159421086311e-001, 3.8159421086311e-001, -3.8109815120697e-001, 3.8109815120697e-001,
+ 1.7574280500412e-001, 1.7574280500412e-001, 1.7515933513641e-001, 1.7515933513641e-001,
+ -3.8060119748116e-001, 3.8060119748116e-001, -3.8010331988335e-001, 3.8010331988335e-001,
+ 1.7457664012909e-001, 1.7457664012909e-001, 1.7399471998215e-001, 1.7399471998215e-001,
+ -3.7960457801819e-001, 3.7960457801819e-001, -3.7910494208336e-001, 3.7910494208336e-001,
+ 1.7341357469559e-001, 1.7341357469559e-001, 1.7283317446709e-001, 1.7283317446709e-001,
+ -3.7860441207886e-001, 3.7860441207886e-001, -3.7810298800468e-001, 3.7810298800468e-001,
+ 1.7225357890129e-001, 1.7225357890129e-001, 1.7167472839355e-001, 1.7167472839355e-001,
+ -3.7760066986084e-001, 3.7760066986084e-001, -3.7709748744965e-001, 3.7709748744965e-001,
+ 1.7109665274620e-001, 1.7109665274620e-001, 1.7051935195923e-001, 1.7051935195923e-001,
+ -3.7659338116646e-001, 3.7659338116646e-001, -3.7608841061592e-001, 3.7608841061592e-001,
+ 1.6994282603264e-001, 1.6994282603264e-001, 1.6936707496643e-001, 1.6936707496643e-001,
+ -3.7558254599571e-001, 3.7558254599571e-001, -3.7507581710815e-001, 3.7507581710815e-001,
+ 1.6879209876060e-001, 1.6879209876060e-001, 1.6821792721748e-001, 1.6821792721748e-001,
+ -3.7456819415092e-001, 3.7456819415092e-001, -3.7405967712402e-001, 3.7405967712402e-001,
+ 1.6764450073242e-001, 1.6764450073242e-001, 1.6707187891006e-001, 1.6707187891006e-001,
+ -3.7355029582977e-001, 3.7355029582977e-001, -3.7304002046585e-001, 3.7304002046585e-001,
+ 1.6650003194809e-001, 1.6650003194809e-001, 1.6592895984650e-001, 1.6592895984650e-001,
+ -3.7252888083458e-001, 3.7252888083458e-001, -3.7201687693596e-001, 3.7201687693596e-001,
+ 1.6535869240761e-001, 1.6535869240761e-001, 1.6478919982910e-001, 1.6478919982910e-001,
+ -3.7150397896767e-001, 3.7150397896767e-001, -3.7099018692970e-001, 3.7099018692970e-001,
+ 1.6422051191330e-001, 1.6422051191330e-001, 1.6365259885788e-001, 1.6365259885788e-001,
+ -3.7047556042671e-001, 3.7047556042671e-001, -3.6996003985405e-001, 3.6996003985405e-001,
+ 1.6308549046516e-001, 1.6308549046516e-001, 1.6251915693283e-001, 1.6251915693283e-001,
+ -3.6944365501404e-001, 3.6944365501404e-001, -3.6892640590668e-001, 3.6892640590668e-001,
+ 1.6195362806320e-001, 1.6195362806320e-001, 1.6138890385628e-001, 1.6138890385628e-001,
+ -3.6840826272964e-001, 3.6840826272964e-001, -3.6788928508759e-001, 3.6788928508759e-001,
+ 1.6082498431206e-001, 1.6082498431206e-001, 1.6026183962822e-001, 1.6026183962822e-001,
+ -3.6736944317818e-001, 3.6736944317818e-001, -3.6684870719910e-001, 3.6684870719910e-001,
+ 1.5969949960709e-001, 1.5969949960709e-001, 1.5913796424866e-001, 1.5913796424866e-001,
+ -3.6632713675499e-001, 3.6632713675499e-001, -3.6580467224121e-001, 3.6580467224121e-001,
+ 1.5857723355293e-001, 1.5857723355293e-001, 1.5801727771759e-001, 1.5801727771759e-001,
+ -3.6528137326241e-001, 3.6528137326241e-001, -3.6475721001625e-001, 3.6475721001625e-001,
+ 1.5745815634727e-001, 1.5745815634727e-001, 1.5689983963966e-001, 1.5689983963966e-001,
+ -3.6423218250275e-001, 3.6423218250275e-001, -3.6370632052422e-001, 3.6370632052422e-001,
+ 1.5634232759476e-001, 1.5634232759476e-001, 1.5578562021255e-001, 1.5578562021255e-001,
+ -3.6317956447601e-001, 3.6317956447601e-001, -3.6265197396278e-001, 3.6265197396278e-001,
+ 1.5522971749306e-001, 1.5522971749306e-001, 1.5467464923859e-001, 1.5467464923859e-001,
+ -3.6212354898453e-001, 3.6212354898453e-001, -3.6159422993660e-001, 3.6159422993660e-001,
+ 1.5412035584450e-001, 1.5412035584450e-001, 1.5356689691544e-001, 1.5356689691544e-001,
+ -3.6106407642365e-001, 3.6106407642365e-001, -3.6053308844566e-001, 3.6053308844566e-001,
+ 1.5301427245140e-001, 1.5301427245140e-001, 1.5246242284775e-001, 1.5246242284775e-001,
+ -3.6000123620033e-001, 3.6000123620033e-001, -3.5946854948997e-001, 3.5946854948997e-001,
+ 1.5191143751144e-001, 1.5191143751144e-001, 1.5136122703552e-001, 1.5136122703552e-001,
+ -3.5893502831459e-001, 3.5893502831459e-001, -3.5840064287186e-001, 3.5840064287186e-001,
+ 1.5081188082695e-001, 1.5081188082695e-001, 1.5026330947876e-001, 1.5026330947876e-001,
+ -3.5786539316177e-001, 3.5786539316177e-001, -3.5732933878899e-001, 3.5732933878899e-001,
+ 1.4971560239792e-001, 1.4971560239792e-001, 1.4916869997978e-001, 1.4916869997978e-001,
+ -3.5679242014885e-001, 3.5679242014885e-001, -3.5625466704369e-001, 3.5625466704369e-001,
+ 1.4862263202667e-001, 1.4862263202667e-001, 1.4807736873627e-001, 1.4807736873627e-001,
+ -3.5571607947350e-001, 3.5571607947350e-001, -3.5517665743828e-001, 3.5517665743828e-001,
+ 1.4753293991089e-001, 1.4753293991089e-001, 1.4698937535286e-001, 1.4698937535286e-001,
+ -3.5463640093803e-001, 3.5463640093803e-001, -3.5409530997276e-001, 3.5409530997276e-001,
+ 1.4644661545753e-001, 1.4644661545753e-001, 1.4590469002724e-001, 1.4590469002724e-001,
+ -3.5355338454247e-001, 3.5355338454247e-001, -3.5301062464714e-001, 3.5301062464714e-001,
+ 1.4536359906197e-001, 1.4536359906197e-001, 1.4482334256172e-001, 1.4482334256172e-001,
+ -3.5246706008911e-001, 3.5246706008911e-001, -3.5192263126373e-001, 3.5192263126373e-001,
+ 1.4428392052650e-001, 1.4428392052650e-001, 1.4374533295631e-001, 1.4374533295631e-001,
+ -3.5137736797333e-001, 3.5137736797333e-001, -3.5083130002022e-001, 3.5083130002022e-001,
+ 1.4320757985115e-001, 1.4320757985115e-001, 1.4267066121101e-001, 1.4267066121101e-001,
+ -3.5028439760208e-001, 3.5028439760208e-001, -3.4973669052124e-001, 3.4973669052124e-001,
+ 1.4213460683823e-001, 1.4213460683823e-001, 1.4159935712814e-001, 1.4159935712814e-001,
+ -3.4918811917305e-001, 3.4918811917305e-001, -3.4863877296448e-001, 3.4863877296448e-001,
+ 1.4106497168541e-001, 1.4106497168541e-001, 1.4053145051003e-001, 1.4053145051003e-001,
+ -3.4808856248856e-001, 3.4808856248856e-001, -3.4753757715225e-001, 3.4753757715225e-001,
+ 1.3999876379967e-001, 1.3999876379967e-001, 1.3946691155434e-001, 1.3946691155434e-001,
+ -3.4698572754860e-001, 3.4698572754860e-001, -3.4643310308456e-001, 3.4643310308456e-001,
+ 1.3893592357635e-001, 1.3893592357635e-001, 1.3840577006340e-001, 1.3840577006340e-001,
+ -3.4587964415550e-001, 3.4587964415550e-001, -3.4532535076141e-001, 3.4532535076141e-001,
+ 1.3787645101547e-001, 1.3787645101547e-001, 1.3734802603722e-001, 1.3734802603722e-001,
+ -3.4477028250694e-001, 3.4477028250694e-001, -3.4421437978745e-001, 3.4421437978745e-001,
+ 1.3682043552399e-001, 1.3682043552399e-001, 1.3629367947578e-001, 1.3629367947578e-001,
+ -3.4365767240524e-001, 3.4365767240524e-001, -3.4310016036034e-001, 3.4310016036034e-001,
+ 1.3576781749725e-001, 1.3576781749725e-001, 1.3524278998375e-001, 1.3524278998375e-001,
+ -3.4254184365273e-001, 3.4254184365273e-001, -3.4198272228241e-001, 3.4198272228241e-001,
+ 1.3471862673759e-001, 1.3471862673759e-001, 1.3419532775879e-001, 1.3419532775879e-001,
+ -3.4142276644707e-001, 3.4142276644707e-001, -3.4086203575134e-001, 3.4086203575134e-001,
+ 1.3367286324501e-001, 1.3367286324501e-001, 1.3315129280090e-001, 1.3315129280090e-001,
+ -3.4030050039291e-001, 3.4030050039291e-001, -3.3973816037178e-001, 3.3973816037178e-001,
+ 1.3263055682182e-001, 1.3263055682182e-001, 1.3211071491241e-001, 1.3211071491241e-001,
+ -3.3917501568794e-001, 3.3917501568794e-001, -3.3861109614372e-001, 3.3861109614372e-001,
+ 1.3159173727036e-001, 1.3159173727036e-001, 1.3107359409332e-001, 1.3107359409332e-001,
+ -3.3804637193680e-001, 3.3804637193680e-001, -3.3748084306717e-001, 3.3748084306717e-001,
+ 1.3055634498596e-001, 1.3055634498596e-001, 1.3003996014595e-001, 1.3003996014595e-001,
+ -3.3691450953484e-001, 3.3691450953484e-001, -3.3634740114212e-001, 3.3634740114212e-001,
+ 1.2952443957329e-001, 1.2952443957329e-001, 1.2900981307030e-001, 1.2900981307030e-001,
+ -3.3577948808670e-001, 3.3577948808670e-001, -3.3521080017090e-001, 3.3521080017090e-001,
+ 1.2849602103233e-001, 1.2849602103233e-001, 1.2798312306404e-001, 1.2798312306404e-001,
+ -3.3464130759239e-001, 3.3464130759239e-001, -3.3407104015350e-001, 3.3407104015350e-001,
+ 1.2747111916542e-001, 1.2747111916542e-001, 1.2695997953415e-001, 1.2695997953415e-001,
+ -3.3349996805191e-001, 3.3349996805191e-001, -3.3292812108994e-001, 3.3292812108994e-001,
+ 1.2644970417023e-001, 1.2644970417023e-001, 1.2594032287598e-001, 1.2594032287598e-001,
+ -3.3235549926758e-001, 3.3235549926758e-001, -3.3178207278252e-001, 3.3178207278252e-001,
+ 1.2543180584908e-001, 1.2543180584908e-001, 1.2492418289185e-001, 1.2492418289185e-001,
+ -3.3120790123940e-001, 3.3120790123940e-001, -3.3063292503357e-001, 3.3063292503357e-001,
+ 1.2441745400429e-001, 1.2441745400429e-001, 1.2391158938408e-001, 1.2391158938408e-001,
+ -3.3005717396736e-001, 3.3005717396736e-001, -3.2948064804077e-001, 3.2948064804077e-001,
+ 1.2340661883354e-001, 1.2340661883354e-001, 1.2290251255035e-001, 1.2290251255035e-001,
+ -3.2890334725380e-001, 3.2890334725380e-001, -3.2832527160645e-001, 3.2832527160645e-001,
+ 1.2239933013916e-001, 1.2239933013916e-001, 1.2189701199532e-001, 1.2189701199532e-001,
+ -3.2774642109871e-001, 3.2774642109871e-001, -3.2716682553291e-001, 3.2716682553291e-001,
+ 1.2139558792114e-001, 1.2139558792114e-001, 1.2089505791664e-001, 1.2089505791664e-001,
+ -3.2658642530441e-001, 3.2658642530441e-001, -3.2600528001785e-001, 3.2600528001785e-001,
+ 1.2039542198181e-001, 1.2039542198181e-001, 1.1989668011665e-001, 1.1989668011665e-001,
+ -3.2542335987091e-001, 3.2542335987091e-001, -3.2484066486359e-001, 3.2484066486359e-001,
+ 1.1939880251884e-001, 1.1939880251884e-001, 1.1890184879303e-001, 1.1890184879303e-001,
+ -3.2425719499588e-001, 3.2425719499588e-001, -3.2367298007011e-001, 3.2367298007011e-001,
+ 1.1840578913689e-001, 1.1840578913689e-001, 1.1791062355042e-001, 1.1791062355042e-001,
+ -3.2308802008629e-001, 3.2308802008629e-001, -3.2250228524208e-001, 3.2250228524208e-001,
+ 1.1741638183594e-001, 1.1741638183594e-001, 1.1692300438881e-001, 1.1692300438881e-001,
+ -3.2191577553749e-001, 3.2191577553749e-001, -3.2132852077484e-001, 3.2132852077484e-001,
+ 1.1643055081367e-001, 1.1643055081367e-001, 1.1593899130821e-001, 1.1593899130821e-001,
+ -3.2074052095413e-001, 3.2074052095413e-001, -3.2015174627304e-001, 3.2015174627304e-001,
+ 1.1544832587242e-001, 1.1544832587242e-001, 1.1495858430862e-001, 1.1495858430862e-001,
+ -3.1956222653389e-001, 3.1956222653389e-001, -3.1897196173668e-001, 3.1897196173668e-001,
+ 1.1446973681450e-001, 1.1446973681450e-001, 1.1398181319237e-001, 1.1398181319237e-001,
+ -3.1838095188141e-001, 3.1838095188141e-001, -3.1778916716576e-001, 3.1778916716576e-001,
+ 1.1349478363991e-001, 1.1349478363991e-001, 1.1300864815712e-001, 1.1300864815712e-001,
+ -3.1719663739204e-001, 3.1719663739204e-001, -3.1660339236259e-001, 3.1660339236259e-001,
+ 1.1252346634865e-001, 1.1252346634865e-001, 1.1203914880753e-001, 1.1203914880753e-001,
+ -3.1600937247276e-001, 3.1600937247276e-001, -3.1541460752487e-001, 3.1541460752487e-001,
+ 1.1155578494072e-001, 1.1155578494072e-001, 1.1107331514359e-001, 1.1107331514359e-001,
+ -3.1481912732124e-001, 3.1481912732124e-001, -3.1422290205956e-001, 3.1422290205956e-001,
+ 1.1059173941612e-001, 1.1059173941612e-001, 1.1011111736298e-001, 1.1011111736298e-001,
+ -3.1362590193748e-001, 3.1362590193748e-001, -3.1302821636200e-001, 3.1302821636200e-001,
+ 1.0963138937950e-001, 1.0963138937950e-001, 1.0915258526802e-001, 1.0915258526802e-001,
+ -3.1242975592613e-001, 3.1242975592613e-001, -3.1183058023453e-001, 3.1183058023453e-001,
+ 1.0867470502853e-001, 1.0867470502853e-001, 1.0819774866104e-001, 1.0819774866104e-001,
+ -3.1123065948486e-001, 3.1123065948486e-001, -3.1062999367714e-001, 3.1062999367714e-001,
+ 1.0772171616554e-001, 1.0772171616554e-001, 1.0724657773972e-001, 1.0724657773972e-001,
+ -3.1002861261368e-001, 3.1002861261368e-001, -3.0942648649216e-001, 3.0942648649216e-001,
+ 1.0677239298820e-001, 1.0677239298820e-001, 1.0629913210869e-001, 1.0629913210869e-001,
+ -3.0882367491722e-001, 3.0882367491722e-001, -3.0822008848190e-001, 3.0822008848190e-001,
+ 1.0582679510117e-001, 1.0582679510117e-001, 1.0535538196564e-001, 1.0535538196564e-001,
+ -3.0761581659317e-001, 3.0761581659317e-001, -3.0701079964638e-001, 3.0701079964638e-001,
+ 1.0488489270210e-001, 1.0488489270210e-001, 1.0441532731056e-001, 1.0441532731056e-001,
+ -3.0640503764153e-001, 3.0640503764153e-001, -3.0579859018326e-001, 3.0579859018326e-001,
+ 1.0394671559334e-001, 1.0394671559334e-001, 1.0347902774811e-001, 1.0347902774811e-001,
+ -3.0519139766693e-001, 3.0519139766693e-001, -3.0458351969719e-001, 3.0458351969719e-001,
+ 1.0301226377487e-001, 1.0301226377487e-001, 1.0254645347595e-001, 1.0254645347595e-001,
+ -3.0397489666939e-001, 3.0397489666939e-001, -3.0336555838585e-001, 3.0336555838585e-001,
+ 1.0208156704903e-001, 1.0208156704903e-001, 1.0161760449409e-001, 1.0161760449409e-001,
+ -3.0275553464890e-001, 3.0275553464890e-001, -3.0214476585388e-001, 3.0214476585388e-001,
+ 1.0115459561348e-001, 1.0115459561348e-001, 1.0069251060486e-001, 1.0069251060486e-001,
+ -3.0153331160545e-001, 3.0153331160545e-001, -3.0092114210129e-001, 3.0092114210129e-001,
+ 1.0023137927055e-001, 1.0023137927055e-001, 9.9771171808243e-002, 9.9771171808243e-002,
+ -3.0030825734138e-001, 3.0030825734138e-001, -2.9969465732574e-001, 2.9969465732574e-001,
+ 9.9311918020248e-002, 9.9311918020248e-002, 9.8853617906570e-002, 9.8853617906570e-002,
+ -2.9908037185669e-001, 2.9908037185669e-001, -2.9846537113190e-001, 2.9846537113190e-001,
+ 9.8396241664886e-002, 9.8396241664886e-002, 9.7939819097519e-002, 9.7939819097519e-002,
+ -2.9784965515137e-001, 2.9784965515137e-001, -2.9723325371742e-001, 2.9723325371742e-001,
+ 9.7484350204468e-002, 9.7484350204468e-002, 9.7029805183411e-002, 9.7029805183411e-002,
+ -2.9661616683006e-001, 2.9661616683006e-001, -2.9599836468697e-001, 2.9599836468697e-001,
+ 9.6576213836670e-002, 9.6576213836670e-002, 9.6123605966568e-002, 9.6123605966568e-002,
+ -2.9537984728813e-001, 2.9537984728813e-001, -2.9476067423820e-001, 2.9476067423820e-001,
+ 9.5671921968460e-002, 9.5671921968460e-002, 9.5221191644669e-002, 9.5221191644669e-002,
+ -2.9414078593254e-001, 2.9414078593254e-001, -2.9352021217346e-001, 2.9352021217346e-001,
+ 9.4771414995193e-002, 9.4771414995193e-002, 9.4322592020035e-002, 9.4322592020035e-002,
+ -2.9289892315865e-001, 2.9289892315865e-001, -2.9227697849274e-001, 2.9227697849274e-001,
+ 9.3874722719193e-002, 9.3874722719193e-002, 9.3427807092667e-002, 9.3427807092667e-002,
+ -2.9165434837341e-001, 2.9165434837341e-001, -2.9103100299835e-001, 2.9103100299835e-001,
+ 9.2981845140457e-002, 9.2981845140457e-002, 9.2536836862564e-002, 9.2536836862564e-002,
+ -2.9040697216988e-001, 2.9040697216988e-001, -2.8978228569031e-001, 2.8978228569031e-001,
+ 9.2092812061310e-002, 9.2092812061310e-002, 9.1649711132050e-002, 9.1649711132050e-002,
+ -2.8915691375732e-001, 2.8915691375732e-001, -2.8853085637093e-001, 2.8853085637093e-001,
+ 9.1207593679428e-002, 9.1207593679428e-002, 9.0766429901123e-002, 9.0766429901123e-002,
+ -2.8790411353111e-001, 2.8790411353111e-001, -2.8727668523788e-001, 2.8727668523788e-001,
+ 9.0326249599457e-002, 9.0326249599457e-002, 8.9887022972107e-002, 8.9887022972107e-002,
+ -2.8664860129356e-001, 2.8664860129356e-001, -2.8601983189583e-001, 2.8601983189583e-001,
+ 8.9448750019073e-002, 8.9448750019073e-002, 8.9011460542679e-002, 8.9011460542679e-002,
+ -2.8539037704468e-001, 2.8539037704468e-001, -2.8476026654243e-001, 2.8476026654243e-001,
+ 8.8575124740601e-002, 8.8575124740601e-002, 8.8139742612839e-002, 8.8139742612839e-002,
+ -2.8412947058678e-001, 2.8412947058678e-001, -2.8349801898003e-001, 2.8349801898003e-001,
+ 8.7705343961716e-002, 8.7705343961716e-002, 8.7271928787231e-002, 8.7271928787231e-002,
+ -2.8286591172218e-001, 2.8286591172218e-001, -2.8223311901093e-001, 2.8223311901093e-001,
+ 8.6839467287064e-002, 8.6839467287064e-002, 8.6407989263535e-002, 8.6407989263535e-002,
+ -2.8159967064857e-001, 2.8159967064857e-001, -2.8096556663513e-001, 2.8096556663513e-001,
+ 8.5977494716644e-002, 8.5977494716644e-002, 8.5547953844070e-002, 8.5547953844070e-002,
+ -2.8033080697060e-001, 2.8033080697060e-001, -2.7969536185265e-001, 2.7969536185265e-001,
+ 8.5119396448135e-002, 8.5119396448135e-002, 8.4691792726517e-002, 8.4691792726517e-002,
+ -2.7905926108360e-001, 2.7905926108360e-001, -2.7842253446579e-001, 2.7842253446579e-001,
+ 8.4265202283859e-002, 8.4265202283859e-002, 8.3839565515518e-002, 8.3839565515518e-002,
+ -2.7778512239456e-001, 2.7778512239456e-001, -2.7714705467224e-001, 2.7714705467224e-001,
+ 8.3414912223816e-002, 8.3414912223816e-002, 8.2991242408752e-002, 8.2991242408752e-002,
+ -2.7650836110115e-001, 2.7650836110115e-001, -2.7586901187897e-001, 2.7586901187897e-001,
+ 8.2568556070328e-002, 8.2568556070328e-002, 8.2146853208542e-002, 8.2146853208542e-002,
+ -2.7522900700569e-001, 2.7522900700569e-001, -2.7458834648132e-001, 2.7458834648132e-001,
+ 8.1726133823395e-002, 8.1726133823395e-002, 8.1306397914886e-002, 8.1306397914886e-002,
+ -2.7394703030586e-001, 2.7394703030586e-001, -2.7330508828163e-001, 2.7330508828163e-001,
+ 8.0887645483017e-002, 8.0887645483017e-002, 8.0469876527786e-002, 8.0469876527786e-002,
+ -2.7266249060631e-001, 2.7266249060631e-001, -2.7201926708221e-001, 2.7201926708221e-001,
+ 8.0053120851517e-002, 8.0053120851517e-002, 7.9637318849564e-002, 7.9637318849564e-002,
+ -2.7137538790703e-001, 2.7137538790703e-001, -2.7073088288307e-001, 2.7073088288307e-001,
+ 7.9222530126572e-002, 7.9222530126572e-002, 7.8808695077896e-002, 7.8808695077896e-002,
+ -2.7008575201035e-001, 2.7008575201035e-001, -2.6943996548653e-001, 2.6943996548653e-001,
+ 7.8395873308182e-002, 7.8395873308182e-002, 7.7984064817429e-002, 7.7984064817429e-002,
+ -2.6879355311394e-001, 2.6879355311394e-001, -2.6814648509026e-001, 2.6814648509026e-001,
+ 7.7573210000992e-002, 7.7573210000992e-002, 7.7163368463516e-002, 7.7163368463516e-002,
+ -2.6749882102013e-001, 2.6749882102013e-001, -2.6685050129890e-001, 2.6685050129890e-001,
+ 7.6754540205002e-002, 7.6754540205002e-002, 7.6346695423126e-002, 7.6346695423126e-002,
+ -2.6620155572891e-001, 2.6620155572891e-001, -2.6555201411247e-001, 2.6555201411247e-001,
+ 7.5939834117889e-002, 7.5939834117889e-002, 7.5533986091614e-002, 7.5533986091614e-002,
+ -2.6490181684494e-001, 2.6490181684494e-001, -2.6425099372864e-001, 2.6425099372864e-001,
+ 7.5129121541977e-002, 7.5129121541977e-002, 7.4725270271301e-002, 7.4725270271301e-002,
+ -2.6359957456589e-001, 2.6359957456589e-001, -2.6294752955437e-001, 2.6294752955437e-001,
+ 7.4322402477264e-002, 7.4322402477264e-002, 7.3920547962189e-002, 7.3920547962189e-002,
+ -2.6229485869408e-001, 2.6229485869408e-001, -2.6164156198502e-001, 2.6164156198502e-001,
+ 7.3519706726074e-002, 7.3519706726074e-002, 7.3119848966599e-002, 7.3119848966599e-002,
+ -2.6098763942719e-001, 2.6098763942719e-001, -2.6033312082291e-001, 2.6033312082291e-001,
+ 7.2721004486084e-002, 7.2721004486084e-002, 7.2323173284531e-002, 7.2323173284531e-002,
+ -2.5967800617218e-001, 2.5967800617218e-001, -2.5902226567268e-001, 2.5902226567268e-001,
+ 7.1926325559616e-002, 7.1926325559616e-002, 7.1530520915985e-002, 7.1530520915985e-002,
+ -2.5836589932442e-001, 2.5836589932442e-001, -2.5770893692970e-001, 2.5770893692970e-001,
+ 7.1135699748993e-002, 7.1135699748993e-002, 7.0741891860962e-002, 7.0741891860962e-002,
+ -2.5705137848854e-001, 2.5705137848854e-001, -2.5639319419861e-001, 2.5639319419861e-001,
+ 7.0349097251892e-002, 7.0349097251892e-002, 6.9957315921783e-002, 6.9957315921783e-002,
+ -2.5573444366455e-001, 2.5573444366455e-001, -2.5507506728172e-001, 2.5507506728172e-001,
+ 6.9566547870636e-002, 6.9566547870636e-002, 6.9176763296127e-002, 6.9176763296127e-002,
+ -2.5441506505013e-001, 2.5441506505013e-001, -2.5375449657440e-001, 2.5375449657440e-001,
+ 6.8788021802902e-002, 6.8788021802902e-002, 6.8400293588638e-002, 6.8400293588638e-002,
+ -2.5309333205223e-001, 2.5309333205223e-001, -2.5243157148361e-001, 2.5243157148361e-001,
+ 6.8013578653336e-002, 6.8013578653336e-002, 6.7627876996994e-002, 6.7627876996994e-002,
+ -2.5176918506622e-001, 2.5176918506622e-001, -2.5110623240471e-001, 2.5110623240471e-001,
+ 6.7243188619614e-002, 6.7243188619614e-002, 6.6859513521194e-002, 6.6859513521194e-002,
+ -2.5044268369675e-001, 2.5044268369675e-001, -2.4977856874466e-001, 2.4977856874466e-001,
+ 6.6476881504059e-002, 6.6476881504059e-002, 6.6095262765884e-002, 6.6095262765884e-002,
+ -2.4911384284496e-001, 2.4911384284496e-001, -2.4844853579998e-001, 2.4844853579998e-001,
+ 6.5714657306671e-002, 6.5714657306671e-002, 6.5335065126419e-002, 6.5335065126419e-002,
+ -2.4778263270855e-001, 2.4778263270855e-001, -2.4711616337299e-001, 2.4711616337299e-001,
+ 6.4956516027451e-002, 6.4956516027451e-002, 6.4578980207443e-002, 6.4578980207443e-002,
+ -2.4644909799099e-001, 2.4644909799099e-001, -2.4578146636486e-001, 2.4578146636486e-001,
+ 6.4202457666397e-002, 6.4202457666397e-002, 6.3826978206635e-002, 6.3826978206635e-002,
+ -2.4511325359344e-001, 2.4511325359344e-001, -2.4444445967674e-001, 2.4444445967674e-001,
+ 6.3452512025833e-002, 6.3452512025833e-002, 6.3079088926315e-002, 6.3079088926315e-002,
+ -2.4377508461475e-001, 2.4377508461475e-001, -2.4310514330864e-001, 2.4310514330864e-001,
+ 6.2706679105759e-002, 6.2706679105759e-002, 6.2335312366486e-002, 6.2335312366486e-002,
+ -2.4243463575840e-001, 2.4243463575840e-001, -2.4176354706287e-001, 2.4176354706287e-001,
+ 6.1964958906174e-002, 6.1964958906174e-002, 6.1595648527145e-002, 6.1595648527145e-002,
+ -2.4109189212322e-001, 2.4109189212322e-001, -2.4041967093945e-001, 2.4041967093945e-001,
+ 6.1227351427078e-002, 6.1227351427078e-002, 6.0860097408295e-002, 6.0860097408295e-002,
+ -2.3974688351154e-001, 2.3974688351154e-001, -2.3907352983952e-001, 2.3907352983952e-001,
+ 6.0493886470795e-002, 6.0493886470795e-002, 6.0128718614578e-002, 6.0128718614578e-002,
+ -2.3839962482452e-001, 2.3839962482452e-001, -2.3772515356541e-001, 2.3772515356541e-001,
+ 5.9764564037323e-002, 5.9764564037323e-002, 5.9401452541351e-002, 5.9401452541351e-002,
+ -2.3705011606216e-001, 2.3705011606216e-001, -2.3637452721596e-001, 2.3637452721596e-001,
+ 5.9039384126663e-002, 5.9039384126663e-002, 5.8678328990936e-002, 5.8678328990936e-002,
+ -2.3569837212563e-001, 2.3569837212563e-001, -2.3502166569233e-001, 2.3502166569233e-001,
+ 5.8318346738815e-002, 5.8318346738815e-002, 5.7959377765656e-002, 5.7959377765656e-002,
+ -2.3434442281723e-001, 2.3434442281723e-001, -2.3366661369801e-001, 2.3366661369801e-001,
+ 5.7601451873779e-002, 5.7601451873779e-002, 5.7244569063187e-002, 5.7244569063187e-002,
+ -2.3298825323582e-001, 2.3298825323582e-001, -2.3230935633183e-001, 2.3230935633183e-001,
+ 5.6888729333878e-002, 5.6888729333878e-002, 5.6533932685852e-002, 5.6533932685852e-002,
+ -2.3162989318371e-001, 2.3162989318371e-001, -2.3094990849495e-001, 2.3094990849495e-001,
+ 5.6180179119110e-002, 5.6180179119110e-002, 5.5827498435974e-002, 5.5827498435974e-002,
+ -2.3026935756207e-001, 2.3026935756207e-001, -2.2958828508854e-001, 2.2958828508854e-001,
+ 5.5475831031799e-002, 5.5475831031799e-002, 5.5125206708908e-002, 5.5125206708908e-002,
+ -2.2890666127205e-001, 2.2890666127205e-001, -2.2822450101376e-001, 2.2822450101376e-001,
+ 5.4775655269623e-002, 5.4775655269623e-002, 5.4427117109299e-002, 5.4427117109299e-002,
+ -2.2754180431366e-001, 2.2754180431366e-001, -2.2685857117176e-001, 2.2685857117176e-001,
+ 5.4079651832581e-002, 5.4079651832581e-002, 5.3733229637146e-002, 5.3733229637146e-002,
+ -2.2617480158806e-001, 2.2617480158806e-001, -2.2549049556255e-001, 2.2549049556255e-001,
+ 5.3387850522995e-002, 5.3387850522995e-002, 5.3043544292450e-002, 5.3043544292450e-002,
+ -2.2480566799641e-001, 2.2480566799641e-001, -2.2412031888962e-001, 2.2412031888962e-001,
+ 5.2700251340866e-002, 5.2700251340866e-002, 5.2358031272888e-002, 5.2358031272888e-002,
+ -2.2343441843987e-001, 2.2343441843987e-001, -2.2274801135063e-001, 2.2274801135063e-001,
+ 5.2016884088516e-002, 5.2016884088516e-002, 5.1676779985428e-002, 5.1676779985428e-002,
+ -2.2206108272076e-001, 2.2206108272076e-001, -2.2137361764908e-001, 2.2137361764908e-001,
+ 5.1337718963623e-002, 5.1337718963623e-002, 5.0999701023102e-002, 5.0999701023102e-002,
+ -2.2068564593792e-001, 2.2068564593792e-001, -2.1999713778496e-001, 2.1999713778496e-001,
+ 5.0662755966187e-002, 5.0662755966187e-002, 5.0326883792877e-002, 5.0326883792877e-002,
+ -2.1930812299252e-001, 2.1930812299252e-001, -2.1861858665943e-001, 2.1861858665943e-001,
+ 4.9992054700851e-002, 4.9992054700851e-002, 4.9658298492432e-002, 4.9658298492432e-002,
+ -2.1792854368687e-001, 2.1792854368687e-001, -2.1723797917366e-001, 2.1723797917366e-001,
+ 4.9325585365295e-002, 4.9325585365295e-002, 4.8993945121765e-002, 4.8993945121765e-002,
+ -2.1654690802097e-001, 2.1654690802097e-001, -2.1585533022881e-001, 2.1585533022881e-001,
+ 4.8663347959518e-002, 4.8663347959518e-002, 4.8333823680878e-002, 4.8333823680878e-002,
+ -2.1516324579716e-001, 2.1516324579716e-001, -2.1447065472603e-001, 2.1447065472603e-001,
+ 4.8005342483521e-002, 4.8005342483521e-002, 4.7677963972092e-002, 4.7677963972092e-002,
+ -2.1377755701542e-001, 2.1377755701542e-001, -2.1308395266533e-001, 2.1308395266533e-001,
+ 4.7351628541946e-002, 4.7351628541946e-002, 4.7026365995407e-002, 4.7026365995407e-002,
+ -2.1238984167576e-001, 2.1238984167576e-001, -2.1169523894787e-001, 2.1169523894787e-001,
+ 4.6702146530151e-002, 4.6702146530151e-002, 4.6378999948502e-002, 4.6378999948502e-002,
+ -2.1100014448166e-001, 2.1100014448166e-001, -2.1030454337597e-001, 2.1030454337597e-001,
+ 4.6056956052780e-002, 4.6056956052780e-002, 4.5735955238342e-002, 4.5735955238342e-002,
+ -2.0960845053196e-001, 2.0960845053196e-001, -2.0891186594963e-001, 2.0891186594963e-001,
+ 4.5415997505188e-002, 4.5415997505188e-002, 4.5097142457962e-002, 4.5097142457962e-002,
+ -2.0821478962898e-001, 2.0821478962898e-001, -2.0751722157001e-001, 2.0751722157001e-001,
+ 4.4779360294342e-002, 4.4779360294342e-002, 4.4462621212006e-002, 4.4462621212006e-002,
+ -2.0681916177273e-001, 2.0681916177273e-001, -2.0612062513828e-001, 2.0612062513828e-001,
+ 4.4146984815598e-002, 4.4146984815598e-002, 4.3832421302795e-002, 4.3832421302795e-002,
+ -2.0542159676552e-001, 2.0542159676552e-001, -2.0472207665443e-001, 2.0472207665443e-001,
+ 4.3518900871277e-002, 4.3518900871277e-002, 4.3206483125687e-002, 4.3206483125687e-002,
+ -2.0402207970619e-001, 2.0402207970619e-001, -2.0332162082195e-001, 2.0332162082195e-001,
+ 4.2895138263702e-002, 4.2895138263702e-002, 4.2584836483002e-002, 4.2584836483002e-002,
+ -2.0262065529823e-001, 2.0262065529823e-001, -2.0191922783852e-001, 2.0191922783852e-001,
+ 4.2275637388229e-002, 4.2275637388229e-002, 4.1967511177063e-002, 4.1967511177063e-002,
+ -2.0121732354164e-001, 2.0121732354164e-001, -2.0051495730877e-001, 2.0051495730877e-001,
+ 4.1660457849503e-002, 4.1660457849503e-002, 4.1354507207870e-002, 4.1354507207870e-002,
+ -1.9981209933758e-001, 1.9981209933758e-001, -1.9910877943039e-001, 1.9910877943039e-001,
+ 4.1049599647522e-002, 4.1049599647522e-002, 4.0745794773102e-002, 4.0745794773102e-002,
+ -1.9840499758720e-001, 1.9840499758720e-001, -1.9770073890686e-001, 1.9770073890686e-001,
+ 4.0443062782288e-002, 4.0443062782288e-002, 4.0141433477402e-002, 4.0141433477402e-002,
+ -1.9699601829052e-001, 1.9699601829052e-001, -1.9629083573818e-001, 1.9629083573818e-001,
+ 3.9840877056122e-002, 3.9840877056122e-002, 3.9541393518448e-002, 3.9541393518448e-002,
+ -1.9558519124985e-001, 1.9558519124985e-001, -1.9487908482552e-001, 1.9487908482552e-001,
+ 3.9242982864380e-002, 3.9242982864380e-002, 3.8945674896240e-002, 3.8945674896240e-002,
+ -1.9417253136635e-001, 1.9417253136635e-001, -1.9346550107002e-001, 1.9346550107002e-001,
+ 3.8649439811707e-002, 3.8649439811707e-002, 3.8354307413101e-002, 3.8354307413101e-002,
+ -1.9275803864002e-001, 1.9275803864002e-001, -1.9205009937286e-001, 1.9205009937286e-001,
+ 3.8060247898102e-002, 3.8060247898102e-002, 3.7767261266708e-002, 3.7767261266708e-002,
+ -1.9134172797203e-001, 1.9134172797203e-001, -1.9063289463520e-001, 1.9063289463520e-001,
+ 3.7475377321243e-002, 3.7475377321243e-002, 3.7184596061707e-002, 3.7184596061707e-002,
+ -1.8992361426353e-001, 1.8992361426353e-001, -1.8921388685703e-001, 1.8921388685703e-001,
+ 3.6894887685776e-002, 3.6894887685776e-002, 3.6606252193451e-002, 3.6606252193451e-002,
+ -1.8850371241570e-001, 1.8850371241570e-001, -1.8779309093952e-001, 1.8779309093952e-001,
+ 3.6318749189377e-002, 3.6318749189377e-002, 3.6032319068909e-002, 3.6032319068909e-002,
+ -1.8708203732967e-001, 1.8708203732967e-001, -1.8637053668499e-001, 1.8637053668499e-001,
+ 3.5746961832047e-002, 3.5746961832047e-002, 3.5462707281113e-002, 3.5462707281113e-002,
+ -1.8565860390663e-001, 1.8565860390663e-001, -1.8494622409344e-001, 1.8494622409344e-001,
+ 3.5179555416107e-002, 3.5179555416107e-002, 3.4897476434708e-002, 3.4897476434708e-002,
+ -1.8423342704773e-001, 1.8423342704773e-001, -1.8352018296719e-001, 1.8352018296719e-001,
+ 3.4616529941559e-002, 3.4616529941559e-002, 3.4336656332016e-002, 3.4336656332016e-002,
+ -1.8280650675297e-001, 1.8280650675297e-001, -1.8209239840508e-001, 1.8209239840508e-001,
+ 3.4057855606079e-002, 3.4057855606079e-002, 3.3780187368393e-002, 3.3780187368393e-002,
+ -1.8137787282467e-001, 1.8137787282467e-001, -1.8066290020943e-001, 1.8066290020943e-001,
+ 3.3503592014313e-002, 3.3503592014313e-002, 3.3228129148483e-002, 3.3228129148483e-002,
+ -1.7994752526283e-001, 1.7994752526283e-001, -1.7923171818256e-001, 1.7923171818256e-001,
+ 3.2953739166260e-002, 3.2953739166260e-002, 3.2680422067642e-002, 3.2680422067642e-002,
+ -1.7851547896862e-001, 1.7851547896862e-001, -1.7779883742332e-001, 1.7779883742332e-001,
+ 3.2408237457275e-002, 3.2408237457275e-002, 3.2137155532837e-002, 3.2137155532837e-002,
+ -1.7708176374435e-001, 1.7708176374435e-001, -1.7636428773403e-001, 1.7636428773403e-001,
+ 3.1867176294327e-002, 3.1867176294327e-002, 3.1598269939423e-002, 3.1598269939423e-002,
+ -1.7564637959003e-001, 1.7564637959003e-001, -1.7492806911469e-001, 1.7492806911469e-001,
+ 3.1330496072769e-002, 3.1330496072769e-002, 3.1063824892044e-002, 3.1063824892044e-002,
+ -1.7420934140682e-001, 1.7420934140682e-001, -1.7349021136761e-001, 1.7349021136761e-001,
+ 3.0798226594925e-002, 3.0798226594925e-002, 3.0533760786057e-002, 3.0533760786057e-002,
+ -1.7277066409588e-001, 1.7277066409588e-001, -1.7205071449280e-001, 1.7205071449280e-001,
+ 3.0270397663116e-002, 3.0270397663116e-002, 3.0008137226105e-002, 3.0008137226105e-002,
+ -1.7133036255836e-001, 1.7133036255836e-001, -1.7060960829258e-001, 1.7060960829258e-001,
+ 2.9746979475021e-002, 2.9746979475021e-002, 2.9486924409866e-002, 2.9486924409866e-002,
+ -1.6988845169544e-001, 1.6988845169544e-001, -1.6916689276695e-001, 1.6916689276695e-001,
+ 2.9227972030640e-002, 2.9227972030640e-002, 2.8970122337341e-002, 2.8970122337341e-002,
+ -1.6844493150711e-001, 1.6844493150711e-001, -1.6772258281708e-001, 1.6772258281708e-001,
+ 2.8713405132294e-002, 2.8713405132294e-002, 2.8457790613174e-002, 2.8457790613174e-002,
+ -1.6699983179569e-001, 1.6699983179569e-001, -1.6627669334412e-001, 1.6627669334412e-001,
+ 2.8203278779984e-002, 2.8203278779984e-002, 2.7949869632721e-002, 2.7949869632721e-002,
+ -1.6555315256119e-001, 1.6555315256119e-001, -1.6482923924923e-001, 1.6482923924923e-001,
+ 2.7697592973709e-002, 2.7697592973709e-002, 2.7446419000626e-002, 2.7446419000626e-002,
+ -1.6410492360592e-001, 1.6410492360592e-001, -1.6338023543358e-001, 1.6338023543358e-001,
+ 2.7196347713470e-002, 2.7196347713470e-002, 2.6947379112244e-002, 2.6947379112244e-002,
+ -1.6265514492989e-001, 1.6265514492989e-001, -1.6192968189716e-001, 1.6192968189716e-001,
+ 2.6699542999268e-002, 2.6699542999268e-002, 2.6452809572220e-002, 2.6452809572220e-002,
+ -1.6120384633541e-001, 1.6120384633541e-001, -1.6047762334347e-001, 1.6047762334347e-001,
+ 2.6207208633423e-002, 2.6207208633423e-002, 2.5962710380554e-002, 2.5962710380554e-002,
+ -1.5975101292133e-001, 1.5975101292133e-001, -1.5902404487133e-001, 1.5902404487133e-001,
+ 2.5719314813614e-002, 2.5719314813614e-002, 2.5477051734924e-002, 2.5477051734924e-002,
+ -1.5829668939114e-001, 1.5829668939114e-001, -1.5756896138191e-001, 1.5756896138191e-001,
+ 2.5235921144485e-002, 2.5235921144485e-002, 2.4995893239975e-002, 2.4995893239975e-002,
+ -1.5684087574482e-001, 1.5684087574482e-001, -1.5611241757870e-001, 1.5611241757870e-001,
+ 2.4756968021393e-002, 2.4756968021393e-002, 2.4519175291061e-002, 2.4519175291061e-002,
+ -1.5538358688354e-001, 1.5538358688354e-001, -1.5465438365936e-001, 1.5465438365936e-001,
+ 2.4282485246658e-002, 2.4282485246658e-002, 2.4046927690506e-002, 2.4046927690506e-002,
+ -1.5392482280731e-001, 1.5392482280731e-001, -1.5319490432739e-001, 1.5319490432739e-001,
+ 2.3812502622604e-002, 2.3812502622604e-002, 2.3579180240631e-002, 2.3579180240631e-002,
+ -1.5246461331844e-001, 1.5246461331844e-001, -1.5173397958279e-001, 1.5173397958279e-001,
+ 2.3346990346909e-002, 2.3346990346909e-002, 2.3115903139114e-002, 2.3115903139114e-002,
+ -1.5100297331810e-001, 1.5100297331810e-001, -1.5027162432671e-001, 1.5027162432671e-001,
+ 2.2885948419571e-002, 2.2885948419571e-002, 2.2657126188278e-002, 2.2657126188278e-002,
+ -1.4953991770744e-001, 1.4953991770744e-001, -1.4880785346031e-001, 1.4880785346031e-001,
+ 2.2429406642914e-002, 2.2429406642914e-002, 2.2202819585800e-002, 2.2202819585800e-002,
+ -1.4807544648647e-001, 1.4807544648647e-001, -1.4734269678593e-001, 1.4734269678593e-001,
+ 2.1977365016937e-002, 2.1977365016937e-002, 2.1753042936325e-002, 2.1753042936325e-002,
+ -1.4660958945751e-001, 1.4660958945751e-001, -1.4587613940239e-001, 1.4587613940239e-001,
+ 2.1529823541641e-002, 2.1529823541641e-002, 2.1307736635208e-002, 2.1307736635208e-002,
+ -1.4514234662056e-001, 1.4514234662056e-001, -1.4440821111202e-001, 1.4440821111202e-001,
+ 2.1086782217026e-002, 2.1086782217026e-002, 2.0866960287094e-002, 2.0866960287094e-002,
+ -1.4367373287678e-001, 1.4367373287678e-001, -1.4293892681599e-001, 1.4293892681599e-001,
+ 2.0648270845413e-002, 2.0648270845413e-002, 2.0430684089661e-002, 2.0430684089661e-002,
+ -1.4220377802849e-001, 1.4220377802849e-001, -1.4146828651428e-001, 1.4146828651428e-001,
+ 2.0214229822159e-002, 2.0214229822159e-002, 1.9998937845230e-002, 1.9998937845230e-002,
+ -1.4073246717453e-001, 1.4073246717453e-001, -1.3999632000923e-001, 1.3999632000923e-001,
+ 1.9784748554230e-002, 1.9784748554230e-002, 1.9571691751480e-002, 1.9571691751480e-002,
+ -1.3925984501839e-001, 1.3925984501839e-001, -1.3852304220200e-001, 1.3852304220200e-001,
+ 1.9359767436981e-002, 1.9359767436981e-002, 1.9148975610733e-002, 1.9148975610733e-002,
+ -1.3778591156006e-001, 1.3778591156006e-001, -1.3704845309258e-001, 1.3704845309258e-001,
+ 1.8939286470413e-002, 1.8939286470413e-002, 1.8730759620667e-002, 1.8730759620667e-002,
+ -1.3631068170071e-001, 1.3631068170071e-001, -1.3557258248329e-001, 1.3557258248329e-001,
+ 1.8523365259171e-002, 1.8523365259171e-002, 1.8317103385925e-002, 1.8317103385925e-002,
+ -1.3483417034149e-001, 1.3483417034149e-001, -1.3409543037415e-001, 1.3409543037415e-001,
+ 1.8111974000931e-002, 1.8111974000931e-002, 1.7907977104187e-002, 1.7907977104187e-002,
+ -1.3335637748241e-001, 1.3335637748241e-001, -1.3261701166630e-001, 1.3261701166630e-001,
+ 1.7705112695694e-002, 1.7705112695694e-002, 1.7503380775452e-002, 1.7503380775452e-002,
+ -1.3187734782696e-001, 1.3187734782696e-001, -1.3113735616207e-001, 1.3113735616207e-001,
+ 1.7302781343460e-002, 1.7302781343460e-002, 1.7103314399719e-002, 1.7103314399719e-002,
+ -1.3039706647396e-001, 1.3039706647396e-001, -1.2965646386147e-001, 1.2965646386147e-001,
+ 1.6905009746552e-002, 1.6905009746552e-002, 1.6707807779312e-002, 1.6707807779312e-002,
+ -1.2891554832458e-001, 1.2891554832458e-001, -1.2817434966564e-001, 1.2817434966564e-001,
+ 1.6511768102646e-002, 1.6511768102646e-002, 1.6316860914230e-002, 1.6316860914230e-002,
+ -1.2743283808231e-001, 1.2743283808231e-001, -1.2669102847576e-001, 1.2669102847576e-001,
+ 1.6123086214066e-002, 1.6123086214066e-002, 1.5930444002151e-002, 1.5930444002151e-002,
+ -1.2594890594482e-001, 1.2594890594482e-001, -1.2520650029182e-001, 1.2520650029182e-001,
+ 1.5738964080811e-002, 1.5738964080811e-002, 1.5548586845398e-002, 1.5548586845398e-002,
+ -1.2446380406618e-001, 1.2446380406618e-001, -1.2372080981731e-001, 1.2372080981731e-001,
+ 1.5359371900558e-002, 1.5359371900558e-002, 1.5171319246292e-002, 1.5171319246292e-002,
+ -1.2297752499580e-001, 1.2297752499580e-001, -1.2223395705223e-001, 1.2223395705223e-001,
+ 1.4984369277954e-002, 1.4984369277954e-002, 1.4798581600189e-002, 1.4798581600189e-002,
+ -1.2149009108543e-001, 1.2149009108543e-001, -1.2074594944716e-001, 1.2074594944716e-001,
+ 1.4613926410675e-002, 1.4613926410675e-002, 1.4430433511734e-002, 1.4430433511734e-002,
+ -1.2000151723623e-001, 1.2000151723623e-001, -1.1925680190325e-001, 1.1925680190325e-001,
+ 1.4248043298721e-002, 1.4248043298721e-002, 1.4066845178604e-002, 1.4066845178604e-002,
+ -1.1851180344820e-001, 1.1851180344820e-001, -1.1776652932167e-001, 1.1776652932167e-001,
+ 1.3886749744415e-002, 1.3886749744415e-002, 1.3707816600800e-002, 1.3707816600800e-002,
+ -1.1702097952366e-001, 1.1702097952366e-001, -1.1627515405416e-001, 1.1627515405416e-001,
+ 1.3530015945435e-002, 1.3530015945435e-002, 1.3353377580643e-002, 1.3353377580643e-002,
+ -1.1552906036377e-001, 1.1552906036377e-001, -1.1478268355131e-001, 1.1478268355131e-001,
+ 1.3177871704102e-002, 1.3177871704102e-002, 1.3003528118134e-002, 1.3003528118134e-002,
+ -1.1403604596853e-001, 1.1403604596853e-001, -1.1328913271427e-001, 1.1328913271427e-001,
+ 1.2830317020416e-002, 1.2830317020416e-002, 1.2658238410950e-002, 1.2658238410950e-002,
+ -1.1254195868969e-001, 1.1254195868969e-001, -1.1179451644421e-001, 1.1179451644421e-001,
+ 1.2487322092056e-002, 1.2487322092056e-002, 1.2317568063736e-002, 1.2317568063736e-002,
+ -1.1104681342840e-001, 1.1104681342840e-001, -1.1029884964228e-001, 1.1029884964228e-001,
+ 1.2148946523666e-002, 1.2148946523666e-002, 1.1981457471848e-002, 1.1981457471848e-002,
+ -1.0955062508583e-001, 1.0955062508583e-001, -1.0880213975906e-001, 1.0880213975906e-001,
+ 1.1815130710602e-002, 1.1815130710602e-002, 1.1649966239929e-002, 1.1649966239929e-002,
+ -1.0805340111256e-001, 1.0805340111256e-001, -1.0730440914631e-001, 1.0730440914631e-001,
+ 1.1485934257507e-002, 1.1485934257507e-002, 1.1323064565659e-002, 1.1323064565659e-002,
+ -1.0655516386032e-001, 1.0655516386032e-001, -1.0580566525459e-001, 1.0580566525459e-001,
+ 1.1161327362061e-002, 1.1161327362061e-002, 1.1000752449036e-002, 1.1000752449036e-002,
+ -1.0505592077971e-001, 1.0505592077971e-001, -1.0430593043566e-001, 1.0430593043566e-001,
+ 1.0841310024261e-002, 1.0841310024261e-002, 1.0683029890060e-002, 1.0683029890060e-002,
+ -1.0355569422245e-001, 1.0355569422245e-001, -1.0280521214008e-001, 1.0280521214008e-001,
+ 1.0525912046432e-002, 1.0525912046432e-002, 1.0369926691055e-002, 1.0369926691055e-002,
+ -1.0205448418856e-001, 1.0205448418856e-001, -1.0130352526903e-001, 1.0130352526903e-001,
+ 1.0215103626251e-002, 1.0215103626251e-002, 1.0061442852020e-002, 1.0061442852020e-002,
+ -1.0055232048035e-001, 1.0055232048035e-001, -9.9800884723663e-002, 9.9800884723663e-002,
+ 9.9089443683624e-003, 9.9089443683624e-003, 9.7575783729553e-003, 9.7575783729553e-003,
+ -9.9049210548401e-002, 9.9049210548401e-002, -9.8297297954559e-002, 9.8297297954559e-002,
+ 9.6073746681213e-003, 9.6073746681213e-003, 9.4583034515381e-003, 9.4583034515381e-003,
+ -9.7545161843300e-002, 9.7545161843300e-002, -9.6792794764042e-002, 9.6792794764042e-002,
+ 9.3103945255280e-003, 9.3103945255280e-003, 9.1636478900909e-003, 9.1636478900909e-003,
+ -9.6040204167366e-002, 9.6040204167366e-002, -9.5287382602692e-002, 9.5287382602692e-002,
+ 9.0180635452271e-003, 9.0180635452271e-003, 8.8736414909363e-003, 8.8736414909363e-003,
+ -9.4534337520599e-002, 9.4534337520599e-002, -9.3781068921089e-002, 9.3781068921089e-002,
+ 8.7303519248962e-003, 8.7303519248962e-003, 8.5882246494293e-003, 8.5882246494293e-003,
+ -9.3027576804161e-002, 9.3027576804161e-002, -9.2273868620396e-002, 9.2273868620396e-002,
+ 8.4472596645355e-003, 8.4472596645355e-003, 8.3074569702148e-003, 8.3074569702148e-003,
+ -9.1519944369793e-002, 9.1519944369793e-002, -9.0765804052353e-002, 9.0765804052353e-002,
+ 8.1687867641449e-003, 8.1687867641449e-003, 8.0312788486481e-003, 8.0312788486481e-003,
+ -9.0011455118656e-002, 9.0011455118656e-002, -8.9256890118122e-002, 8.9256890118122e-002,
+ 7.8949630260468e-003, 7.8949630260468e-003, 7.7597796916962e-003, 7.7597796916962e-003,
+ -8.8502109050751e-002, 8.8502109050751e-002, -8.7747126817703e-002, 8.7747126817703e-002,
+ 7.6257586479187e-003, 7.6257586479187e-003, 7.4928700923920e-003, 7.4928700923920e-003,
+ -8.6991935968399e-002, 8.6991935968399e-002, -8.6236543953419e-002, 8.6236543953419e-002,
+ 7.3611736297607e-003, 7.3611736297607e-003, 7.2306394577026e-003, 7.2306394577026e-003,
+ -8.5480943322182e-002, 8.5480943322182e-002, -8.4725148975849e-002, 8.4725148975849e-002,
+ 7.1012377738953e-003, 7.1012377738953e-003, 6.9730281829834e-003, 6.9730281829834e-003,
+ -8.3969153463840e-002, 8.3969153463840e-002, -8.3212956786156e-002, 8.3212956786156e-002,
+ 6.8459510803223e-003, 6.8459510803223e-003, 6.7200362682343e-003, 6.7200362682343e-003,
+ -8.2456558942795e-002, 8.2456558942795e-002, -8.1699974834919e-002, 8.1699974834919e-002,
+ 6.5953135490417e-003, 6.5953135490417e-003, 6.4717233181000e-003, 6.4717233181000e-003,
+ -8.0943197011948e-002, 8.0943197011948e-002, -8.0186232924461e-002, 8.0186232924461e-002,
+ 6.3492953777313e-003, 6.3492953777313e-003, 6.2280297279358e-003, 6.2280297279358e-003,
+ -7.9429075121880e-002, 7.9429075121880e-002, -7.8671731054783e-002, 7.8671731054783e-002,
+ 6.1079263687134e-003, 6.1079263687134e-003, 5.9889853000641e-003, 5.9889853000641e-003,
+ -7.7914200723171e-002, 7.7914200723171e-002, -7.7156491577625e-002, 7.7156491577625e-002,
+ 5.8712065219879e-003, 5.8712065219879e-003, 5.7545900344849e-003, 5.7545900344849e-003,
+ -7.6398596167564e-002, 7.6398596167564e-002, -7.5640521943569e-002, 7.5640521943569e-002,
+ 5.6391656398773e-003, 5.6391656398773e-003, 5.5248737335205e-003, 5.5248737335205e-003,
+ -7.4882268905640e-002, 7.4882268905640e-002, -7.4123844504356e-002, 7.4123844504356e-002,
+ 5.4117441177368e-003, 5.4117441177368e-003, 5.2997767925262e-003, 5.2997767925262e-003,
+ -7.3365241289139e-002, 7.3365241289139e-002, -7.2606466710567e-002, 7.2606466710567e-002,
+ 5.1890015602112e-003, 5.1890015602112e-003, 5.0793588161469e-003, 5.0793588161469e-003,
+ -7.1847520768642e-002, 7.1847520768642e-002, -7.1088403463364e-002, 7.1088403463364e-002,
+ 4.9709081649780e-003, 4.9709081649780e-003, 4.8635900020599e-003, 4.8635900020599e-003,
+ -7.0329122245312e-002, 7.0329122245312e-002, -6.9569677114487e-002, 6.9569677114487e-002,
+ 4.7574639320374e-003, 4.7574639320374e-003, 4.6525001525879e-003, 4.6525001525879e-003,
+ -6.8810060620308e-002, 6.8810060620308e-002, -6.8050287663937e-002, 6.8050287663937e-002,
+ 4.5486688613892e-003, 4.5486688613892e-003, 4.4460296630859e-003, 4.4460296630859e-003,
+ -6.7290358245373e-002, 6.7290358245373e-002, -6.6530264914036e-002, 6.6530264914036e-002,
+ 4.3445825576782e-003, 4.3445825576782e-003, 4.2442679405212e-003, 4.2442679405212e-003,
+ -6.5770015120506e-002, 6.5770015120506e-002, -6.5009616315365e-002, 6.5009616315365e-002,
+ 4.1451156139374e-003, 4.1451156139374e-003, 4.0471553802490e-003, 4.0471553802490e-003,
+ -6.4249053597450e-002, 6.4249053597450e-002, -6.3488349318504e-002, 6.3488349318504e-002,
+ 3.9503574371338e-003, 3.9503574371338e-003, 3.8546919822693e-003, 3.8546919822693e-003,
+ -6.2727496027946e-002, 6.2727496027946e-002, -6.1966490000486e-002, 6.1966490000486e-002,
+ 3.7602186203003e-003, 3.7602186203003e-003, 3.6669373512268e-003, 3.6669373512268e-003,
+ -6.1205338686705e-002, 6.1205338686705e-002, -6.0444045811892e-002, 6.0444045811892e-002,
+ 3.5747885704041e-003, 3.5747885704041e-003, 3.4838318824768e-003, 3.4838318824768e-003,
+ -5.9682607650757e-002, 5.9682607650757e-002, -5.8921031653881e-002, 5.8921031653881e-002,
+ 3.3940374851227e-003, 3.3940374851227e-003, 3.3054053783417e-003, 3.3054053783417e-003,
+ -5.8159317821264e-002, 5.8159317821264e-002, -5.7397466152906e-002, 5.7397466152906e-002,
+ 3.2179355621338e-003, 3.2179355621338e-003, 3.1316280364990e-003, 3.1316280364990e-003,
+ -5.6635476648808e-002, 5.6635476648808e-002, -5.5873356759548e-002, 5.5873356759548e-002,
+ 3.0465126037598e-003, 3.0465126037598e-003, 2.9625594615936e-003, 2.9625594615936e-003,
+ -5.5111106485128e-002, 5.5111106485128e-002, -5.4348722100258e-002, 5.4348722100258e-002,
+ 2.8797686100006e-003, 2.8797686100006e-003, 2.7981698513031e-003, 2.7981698513031e-003,
+ -5.3586214780807e-002, 5.3586214780807e-002, -5.2823577076197e-002, 5.2823577076197e-002,
+ 2.7177035808563e-003, 2.7177035808563e-003, 2.6384294033051e-003, 2.6384294033051e-003,
+ -5.2060820162296e-002, 5.2060820162296e-002, -5.1297936588526e-002, 5.1297936588526e-002,
+ 2.5603473186493e-003, 2.5603473186493e-003, 2.4833977222443e-003, 2.4833977222443e-003,
+ -5.0534933805466e-002, 5.0534933805466e-002, -4.9771811813116e-002, 4.9771811813116e-002,
+ 2.4076402187347e-003, 2.4076402187347e-003, 2.3330450057983e-003, 2.3330450057983e-003,
+ -4.9008570611477e-002, 4.9008570611477e-002, -4.8245217651129e-002, 4.8245217651129e-002,
+ 2.2596120834351e-003, 2.2596120834351e-003, 2.1873712539673e-003, 2.1873712539673e-003,
+ -4.7481749206781e-002, 4.7481749206781e-002, -4.6718169003725e-002, 4.6718169003725e-002,
+ 2.1162927150726e-003, 2.1162927150726e-003, 2.0463764667511e-003, 2.0463764667511e-003,
+ -4.5954480767250e-002, 4.5954480767250e-002, -4.5190680772066e-002, 4.5190680772066e-002,
+ 1.9776523113251e-003, 1.9776523113251e-003, 1.9100904464722e-003, 1.9100904464722e-003,
+ -4.4426776468754e-002, 4.4426776468754e-002, -4.3662767857313e-002, 4.3662767857313e-002,
+ 1.8436908721924e-003, 1.8436908721924e-003, 1.7784833908081e-003, 1.7784833908081e-003,
+ -4.2898658663034e-002, 4.2898658663034e-002, -4.2134445160627e-002, 4.2134445160627e-002,
+ 1.7144381999969e-003, 1.7144381999969e-003, 1.6515552997589e-003, 1.6515552997589e-003,
+ -4.1370134800673e-002, 4.1370134800673e-002, -4.0605723857880e-002, 4.0605723857880e-002,
+ 1.5898644924164e-003, 1.5898644924164e-003, 1.5293359756470e-003, 1.5293359756470e-003,
+ -3.9841219782829e-002, 3.9841219782829e-002, -3.9076622575521e-002, 3.9076622575521e-002,
+ 1.4699697494507e-003, 1.4699697494507e-003, 1.4117956161499e-003, 1.4117956161499e-003,
+ -3.8311932235956e-002, 3.8311932235956e-002, -3.7547152489424e-002, 3.7547152489424e-002,
+ 1.3547837734222e-003, 1.3547837734222e-003, 1.2989342212677e-003, 1.2989342212677e-003,
+ -3.6782283335924e-002, 3.6782283335924e-002, -3.6017328500748e-002, 3.6017328500748e-002,
+ 1.2442767620087e-003, 1.2442767620087e-003, 1.1907815933228e-003, 1.1907815933228e-003,
+ -3.5252287983894e-002, 3.5252287983894e-002, -3.4487165510654e-002, 3.4487165510654e-002,
+ 1.1384785175323e-003, 1.1384785175323e-003, 1.0873377323151e-003, 1.0873377323151e-003,
+ -3.3721961081028e-002, 3.3721961081028e-002, -3.2956678420305e-002, 3.2956678420305e-002,
+ 1.0373592376709e-003, 1.0373592376709e-003, 9.8857283592224e-004, 9.8857283592224e-004,
+ -3.2191317528486e-002, 3.2191317528486e-002, -3.1425878405571e-002, 3.1425878405571e-002,
+ 9.4094872474670e-004, 9.4094872474670e-004, 8.9448690414429e-004, 8.9448690414429e-004,
+ -3.0660368502140e-002, 3.0660368502140e-002, -2.9894785955548e-002, 2.9894785955548e-002,
+ 8.4921717643738e-004, 8.4921717643738e-004, 8.0513954162598e-004, 8.0513954162598e-004,
+ -2.9129132628441e-002, 2.9129132628441e-002, -2.8363412246108e-002, 2.8363412246108e-002,
+ 7.6222419738770e-004, 7.6222419738770e-004, 7.2047114372253e-004, 7.2047114372253e-004,
+ -2.7597622945905e-002, 2.7597622945905e-002, -2.6831770315766e-002, 2.6831770315766e-002,
+ 6.7988038063049e-004, 6.7988038063049e-004, 6.4048171043396e-004, 6.4048171043396e-004,
+ -2.6065852493048e-002, 2.6065852493048e-002, -2.5299875065684e-002, 2.5299875065684e-002,
+ 6.0227513313293e-004, 6.0227513313293e-004, 5.6523084640503e-004, 5.6523084640503e-004,
+ -2.4533838033676e-002, 2.4533838033676e-002, -2.3767743259668e-002, 2.3767743259668e-002,
+ 5.2934885025024e-004, 5.2934885025024e-004, 4.9465894699097e-004, 4.9465894699097e-004,
+ -2.3001592606306e-002, 2.3001592606306e-002, -2.2235386073589e-002, 2.2235386073589e-002,
+ 4.6113133430481e-004, 4.6113133430481e-004, 4.2879581451416e-004, 4.2879581451416e-004,
+ -2.1469129249454e-002, 2.1469129249454e-002, -2.0702820271254e-002, 2.0702820271254e-002,
+ 3.9762258529663e-004, 3.9762258529663e-004, 3.6761164665222e-004, 3.6761164665222e-004,
+ -1.9936464726925e-002, 1.9936464726925e-002, -1.9170060753822e-002, 1.9170060753822e-002,
+ 3.3882260322571e-004, 3.3882260322571e-004, 3.1116604804993e-004, 3.1116604804993e-004,
+ -1.8403612077236e-002, 1.8403612077236e-002, -1.7637120559812e-002, 1.7637120559812e-002,
+ 2.8470158576965e-004, 2.8470158576965e-004, 2.5939941406250e-004, 2.5939941406250e-004,
+ -1.6870586201549e-002, 1.6870586201549e-002, -1.6104012727737e-002, 1.6104012727737e-002,
+ 2.3528933525085e-004, 2.3528933525085e-004, 2.1234154701233e-004, 2.1234154701233e-004,
+ -1.5337402001023e-002, 1.5337402001023e-002, -1.4570754952729e-002, 1.4570754952729e-002,
+ 1.9058585166931e-004, 1.9058585166931e-004, 1.6999244689941e-004, 1.6999244689941e-004,
+ -1.3804073445499e-002, 1.3804073445499e-002, -1.3037359341979e-002, 1.3037359341979e-002,
+ 1.5059113502502e-004, 1.5059113502502e-004, 1.3235211372375e-004, 1.3235211372375e-004,
+ -1.2270614504814e-002, 1.2270614504814e-002, -1.1503840796649e-002, 1.1503840796649e-002,
+ 1.1530518531799e-004, 1.1530518531799e-004, 9.9420547485352e-005, 9.9420547485352e-005,
+ -1.0737040080130e-002, 1.0737040080130e-002, -9.9702142179012e-003, 9.9702142179012e-003,
+ 8.4698200225830e-005, 8.4698200225830e-005, 7.1167945861816e-005, 7.1167945861816e-005,
+ -9.2033650726080e-003, 9.2033650726080e-003, -8.4364945068955e-003, 8.4364945068955e-003,
+ 5.8829784393311e-005, 5.8829784393311e-005, 4.7653913497925e-005, 4.7653913497925e-005,
+ -7.6696034520864e-003, 7.6696034520864e-003, -6.9026942364872e-003, 6.9026942364872e-003,
+ 3.7640333175659e-005, 3.7640333175659e-005, 2.8818845748901e-005, 2.8818845748901e-005,
+ -6.1357691884041e-003, 6.1357691884041e-003, -5.3688297048211e-003, 5.3688297048211e-003,
+ 2.1189451217651e-005, 2.1189451217651e-005, 1.4692544937134e-005, 1.4692544937134e-005,
+ -4.6018776483834e-003, 4.6018776483834e-003, -3.8349144160748e-003, 3.8349144160748e-003,
+ 9.4175338745117e-006, 9.4175338745117e-006, 5.3048133850098e-006, 5.3048133850098e-006,
+ -3.0679423362017e-003, 3.0679423362017e-003, -2.3009630385786e-003, 2.3009630385786e-003,
+ 2.3543834686279e-006, 2.3543834686279e-006, 5.9604644775391e-007, 5.9604644775391e-007,
+ -1.5339783858508e-003, 1.5339783858508e-003, -7.6699012424797e-004, 7.6699012424797e-004
+};
+
+static _MM_ALIGN16 float CT1STP[] = {
+ 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001,
+ -7.0710676908493e-001, 7.0710676908493e-001, -7.0710676908493e-001, 7.0710676908493e-001,
+ 9.2387950420380e-001, 9.2387950420380e-001, 3.8268339633942e-001, 3.8268339633942e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -9.2387944459915e-001, 9.2387944459915e-001,
+ -7.0710676908493e-001, 7.0710676908493e-001, -7.0710676908493e-001, 7.0710676908493e-001,
+ 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, -9.2387944459915e-001, -9.2387944459915e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, 3.8268339633942e-001, -3.8268339633942e-001,
+ 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -3.8268345594406e-001, 3.8268345594406e-001,
+ 9.8078525066376e-001, 9.8078525066376e-001, 8.3146953582764e-001, 8.3146953582764e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, -9.2387950420380e-001, 9.2387950420380e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, -9.8078513145447e-001, -9.8078513145447e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -1.9509035348892e-001, 1.9509035348892e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, -9.2387950420380e-001, 9.2387950420380e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, -1.9509035348892e-001, -1.9509035348892e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -9.8078513145447e-001, 9.8078513145447e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -3.8268345594406e-001, 3.8268345594406e-001,
+ 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001,
+ 1.9509032368660e-001, 1.9509032368660e-001, -5.5557024478912e-001, -5.5557024478912e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, 8.3146953582764e-001, -8.3146953582764e-001,
+ 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -2.9028469324112e-001, 2.9028469324112e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, -9.8078525066376e-001, 9.8078525066376e-001,
+ 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, -8.8192123174667e-001, -8.8192123174667e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 9.8017096519470e-002, 9.8017096519470e-002,
+ -4.7139674425125e-001, 4.7139674425125e-001, -9.9518460035324e-001, 9.9518460035324e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ 2.9028469324112e-001, 2.9028469324112e-001, -7.7301043272018e-001, -7.7301043272018e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, 6.3439327478409e-001, -6.3439327478409e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, -9.9518460035324e-001, -9.9518460035324e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, 9.8017096519470e-002, -9.8017096519470e-002,
+ 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, -9.8078525066376e-001, 9.8078525066376e-001,
+ 7.7301043272018e-001, 7.7301043272018e-001, -4.7139674425125e-001, -4.7139674425125e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, -2.9028469324112e-001, -2.9028469324112e-001,
+ -9.9518471956253e-001, 9.9518471956253e-001, 9.5694035291672e-001, -9.5694035291672e-001,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -9.8017141222954e-002, 9.8017141222954e-002,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -1.4673046767712e-001, 1.4673046767712e-001,
+ -9.9518471956253e-001, 9.9518471956253e-001, -9.9518471956253e-001, 9.9518471956253e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002,
+ 6.7155897617340e-001, 6.7155897617340e-001, -8.0320751667023e-001, -8.0320751667023e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -5.9569936990738e-001, 5.9569936990738e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001,
+ 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, -8.5772860050201e-001, -8.5772860050201e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, 5.1410269737244e-001, -5.1410269737244e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001,
+ -4.7139674425125e-001, 4.7139674425125e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, -9.9879533052444e-001, -9.9879533052444e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -4.9067676067352e-002, 4.9067676067352e-002,
+ 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001,
+ 8.0320751667023e-001, 8.0320751667023e-001, -3.3688986301422e-001, -3.3688986301422e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -2.9028469324112e-001, 2.9028469324112e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, -4.2755514383316e-001, -4.2755514383316e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, 9.0398931503296e-001, -9.0398931503296e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -2.9028469324112e-001, 2.9028469324112e-001,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -4.2755514383316e-001, 4.2755514383316e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001,
+ 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, -9.4154405593872e-001, -9.4154405593872e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, -4.9067676067352e-002, -4.9067676067352e-002,
+ -5.1410275697708e-001, 5.1410275697708e-001, -9.9879533052444e-001, 9.9879533052444e-001,
+ -4.7139674425125e-001, 4.7139674425125e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, -6.7155897617340e-001, -6.7155897617340e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, 7.4095112085342e-001, -7.4095112085342e-001,
+ 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 5.1410269737244e-001, 5.1410269737244e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, -9.7003126144409e-001, -9.7003126144409e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, 2.4298018217087e-001, -2.4298018217087e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002,
+ -9.9518471956253e-001, 9.9518471956253e-001, -9.9518471956253e-001, 9.9518471956253e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, -5.9569936990738e-001, -5.9569936990738e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -9.8017141222954e-002, 9.8017141222954e-002,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, -1.4673046767712e-001, -1.4673046767712e-001,
+ -9.9879544973373e-001, 9.9879544973373e-001, 9.8917651176453e-001, -9.8917651176453e-001,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -4.9067676067352e-002, 4.9067676067352e-002,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ -9.9879544973373e-001, 9.9879544973373e-001, -9.9879544973373e-001, 9.9879544973373e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002,
+ 6.8954056501389e-001, 6.8954056501389e-001, -7.5720888376236e-001, -7.5720888376236e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 3.1368172168732e-001, 3.1368172168732e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, -8.9322429895401e-001, -8.9322429895401e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, 4.4961130619049e-001, -4.4961130619049e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -6.1523157358170e-001, 6.1523157358170e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, -9.9247962236404e-001, -9.9247962236404e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -1.2241071462631e-001, 1.2241071462631e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, -2.6671284437180e-001, -2.6671284437180e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, -4.9289819598198e-001, -4.9289819598198e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, 8.7008702754974e-001, -8.7008702754974e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, -9.1420972347260e-001, -9.1420972347260e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -4.0524137020111e-001, 4.0524137020111e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 2.4541199207306e-002, 2.4541199207306e-002,
+ -4.9289819598198e-001, 4.9289819598198e-001, -9.9969875812531e-001, 9.9969875812531e-001,
+ -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001,
+ 2.6671275496483e-001, 2.6671275496483e-001, -7.2424709796906e-001, -7.2424709796906e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, 6.8954050540924e-001, -6.8954050540924e-001,
+ 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -5.9569931030273e-001, 5.9569931030273e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 5.7580816745758e-001, 5.7580816745758e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -8.1758487224579e-001, 8.1758487224579e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, -9.8527765274048e-001, -9.8527765274048e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, 1.7096191644669e-001, -1.7096191644669e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, -5.3499770164490e-001, -5.3499770164490e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -8.4485352039337e-001, 8.4485352039337e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -1.4673048257828e-001, 1.4673048257828e-001,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, -2.1910125017166e-001, -2.1910125017166e-001,
+ -9.9729043245316e-001, 9.9729043245316e-001, 9.7570210695267e-001, -9.7570210695267e-001,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -1.4673048257828e-001, 1.4673048257828e-001,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -2.1910125017166e-001, 2.1910125017166e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, -8.4485352039337e-001, -8.4485352039337e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -5.3499770164490e-001, 5.3499770164490e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 1.7096191644669e-001, 1.7096191644669e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -5.9569931030273e-001, 5.9569931030273e-001,
+ 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001,
+ 3.1368175148964e-001, 3.1368175148964e-001, -8.1758487224579e-001, -8.1758487224579e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, 5.7580816745758e-001, -5.7580816745758e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001,
+ -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 6.8954050540924e-001, 6.8954050540924e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, -9.9969875812531e-001, -9.9969875812531e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, 2.4541199207306e-002, -2.4541199207306e-002,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, -4.0524137020111e-001, -4.0524137020111e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, -3.5989505052567e-001, -3.5989505052567e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, 9.3299281597137e-001, -9.3299281597137e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 9.8527765274048e-001, 9.8527765274048e-001, 8.7008702754974e-001, 8.7008702754974e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, -9.6377605199814e-001, -9.6377605199814e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -2.6671284437180e-001, 2.6671284437180e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, -1.2241071462631e-001, -1.2241071462631e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -9.9247962236404e-001, 9.9247962236404e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ 2.1910125017166e-001, 2.1910125017166e-001, -6.1523157358170e-001, -6.1523157358170e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, 7.8834640979767e-001, -7.8834640979767e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 4.4961130619049e-001, 4.4961130619049e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001,
+ 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, -9.4952815771103e-001, -9.4952815771103e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, 3.1368172168732e-001, -3.1368172168732e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002,
+ -9.9879544973373e-001, 9.9879544973373e-001, -9.9879544973373e-001, 9.9879544973373e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, -6.5317285060883e-001, -6.5317285060883e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -7.5720888376236e-001, 7.5720888376236e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -4.9067676067352e-002, 4.9067676067352e-002,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, -7.3564566671848e-002, -7.3564566671848e-002,
+ -9.9969881772995e-001, 9.9969881772995e-001, 9.9729043245316e-001, -9.9729043245316e-001,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -2.4541229009628e-002, 2.4541229009628e-002,
+ 9.9992471933365e-001, 9.9992471933365e-001, 9.9932241439819e-001, 9.9932241439819e-001,
+ -1.2271538376808e-002, 1.2271538376808e-002, -3.6807224154472e-002, 3.6807224154472e-002,
+ -9.9969881772995e-001, 9.9969881772995e-001, -9.9969881772995e-001, 9.9969881772995e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002,
+ 6.9837623834610e-001, 6.9837623834610e-001, -7.3265415430069e-001, -7.3265415430069e-001,
+ -7.1573078632355e-001, 7.1573078632355e-001, -6.8060100078583e-001, 6.8060100078583e-001,
+ 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ 9.1911387443542e-001, 9.1911387443542e-001, 3.4841871261597e-001, 3.4841871261597e-001,
+ -3.9399203658104e-001, 3.9399203658104e-001, -9.3733906745911e-001, 9.3733906745911e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -6.8954056501389e-001, 6.8954056501389e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001,
+ 3.7131720781326e-001, 3.7131720781326e-001, -9.0916794538498e-001, -9.0916794538498e-001,
+ -9.2850607633591e-001, 9.2850607633591e-001, 4.1642951965332e-001, -4.1642951965332e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -4.0524131059647e-001, 4.0524131059647e-001,
+ 9.7831737995148e-001, 9.7831737995148e-001, 8.1045722961426e-001, 8.1045722961426e-001,
+ -2.0711138844490e-001, 2.0711138844490e-001, -5.8579778671265e-001, 5.8579778671265e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001,
+ 5.4532498121262e-001, 5.4532498121262e-001, -9.8730140924454e-001, -9.8730140924454e-001,
+ -8.3822470903397e-001, 8.3822470903397e-001, -1.5885812044144e-001, 1.5885812044144e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001,
+ 8.2458931207657e-001, 8.2458931207657e-001, -2.3105818033218e-001, -2.3105818033218e-001,
+ -5.6573182344437e-001, 5.6573182344437e-001, -9.7294002771378e-001, 9.7294002771378e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ 1.8303988873959e-001, 1.8303988873959e-001, -5.2458971738815e-001, -5.2458971738815e-001,
+ -9.8310548067093e-001, 9.8310548067093e-001, 8.5135519504547e-001, -8.5135519504547e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 9.9390697479248e-001, 9.9390697479248e-001, 9.4560730457306e-001, 9.4560730457306e-001,
+ -1.1022221297026e-001, 1.1022221297026e-001, -3.2531028985977e-001, 3.2531028985977e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001,
+ 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001,
+ 6.2485951185226e-001, 6.2485951185226e-001, -8.9867442846298e-001, -8.9867442846298e-001,
+ -7.8073722124100e-001, 7.8073722124100e-001, -4.3861621618271e-001, 4.3861621618271e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -8.4485357999802e-001, 8.4485357999802e-001,
+ 8.7607008218765e-001, 8.7607008218765e-001, 6.1320662498474e-002, 6.1320662498474e-002,
+ -4.8218378424644e-001, 4.8218378424644e-001, -9.9811804294586e-001, 9.9811804294586e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001,
+ 2.7851969003677e-001, 2.7851969003677e-001, -7.4913638830185e-001, -7.4913638830185e-001,
+ -9.6043050289154e-001, 9.6043050289154e-001, 6.6241574287415e-001, -6.6241574287415e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -5.7580822706223e-001, 5.7580822706223e-001,
+ 9.5330601930618e-001, 9.5330601930618e-001, 6.0551100969315e-001, 6.0551100969315e-001,
+ -3.0200594663620e-001, 3.0200594663620e-001, -7.9583698511124e-001, 7.9583698511124e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001,
+ 4.6053871512413e-001, 4.6053871512413e-001, -9.9090266227722e-001, -9.9090266227722e-001,
+ -8.8763964176178e-001, 8.8763964176178e-001, 1.3458073139191e-001, -1.3458073139191e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ 7.6516723632813e-001, 7.6516723632813e-001, -5.0353848934174e-001, -5.0353848934174e-001,
+ -6.4383155107498e-001, 6.4383155107498e-001, -8.6397284269333e-001, 8.6397284269333e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001,
+ 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001,
+ 8.5797317326069e-002, 8.5797317326069e-002, -2.5486564636230e-001, -2.5486564636230e-001,
+ -9.9631261825562e-001, 9.9631261825562e-001, 9.6697646379471e-001, -9.6697646379471e-001,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -1.2241067737341e-001, 1.2241067737341e-001,
+ 9.9811810255051e-001, 9.9811810255051e-001, 9.8310548067093e-001, 9.8310548067093e-001,
+ -6.1320737004280e-002, 6.1320737004280e-002, -1.8303988873959e-001, 1.8303988873959e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001,
+ 6.6241580247879e-001, 6.6241580247879e-001, -8.2458931207657e-001, -8.2458931207657e-001,
+ -7.4913638830185e-001, 7.4913638830185e-001, -5.6573194265366e-001, 5.6573194265366e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001,
+ 8.9867448806763e-001, 8.9867448806763e-001, 2.0711141824722e-001, 2.0711141824722e-001,
+ -4.3861624598503e-001, 4.3861624598503e-001, -9.7831737995148e-001, 9.7831737995148e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -6.1523163318634e-001, 6.1523163318634e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ 3.2531028985977e-001, 3.2531028985977e-001, -8.3822470903397e-001, -8.3822470903397e-001,
+ -9.4560730457306e-001, 9.4560730457306e-001, 5.4532492160797e-001, -5.4532492160797e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001,
+ -4.9289819598198e-001, 4.9289819598198e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ 9.6697646379471e-001, 9.6697646379471e-001, 7.1573078632355e-001, 7.1573078632355e-001,
+ -2.5486567616463e-001, 2.5486567616463e-001, -6.9837617874146e-001, 6.9837617874146e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, -8.7008696794510e-001, 8.7008696794510e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001,
+ 5.0353837013245e-001, 5.0353837013245e-001, -9.9992465972900e-001, -9.9992465972900e-001,
+ -8.6397284269333e-001, 8.6397284269333e-001, -1.2271523475647e-002, 1.2271523475647e-002,
+ 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ 7.9583686590195e-001, 7.9583686590195e-001, -3.7131732702255e-001, -3.7131732702255e-001,
+ -6.0551106929779e-001, 6.0551106929779e-001, -9.2850589752197e-001, 9.2850589752197e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -2.6671275496483e-001, 2.6671275496483e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001,
+ 1.3458071649075e-001, 1.3458071649075e-001, -3.9399200677872e-001, -3.9399200677872e-001,
+ -9.9090266227722e-001, 9.9090266227722e-001, 9.1911387443542e-001, -9.1911387443542e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -3.1368175148964e-001, 3.1368175148964e-001,
+ 9.8730140924454e-001, 9.8730140924454e-001, 8.8763958215714e-001, 8.8763958215714e-001,
+ -1.5885815024376e-001, 1.5885815024376e-001, -4.6053871512413e-001, 4.6053871512413e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001,
+ 5.8579784631729e-001, 5.8579784631729e-001, -9.5330601930618e-001, -9.5330601930618e-001,
+ -8.1045717000961e-001, 8.1045717000961e-001, -3.0200594663620e-001, 3.0200594663620e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ 8.5135519504547e-001, 8.5135519504547e-001, -8.5797369480133e-002, -8.5797369480133e-002,
+ -5.2458971738815e-001, 5.2458971738815e-001, -9.9631255865097e-001, 9.9631255865097e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -4.4961133599281e-001, 4.4961133599281e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001,
+ 2.3105812072754e-001, 2.3105812072754e-001, -6.4383155107498e-001, -6.4383155107498e-001,
+ -9.7293996810913e-001, 9.7293996810913e-001, 7.6516723632813e-001, -7.6516723632813e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 9.3733900785446e-001, 9.3733900785446e-001, 4.8218375444412e-001, 4.8218375444412e-001,
+ -3.4841868281364e-001, 3.4841868281364e-001, -8.7607002258301e-001, 8.7607002258301e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001,
+ 4.1642957925797e-001, 4.1642957925797e-001, -9.6043044328690e-001, -9.6043044328690e-001,
+ -9.0916800498962e-001, 9.0916800498962e-001, 2.7851969003677e-001, -2.7851969003677e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002,
+ -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001,
+ 7.3265427350998e-001, 7.3265427350998e-001, -6.2485951185226e-001, -6.2485951185226e-001,
+ -6.8060100078583e-001, 6.8060100078583e-001, -7.8073716163635e-001, 7.8073716163635e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ 3.6807224154472e-002, 3.6807224154472e-002, -1.1022220551968e-001, -1.1022220551968e-001,
+ -9.9932235479355e-001, 9.9932235479355e-001, 9.9390691518784e-001, -9.9390691518784e-001,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 9.9932235479355e-001, 9.9932235479355e-001, 9.9390691518784e-001, 9.9390691518784e-001,
+ -3.6807224154472e-002, 3.6807224154472e-002, -1.1022220551968e-001, 1.1022220551968e-001,
+ -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002,
+ 6.8060100078583e-001, 6.8060100078583e-001, -7.8073716163635e-001, -7.8073716163635e-001,
+ -7.3265427350998e-001, 7.3265427350998e-001, -6.2485951185226e-001, 6.2485951185226e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ 9.0916800498962e-001, 9.0916800498962e-001, 2.7851969003677e-001, 2.7851969003677e-001,
+ -4.1642957925797e-001, 4.1642957925797e-001, -9.6043044328690e-001, 9.6043044328690e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001,
+ 3.4841868281364e-001, 3.4841868281364e-001, -8.7607002258301e-001, -8.7607002258301e-001,
+ -9.3733900785446e-001, 9.3733900785446e-001, 4.8218375444412e-001, -4.8218375444412e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -4.4961133599281e-001, 4.4961133599281e-001,
+ 9.7293996810913e-001, 9.7293996810913e-001, 7.6516723632813e-001, 7.6516723632813e-001,
+ -2.3105812072754e-001, 2.3105812072754e-001, -6.4383155107498e-001, 6.4383155107498e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001,
+ 5.2458971738815e-001, 5.2458971738815e-001, -9.9631255865097e-001, -9.9631255865097e-001,
+ -8.5135519504547e-001, 8.5135519504547e-001, -8.5797369480133e-002, 8.5797369480133e-002,
+ 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ 8.1045717000961e-001, 8.1045717000961e-001, -3.0200594663620e-001, -3.0200594663620e-001,
+ -5.8579784631729e-001, 5.8579784631729e-001, -9.5330601930618e-001, 9.5330601930618e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -3.1368175148964e-001, 3.1368175148964e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001,
+ 1.5885815024376e-001, 1.5885815024376e-001, -4.6053871512413e-001, -4.6053871512413e-001,
+ -9.8730140924454e-001, 9.8730140924454e-001, 8.8763958215714e-001, -8.8763958215714e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -2.6671275496483e-001, 2.6671275496483e-001,
+ 9.9090266227722e-001, 9.9090266227722e-001, 9.1911387443542e-001, 9.1911387443542e-001,
+ -1.3458071649075e-001, 1.3458071649075e-001, -3.9399200677872e-001, 3.9399200677872e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001,
+ 6.0551106929779e-001, 6.0551106929779e-001, -9.2850589752197e-001, -9.2850589752197e-001,
+ -7.9583686590195e-001, 7.9583686590195e-001, -3.7131732702255e-001, 3.7131732702255e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, -8.7008696794510e-001, 8.7008696794510e-001,
+ 8.6397284269333e-001, 8.6397284269333e-001, -1.2271523475647e-002, -1.2271523475647e-002,
+ -5.0353837013245e-001, 5.0353837013245e-001, -9.9992465972900e-001, 9.9992465972900e-001,
+ -4.9289819598198e-001, 4.9289819598198e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001,
+ 2.5486567616463e-001, 2.5486567616463e-001, -6.9837617874146e-001, -6.9837617874146e-001,
+ -9.6697646379471e-001, 9.6697646379471e-001, 7.1573078632355e-001, -7.1573078632355e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -6.1523163318634e-001, 6.1523163318634e-001,
+ 9.4560730457306e-001, 9.4560730457306e-001, 5.4532492160797e-001, 5.4532492160797e-001,
+ -3.2531028985977e-001, 3.2531028985977e-001, -8.3822470903397e-001, 8.3822470903397e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001,
+ 4.3861624598503e-001, 4.3861624598503e-001, -9.7831737995148e-001, -9.7831737995148e-001,
+ -8.9867448806763e-001, 8.9867448806763e-001, 2.0711141824722e-001, -2.0711141824722e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ 7.4913638830185e-001, 7.4913638830185e-001, -5.6573194265366e-001, -5.6573194265366e-001,
+ -6.6241580247879e-001, 6.6241580247879e-001, -8.2458931207657e-001, 8.2458931207657e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -1.2241067737341e-001, 1.2241067737341e-001,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001,
+ 6.1320737004280e-002, 6.1320737004280e-002, -1.8303988873959e-001, -1.8303988873959e-001,
+ -9.9811810255051e-001, 9.9811810255051e-001, 9.8310548067093e-001, -9.8310548067093e-001,
+ 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001,
+ 9.9631261825562e-001, 9.9631261825562e-001, 9.6697646379471e-001, 9.6697646379471e-001,
+ -8.5797317326069e-002, 8.5797317326069e-002, -2.5486564636230e-001, 2.5486564636230e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ 6.4383155107498e-001, 6.4383155107498e-001, -8.6397284269333e-001, -8.6397284269333e-001,
+ -7.6516723632813e-001, 7.6516723632813e-001, -5.0353848934174e-001, 5.0353848934174e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ 8.8763964176178e-001, 8.8763964176178e-001, 1.3458073139191e-001, 1.3458073139191e-001,
+ -4.6053871512413e-001, 4.6053871512413e-001, -9.9090266227722e-001, 9.9090266227722e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -5.7580822706223e-001, 5.7580822706223e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001,
+ 3.0200594663620e-001, 3.0200594663620e-001, -7.9583698511124e-001, -7.9583698511124e-001,
+ -9.5330601930618e-001, 9.5330601930618e-001, 6.0551100969315e-001, -6.0551100969315e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 9.6043050289154e-001, 9.6043050289154e-001, 6.6241574287415e-001, 6.6241574287415e-001,
+ -2.7851969003677e-001, 2.7851969003677e-001, -7.4913638830185e-001, 7.4913638830185e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -8.4485357999802e-001, 8.4485357999802e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001,
+ 4.8218378424644e-001, 4.8218378424644e-001, -9.9811804294586e-001, -9.9811804294586e-001,
+ -8.7607008218765e-001, 8.7607008218765e-001, 6.1320662498474e-002, -6.1320662498474e-002,
+ 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001,
+ 7.8073722124100e-001, 7.8073722124100e-001, -4.3861621618271e-001, -4.3861621618271e-001,
+ -6.2485951185226e-001, 6.2485951185226e-001, -8.9867442846298e-001, 8.9867442846298e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ 1.1022221297026e-001, 1.1022221297026e-001, -3.2531028985977e-001, -3.2531028985977e-001,
+ -9.9390697479248e-001, 9.9390697479248e-001, 9.4560730457306e-001, -9.4560730457306e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 9.8310548067093e-001, 9.8310548067093e-001, 8.5135519504547e-001, 8.5135519504547e-001,
+ -1.8303988873959e-001, 1.8303988873959e-001, -5.2458971738815e-001, 5.2458971738815e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001,
+ 5.6573182344437e-001, 5.6573182344437e-001, -9.7294002771378e-001, -9.7294002771378e-001,
+ -8.2458931207657e-001, 8.2458931207657e-001, -2.3105818033218e-001, 2.3105818033218e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ 8.3822470903397e-001, 8.3822470903397e-001, -1.5885812044144e-001, -1.5885812044144e-001,
+ -5.4532498121262e-001, 5.4532498121262e-001, -9.8730140924454e-001, 9.8730140924454e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -4.0524131059647e-001, 4.0524131059647e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001,
+ 2.0711138844490e-001, 2.0711138844490e-001, -5.8579778671265e-001, -5.8579778671265e-001,
+ -9.7831737995148e-001, 9.7831737995148e-001, 8.1045722961426e-001, -8.1045722961426e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -6.8954056501389e-001, 6.8954056501389e-001,
+ 9.2850607633591e-001, 9.2850607633591e-001, 4.1642951965332e-001, 4.1642951965332e-001,
+ -3.7131720781326e-001, 3.7131720781326e-001, -9.0916794538498e-001, 9.0916794538498e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001,
+ 3.9399203658104e-001, 3.9399203658104e-001, -9.3733906745911e-001, -9.3733906745911e-001,
+ -9.1911387443542e-001, 9.1911387443542e-001, 3.4841871261597e-001, -3.4841871261597e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002,
+ -9.9969881772995e-001, 9.9969881772995e-001, -9.9969881772995e-001, 9.9969881772995e-001,
+ 7.1573078632355e-001, 7.1573078632355e-001, -6.8060100078583e-001, -6.8060100078583e-001,
+ -6.9837623834610e-001, 6.9837623834610e-001, -7.3265415430069e-001, 7.3265415430069e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -2.4541229009628e-002, 2.4541229009628e-002,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001,
+ 1.2271538376808e-002, 1.2271538376808e-002, -3.6807224154472e-002, -3.6807224154472e-002,
+ -9.9992471933365e-001, 9.9992471933365e-001, 9.9932241439819e-001, -9.9932241439819e-001,
+ 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001,
+ -1.2271538376808e-002, 1.2271538376808e-002, -1.2271538376808e-002, 1.2271538376808e-002,
+ 9.9998116493225e-001, 9.9998116493225e-001, 9.9983054399490e-001, 9.9983054399490e-001,
+ -6.1358846724033e-003, 6.1358846724033e-003, -1.8406730145216e-002, 1.8406730145216e-002,
+ -9.9992471933365e-001, 9.9992471933365e-001, -9.9992471933365e-001, 9.9992471933365e-001,
+ 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002,
+ 7.0275473594666e-001, 7.0275473594666e-001, -7.2000241279602e-001, -7.2000241279602e-001,
+ -7.1143215894699e-001, 7.1143215894699e-001, -6.9397145509720e-001, 6.9397145509720e-001,
+ 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001,
+ -7.1573078632355e-001, 7.1573078632355e-001, -7.1573078632355e-001, 7.1573078632355e-001,
+ 9.2151403427124e-001, 9.2151403427124e-001, 3.6561298370361e-001, 3.6561298370361e-001,
+ -3.8834506273270e-001, 3.8834506273270e-001, -9.3076688051224e-001, 9.3076688051224e-001,
+ -6.9837623834610e-001, 6.9837623834610e-001, -6.9837623834610e-001, 6.9837623834610e-001,
+ 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001,
+ 3.7700742483139e-001, 3.7700742483139e-001, -9.1667896509171e-001, -9.1667896509171e-001,
+ -9.2621022462845e-001, 9.2621022462845e-001, 3.9962416887283e-001, -3.9962416887283e-001,
+ 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001,
+ -3.9399203658104e-001, 3.9399203658104e-001, -3.9399203658104e-001, 3.9399203658104e-001,
+ 9.7956979274750e-001, 9.7956979274750e-001, 8.2110255956650e-001, 8.2110255956650e-001,
+ -2.0110464096069e-001, 2.0110464096069e-001, -5.7078075408936e-001, 5.7078075408936e-001,
+ -9.1911387443542e-001, 9.1911387443542e-001, -9.1911387443542e-001, 9.1911387443542e-001,
+ 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001,
+ 5.5045801401138e-001, 5.5045801401138e-001, -9.8421007394791e-001, -9.8421007394791e-001,
+ -8.3486288785934e-001, 8.3486288785934e-001, -1.7700427770615e-001, 1.7700427770615e-001,
+ 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001,
+ -9.2850607633591e-001, 9.2850607633591e-001, -9.2850607633591e-001, 9.2850607633591e-001,
+ 8.2804501056671e-001, 8.2804501056671e-001, -2.1311044692993e-001, -2.1311044692993e-001,
+ -5.6066161394119e-001, 5.6066161394119e-001, -9.7702807188034e-001, 9.7702807188034e-001,
+ -3.7131720781326e-001, 3.7131720781326e-001, -3.7131720781326e-001, 3.7131720781326e-001,
+ 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001,
+ 1.8906867504120e-001, 1.8906867504120e-001, -5.4017150402069e-001, -5.4017150402069e-001,
+ -9.8196387290955e-001, 9.8196387290955e-001, 8.4155499935150e-001, -8.4155499935150e-001,
+ 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001,
+ -2.0711138844490e-001, 2.0711138844490e-001, -2.0711138844490e-001, 2.0711138844490e-001,
+ 9.9456459283829e-001, 9.9456459283829e-001, 9.5143502950668e-001, 9.5143502950668e-001,
+ -1.0412164032459e-001, 1.0412164032459e-001, -3.0784964561462e-001, 3.0784964561462e-001,
+ -9.7831737995148e-001, 9.7831737995148e-001, -9.7831737995148e-001, 9.7831737995148e-001,
+ 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001,
+ 6.2963825464249e-001, 6.2963825464249e-001, -8.9044862985611e-001, -8.9044862985611e-001,
+ -7.7688843011856e-001, 7.7688843011856e-001, -4.5508366823196e-001, 4.5508366823196e-001,
+ 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001,
+ -8.3822470903397e-001, 8.3822470903397e-001, -8.3822470903397e-001, 8.3822470903397e-001,
+ 8.7901222705841e-001, 8.7901222705841e-001, 7.9682409763336e-002, 7.9682409763336e-002,
+ -4.7679924964905e-001, 4.7679924964905e-001, -9.9682033061981e-001, 9.9682033061981e-001,
+ -5.4532498121262e-001, 5.4532498121262e-001, -5.4532498121262e-001, 5.4532498121262e-001,
+ 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001,
+ 2.8440755605698e-001, 2.8440755605698e-001, -7.6120227575302e-001, -7.6120227575302e-001,
+ -9.5870345830917e-001, 9.5870345830917e-001, 6.4851438999176e-001, -6.4851438999176e-001,
+ 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001,
+ -5.6573182344437e-001, 5.6573182344437e-001, -5.6573182344437e-001, 5.6573182344437e-001,
+ 9.5514118671417e-001, 9.5514118671417e-001, 6.2005722522736e-001, 6.2005722522736e-001,
+ -2.9615089297295e-001, 2.9615089297295e-001, -7.8455662727356e-001, 7.8455662727356e-001,
+ -8.2458931207657e-001, 8.2458931207657e-001, -8.2458931207657e-001, 8.2458931207657e-001,
+ 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001,
+ 4.6597650647163e-001, 4.6597650647163e-001, -9.9321198463440e-001, -9.9321198463440e-001,
+ -8.8479709625244e-001, 8.8479709625244e-001, 1.1631858348846e-001, -1.1631858348846e-001,
+ 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001,
+ -9.8310548067093e-001, 9.8310548067093e-001, -9.8310548067093e-001, 9.8310548067093e-001,
+ 7.6910334825516e-001, 7.6910334825516e-001, -4.8755019903183e-001, -4.8755019903183e-001,
+ -6.3912445306778e-001, 6.3912445306778e-001, -8.7309497594833e-001, 8.7309497594833e-001,
+ -1.8303988873959e-001, 1.8303988873959e-001, -1.8303988873959e-001, 1.8303988873959e-001,
+ 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001,
+ 9.1908961534500e-002, 9.1908961534500e-002, -2.7262136340141e-001, -2.7262136340141e-001,
+ -9.9576741456985e-001, 9.9576741456985e-001, 9.6212142705917e-001, -9.6212142705917e-001,
+ 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001,
+ -1.1022221297026e-001, 1.1022221297026e-001, -1.1022221297026e-001, 1.1022221297026e-001,
+ 9.9847555160522e-001, 9.9847555160522e-001, 9.8630803823471e-001, 9.8630803823471e-001,
+ -5.5195245891809e-002, 5.5195245891809e-002, -1.6491313278675e-001, 1.6491313278675e-001,
+ -9.9390697479248e-001, 9.9390697479248e-001, -9.9390697479248e-001, 9.9390697479248e-001,
+ 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001,
+ 6.6699993610382e-001, 6.6699993610382e-001, -8.1403625011444e-001, -8.1403625011444e-001,
+ -7.4505776166916e-001, 7.4505776166916e-001, -5.8081406354904e-001, 5.8081406354904e-001,
+ 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001,
+ -7.8073722124100e-001, 7.8073722124100e-001, -7.8073722124100e-001, 7.8073722124100e-001,
+ 9.0134882926941e-001, 9.0134882926941e-001, 2.2508388757706e-001, 2.2508388757706e-001,
+ -4.3309381604195e-001, 4.3309381604195e-001, -9.7433936595917e-001, 9.7433936595917e-001,
+ -6.2485951185226e-001, 6.2485951185226e-001, -6.2485951185226e-001, 6.2485951185226e-001,
+ 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001,
+ 3.3110630512238e-001, 3.3110630512238e-001, -8.4812033176422e-001, -8.4812033176422e-001,
+ -9.4359344244003e-001, 9.4359344244003e-001, 5.2980363368988e-001, -5.2980363368988e-001,
+ 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001,
+ -4.8218378424644e-001, 4.8218378424644e-001, -4.8218378424644e-001, 4.8218378424644e-001,
+ 9.6852207183838e-001, 9.6852207183838e-001, 7.2846436500549e-001, 7.2846436500549e-001,
+ -2.4892760813236e-001, 2.4892760813236e-001, -6.8508368730545e-001, 6.8508368730545e-001,
+ -8.7607008218765e-001, 8.7607008218765e-001, -8.7607008218765e-001, 8.7607008218765e-001,
+ 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001,
+ 5.0883013010025e-001, 5.0883013010025e-001, -9.9952930212021e-001, -9.9952930212021e-001,
+ -8.6086690425873e-001, 8.6086690425873e-001, -3.0674815177917e-002, 3.0674815177917e-002,
+ 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001,
+ -9.6043050289154e-001, 9.6043050289154e-001, -9.6043050289154e-001, 9.6043050289154e-001,
+ 7.9953724145889e-001, 7.9953724145889e-001, -3.5416358709335e-001, -3.5416358709335e-001,
+ -6.0061651468277e-001, 6.0061651468277e-001, -9.3518334627151e-001, 9.3518334627151e-001,
+ -2.7851969003677e-001, 2.7851969003677e-001, -2.7851969003677e-001, 2.7851969003677e-001,
+ 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001,
+ 1.4065824449062e-001, 1.4065824449062e-001, -4.1084313392639e-001, -4.1084313392639e-001,
+ -9.9005818367004e-001, 9.9005818367004e-001, 9.1170597076416e-001, -9.1170597076416e-001,
+ 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001,
+ -3.0200594663620e-001, 3.0200594663620e-001, -3.0200594663620e-001, 3.0200594663620e-001,
+ 9.8825758695602e-001, 9.8825758695602e-001, 8.9596629142761e-001, 8.9596629142761e-001,
+ -1.5279719233513e-001, 1.5279719233513e-001, -4.4412216544151e-001, 4.4412216544151e-001,
+ -9.5330601930618e-001, 9.5330601930618e-001, -9.5330601930618e-001, 9.5330601930618e-001,
+ 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001,
+ 5.9075969457626e-001, 5.9075969457626e-001, -9.4758564233780e-001, -9.4758564233780e-001,
+ -8.0684757232666e-001, 8.0684757232666e-001, -3.1950199604034e-001, 3.1950199604034e-001,
+ 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001,
+ -8.8763964176178e-001, 8.8763964176178e-001, -8.8763964176178e-001, 8.8763964176178e-001,
+ 8.5455799102783e-001, 8.5455799102783e-001, -6.7443966865540e-002, -6.7443966865540e-002,
+ -5.1935601234436e-001, 5.1935601234436e-001, -9.9772310256958e-001, 9.9772310256958e-001,
+ -4.6053871512413e-001, 4.6053871512413e-001, -4.6053871512413e-001, 4.6053871512413e-001,
+ 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001,
+ 2.3702360689640e-001, 2.3702360689640e-001, -6.5780675411224e-001, -6.5780675411224e-001,
+ -9.7150391340256e-001, 9.7150391340256e-001, 7.5318682193756e-001, -7.5318682193756e-001,
+ 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001,
+ -6.4383155107498e-001, 6.4383155107498e-001, -6.4383155107498e-001, 6.4383155107498e-001,
+ 9.3945920467377e-001, 9.3945920467377e-001, 4.9822762608528e-001, 4.9822762608528e-001,
+ -3.4266072511673e-001, 3.4266072511673e-001, -8.6704617738724e-001, 8.6704617738724e-001,
+ -7.6516723632813e-001, 7.6516723632813e-001, -7.6516723632813e-001, 7.6516723632813e-001,
+ 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001,
+ 4.2200028896332e-001, 4.2200028896332e-001, -9.6539437770844e-001, -9.6539437770844e-001,
+ -9.0659570693970e-001, 9.0659570693970e-001, 2.6079410314560e-001, -2.6079410314560e-001,
+ 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002,
+ -9.9631261825562e-001, 9.9631261825562e-001, -9.9631261825562e-001, 9.9631261825562e-001,
+ 7.3681652545929e-001, 7.3681652545929e-001, -6.1038291454315e-001, -6.1038291454315e-001,
+ -6.7609274387360e-001, 6.7609274387360e-001, -7.9210650920868e-001, 7.9210650920868e-001,
+ -8.5797317326069e-002, 8.5797317326069e-002, -8.5797317326069e-002, 8.5797317326069e-002,
+ 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001,
+ 4.2938258498907e-002, 4.2938258498907e-002, -1.2849812209606e-001, -1.2849812209606e-001,
+ -9.9907773733139e-001, 9.9907773733139e-001, 9.9170976877213e-001, -9.9170976877213e-001,
+ 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001,
+ -6.1320737004280e-002, 6.1320737004280e-002, -6.1320737004280e-002, 6.1320737004280e-002,
+ 9.9952942132950e-001, 9.9952942132950e-001, 9.9576741456985e-001, 9.9576741456985e-001,
+ -3.0674804002047e-002, 3.0674804002047e-002, -9.1908961534500e-002, 9.1908961534500e-002,
+ -9.9811810255051e-001, 9.9811810255051e-001, -9.9811810255051e-001, 9.9811810255051e-001,
+ 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002,
+ 6.8508368730545e-001, 6.8508368730545e-001, -7.6910322904587e-001, -7.6910322904587e-001,
+ -7.2846436500549e-001, 7.2846436500549e-001, -6.3912451267242e-001, 6.3912451267242e-001,
+ 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001,
+ -7.4913638830185e-001, 7.4913638830185e-001, -7.4913638830185e-001, 7.4913638830185e-001,
+ 9.1170603036880e-001, 9.1170603036880e-001, 2.9615086317062e-001, 2.9615086317062e-001,
+ -4.1084319353104e-001, 4.1084319353104e-001, -9.5514112710953e-001, 9.5514112710953e-001,
+ -6.6241580247879e-001, 6.6241580247879e-001, -6.6241580247879e-001, 6.6241580247879e-001,
+ 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001,
+ 3.5416352748871e-001, 3.5416352748871e-001, -8.8479721546173e-001, -8.8479721546173e-001,
+ -9.3518352508545e-001, 9.3518352508545e-001, 4.6597647666931e-001, -4.6597647666931e-001,
+ 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001,
+ -4.3861624598503e-001, 4.3861624598503e-001, -4.3861624598503e-001, 4.3861624598503e-001,
+ 9.7433936595917e-001, 9.7433936595917e-001, 7.7688843011856e-001, 7.7688843011856e-001,
+ -2.2508391737938e-001, 2.2508391737938e-001, -6.2963819503784e-001, 6.2963819503784e-001,
+ -8.9867448806763e-001, 8.9867448806763e-001, -8.9867448806763e-001, 8.9867448806763e-001,
+ 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001,
+ 5.2980363368988e-001, 5.2980363368988e-001, -9.9456453323364e-001, -9.9456453323364e-001,
+ -8.4812033176422e-001, 8.4812033176422e-001, -1.0412168502808e-001, 1.0412168502808e-001,
+ 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001,
+ -9.4560730457306e-001, 9.4560730457306e-001, -9.4560730457306e-001, 9.4560730457306e-001,
+ 8.1403630971909e-001, 8.1403630971909e-001, -2.8440755605698e-001, -2.8440755605698e-001,
+ -5.8081394433975e-001, 5.8081394433975e-001, -9.5870345830917e-001, 9.5870345830917e-001,
+ -3.2531028985977e-001, 3.2531028985977e-001, -3.2531028985977e-001, 3.2531028985977e-001,
+ 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001,
+ 1.6491311788559e-001, 1.6491311788559e-001, -4.7679924964905e-001, -4.7679924964905e-001,
+ -9.8630809783936e-001, 9.8630809783936e-001, 8.7901222705841e-001, -8.7901222705841e-001,
+ 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001,
+ -2.5486567616463e-001, 2.5486567616463e-001, -2.5486567616463e-001, 2.5486567616463e-001,
+ 9.9170976877213e-001, 9.9170976877213e-001, 9.2621028423309e-001, 9.2621028423309e-001,
+ -1.2849810719490e-001, 1.2849810719490e-001, -3.7700745463371e-001, 3.7700745463371e-001,
+ -9.6697646379471e-001, 9.6697646379471e-001, -9.6697646379471e-001, 9.6697646379471e-001,
+ 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001,
+ 6.1038279533386e-001, 6.1038279533386e-001, -9.2151403427124e-001, -9.2151403427124e-001,
+ -7.9210656881332e-001, 7.9210656881332e-001, -3.8834506273270e-001, 3.8834506273270e-001,
+ 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001,
+ -8.6397284269333e-001, 8.6397284269333e-001, -8.6397284269333e-001, 8.6397284269333e-001,
+ 8.6704623699188e-001, 8.6704623699188e-001, 6.1358809471130e-003, 6.1358809471130e-003,
+ -4.9822768568993e-001, 4.9822768568993e-001, -9.9998104572296e-001, 9.9998104572296e-001,
+ -5.0353837013245e-001, 5.0353837013245e-001, -5.0353837013245e-001, 5.0353837013245e-001,
+ 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001,
+ 2.6079413294792e-001, 2.6079413294792e-001, -7.1143209934235e-001, -7.1143209934235e-001,
+ -9.6539443731308e-001, 9.6539443731308e-001, 7.0275473594666e-001, -7.0275473594666e-001,
+ 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001,
+ -6.0551106929779e-001, 6.0551106929779e-001, -6.0551106929779e-001, 6.0551106929779e-001,
+ 9.4758558273315e-001, 9.4758558273315e-001, 5.6066155433655e-001, 5.6066155433655e-001,
+ -3.1950202584267e-001, 3.1950202584267e-001, -8.2804512977600e-001, 8.2804512977600e-001,
+ -7.9583686590195e-001, 7.9583686590195e-001, -7.9583686590195e-001, 7.9583686590195e-001,
+ 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001,
+ 4.4412216544151e-001, 4.4412216544151e-001, -9.8196375370026e-001, -9.8196375370026e-001,
+ -8.9596623182297e-001, 8.9596623182297e-001, 1.8906867504120e-001, -1.8906867504120e-001,
+ 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001,
+ -9.9090266227722e-001, 9.9090266227722e-001, -9.9090266227722e-001, 9.9090266227722e-001,
+ 7.5318676233292e-001, 7.5318676233292e-001, -5.5045801401138e-001, -5.5045801401138e-001,
+ -6.5780669450760e-001, 6.5780669450760e-001, -8.3486288785934e-001, 8.3486288785934e-001,
+ -1.3458071649075e-001, 1.3458071649075e-001, -1.3458071649075e-001, 1.3458071649075e-001,
+ 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001,
+ 6.7443922162056e-002, 6.7443922162056e-002, -2.0110465586185e-001, -2.0110465586185e-001,
+ -9.9772304296494e-001, 9.9772304296494e-001, 9.7956973314285e-001, -9.7956973314285e-001,
+ 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001,
+ -1.5885815024376e-001, 1.5885815024376e-001, -1.5885815024376e-001, 1.5885815024376e-001,
+ 9.9682027101517e-001, 9.9682027101517e-001, 9.7150385379791e-001, 9.7150385379791e-001,
+ -7.9682439565659e-002, 7.9682439565659e-002, -2.3702362179756e-001, 2.3702362179756e-001,
+ -9.8730140924454e-001, 9.8730140924454e-001, -9.8730140924454e-001, 9.8730140924454e-001,
+ 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001,
+ 6.4851438999176e-001, 6.4851438999176e-001, -8.5455799102783e-001, -8.5455799102783e-001,
+ -7.6120239496231e-001, 7.6120239496231e-001, -5.1935595273972e-001, 5.1935595273972e-001,
+ 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001,
+ -8.1045717000961e-001, 8.1045717000961e-001, -8.1045717000961e-001, 8.1045717000961e-001,
+ 8.9044868946075e-001, 8.9044868946075e-001, 1.5279716253281e-001, 1.5279716253281e-001,
+ -4.5508360862732e-001, 4.5508360862732e-001, -9.8825740814209e-001, 9.8825740814209e-001,
+ -5.8579784631729e-001, 5.8579784631729e-001, -5.8579784631729e-001, 5.8579784631729e-001,
+ 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001,
+ 3.0784964561462e-001, 3.0784964561462e-001, -8.0684757232666e-001, -8.0684757232666e-001,
+ -9.5143502950668e-001, 9.5143502950668e-001, 5.9075975418091e-001, -5.9075975418091e-001,
+ 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001,
+ -5.2458971738815e-001, 5.2458971738815e-001, -5.2458971738815e-001, 5.2458971738815e-001,
+ 9.6212142705917e-001, 9.6212142705917e-001, 6.7609268426895e-001, 6.7609268426895e-001,
+ -2.7262136340141e-001, 2.7262136340141e-001, -7.3681664466858e-001, 7.3681664466858e-001,
+ -8.5135519504547e-001, 8.5135519504547e-001, -8.5135519504547e-001, 8.5135519504547e-001,
+ 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001,
+ 4.8755016922951e-001, 4.8755016922951e-001, -9.9907779693604e-001, -9.9907779693604e-001,
+ -8.7309497594833e-001, 8.7309497594833e-001, 4.2938232421875e-002, -4.2938232421875e-002,
+ 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001,
+ -9.7293996810913e-001, 9.7293996810913e-001, -9.7293996810913e-001, 9.7293996810913e-001,
+ 7.8455656766891e-001, 7.8455656766891e-001, -4.2200034856796e-001, -4.2200034856796e-001,
+ -6.2005722522736e-001, 6.2005722522736e-001, -9.0659570693970e-001, 9.0659570693970e-001,
+ -2.3105812072754e-001, 2.3105812072754e-001, -2.3105812072754e-001, 2.3105812072754e-001,
+ 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001,
+ 1.1631863564253e-001, 1.1631863564253e-001, -3.4266072511673e-001, -3.4266072511673e-001,
+ -9.9321192502975e-001, 9.9321192502975e-001, 9.3945920467377e-001, -9.3945920467377e-001,
+ 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001,
+ -3.4841868281364e-001, 3.4841868281364e-001, -3.4841868281364e-001, 3.4841868281364e-001,
+ 9.8421007394791e-001, 9.8421007394791e-001, 8.6086690425873e-001, 8.6086690425873e-001,
+ -1.7700421810150e-001, 1.7700421810150e-001, -5.0883013010025e-001, 5.0883013010025e-001,
+ -9.3733900785446e-001, 9.3733900785446e-001, -9.3733900785446e-001, 9.3733900785446e-001,
+ 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001,
+ 5.7078075408936e-001, 5.7078075408936e-001, -9.6852207183838e-001, -9.6852207183838e-001,
+ -8.2110249996185e-001, 8.2110249996185e-001, -2.4892759323120e-001, 2.4892759323120e-001,
+ 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001,
+ -9.0916800498962e-001, 9.0916800498962e-001, -9.0916800498962e-001, 9.0916800498962e-001,
+ 8.4155493974686e-001, 8.4155493974686e-001, -1.4065837860107e-001, -1.4065837860107e-001,
+ -5.4017150402069e-001, 5.4017150402069e-001, -9.9005818367004e-001, 9.9005818367004e-001,
+ -4.1642957925797e-001, 4.1642957925797e-001, -4.1642957925797e-001, 4.1642957925797e-001,
+ 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001,
+ 2.1311032772064e-001, 2.1311032772064e-001, -6.0061651468277e-001, -6.0061651468277e-001,
+ -9.7702813148499e-001, 9.7702813148499e-001, 7.9953724145889e-001, -7.9953724145889e-001,
+ 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001,
+ -6.8060100078583e-001, 6.8060100078583e-001, -6.8060100078583e-001, 6.8060100078583e-001,
+ 9.3076694011688e-001, 9.3076694011688e-001, 4.3309378623962e-001, 4.3309378623962e-001,
+ -3.6561301350594e-001, 3.6561301350594e-001, -9.0134882926941e-001, 9.0134882926941e-001,
+ -7.3265427350998e-001, 7.3265427350998e-001, -7.3265427350998e-001, 7.3265427350998e-001,
+ 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001,
+ 3.9962419867516e-001, 3.9962419867516e-001, -9.4359350204468e-001, -9.4359350204468e-001,
+ -9.1667908430099e-001, 9.1667908430099e-001, 3.3110630512238e-001, -3.3110630512238e-001,
+ 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002,
+ -9.9932235479355e-001, 9.9932235479355e-001, -9.9932235479355e-001, 9.9932235479355e-001,
+ 7.2000247240067e-001, 7.2000247240067e-001, -6.6699987649918e-001, -6.6699987649918e-001,
+ -6.9397145509720e-001, 6.9397145509720e-001, -7.4505764245987e-001, 7.4505764245987e-001,
+ -3.6807224154472e-002, 3.6807224154472e-002, -3.6807224154472e-002, 3.6807224154472e-002,
+ 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001,
+ 1.8406730145216e-002, 1.8406730145216e-002, -5.5195245891809e-002, -5.5195245891809e-002,
+ -9.9983060359955e-001, 9.9983060359955e-001, 9.9847561120987e-001, -9.9847561120987e-001,
+ 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001,
+ -3.6807224154472e-002, 3.6807224154472e-002, -3.6807224154472e-002, 3.6807224154472e-002,
+ 9.9983060359955e-001, 9.9983060359955e-001, 9.9847561120987e-001, 9.9847561120987e-001,
+ -1.8406730145216e-002, 1.8406730145216e-002, -5.5195245891809e-002, 5.5195245891809e-002,
+ -9.9932235479355e-001, 9.9932235479355e-001, -9.9932235479355e-001, 9.9932235479355e-001,
+ 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002,
+ 6.9397145509720e-001, 6.9397145509720e-001, -7.4505764245987e-001, -7.4505764245987e-001,
+ -7.2000247240067e-001, 7.2000247240067e-001, -6.6699987649918e-001, 6.6699987649918e-001,
+ 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001,
+ -7.3265427350998e-001, 7.3265427350998e-001, -7.3265427350998e-001, 7.3265427350998e-001,
+ 9.1667908430099e-001, 9.1667908430099e-001, 3.3110630512238e-001, 3.3110630512238e-001,
+ -3.9962419867516e-001, 3.9962419867516e-001, -9.4359350204468e-001, 9.4359350204468e-001,
+ -6.8060100078583e-001, 6.8060100078583e-001, -6.8060100078583e-001, 6.8060100078583e-001,
+ 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001,
+ 3.6561301350594e-001, 3.6561301350594e-001, -9.0134882926941e-001, -9.0134882926941e-001,
+ -9.3076694011688e-001, 9.3076694011688e-001, 4.3309378623962e-001, -4.3309378623962e-001,
+ 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001,
+ -4.1642957925797e-001, 4.1642957925797e-001, -4.1642957925797e-001, 4.1642957925797e-001,
+ 9.7702813148499e-001, 9.7702813148499e-001, 7.9953724145889e-001, 7.9953724145889e-001,
+ -2.1311032772064e-001, 2.1311032772064e-001, -6.0061651468277e-001, 6.0061651468277e-001,
+ -9.0916800498962e-001, 9.0916800498962e-001, -9.0916800498962e-001, 9.0916800498962e-001,
+ 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001,
+ 5.4017150402069e-001, 5.4017150402069e-001, -9.9005818367004e-001, -9.9005818367004e-001,
+ -8.4155493974686e-001, 8.4155493974686e-001, -1.4065837860107e-001, 1.4065837860107e-001,
+ 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001,
+ -9.3733900785446e-001, 9.3733900785446e-001, -9.3733900785446e-001, 9.3733900785446e-001,
+ 8.2110249996185e-001, 8.2110249996185e-001, -2.4892759323120e-001, -2.4892759323120e-001,
+ -5.7078075408936e-001, 5.7078075408936e-001, -9.6852207183838e-001, 9.6852207183838e-001,
+ -3.4841868281364e-001, 3.4841868281364e-001, -3.4841868281364e-001, 3.4841868281364e-001,
+ 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001,
+ 1.7700421810150e-001, 1.7700421810150e-001, -5.0883013010025e-001, -5.0883013010025e-001,
+ -9.8421007394791e-001, 9.8421007394791e-001, 8.6086690425873e-001, -8.6086690425873e-001,
+ 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001,
+ -2.3105812072754e-001, 2.3105812072754e-001, -2.3105812072754e-001, 2.3105812072754e-001,
+ 9.9321192502975e-001, 9.9321192502975e-001, 9.3945920467377e-001, 9.3945920467377e-001,
+ -1.1631863564253e-001, 1.1631863564253e-001, -3.4266072511673e-001, 3.4266072511673e-001,
+ -9.7293996810913e-001, 9.7293996810913e-001, -9.7293996810913e-001, 9.7293996810913e-001,
+ 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001,
+ 6.2005722522736e-001, 6.2005722522736e-001, -9.0659570693970e-001, -9.0659570693970e-001,
+ -7.8455656766891e-001, 7.8455656766891e-001, -4.2200034856796e-001, 4.2200034856796e-001,
+ 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001,
+ -8.5135519504547e-001, 8.5135519504547e-001, -8.5135519504547e-001, 8.5135519504547e-001,
+ 8.7309497594833e-001, 8.7309497594833e-001, 4.2938232421875e-002, 4.2938232421875e-002,
+ -4.8755016922951e-001, 4.8755016922951e-001, -9.9907779693604e-001, 9.9907779693604e-001,
+ -5.2458971738815e-001, 5.2458971738815e-001, -5.2458971738815e-001, 5.2458971738815e-001,
+ 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001,
+ 2.7262136340141e-001, 2.7262136340141e-001, -7.3681664466858e-001, -7.3681664466858e-001,
+ -9.6212142705917e-001, 9.6212142705917e-001, 6.7609268426895e-001, -6.7609268426895e-001,
+ 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001,
+ -5.8579784631729e-001, 5.8579784631729e-001, -5.8579784631729e-001, 5.8579784631729e-001,
+ 9.5143502950668e-001, 9.5143502950668e-001, 5.9075975418091e-001, 5.9075975418091e-001,
+ -3.0784964561462e-001, 3.0784964561462e-001, -8.0684757232666e-001, 8.0684757232666e-001,
+ -8.1045717000961e-001, 8.1045717000961e-001, -8.1045717000961e-001, 8.1045717000961e-001,
+ 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001,
+ 4.5508360862732e-001, 4.5508360862732e-001, -9.8825740814209e-001, -9.8825740814209e-001,
+ -8.9044868946075e-001, 8.9044868946075e-001, 1.5279716253281e-001, -1.5279716253281e-001,
+ 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001,
+ -9.8730140924454e-001, 9.8730140924454e-001, -9.8730140924454e-001, 9.8730140924454e-001,
+ 7.6120239496231e-001, 7.6120239496231e-001, -5.1935595273972e-001, -5.1935595273972e-001,
+ -6.4851438999176e-001, 6.4851438999176e-001, -8.5455799102783e-001, 8.5455799102783e-001,
+ -1.5885815024376e-001, 1.5885815024376e-001, -1.5885815024376e-001, 1.5885815024376e-001,
+ 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001,
+ 7.9682439565659e-002, 7.9682439565659e-002, -2.3702362179756e-001, -2.3702362179756e-001,
+ -9.9682027101517e-001, 9.9682027101517e-001, 9.7150385379791e-001, -9.7150385379791e-001,
+ 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001,
+ -1.3458071649075e-001, 1.3458071649075e-001, -1.3458071649075e-001, 1.3458071649075e-001,
+ 9.9772304296494e-001, 9.9772304296494e-001, 9.7956973314285e-001, 9.7956973314285e-001,
+ -6.7443922162056e-002, 6.7443922162056e-002, -2.0110465586185e-001, 2.0110465586185e-001,
+ -9.9090266227722e-001, 9.9090266227722e-001, -9.9090266227722e-001, 9.9090266227722e-001,
+ 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001,
+ 6.5780669450760e-001, 6.5780669450760e-001, -8.3486288785934e-001, -8.3486288785934e-001,
+ -7.5318676233292e-001, 7.5318676233292e-001, -5.5045801401138e-001, 5.5045801401138e-001,
+ 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001,
+ -7.9583686590195e-001, 7.9583686590195e-001, -7.9583686590195e-001, 7.9583686590195e-001,
+ 8.9596623182297e-001, 8.9596623182297e-001, 1.8906867504120e-001, 1.8906867504120e-001,
+ -4.4412216544151e-001, 4.4412216544151e-001, -9.8196375370026e-001, 9.8196375370026e-001,
+ -6.0551106929779e-001, 6.0551106929779e-001, -6.0551106929779e-001, 6.0551106929779e-001,
+ 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001,
+ 3.1950202584267e-001, 3.1950202584267e-001, -8.2804512977600e-001, -8.2804512977600e-001,
+ -9.4758558273315e-001, 9.4758558273315e-001, 5.6066155433655e-001, -5.6066155433655e-001,
+ 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001,
+ -5.0353837013245e-001, 5.0353837013245e-001, -5.0353837013245e-001, 5.0353837013245e-001,
+ 9.6539443731308e-001, 9.6539443731308e-001, 7.0275473594666e-001, 7.0275473594666e-001,
+ -2.6079413294792e-001, 2.6079413294792e-001, -7.1143209934235e-001, 7.1143209934235e-001,
+ -8.6397284269333e-001, 8.6397284269333e-001, -8.6397284269333e-001, 8.6397284269333e-001,
+ 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001,
+ 4.9822768568993e-001, 4.9822768568993e-001, -9.9998104572296e-001, -9.9998104572296e-001,
+ -8.6704623699188e-001, 8.6704623699188e-001, 6.1358809471130e-003, -6.1358809471130e-003,
+ 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001,
+ -9.6697646379471e-001, 9.6697646379471e-001, -9.6697646379471e-001, 9.6697646379471e-001,
+ 7.9210656881332e-001, 7.9210656881332e-001, -3.8834506273270e-001, -3.8834506273270e-001,
+ -6.1038279533386e-001, 6.1038279533386e-001, -9.2151403427124e-001, 9.2151403427124e-001,
+ -2.5486567616463e-001, 2.5486567616463e-001, -2.5486567616463e-001, 2.5486567616463e-001,
+ 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001,
+ 1.2849810719490e-001, 1.2849810719490e-001, -3.7700745463371e-001, -3.7700745463371e-001,
+ -9.9170976877213e-001, 9.9170976877213e-001, 9.2621028423309e-001, -9.2621028423309e-001,
+ 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001,
+ -3.2531028985977e-001, 3.2531028985977e-001, -3.2531028985977e-001, 3.2531028985977e-001,
+ 9.8630809783936e-001, 9.8630809783936e-001, 8.7901222705841e-001, 8.7901222705841e-001,
+ -1.6491311788559e-001, 1.6491311788559e-001, -4.7679924964905e-001, 4.7679924964905e-001,
+ -9.4560730457306e-001, 9.4560730457306e-001, -9.4560730457306e-001, 9.4560730457306e-001,
+ 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001,
+ 5.8081394433975e-001, 5.8081394433975e-001, -9.5870345830917e-001, -9.5870345830917e-001,
+ -8.1403630971909e-001, 8.1403630971909e-001, -2.8440755605698e-001, 2.8440755605698e-001,
+ 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001,
+ -8.9867448806763e-001, 8.9867448806763e-001, -8.9867448806763e-001, 8.9867448806763e-001,
+ 8.4812033176422e-001, 8.4812033176422e-001, -1.0412168502808e-001, -1.0412168502808e-001,
+ -5.2980363368988e-001, 5.2980363368988e-001, -9.9456453323364e-001, 9.9456453323364e-001,
+ -4.3861624598503e-001, 4.3861624598503e-001, -4.3861624598503e-001, 4.3861624598503e-001,
+ 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001,
+ 2.2508391737938e-001, 2.2508391737938e-001, -6.2963819503784e-001, -6.2963819503784e-001,
+ -9.7433936595917e-001, 9.7433936595917e-001, 7.7688843011856e-001, -7.7688843011856e-001,
+ 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001,
+ -6.6241580247879e-001, 6.6241580247879e-001, -6.6241580247879e-001, 6.6241580247879e-001,
+ 9.3518352508545e-001, 9.3518352508545e-001, 4.6597647666931e-001, 4.6597647666931e-001,
+ -3.5416352748871e-001, 3.5416352748871e-001, -8.8479721546173e-001, 8.8479721546173e-001,
+ -7.4913638830185e-001, 7.4913638830185e-001, -7.4913638830185e-001, 7.4913638830185e-001,
+ 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001,
+ 4.1084319353104e-001, 4.1084319353104e-001, -9.5514112710953e-001, -9.5514112710953e-001,
+ -9.1170603036880e-001, 9.1170603036880e-001, 2.9615086317062e-001, -2.9615086317062e-001,
+ 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002,
+ -9.9811810255051e-001, 9.9811810255051e-001, -9.9811810255051e-001, 9.9811810255051e-001,
+ 7.2846436500549e-001, 7.2846436500549e-001, -6.3912451267242e-001, -6.3912451267242e-001,
+ -6.8508368730545e-001, 6.8508368730545e-001, -7.6910322904587e-001, 7.6910322904587e-001,
+ -6.1320737004280e-002, 6.1320737004280e-002, -6.1320737004280e-002, 6.1320737004280e-002,
+ 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001,
+ 3.0674804002047e-002, 3.0674804002047e-002, -9.1908961534500e-002, -9.1908961534500e-002,
+ -9.9952942132950e-001, 9.9952942132950e-001, 9.9576741456985e-001, -9.9576741456985e-001,
+ 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001,
+ -8.5797317326069e-002, 8.5797317326069e-002, -8.5797317326069e-002, 8.5797317326069e-002,
+ 9.9907773733139e-001, 9.9907773733139e-001, 9.9170976877213e-001, 9.9170976877213e-001,
+ -4.2938258498907e-002, 4.2938258498907e-002, -1.2849812209606e-001, 1.2849812209606e-001,
+ -9.9631261825562e-001, 9.9631261825562e-001, -9.9631261825562e-001, 9.9631261825562e-001,
+ 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002,
+ 6.7609274387360e-001, 6.7609274387360e-001, -7.9210650920868e-001, -7.9210650920868e-001,
+ -7.3681652545929e-001, 7.3681652545929e-001, -6.1038291454315e-001, 6.1038291454315e-001,
+ 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001,
+ -7.6516723632813e-001, 7.6516723632813e-001, -7.6516723632813e-001, 7.6516723632813e-001,
+ 9.0659570693970e-001, 9.0659570693970e-001, 2.6079410314560e-001, 2.6079410314560e-001,
+ -4.2200028896332e-001, 4.2200028896332e-001, -9.6539437770844e-001, 9.6539437770844e-001,
+ -6.4383155107498e-001, 6.4383155107498e-001, -6.4383155107498e-001, 6.4383155107498e-001,
+ 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001,
+ 3.4266072511673e-001, 3.4266072511673e-001, -8.6704617738724e-001, -8.6704617738724e-001,
+ -9.3945920467377e-001, 9.3945920467377e-001, 4.9822762608528e-001, -4.9822762608528e-001,
+ 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001,
+ -4.6053871512413e-001, 4.6053871512413e-001, -4.6053871512413e-001, 4.6053871512413e-001,
+ 9.7150391340256e-001, 9.7150391340256e-001, 7.5318682193756e-001, 7.5318682193756e-001,
+ -2.3702360689640e-001, 2.3702360689640e-001, -6.5780675411224e-001, 6.5780675411224e-001,
+ -8.8763964176178e-001, 8.8763964176178e-001, -8.8763964176178e-001, 8.8763964176178e-001,
+ 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001,
+ 5.1935601234436e-001, 5.1935601234436e-001, -9.9772310256958e-001, -9.9772310256958e-001,
+ -8.5455799102783e-001, 8.5455799102783e-001, -6.7443966865540e-002, 6.7443966865540e-002,
+ 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001,
+ -9.5330601930618e-001, 9.5330601930618e-001, -9.5330601930618e-001, 9.5330601930618e-001,
+ 8.0684757232666e-001, 8.0684757232666e-001, -3.1950199604034e-001, -3.1950199604034e-001,
+ -5.9075969457626e-001, 5.9075969457626e-001, -9.4758564233780e-001, 9.4758564233780e-001,
+ -3.0200594663620e-001, 3.0200594663620e-001, -3.0200594663620e-001, 3.0200594663620e-001,
+ 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001,
+ 1.5279719233513e-001, 1.5279719233513e-001, -4.4412216544151e-001, -4.4412216544151e-001,
+ -9.8825758695602e-001, 9.8825758695602e-001, 8.9596629142761e-001, -8.9596629142761e-001,
+ 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001,
+ -2.7851969003677e-001, 2.7851969003677e-001, -2.7851969003677e-001, 2.7851969003677e-001,
+ 9.9005818367004e-001, 9.9005818367004e-001, 9.1170597076416e-001, 9.1170597076416e-001,
+ -1.4065824449062e-001, 1.4065824449062e-001, -4.1084313392639e-001, 4.1084313392639e-001,
+ -9.6043050289154e-001, 9.6043050289154e-001, -9.6043050289154e-001, 9.6043050289154e-001,
+ 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001,
+ 6.0061651468277e-001, 6.0061651468277e-001, -9.3518334627151e-001, -9.3518334627151e-001,
+ -7.9953724145889e-001, 7.9953724145889e-001, -3.5416358709335e-001, 3.5416358709335e-001,
+ 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001,
+ -8.7607008218765e-001, 8.7607008218765e-001, -8.7607008218765e-001, 8.7607008218765e-001,
+ 8.6086690425873e-001, 8.6086690425873e-001, -3.0674815177917e-002, -3.0674815177917e-002,
+ -5.0883013010025e-001, 5.0883013010025e-001, -9.9952930212021e-001, 9.9952930212021e-001,
+ -4.8218378424644e-001, 4.8218378424644e-001, -4.8218378424644e-001, 4.8218378424644e-001,
+ 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001,
+ 2.4892760813236e-001, 2.4892760813236e-001, -6.8508368730545e-001, -6.8508368730545e-001,
+ -9.6852207183838e-001, 9.6852207183838e-001, 7.2846436500549e-001, -7.2846436500549e-001,
+ 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001,
+ -6.2485951185226e-001, 6.2485951185226e-001, -6.2485951185226e-001, 6.2485951185226e-001,
+ 9.4359344244003e-001, 9.4359344244003e-001, 5.2980363368988e-001, 5.2980363368988e-001,
+ -3.3110630512238e-001, 3.3110630512238e-001, -8.4812033176422e-001, 8.4812033176422e-001,
+ -7.8073722124100e-001, 7.8073722124100e-001, -7.8073722124100e-001, 7.8073722124100e-001,
+ 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001,
+ 4.3309381604195e-001, 4.3309381604195e-001, -9.7433936595917e-001, -9.7433936595917e-001,
+ -9.0134882926941e-001, 9.0134882926941e-001, 2.2508388757706e-001, -2.2508388757706e-001,
+ 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001,
+ -9.9390697479248e-001, 9.9390697479248e-001, -9.9390697479248e-001, 9.9390697479248e-001,
+ 7.4505776166916e-001, 7.4505776166916e-001, -5.8081406354904e-001, -5.8081406354904e-001,
+ -6.6699993610382e-001, 6.6699993610382e-001, -8.1403625011444e-001, 8.1403625011444e-001,
+ -1.1022221297026e-001, 1.1022221297026e-001, -1.1022221297026e-001, 1.1022221297026e-001,
+ 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001,
+ 5.5195245891809e-002, 5.5195245891809e-002, -1.6491313278675e-001, -1.6491313278675e-001,
+ -9.9847555160522e-001, 9.9847555160522e-001, 9.8630803823471e-001, -9.8630803823471e-001,
+ 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001,
+ -1.8303988873959e-001, 1.8303988873959e-001, -1.8303988873959e-001, 1.8303988873959e-001,
+ 9.9576741456985e-001, 9.9576741456985e-001, 9.6212142705917e-001, 9.6212142705917e-001,
+ -9.1908961534500e-002, 9.1908961534500e-002, -2.7262136340141e-001, 2.7262136340141e-001,
+ -9.8310548067093e-001, 9.8310548067093e-001, -9.8310548067093e-001, 9.8310548067093e-001,
+ 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001,
+ 6.3912445306778e-001, 6.3912445306778e-001, -8.7309497594833e-001, -8.7309497594833e-001,
+ -7.6910334825516e-001, 7.6910334825516e-001, -4.8755019903183e-001, 4.8755019903183e-001,
+ 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001,
+ -8.2458931207657e-001, 8.2458931207657e-001, -8.2458931207657e-001, 8.2458931207657e-001,
+ 8.8479709625244e-001, 8.8479709625244e-001, 1.1631858348846e-001, 1.1631858348846e-001,
+ -4.6597650647163e-001, 4.6597650647163e-001, -9.9321198463440e-001, 9.9321198463440e-001,
+ -5.6573182344437e-001, 5.6573182344437e-001, -5.6573182344437e-001, 5.6573182344437e-001,
+ 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001,
+ 2.9615089297295e-001, 2.9615089297295e-001, -7.8455662727356e-001, -7.8455662727356e-001,
+ -9.5514118671417e-001, 9.5514118671417e-001, 6.2005722522736e-001, -6.2005722522736e-001,
+ 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001,
+ -5.4532498121262e-001, 5.4532498121262e-001, -5.4532498121262e-001, 5.4532498121262e-001,
+ 9.5870345830917e-001, 9.5870345830917e-001, 6.4851438999176e-001, 6.4851438999176e-001,
+ -2.8440755605698e-001, 2.8440755605698e-001, -7.6120227575302e-001, 7.6120227575302e-001,
+ -8.3822470903397e-001, 8.3822470903397e-001, -8.3822470903397e-001, 8.3822470903397e-001,
+ 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001,
+ 4.7679924964905e-001, 4.7679924964905e-001, -9.9682033061981e-001, -9.9682033061981e-001,
+ -8.7901222705841e-001, 8.7901222705841e-001, 7.9682409763336e-002, -7.9682409763336e-002,
+ 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001,
+ -9.7831737995148e-001, 9.7831737995148e-001, -9.7831737995148e-001, 9.7831737995148e-001,
+ 7.7688843011856e-001, 7.7688843011856e-001, -4.5508366823196e-001, -4.5508366823196e-001,
+ -6.2963825464249e-001, 6.2963825464249e-001, -8.9044862985611e-001, 8.9044862985611e-001,
+ -2.0711138844490e-001, 2.0711138844490e-001, -2.0711138844490e-001, 2.0711138844490e-001,
+ 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001,
+ 1.0412164032459e-001, 1.0412164032459e-001, -3.0784964561462e-001, -3.0784964561462e-001,
+ -9.9456459283829e-001, 9.9456459283829e-001, 9.5143502950668e-001, -9.5143502950668e-001,
+ 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001,
+ -3.7131720781326e-001, 3.7131720781326e-001, -3.7131720781326e-001, 3.7131720781326e-001,
+ 9.8196387290955e-001, 9.8196387290955e-001, 8.4155499935150e-001, 8.4155499935150e-001,
+ -1.8906867504120e-001, 1.8906867504120e-001, -5.4017150402069e-001, 5.4017150402069e-001,
+ -9.2850607633591e-001, 9.2850607633591e-001, -9.2850607633591e-001, 9.2850607633591e-001,
+ 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001,
+ 5.6066161394119e-001, 5.6066161394119e-001, -9.7702807188034e-001, -9.7702807188034e-001,
+ -8.2804501056671e-001, 8.2804501056671e-001, -2.1311044692993e-001, 2.1311044692993e-001,
+ 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001,
+ -9.1911387443542e-001, 9.1911387443542e-001, -9.1911387443542e-001, 9.1911387443542e-001,
+ 8.3486288785934e-001, 8.3486288785934e-001, -1.7700427770615e-001, -1.7700427770615e-001,
+ -5.5045801401138e-001, 5.5045801401138e-001, -9.8421007394791e-001, 9.8421007394791e-001,
+ -3.9399203658104e-001, 3.9399203658104e-001, -3.9399203658104e-001, 3.9399203658104e-001,
+ 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001,
+ 2.0110464096069e-001, 2.0110464096069e-001, -5.7078075408936e-001, -5.7078075408936e-001,
+ -9.7956979274750e-001, 9.7956979274750e-001, 8.2110255956650e-001, -8.2110255956650e-001,
+ 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001,
+ -6.9837623834610e-001, 6.9837623834610e-001, -6.9837623834610e-001, 6.9837623834610e-001,
+ 9.2621022462845e-001, 9.2621022462845e-001, 3.9962416887283e-001, 3.9962416887283e-001,
+ -3.7700742483139e-001, 3.7700742483139e-001, -9.1667896509171e-001, 9.1667896509171e-001,
+ -7.1573078632355e-001, 7.1573078632355e-001, -7.1573078632355e-001, 7.1573078632355e-001,
+ 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001,
+ 3.8834506273270e-001, 3.8834506273270e-001, -9.3076688051224e-001, -9.3076688051224e-001,
+ -9.2151403427124e-001, 9.2151403427124e-001, 3.6561298370361e-001, -3.6561298370361e-001,
+ 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002,
+ -9.9992471933365e-001, 9.9992471933365e-001, -9.9992471933365e-001, 9.9992471933365e-001,
+ 7.1143215894699e-001, 7.1143215894699e-001, -6.9397145509720e-001, -6.9397145509720e-001,
+ -7.0275473594666e-001, 7.0275473594666e-001, -7.2000241279602e-001, 7.2000241279602e-001,
+ -1.2271538376808e-002, 1.2271538376808e-002, -1.2271538376808e-002, 1.2271538376808e-002,
+ 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001,
+ 6.1358846724033e-003, 6.1358846724033e-003, -1.8406730145216e-002, -1.8406730145216e-002,
+ -9.9998116493225e-001, 9.9998116493225e-001, 9.9983054399490e-001, -9.9983054399490e-001,
+ 9.9998116493225e-001, 9.9998116493225e-001, 9.9998116493225e-001, 9.9998116493225e-001,
+ -6.1358846724033e-003, 6.1358846724033e-003, -6.1358846724033e-003, 6.1358846724033e-003,
+ 9.9999529123306e-001, 9.9999529123306e-001, 9.9995762109756e-001, 9.9995762109756e-001,
+ -3.0679567717016e-003, 3.0679567717016e-003, -9.2037543654442e-003, 9.2037543654442e-003,
+ -9.9998116493225e-001, 9.9998116493225e-001, -9.9998116493225e-001, 9.9998116493225e-001,
+ 6.1358846724033e-003, 6.1358846724033e-003, 6.1358846724033e-003, 6.1358846724033e-003,
+ 7.0493412017822e-001, 7.0493412017822e-001, -7.1358478069305e-001, -7.1358478069305e-001,
+ -7.0927280187607e-001, 7.0927280187607e-001, -7.0056885480881e-001, 7.0056885480881e-001,
+ 7.0275473594666e-001, 7.0275473594666e-001, 7.0275473594666e-001, 7.0275473594666e-001,
+ -7.1143215894699e-001, 7.1143215894699e-001, -7.1143215894699e-001, 7.1143215894699e-001,
+ 9.2270112037659e-001, 9.2270112037659e-001, 3.7416404485703e-001, 3.7416404485703e-001,
+ -3.8551607728004e-001, 3.8551607728004e-001, -9.2736244201660e-001, 9.2736244201660e-001,
+ -7.0275473594666e-001, 7.0275473594666e-001, -7.0275473594666e-001, 7.0275473594666e-001,
+ 7.1143215894699e-001, 7.1143215894699e-001, 7.1143215894699e-001, 7.1143215894699e-001,
+ 3.7984722852707e-001, 3.7984722852707e-001, -9.2031830549240e-001, -9.2031830549240e-001,
+ -9.2504924535751e-001, 9.2504924535751e-001, 3.9117038249969e-001, -3.9117038249969e-001,
+ 9.2151403427124e-001, 9.2151403427124e-001, 9.2151403427124e-001, 9.2151403427124e-001,
+ -3.8834506273270e-001, 3.8834506273270e-001, -3.8834506273270e-001, 3.8834506273270e-001,
+ 9.8018211126328e-001, 9.8018211126328e-001, 8.2632100582123e-001, 8.2632100582123e-001,
+ -1.9809842109680e-001, 1.9809842109680e-001, -5.6319934129715e-001, 5.6319934129715e-001,
+ -9.2151403427124e-001, 9.2151403427124e-001, -9.2151403427124e-001, 9.2151403427124e-001,
+ 3.8834506273270e-001, 3.8834506273270e-001, 3.8834506273270e-001, 3.8834506273270e-001,
+ 5.5301672220230e-001, 5.5301672220230e-001, -9.8253935575485e-001, -9.8253935575485e-001,
+ -8.3317017555237e-001, 8.3317017555237e-001, -1.8605518341064e-001, 1.8605518341064e-001,
+ 3.7700742483139e-001, 3.7700742483139e-001, 3.7700742483139e-001, 3.7700742483139e-001,
+ -9.2621022462845e-001, 9.2621022462845e-001, -9.2621022462845e-001, 9.2621022462845e-001,
+ 8.2976120710373e-001, 8.2976120710373e-001, -2.0410901308060e-001, -2.0410901308060e-001,
+ -5.5811852216721e-001, 5.5811852216721e-001, -9.7894805669785e-001, 9.7894805669785e-001,
+ -3.7700742483139e-001, 3.7700742483139e-001, -3.7700742483139e-001, 3.7700742483139e-001,
+ 9.2621022462845e-001, 9.2621022462845e-001, 9.2621022462845e-001, 9.2621022462845e-001,
+ 1.9208040833473e-001, 1.9208040833473e-001, -5.4789412021637e-001, -5.4789412021637e-001,
+ -9.8137921094894e-001, 9.8137921094894e-001, 8.3654773235321e-001, -8.3654773235321e-001,
+ 9.7956979274750e-001, 9.7956979274750e-001, 9.7956979274750e-001, 9.7956979274750e-001,
+ -2.0110464096069e-001, 2.0110464096069e-001, -2.0110464096069e-001, 2.0110464096069e-001,
+ 9.9487930536270e-001, 9.9487930536270e-001, 9.5422804355621e-001, 9.5422804355621e-001,
+ -1.0106986761093e-001, 1.0106986761093e-001, -2.9907983541489e-001, 2.9907983541489e-001,
+ -9.7956979274750e-001, 9.7956979274750e-001, -9.7956979274750e-001, 9.7956979274750e-001,
+ 2.0110464096069e-001, 2.0110464096069e-001, 2.0110464096069e-001, 2.0110464096069e-001,
+ 6.3201874494553e-001, 6.3201874494553e-001, -8.8622254133224e-001, -8.8622254133224e-001,
+ -7.7495306730270e-001, 7.7495306730270e-001, -4.6325987577438e-001, 4.6325987577438e-001,
+ 5.5045801401138e-001, 5.5045801401138e-001, 5.5045801401138e-001, 5.5045801401138e-001,
+ -8.3486288785934e-001, 8.3486288785934e-001, -8.3486288785934e-001, 8.3486288785934e-001,
+ 8.8047087192535e-001, 8.8047087192535e-001, 8.8853478431702e-002, 8.8853478431702e-002,
+ -4.7410023212433e-001, 4.7410023212433e-001, -9.9604463577271e-001, 9.9604463577271e-001,
+ -5.5045801401138e-001, 5.5045801401138e-001, -5.5045801401138e-001, 5.5045801401138e-001,
+ 8.3486288785934e-001, 8.3486288785934e-001, 8.3486288785934e-001, 8.3486288785934e-001,
+ 2.8734746575356e-001, 2.8734746575356e-001, -7.6713907718658e-001, -7.6713907718658e-001,
+ -9.5782643556595e-001, 9.5782643556595e-001, 6.4148104190826e-001, -6.4148104190826e-001,
+ 8.2804501056671e-001, 8.2804501056671e-001, 8.2804501056671e-001, 8.2804501056671e-001,
+ -5.6066161394119e-001, 5.6066161394119e-001, -5.6066161394119e-001, 5.6066161394119e-001,
+ 9.5604526996613e-001, 9.5604526996613e-001, 6.2725180387497e-001, 6.2725180387497e-001,
+ -2.9321917891502e-001, 2.9321917891502e-001, -7.7881658077240e-001, 7.7881658077240e-001,
+ -8.2804501056671e-001, 8.2804501056671e-001, -8.2804501056671e-001, 8.2804501056671e-001,
+ 5.6066161394119e-001, 5.6066161394119e-001, 5.6066161394119e-001, 5.6066161394119e-001,
+ 4.6868884563446e-001, 4.6868884563446e-001, -9.9424028396606e-001, -9.9424028396606e-001,
+ -8.8336330652237e-001, 8.8336330652237e-001, 1.0717236995697e-001, -1.0717236995697e-001,
+ 1.8906867504120e-001, 1.8906867504120e-001, 1.8906867504120e-001, 1.8906867504120e-001,
+ -9.8196387290955e-001, 9.8196387290955e-001, -9.8196387290955e-001, 9.8196387290955e-001,
+ 7.7106052637100e-001, 7.7106052637100e-001, -4.7949379682541e-001, -4.7949379682541e-001,
+ -6.3676190376282e-001, 6.3676190376282e-001, -8.7754523754120e-001, 8.7754523754120e-001,
+ -1.8906867504120e-001, 1.8906867504120e-001, -1.8906867504120e-001, 1.8906867504120e-001,
+ 9.8196387290955e-001, 9.8196387290955e-001, 9.8196387290955e-001, 9.8196387290955e-001,
+ 9.4963498413563e-002, 9.4963498413563e-002, -2.8146496415138e-001, -2.8146496415138e-001,
+ -9.9548077583313e-001, 9.9548077583313e-001, 9.5957154035568e-001, -9.5957154035568e-001,
+ 9.9456459283829e-001, 9.9456459283829e-001, 9.9456459283829e-001, 9.9456459283829e-001,
+ -1.0412164032459e-001, 1.0412164032459e-001, -1.0412164032459e-001, 1.0412164032459e-001,
+ 9.9864023923874e-001, 9.9864023923874e-001, 9.8778414726257e-001, 9.8778414726257e-001,
+ -5.2131704986095e-002, 5.2131704986095e-002, -1.5582841634750e-001, 1.5582841634750e-001,
+ -9.9456459283829e-001, 9.9456459283829e-001, -9.9456459283829e-001, 9.9456459283829e-001,
+ 1.0412164032459e-001, 1.0412164032459e-001, 1.0412164032459e-001, 1.0412164032459e-001,
+ 6.6928261518478e-001, 6.6928261518478e-001, -8.0865615606308e-001, -8.0865615606308e-001,
+ -7.4300795793533e-001, 7.4300795793533e-001, -5.8828157186508e-001, 5.8828157186508e-001,
+ 6.2963825464249e-001, 6.2963825464249e-001, 6.2963825464249e-001, 6.2963825464249e-001,
+ -7.7688843011856e-001, 7.7688843011856e-001, -7.7688843011856e-001, 7.7688843011856e-001,
+ 9.0267330408096e-001, 9.0267330408096e-001, 2.3404198884964e-001, 2.3404198884964e-001,
+ -4.3032649159431e-001, 4.3032649159431e-001, -9.7222638130188e-001, 9.7222638130188e-001,
+ -6.2963825464249e-001, 6.2963825464249e-001, -6.2963825464249e-001, 6.2963825464249e-001,
+ 7.7688843011856e-001, 7.7688843011856e-001, 7.7688843011856e-001, 7.7688843011856e-001,
+ 3.3399966359138e-001, 3.3399966359138e-001, -8.5296058654785e-001, -8.5296058654785e-001,
+ -9.4257318973541e-001, 9.4257318973541e-001, 5.2197527885437e-001, -5.2197527885437e-001,
+ 8.7901222705841e-001, 8.7901222705841e-001, 8.7901222705841e-001, 8.7901222705841e-001,
+ -4.7679924964905e-001, 4.7679924964905e-001, -4.7679924964905e-001, 4.7679924964905e-001,
+ 9.6928125619888e-001, 9.6928125619888e-001, 7.3473888635635e-001, 7.3473888635635e-001,
+ -2.4595504999161e-001, 2.4595504999161e-001, -6.7835009098053e-001, 6.7835009098053e-001,
+ -8.7901222705841e-001, 8.7901222705841e-001, -8.7901222705841e-001, 8.7901222705841e-001,
+ 4.7679924964905e-001, 4.7679924964905e-001, 4.7679924964905e-001, 4.7679924964905e-001,
+ 5.1146888732910e-001, 5.1146888732910e-001, -9.9920475482941e-001, -9.9920475482941e-001,
+ -8.5930180549622e-001, 8.5930180549622e-001, -3.9873003959656e-002, 3.9873003959656e-002,
+ 2.8440755605698e-001, 2.8440755605698e-001, 2.8440755605698e-001, 2.8440755605698e-001,
+ -9.5870345830917e-001, 9.5870345830917e-001, -9.5870345830917e-001, 9.5870345830917e-001,
+ 8.0137616395950e-001, 8.0137616395950e-001, -3.4554141759872e-001, -3.4554141759872e-001,
+ -5.9816074371338e-001, 5.9816074371338e-001, -9.3840348720551e-001, 9.3840348720551e-001,
+ -2.8440755605698e-001, 2.8440755605698e-001, -2.8440755605698e-001, 2.8440755605698e-001,
+ 9.5870345830917e-001, 9.5870345830917e-001, 9.5870345830917e-001, 9.5870345830917e-001,
+ 1.4369504153728e-001, 1.4369504153728e-001, -4.1921687126160e-001, -4.1921687126160e-001,
+ -9.8962199687958e-001, 9.8962199687958e-001, 9.0788608789444e-001, -9.0788608789444e-001,
+ 9.5514118671417e-001, 9.5514118671417e-001, 9.5514118671417e-001, 9.5514118671417e-001,
+ -2.9615089297295e-001, 2.9615089297295e-001, -2.9615089297295e-001, 2.9615089297295e-001,
+ 9.8872166872025e-001, 9.8872166872025e-001, 9.0001589059830e-001, 9.0001589059830e-001,
+ -1.4976453781128e-001, 1.4976453781128e-001, -4.3585705757141e-001, 4.3585705757141e-001,
+ -9.5514118671417e-001, 9.5514118671417e-001, -9.5514118671417e-001, 9.5514118671417e-001,
+ 2.9615089297295e-001, 2.9615089297295e-001, 2.9615089297295e-001, 2.9615089297295e-001,
+ 5.9323233366013e-001, 5.9323233366013e-001, -9.4460481405258e-001, -9.4460481405258e-001,
+ -8.0503129959106e-001, 8.0503129959106e-001, -3.2820999622345e-001, 3.2820999622345e-001,
+ 4.6597650647163e-001, 4.6597650647163e-001, 4.6597650647163e-001, 4.6597650647163e-001,
+ -8.8479709625244e-001, 8.8479709625244e-001, -8.8479709625244e-001, 8.8479709625244e-001,
+ 8.5614734888077e-001, 8.5614734888077e-001, -5.8258235454559e-002, -5.8258235454559e-002,
+ -5.1673179864883e-001, 5.1673179864883e-001, -9.9830156564713e-001, 9.9830156564713e-001,
+ -4.6597650647163e-001, 4.6597650647163e-001, -4.6597650647163e-001, 4.6597650647163e-001,
+ 8.8479709625244e-001, 8.8479709625244e-001, 8.8479709625244e-001, 8.8479709625244e-001,
+ 2.4000303447247e-001, 2.4000303447247e-001, -6.6471099853516e-001, -6.6471099853516e-001,
+ -9.7077214717865e-001, 9.7077214717865e-001, 7.4710059165955e-001, -7.4710059165955e-001,
+ 7.6910334825516e-001, 7.6910334825516e-001, 7.6910334825516e-001, 7.6910334825516e-001,
+ -6.3912445306778e-001, 6.3912445306778e-001, -6.3912445306778e-001, 6.3912445306778e-001,
+ 9.4050604104996e-001, 9.4050604104996e-001, 5.0618660449982e-001, 5.0618660449982e-001,
+ -3.3977690339088e-001, 3.3977690339088e-001, -8.6242389678955e-001, 8.6242389678955e-001,
+ -7.6910334825516e-001, 7.6910334825516e-001, -7.6910334825516e-001, 7.6910334825516e-001,
+ 6.3912445306778e-001, 6.3912445306778e-001, 6.3912445306778e-001, 6.3912445306778e-001,
+ 4.2477968335152e-001, 4.2477968335152e-001, -9.6775388717651e-001, -9.6775388717651e-001,
+ -9.0529674291611e-001, 9.0529674291611e-001, 2.5189781188965e-001, -2.5189781188965e-001,
+ 9.1908961534500e-002, 9.1908961534500e-002, 9.1908961534500e-002, 9.1908961534500e-002,
+ -9.9576741456985e-001, 9.9576741456985e-001, -9.9576741456985e-001, 9.9576741456985e-001,
+ 7.3888731002808e-001, 7.3888731002808e-001, -6.0306668281555e-001, -6.0306668281555e-001,
+ -6.7382901906967e-001, 6.7382901906967e-001, -7.9769080877304e-001, 7.9769080877304e-001,
+ -9.1908961534500e-002, 9.1908961534500e-002, -9.1908961534500e-002, 9.1908961534500e-002,
+ 9.9576741456985e-001, 9.9576741456985e-001, 9.9576741456985e-001, 9.9576741456985e-001,
+ 4.6003185212612e-002, 4.6003185212612e-002, -1.3762012124062e-001, -1.3762012124062e-001,
+ -9.9894130229950e-001, 9.9894130229950e-001, 9.9048507213593e-001, -9.9048507213593e-001,
+ 9.9847555160522e-001, 9.9847555160522e-001, 9.9847555160522e-001, 9.9847555160522e-001,
+ -5.5195245891809e-002, 5.5195245891809e-002, -5.5195245891809e-002, 5.5195245891809e-002,
+ 9.9961882829666e-001, 9.9961882829666e-001, 9.9657112360001e-001, 9.9657112360001e-001,
+ -2.7608146890998e-002, 2.7608146890998e-002, -8.2740262150764e-002, 8.2740262150764e-002,
+ -9.9847555160522e-001, 9.9847555160522e-001, -9.9847555160522e-001, 9.9847555160522e-001,
+ 5.5195245891809e-002, 5.5195245891809e-002, 5.5195245891809e-002, 5.5195245891809e-002,
+ 6.8731534481049e-001, 6.8731534481049e-001, -7.6318836212158e-001, -7.6318836212158e-001,
+ -7.2635912895203e-001, 7.2635912895203e-001, -6.4617598056793e-001, 6.4617598056793e-001,
+ 6.6699993610382e-001, 6.6699993610382e-001, 6.6699993610382e-001, 6.6699993610382e-001,
+ -7.4505776166916e-001, 7.4505776166916e-001, -7.4505776166916e-001, 7.4505776166916e-001,
+ 9.1296219825745e-001, 9.1296219825745e-001, 3.0492925643921e-001, 3.0492925643921e-001,
+ -4.0804415941238e-001, 4.0804415941238e-001, -9.5237499475479e-001, 9.5237499475479e-001,
+ -6.6699993610382e-001, 6.6699993610382e-001, -6.6699993610382e-001, 6.6699993610382e-001,
+ 7.4505776166916e-001, 7.4505776166916e-001, 7.4505776166916e-001, 7.4505776166916e-001,
+ 3.5703095793724e-001, 3.5703095793724e-001, -8.8904833793640e-001, -8.8904833793640e-001,
+ -9.3409252166748e-001, 9.3409252166748e-001, 4.5781326293945e-001, -4.5781326293945e-001,
+ 9.0134882926941e-001, 9.0134882926941e-001, 9.0134882926941e-001, 9.0134882926941e-001,
+ -4.3309381604195e-001, 4.3309381604195e-001, -4.3309381604195e-001, 4.3309381604195e-001,
+ 9.7502535581589e-001, 9.7502535581589e-001, 7.8265058994293e-001, 7.8265058994293e-001,
+ -2.2209362685680e-001, 2.2209362685680e-001, -6.2246125936508e-001, 6.2246125936508e-001,
+ -9.0134882926941e-001, 9.0134882926941e-001, -9.0134882926941e-001, 9.0134882926941e-001,
+ 4.3309381604195e-001, 4.3309381604195e-001, 4.3309381604195e-001, 4.3309381604195e-001,
+ 5.3240311145782e-001, 5.3240311145782e-001, -9.9356412887573e-001, -9.9356412887573e-001,
+ -8.4649091959000e-001, 8.4649091959000e-001, -1.1327093839645e-001, 1.1327093839645e-001,
+ 3.3110630512238e-001, 3.3110630512238e-001, 3.3110630512238e-001, 3.3110630512238e-001,
+ -9.4359344244003e-001, 9.4359344244003e-001, -9.4359344244003e-001, 9.4359344244003e-001,
+ 8.1581437587738e-001, 8.1581437587738e-001, -2.7557194232941e-001, -2.7557194232941e-001,
+ -5.7831382751465e-001, 5.7831382751465e-001, -9.6128034591675e-001, 9.6128034591675e-001,
+ -3.3110630512238e-001, 3.3110630512238e-001, -3.3110630512238e-001, 3.3110630512238e-001,
+ 9.4359344244003e-001, 9.4359344244003e-001, 9.4359344244003e-001, 9.4359344244003e-001,
+ 1.6793830692768e-001, 1.6793830692768e-001, -4.8486924171448e-001, -4.8486924171448e-001,
+ -9.8579752445221e-001, 9.8579752445221e-001, 8.7458664178848e-001, -8.7458664178848e-001,
+ 9.6852207183838e-001, 9.6852207183838e-001, 9.6852207183838e-001, 9.6852207183838e-001,
+ -2.4892760813236e-001, 2.4892760813236e-001, -2.4892760813236e-001, 2.4892760813236e-001,
+ 9.9209928512573e-001, 9.9209928512573e-001, 9.2964088916779e-001, 9.2964088916779e-001,
+ -1.2545499205589e-001, 1.2545499205589e-001, -3.6846682429314e-001, 3.6846682429314e-001,
+ -9.6852207183838e-001, 9.6852207183838e-001, -9.6852207183838e-001, 9.6852207183838e-001,
+ 2.4892760813236e-001, 2.4892760813236e-001, 2.4892760813236e-001, 2.4892760813236e-001,
+ 6.1281007528305e-001, 6.1281007528305e-001, -9.1790074110031e-001, -9.1790074110031e-001,
+ -7.9023021459579e-001, 7.9023021459579e-001, -3.9680999517441e-001, 3.9680999517441e-001,
+ 5.0883013010025e-001, 5.0883013010025e-001, 5.0883013010025e-001, 5.0883013010025e-001,
+ -8.6086690425873e-001, 8.6086690425873e-001, -8.6086690425873e-001, 8.6086690425873e-001,
+ 8.6857068538666e-001, 8.6857068538666e-001, 1.5339195728302e-002, 1.5339195728302e-002,
+ -4.9556526541710e-001, 4.9556526541710e-001, -9.9988222122192e-001, 9.9988222122192e-001,
+ -5.0883013010025e-001, 5.0883013010025e-001, -5.0883013010025e-001, 5.0883013010025e-001,
+ 8.6086690425873e-001, 8.6086690425873e-001, 8.6086690425873e-001, 8.6086690425873e-001,
+ 2.6375469565392e-001, 2.6375469565392e-001, -7.1786999702454e-001, -7.1786999702454e-001,
+ -9.6458977460861e-001, 9.6458977460861e-001, 6.9617712497711e-001, -6.9617712497711e-001,
+ 7.9953724145889e-001, 7.9953724145889e-001, 7.9953724145889e-001, 7.9953724145889e-001,
+ -6.0061651468277e-001, 6.0061651468277e-001, -6.0061651468277e-001, 6.0061651468277e-001,
+ 9.4856137037277e-001, 9.4856137037277e-001, 5.6825894117355e-001, 5.6825894117355e-001,
+ -3.1659337878227e-001, 3.1659337878227e-001, -8.2284986972809e-001, 8.2284986972809e-001,
+ -7.9953724145889e-001, 7.9953724145889e-001, -7.9953724145889e-001, 7.9953724145889e-001,
+ 6.0061651468277e-001, 6.0061651468277e-001, 6.0061651468277e-001, 6.0061651468277e-001,
+ 4.4686883687973e-001, 4.4686883687973e-001, -9.8366242647171e-001, -9.8366242647171e-001,
+ -8.9459949731827e-001, 8.9459949731827e-001, 1.8002295494080e-001, -1.8002295494080e-001,
+ 1.4065824449062e-001, 1.4065824449062e-001, 1.4065824449062e-001, 1.4065824449062e-001,
+ -9.9005818367004e-001, 9.9005818367004e-001, -9.9005818367004e-001, 9.9005818367004e-001,
+ 7.5520133972168e-001, 7.5520133972168e-001, -5.4275071620941e-001, -5.4275071620941e-001,
+ -6.5549284219742e-001, 6.5549284219742e-001, -8.3989363908768e-001, 8.3989363908768e-001,
+ -1.4065824449062e-001, 1.4065824449062e-001, -1.4065824449062e-001, 1.4065824449062e-001,
+ 9.9005818367004e-001, 9.9005818367004e-001, 9.9005818367004e-001, 9.9005818367004e-001,
+ 7.0504575967789e-002, 7.0504575967789e-002, -2.1011185646057e-001, -2.1011185646057e-001,
+ -9.9751144647598e-001, 9.9751144647598e-001, 9.7767734527588e-001, -9.7767734527588e-001,
+ 9.8825758695602e-001, 9.8825758695602e-001, 9.8825758695602e-001, 9.8825758695602e-001,
+ -1.5279719233513e-001, 1.5279719233513e-001, -1.5279719233513e-001, 1.5279719233513e-001,
+ 9.9706006050110e-001, 9.9706006050110e-001, 9.7364425659180e-001, 9.7364425659180e-001,
+ -7.6623864471912e-002, 7.6623864471912e-002, -2.2807210683823e-001, 2.2807210683823e-001,
+ -9.8825758695602e-001, 9.8825758695602e-001, -9.8825758695602e-001, 9.8825758695602e-001,
+ 1.5279719233513e-001, 1.5279719233513e-001, 1.5279719233513e-001, 1.5279719233513e-001,
+ 6.5084671974182e-001, 6.5084671974182e-001, -8.4974169731140e-001, -8.4974169731140e-001,
+ -7.5920915603638e-001, 7.5920915603638e-001, -5.2719926834106e-001, 5.2719926834106e-001,
+ 5.9075969457626e-001, 5.9075969457626e-001, 5.9075969457626e-001, 5.9075969457626e-001,
+ -8.0684757232666e-001, 8.0684757232666e-001, -8.0684757232666e-001, 8.0684757232666e-001,
+ 8.9184069633484e-001, 8.9184069633484e-001, 1.6188633441925e-001, 1.6188633441925e-001,
+ -4.5234960317612e-001, 4.5234960317612e-001, -9.8680943250656e-001, 9.8680943250656e-001,
+ -5.9075969457626e-001, 5.9075969457626e-001, -5.9075969457626e-001, 5.9075969457626e-001,
+ 8.0684757232666e-001, 8.0684757232666e-001, 8.0684757232666e-001, 8.0684757232666e-001,
+ 3.1076717376709e-001, 3.1076717376709e-001, -8.1225049495697e-001, -8.1225049495697e-001,
+ -9.5048606395721e-001, 9.5048606395721e-001, 5.8330863714218e-001, -5.8330863714218e-001,
+ 8.5455799102783e-001, 8.5455799102783e-001, 8.5455799102783e-001, 8.5455799102783e-001,
+ -5.1935601234436e-001, 5.1935601234436e-001, -5.1935601234436e-001, 5.1935601234436e-001,
+ 9.6295326948166e-001, 9.6295326948166e-001, 6.8284553289413e-001, 6.8284553289413e-001,
+ -2.6966834068298e-001, 2.6966834068298e-001, -7.3056280612946e-001, 7.3056280612946e-001,
+ -8.5455799102783e-001, 8.5455799102783e-001, -8.5455799102783e-001, 8.5455799102783e-001,
+ 5.1935601234436e-001, 5.1935601234436e-001, 5.1935601234436e-001, 5.1935601234436e-001,
+ 4.9022650718689e-001, 4.9022650718689e-001, -9.9943053722382e-001, -9.9943053722382e-001,
+ -8.7159508466721e-001, 8.7159508466721e-001, 3.3741116523743e-002, -3.3741116523743e-002,
+ 2.3702360689640e-001, 2.3702360689640e-001, 2.3702360689640e-001, 2.3702360689640e-001,
+ -9.7150391340256e-001, 9.7150391340256e-001, -9.7150391340256e-001, 9.7150391340256e-001,
+ 7.8645521402359e-001, 7.8645521402359e-001, -4.1363841295242e-001, -4.1363841295242e-001,
+ -6.1764734983444e-001, 6.1764734983444e-001, -9.1044133901596e-001, 9.1044133901596e-001,
+ -2.3702360689640e-001, 2.3702360689640e-001, -2.3702360689640e-001, 2.3702360689640e-001,
+ 9.7150391340256e-001, 9.7150391340256e-001, 9.7150391340256e-001, 9.7150391340256e-001,
+ 1.1936521530151e-001, 1.1936521530151e-001, -3.5129275918007e-001, -3.5129275918007e-001,
+ -9.9285042285919e-001, 9.9285042285919e-001, 9.3626564741135e-001, -9.3626564741135e-001,
+ 9.3945920467377e-001, 9.3945920467377e-001, 9.3945920467377e-001, 9.3945920467377e-001,
+ -3.4266072511673e-001, 3.4266072511673e-001, -3.4266072511673e-001, 3.4266072511673e-001,
+ 9.8474848270416e-001, 9.8474848270416e-001, 8.6551362276077e-001, 8.6551362276077e-001,
+ -1.7398387193680e-001, 1.7398387193680e-001, -5.0088536739349e-001, 5.0088536739349e-001,
+ -9.3945920467377e-001, 9.3945920467377e-001, -9.3945920467377e-001, 9.3945920467377e-001,
+ 3.4266072511673e-001, 3.4266072511673e-001, 3.4266072511673e-001, 3.4266072511673e-001,
+ 5.7329720258713e-001, 5.7329720258713e-001, -9.6618992090225e-001, -9.6618992090225e-001,
+ -8.1934750080109e-001, 8.1934750080109e-001, -2.5783121585846e-001, 2.5783121585846e-001,
+ 4.2200028896332e-001, 4.2200028896332e-001, 4.2200028896332e-001, 4.2200028896332e-001,
+ -9.0659570693970e-001, 9.0659570693970e-001, -9.0659570693970e-001, 9.0659570693970e-001,
+ 8.4320825338364e-001, 8.4320825338364e-001, -1.3154006004333e-001, -1.3154006004333e-001,
+ -5.3758710622787e-001, 5.3758710622787e-001, -9.9131089448929e-001, 9.9131089448929e-001,
+ -4.2200028896332e-001, 4.2200028896332e-001, -4.2200028896332e-001, 4.2200028896332e-001,
+ 9.0659570693970e-001, 9.0659570693970e-001, 9.0659570693970e-001, 9.0659570693970e-001,
+ 2.1610680222511e-001, 2.1610680222511e-001, -6.0794985294342e-001, -6.0794985294342e-001,
+ -9.7636973857880e-001, 9.7636973857880e-001, 7.9397547245026e-001, -7.9397547245026e-001,
+ 7.3681652545929e-001, 7.3681652545929e-001, 7.3681652545929e-001, 7.3681652545929e-001,
+ -6.7609274387360e-001, 6.7609274387360e-001, -6.7609274387360e-001, 6.7609274387360e-001,
+ 9.3188428878784e-001, 9.3188428878784e-001, 4.4137123227119e-001, 4.4137123227119e-001,
+ -3.6275574564934e-001, 3.6275574564934e-001, -8.9732468128204e-001, 8.9732468128204e-001,
+ -7.3681652545929e-001, 7.3681652545929e-001, -7.3681652545929e-001, 7.3681652545929e-001,
+ 6.7609274387360e-001, 6.7609274387360e-001, 6.7609274387360e-001, 6.7609274387360e-001,
+ 4.0243464708328e-001, 4.0243464708328e-001, -9.4660085439682e-001, -9.4660085439682e-001,
+ -9.1544872522354e-001, 9.1544872522354e-001, 3.2240772247314e-001, -3.2240772247314e-001,
+ 4.2938258498907e-002, 4.2938258498907e-002, 4.2938258498907e-002, 4.2938258498907e-002,
+ -9.9907773733139e-001, 9.9907773733139e-001, -9.9907773733139e-001, 9.9907773733139e-001,
+ 7.2212815284729e-001, 7.2212815284729e-001, -6.6011440753937e-001, -6.6011440753937e-001,
+ -6.9175928831100e-001, 6.9175928831100e-001, -7.5116509199142e-001, 7.5116509199142e-001,
+ -4.2938258498907e-002, 4.2938258498907e-002, -4.2938258498907e-002, 4.2938258498907e-002,
+ 9.9907773733139e-001, 9.9907773733139e-001, 9.9907773733139e-001, 9.9907773733139e-001,
+ 2.1474080160260e-002, 2.1474080160260e-002, -6.4382635056973e-002, -6.4382635056973e-002,
+ -9.9976938962936e-001, 9.9976938962936e-001, 9.9792528152466e-001, -9.9792528152466e-001,
+ 9.9952942132950e-001, 9.9952942132950e-001, 9.9952942132950e-001, 9.9952942132950e-001,
+ -3.0674804002047e-002, 3.0674804002047e-002, -3.0674804002047e-002, 3.0674804002047e-002,
+ 9.9988234043121e-001, 9.9988234043121e-001, 9.9894130229950e-001, 9.9894130229950e-001,
+ -1.5339206904173e-002, 1.5339206904173e-002, -4.6003181487322e-002, 4.6003181487322e-002,
+ -9.9952942132950e-001, 9.9952942132950e-001, -9.9952942132950e-001, 9.9952942132950e-001,
+ 3.0674804002047e-002, 3.0674804002047e-002, 3.0674804002047e-002, 3.0674804002047e-002,
+ 6.9617712497711e-001, 6.9617712497711e-001, -7.3888731002808e-001, -7.3888731002808e-001,
+ -7.1787005662918e-001, 7.1787005662918e-001, -6.7382901906967e-001, 6.7382901906967e-001,
+ 6.8508368730545e-001, 6.8508368730545e-001, 6.8508368730545e-001, 6.8508368730545e-001,
+ -7.2846436500549e-001, 7.2846436500549e-001, -7.2846436500549e-001, 7.2846436500549e-001,
+ 9.1790080070496e-001, 9.1790080070496e-001, 3.3977693319321e-001, 3.3977693319321e-001,
+ -3.9680999517441e-001, 3.9680999517441e-001, -9.4050604104996e-001, 9.4050604104996e-001,
+ -6.8508368730545e-001, 6.8508368730545e-001, -6.8508368730545e-001, 6.8508368730545e-001,
+ 7.2846436500549e-001, 7.2846436500549e-001, 7.2846436500549e-001, 7.2846436500549e-001,
+ 3.6846685409546e-001, 3.6846685409546e-001, -9.0529680252075e-001, -9.0529680252075e-001,
+ -9.2964088916779e-001, 9.2964088916779e-001, 4.2477965354919e-001, -4.2477965354919e-001,
+ 9.1170603036880e-001, 9.1170603036880e-001, 9.1170603036880e-001, 9.1170603036880e-001,
+ -4.1084319353104e-001, 4.1084319353104e-001, -4.1084319353104e-001, 4.1084319353104e-001,
+ 9.7767734527588e-001, 9.7767734527588e-001, 8.0503129959106e-001, 8.0503129959106e-001,
+ -2.1011184155941e-001, 2.1011184155941e-001, -5.9323233366013e-001, 5.9323233366013e-001,
+ -9.1170603036880e-001, 9.1170603036880e-001, -9.1170603036880e-001, 9.1170603036880e-001,
+ 4.1084319353104e-001, 4.1084319353104e-001, 4.1084319353104e-001, 4.1084319353104e-001,
+ 5.4275077581406e-001, 5.4275077581406e-001, -9.8872166872025e-001, -9.8872166872025e-001,
+ -8.3989375829697e-001, 8.3989375829697e-001, -1.4976453781128e-001, 1.4976453781128e-001,
+ 3.5416352748871e-001, 3.5416352748871e-001, 3.5416352748871e-001, 3.5416352748871e-001,
+ -9.3518352508545e-001, 9.3518352508545e-001, -9.3518352508545e-001, 9.3518352508545e-001,
+ 8.2284975051880e-001, 8.2284975051880e-001, -2.4000298976898e-001, -2.4000298976898e-001,
+ -5.6825894117355e-001, 5.6825894117355e-001, -9.7077208757401e-001, 9.7077208757401e-001,
+ -3.5416352748871e-001, 3.5416352748871e-001, -3.5416352748871e-001, 3.5416352748871e-001,
+ 9.3518352508545e-001, 9.3518352508545e-001, 9.3518352508545e-001, 9.3518352508545e-001,
+ 1.8002291023731e-001, 1.8002291023731e-001, -5.1673179864883e-001, -5.1673179864883e-001,
+ -9.8366242647171e-001, 9.8366242647171e-001, 8.5614734888077e-001, -8.5614734888077e-001,
+ 9.7433936595917e-001, 9.7433936595917e-001, 9.7433936595917e-001, 9.7433936595917e-001,
+ -2.2508391737938e-001, 2.2508391737938e-001, -2.2508391737938e-001, 2.2508391737938e-001,
+ 9.9356412887573e-001, 9.9356412887573e-001, 9.4257318973541e-001, 9.4257318973541e-001,
+ -1.1327095329762e-001, 1.1327095329762e-001, -3.3399963378906e-001, 3.3399963378906e-001,
+ -9.7433936595917e-001, 9.7433936595917e-001, -9.7433936595917e-001, 9.7433936595917e-001,
+ 2.2508391737938e-001, 2.2508391737938e-001, 2.2508391737938e-001, 2.2508391737938e-001,
+ 6.2246131896973e-001, 6.2246131896973e-001, -9.0267324447632e-001, -9.0267324447632e-001,
+ -7.8265058994293e-001, 7.8265058994293e-001, -4.3032658100128e-001, 4.3032658100128e-001,
+ 5.2980363368988e-001, 5.2980363368988e-001, 5.2980363368988e-001, 5.2980363368988e-001,
+ -8.4812033176422e-001, 8.4812033176422e-001, -8.4812033176422e-001, 8.4812033176422e-001,
+ 8.7458664178848e-001, 8.7458664178848e-001, 5.2131652832031e-002, 5.2131652832031e-002,
+ -4.8486927151680e-001, 4.8486927151680e-001, -9.9864017963409e-001, 9.9864017963409e-001,
+ -5.2980363368988e-001, 5.2980363368988e-001, -5.2980363368988e-001, 5.2980363368988e-001,
+ 8.4812033176422e-001, 8.4812033176422e-001, 8.4812033176422e-001, 8.4812033176422e-001,
+ 2.7557182312012e-001, 2.7557182312012e-001, -7.4300789833069e-001, -7.4300789833069e-001,
+ -9.6128046512604e-001, 9.6128046512604e-001, 6.6928255558014e-001, -6.6928255558014e-001,
+ 8.1403630971909e-001, 8.1403630971909e-001, 8.1403630971909e-001, 8.1403630971909e-001,
+ -5.8081394433975e-001, 5.8081394433975e-001, -5.8081394433975e-001, 5.8081394433975e-001,
+ 9.5237499475479e-001, 9.5237499475479e-001, 5.9816074371338e-001, 5.9816074371338e-001,
+ -3.0492922663689e-001, 3.0492922663689e-001, -8.0137610435486e-001, 8.0137610435486e-001,
+ -8.1403630971909e-001, 8.1403630971909e-001, -8.1403630971909e-001, 8.1403630971909e-001,
+ 5.8081394433975e-001, 5.8081394433975e-001, 5.8081394433975e-001, 5.8081394433975e-001,
+ 4.5781332254410e-001, 4.5781332254410e-001, -9.8962193727493e-001, -9.8962193727493e-001,
+ -8.8904833793640e-001, 8.8904833793640e-001, 1.4369499683380e-001, -1.4369499683380e-001,
+ 1.6491311788559e-001, 1.6491311788559e-001, 1.6491311788559e-001, 1.6491311788559e-001,
+ -9.8630809783936e-001, 9.8630809783936e-001, -9.8630809783936e-001, 9.8630809783936e-001,
+ 7.6318842172623e-001, 7.6318842172623e-001, -5.1146894693375e-001, -5.1146894693375e-001,
+ -6.4617604017258e-001, 6.4617604017258e-001, -8.5930174589157e-001, 8.5930174589157e-001,
+ -1.6491311788559e-001, 1.6491311788559e-001, -1.6491311788559e-001, 1.6491311788559e-001,
+ 9.8630809783936e-001, 9.8630809783936e-001, 9.8630809783936e-001, 9.8630809783936e-001,
+ 8.2740269601345e-002, 8.2740269601345e-002, -2.4595502018929e-001, -2.4595502018929e-001,
+ -9.9657112360001e-001, 9.9657112360001e-001, 9.6928119659424e-001, -9.6928119659424e-001,
+ 9.9170976877213e-001, 9.9170976877213e-001, 9.9170976877213e-001, 9.9170976877213e-001,
+ -1.2849810719490e-001, 1.2849810719490e-001, -1.2849810719490e-001, 1.2849810719490e-001,
+ 9.9792528152466e-001, 9.9792528152466e-001, 9.8137921094894e-001, 9.8137921094894e-001,
+ -6.4382635056973e-002, 6.4382635056973e-002, -1.9208037853241e-001, 1.9208037853241e-001,
+ -9.9170976877213e-001, 9.9170976877213e-001, -9.9170976877213e-001, 9.9170976877213e-001,
+ 1.2849810719490e-001, 1.2849810719490e-001, 1.2849810719490e-001, 1.2849810719490e-001,
+ 6.6011434793472e-001, 6.6011434793472e-001, -8.2976120710373e-001, -8.2976120710373e-001,
+ -7.5116509199142e-001, 7.5116509199142e-001, -5.5811864137650e-001, 5.5811864137650e-001,
+ 6.1038279533386e-001, 6.1038279533386e-001, 6.1038279533386e-001, 6.1038279533386e-001,
+ -7.9210656881332e-001, 7.9210656881332e-001, -7.9210656881332e-001, 7.9210656881332e-001,
+ 8.9732456207275e-001, 8.9732456207275e-001, 1.9809836149216e-001, 1.9809836149216e-001,
+ -4.4137129187584e-001, 4.4137129187584e-001, -9.8018205165863e-001, 9.8018205165863e-001,
+ -6.1038279533386e-001, 6.1038279533386e-001, -6.1038279533386e-001, 6.1038279533386e-001,
+ 7.9210656881332e-001, 7.9210656881332e-001, 7.9210656881332e-001, 7.9210656881332e-001,
+ 3.2240769267082e-001, 3.2240769267082e-001, -8.3317005634308e-001, -8.3317005634308e-001,
+ -9.4660091400146e-001, 9.4660091400146e-001, 5.5301666259766e-001, -5.5301666259766e-001,
+ 8.6704623699188e-001, 8.6704623699188e-001, 8.6704623699188e-001, 8.6704623699188e-001,
+ -4.9822768568993e-001, 4.9822768568993e-001, -4.9822768568993e-001, 4.9822768568993e-001,
+ 9.6618998050690e-001, 9.6618998050690e-001, 7.0927280187607e-001, 7.0927280187607e-001,
+ -2.5783109664917e-001, 2.5783109664917e-001, -7.0493412017822e-001, 7.0493412017822e-001,
+ -8.6704623699188e-001, 8.6704623699188e-001, -8.6704623699188e-001, 8.6704623699188e-001,
+ 4.9822768568993e-001, 4.9822768568993e-001, 4.9822768568993e-001, 4.9822768568993e-001,
+ 5.0088536739349e-001, 5.0088536739349e-001, -9.9999535083771e-001, -9.9999535083771e-001,
+ -8.6551362276077e-001, 8.6551362276077e-001, -3.0679106712341e-003, 3.0679106712341e-003,
+ 2.6079413294792e-001, 2.6079413294792e-001, 2.6079413294792e-001, 2.6079413294792e-001,
+ -9.6539443731308e-001, 9.6539443731308e-001, -9.6539443731308e-001, 9.6539443731308e-001,
+ 7.9397547245026e-001, 7.9397547245026e-001, -3.7984716892242e-001, -3.7984716892242e-001,
+ -6.0794979333878e-001, 6.0794979333878e-001, -9.2504924535751e-001, 9.2504924535751e-001,
+ -2.6079413294792e-001, 2.6079413294792e-001, -2.6079413294792e-001, 2.6079413294792e-001,
+ 9.6539443731308e-001, 9.6539443731308e-001, 9.6539443731308e-001, 9.6539443731308e-001,
+ 1.3154003024101e-001, 1.3154003024101e-001, -3.8551607728004e-001, -3.8551607728004e-001,
+ -9.9131083488464e-001, 9.9131083488464e-001, 9.2270112037659e-001, -9.2270112037659e-001,
+ 9.4758558273315e-001, 9.4758558273315e-001, 9.4758558273315e-001, 9.4758558273315e-001,
+ -3.1950202584267e-001, 3.1950202584267e-001, -3.1950202584267e-001, 3.1950202584267e-001,
+ 9.8680937290192e-001, 9.8680937290192e-001, 8.8336330652237e-001, 8.8336330652237e-001,
+ -1.6188639402390e-001, 1.6188639402390e-001, -4.6868878602982e-001, 4.6868878602982e-001,
+ -9.4758558273315e-001, 9.4758558273315e-001, -9.4758558273315e-001, 9.4758558273315e-001,
+ 3.1950202584267e-001, 3.1950202584267e-001, 3.1950202584267e-001, 3.1950202584267e-001,
+ 5.8330869674683e-001, 5.8330869674683e-001, -9.5604515075684e-001, -9.5604515075684e-001,
+ -8.1225055456161e-001, 8.1225055456161e-001, -2.9321926832199e-001, 2.9321926832199e-001,
+ 4.4412216544151e-001, 4.4412216544151e-001, 4.4412216544151e-001, 4.4412216544151e-001,
+ -8.9596623182297e-001, 8.9596623182297e-001, -8.9596623182297e-001, 8.9596623182297e-001,
+ 8.4974175691605e-001, 8.4974175691605e-001, -9.4963490962982e-002, -9.4963490962982e-002,
+ -5.2719914913177e-001, 5.2719914913177e-001, -9.9548065662384e-001, 9.9548065662384e-001,
+ -4.4412216544151e-001, 4.4412216544151e-001, -4.4412216544151e-001, 4.4412216544151e-001,
+ 8.9596623182297e-001, 8.9596623182297e-001, 8.9596623182297e-001, 8.9596623182297e-001,
+ 2.2807209193707e-001, 2.2807209193707e-001, -6.3676190376282e-001, -6.3676190376282e-001,
+ -9.7364425659180e-001, 9.7364425659180e-001, 7.7106052637100e-001, -7.7106052637100e-001,
+ 7.5318676233292e-001, 7.5318676233292e-001, 7.5318676233292e-001, 7.5318676233292e-001,
+ -6.5780669450760e-001, 6.5780669450760e-001, -6.5780669450760e-001, 6.5780669450760e-001,
+ 9.3626564741135e-001, 9.3626564741135e-001, 4.7410020232201e-001, 4.7410020232201e-001,
+ -3.5129275918007e-001, 3.5129275918007e-001, -8.8047087192535e-001, 8.8047087192535e-001,
+ -7.5318676233292e-001, 7.5318676233292e-001, -7.5318676233292e-001, 7.5318676233292e-001,
+ 6.5780669450760e-001, 6.5780669450760e-001, 6.5780669450760e-001, 6.5780669450760e-001,
+ 4.1363832354546e-001, 4.1363832354546e-001, -9.5782625675201e-001, -9.5782625675201e-001,
+ -9.1044127941132e-001, 9.1044127941132e-001, 2.8734743595123e-001, -2.8734743595123e-001,
+ 6.7443922162056e-002, 6.7443922162056e-002, 6.7443922162056e-002, 6.7443922162056e-002,
+ -9.9772304296494e-001, 9.9772304296494e-001, -9.9772304296494e-001, 9.9772304296494e-001,
+ 7.3056274652481e-001, 7.3056274652481e-001, -6.3201874494553e-001, -6.3201874494553e-001,
+ -6.8284553289413e-001, 6.8284553289413e-001, -7.7495306730270e-001, 7.7495306730270e-001,
+ -6.7443922162056e-002, 6.7443922162056e-002, -6.7443922162056e-002, 6.7443922162056e-002,
+ 9.9772304296494e-001, 9.9772304296494e-001, 9.9772304296494e-001, 9.9772304296494e-001,
+ 3.3741172403097e-002, 3.3741172403097e-002, -1.0106986761093e-001, -1.0106986761093e-001,
+ -9.9943059682846e-001, 9.9943059682846e-001, 9.9487930536270e-001, -9.9487930536270e-001,
+ 9.9682027101517e-001, 9.9682027101517e-001, 9.9682027101517e-001, 9.9682027101517e-001,
+ -7.9682439565659e-002, 7.9682439565659e-002, -7.9682439565659e-002, 7.9682439565659e-002,
+ 9.9920475482941e-001, 9.9920475482941e-001, 9.9285042285919e-001, 9.9285042285919e-001,
+ -3.9872929453850e-002, 3.9872929453850e-002, -1.1936521530151e-001, 1.1936521530151e-001,
+ -9.9682027101517e-001, 9.9682027101517e-001, -9.9682027101517e-001, 9.9682027101517e-001,
+ 7.9682439565659e-002, 7.9682439565659e-002, 7.9682439565659e-002, 7.9682439565659e-002,
+ 6.7835003137589e-001, 6.7835003137589e-001, -7.8645521402359e-001, -7.8645521402359e-001,
+ -7.3473888635635e-001, 7.3473888635635e-001, -6.1764723062515e-001, 6.1764723062515e-001,
+ 6.4851438999176e-001, 6.4851438999176e-001, 6.4851438999176e-001, 6.4851438999176e-001,
+ -7.6120239496231e-001, 7.6120239496231e-001, -7.6120239496231e-001, 7.6120239496231e-001,
+ 9.0788608789444e-001, 9.0788608789444e-001, 2.6966828107834e-001, 2.6966828107834e-001,
+ -4.1921690106392e-001, 4.1921690106392e-001, -9.6295320987701e-001, 9.6295320987701e-001,
+ -6.4851438999176e-001, 6.4851438999176e-001, -6.4851438999176e-001, 6.4851438999176e-001,
+ 7.6120239496231e-001, 7.6120239496231e-001, 7.6120239496231e-001, 7.6120239496231e-001,
+ 3.4554132819176e-001, 3.4554132819176e-001, -8.7159502506256e-001, -8.7159502506256e-001,
+ -9.3840354681015e-001, 9.3840354681015e-001, 4.9022650718689e-001, -4.9022650718689e-001,
+ 8.9044868946075e-001, 8.9044868946075e-001, 8.9044868946075e-001, 8.9044868946075e-001,
+ -4.5508360862732e-001, 4.5508360862732e-001, -4.5508360862732e-001, 4.5508360862732e-001,
+ 9.7222650051117e-001, 9.7222650051117e-001, 7.5920915603638e-001, 7.5920915603638e-001,
+ -2.3404195904732e-001, 2.3404195904732e-001, -6.5084671974182e-001, 6.5084671974182e-001,
+ -8.9044868946075e-001, 8.9044868946075e-001, -8.9044868946075e-001, 8.9044868946075e-001,
+ 4.5508360862732e-001, 4.5508360862732e-001, 4.5508360862732e-001, 4.5508360862732e-001,
+ 5.2197527885437e-001, 5.2197527885437e-001, -9.9705994129181e-001, -9.9705994129181e-001,
+ -8.5296058654785e-001, 8.5296058654785e-001, -7.6623797416687e-002, 7.6623797416687e-002,
+ 3.0784964561462e-001, 3.0784964561462e-001, 3.0784964561462e-001, 3.0784964561462e-001,
+ -9.5143502950668e-001, 9.5143502950668e-001, -9.5143502950668e-001, 9.5143502950668e-001,
+ 8.0865615606308e-001, 8.0865615606308e-001, -3.1076723337173e-001, -3.1076723337173e-001,
+ -5.8828157186508e-001, 5.8828157186508e-001, -9.5048600435257e-001, 9.5048600435257e-001,
+ -3.0784964561462e-001, 3.0784964561462e-001, -3.0784964561462e-001, 3.0784964561462e-001,
+ 9.5143502950668e-001, 9.5143502950668e-001, 9.5143502950668e-001, 9.5143502950668e-001,
+ 1.5582840144634e-001, 1.5582840144634e-001, -4.5234960317612e-001, -4.5234960317612e-001,
+ -9.8778414726257e-001, 9.8778414726257e-001, 8.9184069633484e-001, -8.9184069633484e-001,
+ 9.6212142705917e-001, 9.6212142705917e-001, 9.6212142705917e-001, 9.6212142705917e-001,
+ -2.7262136340141e-001, 2.7262136340141e-001, -2.7262136340141e-001, 2.7262136340141e-001,
+ 9.9048507213593e-001, 9.9048507213593e-001, 9.1544872522354e-001, 9.1544872522354e-001,
+ -1.3762012124062e-001, 1.3762012124062e-001, -4.0243467688560e-001, 4.0243467688560e-001,
+ -9.6212142705917e-001, 9.6212142705917e-001, -9.6212142705917e-001, 9.6212142705917e-001,
+ 2.7262136340141e-001, 2.7262136340141e-001, 2.7262136340141e-001, 2.7262136340141e-001,
+ 6.0306662321091e-001, 6.0306662321091e-001, -9.3188422918320e-001, -9.3188422918320e-001,
+ -7.9769080877304e-001, 7.9769080877304e-001, -3.6275583505630e-001, 3.6275583505630e-001,
+ 4.8755016922951e-001, 4.8755016922951e-001, 4.8755016922951e-001, 4.8755016922951e-001,
+ -8.7309497594833e-001, 8.7309497594833e-001, -8.7309497594833e-001, 8.7309497594833e-001,
+ 8.6242395639420e-001, 8.6242395639420e-001, -2.1474123001099e-002, -2.1474123001099e-002,
+ -5.0618666410446e-001, 5.0618666410446e-001, -9.9976938962936e-001, 9.9976938962936e-001,
+ -4.8755016922951e-001, 4.8755016922951e-001, -4.8755016922951e-001, 4.8755016922951e-001,
+ 8.7309497594833e-001, 8.7309497594833e-001, 8.7309497594833e-001, 8.7309497594833e-001,
+ 2.5189781188965e-001, 2.5189781188965e-001, -6.9175928831100e-001, -6.9175928831100e-001,
+ -9.6775382757187e-001, 9.6775382757187e-001, 7.2212815284729e-001, -7.2212815284729e-001,
+ 7.8455656766891e-001, 7.8455656766891e-001, 7.8455656766891e-001, 7.8455656766891e-001,
+ -6.2005722522736e-001, 6.2005722522736e-001, -6.2005722522736e-001, 6.2005722522736e-001,
+ 9.4460481405258e-001, 9.4460481405258e-001, 5.3758704662323e-001, 5.3758704662323e-001,
+ -3.2820984721184e-001, 3.2820984721184e-001, -8.4320819377899e-001, 8.4320819377899e-001,
+ -7.8455656766891e-001, 7.8455656766891e-001, -7.8455656766891e-001, 7.8455656766891e-001,
+ 6.2005722522736e-001, 6.2005722522736e-001, 6.2005722522736e-001, 6.2005722522736e-001,
+ 4.3585708737373e-001, 4.3585708737373e-001, -9.7636973857880e-001, -9.7636973857880e-001,
+ -9.0001589059830e-001, 9.0001589059830e-001, 2.1610683202744e-001, -2.1610683202744e-001,
+ 1.1631863564253e-001, 1.1631863564253e-001, 1.1631863564253e-001, 1.1631863564253e-001,
+ -9.9321192502975e-001, 9.9321192502975e-001, -9.9321192502975e-001, 9.9321192502975e-001,
+ 7.4710059165955e-001, 7.4710059165955e-001, -5.7329714298248e-001, -5.7329714298248e-001,
+ -6.6471099853516e-001, 6.6471099853516e-001, -8.1934738159180e-001, 8.1934738159180e-001,
+ -1.1631863564253e-001, 1.1631863564253e-001, -1.1631863564253e-001, 1.1631863564253e-001,
+ 9.9321192502975e-001, 9.9321192502975e-001, 9.9321192502975e-001, 9.9321192502975e-001,
+ 5.8258265256882e-002, 5.8258265256882e-002, -1.7398388683796e-001, -1.7398388683796e-001,
+ -9.9830156564713e-001, 9.9830156564713e-001, 9.8474854230881e-001, -9.8474854230881e-001,
+ 9.8421007394791e-001, 9.8421007394791e-001, 9.8421007394791e-001, 9.8421007394791e-001,
+ -1.7700421810150e-001, 1.7700421810150e-001, -1.7700421810150e-001, 1.7700421810150e-001,
+ 9.9604469537735e-001, 9.9604469537735e-001, 9.6458977460861e-001, 9.6458977460861e-001,
+ -8.8853552937508e-002, 8.8853552937508e-002, -2.6375466585159e-001, 2.6375466585159e-001,
+ -9.8421007394791e-001, 9.8421007394791e-001, -9.8421007394791e-001, 9.8421007394791e-001,
+ 1.7700421810150e-001, 1.7700421810150e-001, 1.7700421810150e-001, 1.7700421810150e-001,
+ 6.4148104190826e-001, 6.4148104190826e-001, -8.6857056617737e-001, -8.6857056617737e-001,
+ -7.6713889837265e-001, 7.6713889837265e-001, -4.9556535482407e-001, 4.9556535482407e-001,
+ 5.7078075408936e-001, 5.7078075408936e-001, 5.7078075408936e-001, 5.7078075408936e-001,
+ -8.2110249996185e-001, 8.2110249996185e-001, -8.2110249996185e-001, 8.2110249996185e-001,
+ 8.8622254133224e-001, 8.8622254133224e-001, 1.2545502185822e-001, 1.2545502185822e-001,
+ -4.6325978636742e-001, 4.6325978636742e-001, -9.9209928512573e-001, 9.9209928512573e-001,
+ -5.7078075408936e-001, 5.7078075408936e-001, -5.7078075408936e-001, 5.7078075408936e-001,
+ 8.2110249996185e-001, 8.2110249996185e-001, 8.2110249996185e-001, 8.2110249996185e-001,
+ 2.9907983541489e-001, 2.9907983541489e-001, -7.9023021459579e-001, -7.9023021459579e-001,
+ -9.5422810316086e-001, 9.5422810316086e-001, 6.1281007528305e-001, -6.1281007528305e-001,
+ 8.4155493974686e-001, 8.4155493974686e-001, 8.4155493974686e-001, 8.4155493974686e-001,
+ -5.4017150402069e-001, 5.4017150402069e-001, -5.4017150402069e-001, 5.4017150402069e-001,
+ 9.5957154035568e-001, 9.5957154035568e-001, 6.5549290180206e-001, 6.5549290180206e-001,
+ -2.8146493434906e-001, 2.8146493434906e-001, -7.5520145893097e-001, 7.5520145893097e-001,
+ -8.4155493974686e-001, 8.4155493974686e-001, -8.4155493974686e-001, 8.4155493974686e-001,
+ 5.4017150402069e-001, 5.4017150402069e-001, 5.4017150402069e-001, 5.4017150402069e-001,
+ 4.7949376702309e-001, 4.7949376702309e-001, -9.9751138687134e-001, -9.9751138687134e-001,
+ -8.7754529714584e-001, 8.7754529714584e-001, 7.0504605770111e-002, -7.0504605770111e-002,
+ 2.1311032772064e-001, 2.1311032772064e-001, 2.1311032772064e-001, 2.1311032772064e-001,
+ -9.7702813148499e-001, 9.7702813148499e-001, -9.7702813148499e-001, 9.7702813148499e-001,
+ 7.7881652116776e-001, 7.7881652116776e-001, -4.4686883687973e-001, -4.4686883687973e-001,
+ -6.2725180387497e-001, 6.2725180387497e-001, -8.9459949731827e-001, 8.9459949731827e-001,
+ -2.1311032772064e-001, 2.1311032772064e-001, -2.1311032772064e-001, 2.1311032772064e-001,
+ 9.7702813148499e-001, 9.7702813148499e-001, 9.7702813148499e-001, 9.7702813148499e-001,
+ 1.0717242956161e-001, 1.0717242956161e-001, -3.1659337878227e-001, -3.1659337878227e-001,
+ -9.9424046278000e-001, 9.9424046278000e-001, 9.4856137037277e-001, -9.4856137037277e-001,
+ 9.3076694011688e-001, 9.3076694011688e-001, 9.3076694011688e-001, 9.3076694011688e-001,
+ -3.6561301350594e-001, 3.6561301350594e-001, -3.6561301350594e-001, 3.6561301350594e-001,
+ 9.8253929615021e-001, 9.8253929615021e-001, 8.4649091959000e-001, 8.4649091959000e-001,
+ -1.8605515360832e-001, 1.8605515360832e-001, -5.3240311145782e-001, 5.3240311145782e-001,
+ -9.3076694011688e-001, 9.3076694011688e-001, -9.3076694011688e-001, 9.3076694011688e-001,
+ 3.6561301350594e-001, 3.6561301350594e-001, 3.6561301350594e-001, 3.6561301350594e-001,
+ 5.6319934129715e-001, 5.6319934129715e-001, -9.7502535581589e-001, -9.7502535581589e-001,
+ -8.2632106542587e-001, 8.2632106542587e-001, -2.2209364175797e-001, 2.2209364175797e-001,
+ 3.9962419867516e-001, 3.9962419867516e-001, 3.9962419867516e-001, 3.9962419867516e-001,
+ -9.1667908430099e-001, 9.1667908430099e-001, -9.1667908430099e-001, 9.1667908430099e-001,
+ 8.3654773235321e-001, 8.3654773235321e-001, -1.6793835163116e-001, -1.6793835163116e-001,
+ -5.4789406061172e-001, 5.4789406061172e-001, -9.8579758405685e-001, 9.8579758405685e-001,
+ -3.9962419867516e-001, 3.9962419867516e-001, -3.9962419867516e-001, 3.9962419867516e-001,
+ 9.1667908430099e-001, 9.1667908430099e-001, 9.1667908430099e-001, 9.1667908430099e-001,
+ 2.0410896837711e-001, 2.0410896837711e-001, -5.7831382751465e-001, -5.7831382751465e-001,
+ -9.7894817590714e-001, 9.7894817590714e-001, 8.1581437587738e-001, -8.1581437587738e-001,
+ 7.2000247240067e-001, 7.2000247240067e-001, 7.2000247240067e-001, 7.2000247240067e-001,
+ -6.9397145509720e-001, 6.9397145509720e-001, -6.9397145509720e-001, 6.9397145509720e-001,
+ 9.2736250162125e-001, 9.2736250162125e-001, 4.0804409980774e-001, 4.0804409980774e-001,
+ -3.7416407465935e-001, 3.7416407465935e-001, -9.1296207904816e-001, 9.1296207904816e-001,
+ -7.2000247240067e-001, 7.2000247240067e-001, -7.2000247240067e-001, 7.2000247240067e-001,
+ 6.9397145509720e-001, 6.9397145509720e-001, 6.9397145509720e-001, 6.9397145509720e-001,
+ 3.9117038249969e-001, 3.9117038249969e-001, -9.3409240245819e-001, -9.3409240245819e-001,
+ -9.2031824588776e-001, 9.2031824588776e-001, 3.5703098773956e-001, -3.5703098773956e-001,
+ 1.8406730145216e-002, 1.8406730145216e-002, 1.8406730145216e-002, 1.8406730145216e-002,
+ -9.9983060359955e-001, 9.9983060359955e-001, -9.9983060359955e-001, 9.9983060359955e-001,
+ 7.1358484029770e-001, 7.1358484029770e-001, -6.8731540441513e-001, -6.8731540441513e-001,
+ -7.0056879520416e-001, 7.0056879520416e-001, -7.2635912895203e-001, 7.2635912895203e-001,
+ -1.8406730145216e-002, 1.8406730145216e-002, -1.8406730145216e-002, 1.8406730145216e-002,
+ 9.9983060359955e-001, 9.9983060359955e-001, 9.9983060359955e-001, 9.9983060359955e-001,
+ 9.2037552967668e-003, 9.2037552967668e-003, -2.7608145028353e-002, -2.7608145028353e-002,
+ -9.9995762109756e-001, 9.9995762109756e-001, 9.9961882829666e-001, -9.9961882829666e-001,
+ 9.9983060359955e-001, 9.9983060359955e-001, 9.9983060359955e-001, 9.9983060359955e-001,
+ -1.8406730145216e-002, 1.8406730145216e-002, -1.8406730145216e-002, 1.8406730145216e-002,
+ 9.9995762109756e-001, 9.9995762109756e-001, 9.9961882829666e-001, 9.9961882829666e-001,
+ -9.2037552967668e-003, 9.2037552967668e-003, -2.7608145028353e-002, 2.7608145028353e-002,
+ -9.9983060359955e-001, 9.9983060359955e-001, -9.9983060359955e-001, 9.9983060359955e-001,
+ 1.8406730145216e-002, 1.8406730145216e-002, 1.8406730145216e-002, 1.8406730145216e-002,
+ 7.0056879520416e-001, 7.0056879520416e-001, -7.2635912895203e-001, -7.2635912895203e-001,
+ -7.1358484029770e-001, 7.1358484029770e-001, -6.8731540441513e-001, 6.8731540441513e-001,
+ 6.9397145509720e-001, 6.9397145509720e-001, 6.9397145509720e-001, 6.9397145509720e-001,
+ -7.2000247240067e-001, 7.2000247240067e-001, -7.2000247240067e-001, 7.2000247240067e-001,
+ 9.2031824588776e-001, 9.2031824588776e-001, 3.5703098773956e-001, 3.5703098773956e-001,
+ -3.9117038249969e-001, 3.9117038249969e-001, -9.3409240245819e-001, 9.3409240245819e-001,
+ -6.9397145509720e-001, 6.9397145509720e-001, -6.9397145509720e-001, 6.9397145509720e-001,
+ 7.2000247240067e-001, 7.2000247240067e-001, 7.2000247240067e-001, 7.2000247240067e-001,
+ 3.7416407465935e-001, 3.7416407465935e-001, -9.1296207904816e-001, -9.1296207904816e-001,
+ -9.2736250162125e-001, 9.2736250162125e-001, 4.0804409980774e-001, -4.0804409980774e-001,
+ 9.1667908430099e-001, 9.1667908430099e-001, 9.1667908430099e-001, 9.1667908430099e-001,
+ -3.9962419867516e-001, 3.9962419867516e-001, -3.9962419867516e-001, 3.9962419867516e-001,
+ 9.7894817590714e-001, 9.7894817590714e-001, 8.1581437587738e-001, 8.1581437587738e-001,
+ -2.0410896837711e-001, 2.0410896837711e-001, -5.7831382751465e-001, 5.7831382751465e-001,
+ -9.1667908430099e-001, 9.1667908430099e-001, -9.1667908430099e-001, 9.1667908430099e-001,
+ 3.9962419867516e-001, 3.9962419867516e-001, 3.9962419867516e-001, 3.9962419867516e-001,
+ 5.4789406061172e-001, 5.4789406061172e-001, -9.8579758405685e-001, -9.8579758405685e-001,
+ -8.3654773235321e-001, 8.3654773235321e-001, -1.6793835163116e-001, 1.6793835163116e-001,
+ 3.6561301350594e-001, 3.6561301350594e-001, 3.6561301350594e-001, 3.6561301350594e-001,
+ -9.3076694011688e-001, 9.3076694011688e-001, -9.3076694011688e-001, 9.3076694011688e-001,
+ 8.2632106542587e-001, 8.2632106542587e-001, -2.2209364175797e-001, -2.2209364175797e-001,
+ -5.6319934129715e-001, 5.6319934129715e-001, -9.7502535581589e-001, 9.7502535581589e-001,
+ -3.6561301350594e-001, 3.6561301350594e-001, -3.6561301350594e-001, 3.6561301350594e-001,
+ 9.3076694011688e-001, 9.3076694011688e-001, 9.3076694011688e-001, 9.3076694011688e-001,
+ 1.8605515360832e-001, 1.8605515360832e-001, -5.3240311145782e-001, -5.3240311145782e-001,
+ -9.8253929615021e-001, 9.8253929615021e-001, 8.4649091959000e-001, -8.4649091959000e-001,
+ 9.7702813148499e-001, 9.7702813148499e-001, 9.7702813148499e-001, 9.7702813148499e-001,
+ -2.1311032772064e-001, 2.1311032772064e-001, -2.1311032772064e-001, 2.1311032772064e-001,
+ 9.9424046278000e-001, 9.9424046278000e-001, 9.4856137037277e-001, 9.4856137037277e-001,
+ -1.0717242956161e-001, 1.0717242956161e-001, -3.1659337878227e-001, 3.1659337878227e-001,
+ -9.7702813148499e-001, 9.7702813148499e-001, -9.7702813148499e-001, 9.7702813148499e-001,
+ 2.1311032772064e-001, 2.1311032772064e-001, 2.1311032772064e-001, 2.1311032772064e-001,
+ 6.2725180387497e-001, 6.2725180387497e-001, -8.9459949731827e-001, -8.9459949731827e-001,
+ -7.7881652116776e-001, 7.7881652116776e-001, -4.4686883687973e-001, 4.4686883687973e-001,
+ 5.4017150402069e-001, 5.4017150402069e-001, 5.4017150402069e-001, 5.4017150402069e-001,
+ -8.4155493974686e-001, 8.4155493974686e-001, -8.4155493974686e-001, 8.4155493974686e-001,
+ 8.7754529714584e-001, 8.7754529714584e-001, 7.0504605770111e-002, 7.0504605770111e-002,
+ -4.7949376702309e-001, 4.7949376702309e-001, -9.9751138687134e-001, 9.9751138687134e-001,
+ -5.4017150402069e-001, 5.4017150402069e-001, -5.4017150402069e-001, 5.4017150402069e-001,
+ 8.4155493974686e-001, 8.4155493974686e-001, 8.4155493974686e-001, 8.4155493974686e-001,
+ 2.8146493434906e-001, 2.8146493434906e-001, -7.5520145893097e-001, -7.5520145893097e-001,
+ -9.5957154035568e-001, 9.5957154035568e-001, 6.5549290180206e-001, -6.5549290180206e-001,
+ 8.2110249996185e-001, 8.2110249996185e-001, 8.2110249996185e-001, 8.2110249996185e-001,
+ -5.7078075408936e-001, 5.7078075408936e-001, -5.7078075408936e-001, 5.7078075408936e-001,
+ 9.5422810316086e-001, 9.5422810316086e-001, 6.1281007528305e-001, 6.1281007528305e-001,
+ -2.9907983541489e-001, 2.9907983541489e-001, -7.9023021459579e-001, 7.9023021459579e-001,
+ -8.2110249996185e-001, 8.2110249996185e-001, -8.2110249996185e-001, 8.2110249996185e-001,
+ 5.7078075408936e-001, 5.7078075408936e-001, 5.7078075408936e-001, 5.7078075408936e-001,
+ 4.6325978636742e-001, 4.6325978636742e-001, -9.9209928512573e-001, -9.9209928512573e-001,
+ -8.8622254133224e-001, 8.8622254133224e-001, 1.2545502185822e-001, -1.2545502185822e-001,
+ 1.7700421810150e-001, 1.7700421810150e-001, 1.7700421810150e-001, 1.7700421810150e-001,
+ -9.8421007394791e-001, 9.8421007394791e-001, -9.8421007394791e-001, 9.8421007394791e-001,
+ 7.6713889837265e-001, 7.6713889837265e-001, -4.9556535482407e-001, -4.9556535482407e-001,
+ -6.4148104190826e-001, 6.4148104190826e-001, -8.6857056617737e-001, 8.6857056617737e-001,
+ -1.7700421810150e-001, 1.7700421810150e-001, -1.7700421810150e-001, 1.7700421810150e-001,
+ 9.8421007394791e-001, 9.8421007394791e-001, 9.8421007394791e-001, 9.8421007394791e-001,
+ 8.8853552937508e-002, 8.8853552937508e-002, -2.6375466585159e-001, -2.6375466585159e-001,
+ -9.9604469537735e-001, 9.9604469537735e-001, 9.6458977460861e-001, -9.6458977460861e-001,
+ 9.9321192502975e-001, 9.9321192502975e-001, 9.9321192502975e-001, 9.9321192502975e-001,
+ -1.1631863564253e-001, 1.1631863564253e-001, -1.1631863564253e-001, 1.1631863564253e-001,
+ 9.9830156564713e-001, 9.9830156564713e-001, 9.8474854230881e-001, 9.8474854230881e-001,
+ -5.8258265256882e-002, 5.8258265256882e-002, -1.7398388683796e-001, 1.7398388683796e-001,
+ -9.9321192502975e-001, 9.9321192502975e-001, -9.9321192502975e-001, 9.9321192502975e-001,
+ 1.1631863564253e-001, 1.1631863564253e-001, 1.1631863564253e-001, 1.1631863564253e-001,
+ 6.6471099853516e-001, 6.6471099853516e-001, -8.1934738159180e-001, -8.1934738159180e-001,
+ -7.4710059165955e-001, 7.4710059165955e-001, -5.7329714298248e-001, 5.7329714298248e-001,
+ 6.2005722522736e-001, 6.2005722522736e-001, 6.2005722522736e-001, 6.2005722522736e-001,
+ -7.8455656766891e-001, 7.8455656766891e-001, -7.8455656766891e-001, 7.8455656766891e-001,
+ 9.0001589059830e-001, 9.0001589059830e-001, 2.1610683202744e-001, 2.1610683202744e-001,
+ -4.3585708737373e-001, 4.3585708737373e-001, -9.7636973857880e-001, 9.7636973857880e-001,
+ -6.2005722522736e-001, 6.2005722522736e-001, -6.2005722522736e-001, 6.2005722522736e-001,
+ 7.8455656766891e-001, 7.8455656766891e-001, 7.8455656766891e-001, 7.8455656766891e-001,
+ 3.2820984721184e-001, 3.2820984721184e-001, -8.4320819377899e-001, -8.4320819377899e-001,
+ -9.4460481405258e-001, 9.4460481405258e-001, 5.3758704662323e-001, -5.3758704662323e-001,
+ 8.7309497594833e-001, 8.7309497594833e-001, 8.7309497594833e-001, 8.7309497594833e-001,
+ -4.8755016922951e-001, 4.8755016922951e-001, -4.8755016922951e-001, 4.8755016922951e-001,
+ 9.6775382757187e-001, 9.6775382757187e-001, 7.2212815284729e-001, 7.2212815284729e-001,
+ -2.5189781188965e-001, 2.5189781188965e-001, -6.9175928831100e-001, 6.9175928831100e-001,
+ -8.7309497594833e-001, 8.7309497594833e-001, -8.7309497594833e-001, 8.7309497594833e-001,
+ 4.8755016922951e-001, 4.8755016922951e-001, 4.8755016922951e-001, 4.8755016922951e-001,
+ 5.0618666410446e-001, 5.0618666410446e-001, -9.9976938962936e-001, -9.9976938962936e-001,
+ -8.6242395639420e-001, 8.6242395639420e-001, -2.1474123001099e-002, 2.1474123001099e-002,
+ 2.7262136340141e-001, 2.7262136340141e-001, 2.7262136340141e-001, 2.7262136340141e-001,
+ -9.6212142705917e-001, 9.6212142705917e-001, -9.6212142705917e-001, 9.6212142705917e-001,
+ 7.9769080877304e-001, 7.9769080877304e-001, -3.6275583505630e-001, -3.6275583505630e-001,
+ -6.0306662321091e-001, 6.0306662321091e-001, -9.3188422918320e-001, 9.3188422918320e-001,
+ -2.7262136340141e-001, 2.7262136340141e-001, -2.7262136340141e-001, 2.7262136340141e-001,
+ 9.6212142705917e-001, 9.6212142705917e-001, 9.6212142705917e-001, 9.6212142705917e-001,
+ 1.3762012124062e-001, 1.3762012124062e-001, -4.0243467688560e-001, -4.0243467688560e-001,
+ -9.9048507213593e-001, 9.9048507213593e-001, 9.1544872522354e-001, -9.1544872522354e-001,
+ 9.5143502950668e-001, 9.5143502950668e-001, 9.5143502950668e-001, 9.5143502950668e-001,
+ -3.0784964561462e-001, 3.0784964561462e-001, -3.0784964561462e-001, 3.0784964561462e-001,
+ 9.8778414726257e-001, 9.8778414726257e-001, 8.9184069633484e-001, 8.9184069633484e-001,
+ -1.5582840144634e-001, 1.5582840144634e-001, -4.5234960317612e-001, 4.5234960317612e-001,
+ -9.5143502950668e-001, 9.5143502950668e-001, -9.5143502950668e-001, 9.5143502950668e-001,
+ 3.0784964561462e-001, 3.0784964561462e-001, 3.0784964561462e-001, 3.0784964561462e-001,
+ 5.8828157186508e-001, 5.8828157186508e-001, -9.5048600435257e-001, -9.5048600435257e-001,
+ -8.0865615606308e-001, 8.0865615606308e-001, -3.1076723337173e-001, 3.1076723337173e-001,
+ 4.5508360862732e-001, 4.5508360862732e-001, 4.5508360862732e-001, 4.5508360862732e-001,
+ -8.9044868946075e-001, 8.9044868946075e-001, -8.9044868946075e-001, 8.9044868946075e-001,
+ 8.5296058654785e-001, 8.5296058654785e-001, -7.6623797416687e-002, -7.6623797416687e-002,
+ -5.2197527885437e-001, 5.2197527885437e-001, -9.9705994129181e-001, 9.9705994129181e-001,
+ -4.5508360862732e-001, 4.5508360862732e-001, -4.5508360862732e-001, 4.5508360862732e-001,
+ 8.9044868946075e-001, 8.9044868946075e-001, 8.9044868946075e-001, 8.9044868946075e-001,
+ 2.3404195904732e-001, 2.3404195904732e-001, -6.5084671974182e-001, -6.5084671974182e-001,
+ -9.7222650051117e-001, 9.7222650051117e-001, 7.5920915603638e-001, -7.5920915603638e-001,
+ 7.6120239496231e-001, 7.6120239496231e-001, 7.6120239496231e-001, 7.6120239496231e-001,
+ -6.4851438999176e-001, 6.4851438999176e-001, -6.4851438999176e-001, 6.4851438999176e-001,
+ 9.3840354681015e-001, 9.3840354681015e-001, 4.9022650718689e-001, 4.9022650718689e-001,
+ -3.4554132819176e-001, 3.4554132819176e-001, -8.7159502506256e-001, 8.7159502506256e-001,
+ -7.6120239496231e-001, 7.6120239496231e-001, -7.6120239496231e-001, 7.6120239496231e-001,
+ 6.4851438999176e-001, 6.4851438999176e-001, 6.4851438999176e-001, 6.4851438999176e-001,
+ 4.1921690106392e-001, 4.1921690106392e-001, -9.6295320987701e-001, -9.6295320987701e-001,
+ -9.0788608789444e-001, 9.0788608789444e-001, 2.6966828107834e-001, -2.6966828107834e-001,
+ 7.9682439565659e-002, 7.9682439565659e-002, 7.9682439565659e-002, 7.9682439565659e-002,
+ -9.9682027101517e-001, 9.9682027101517e-001, -9.9682027101517e-001, 9.9682027101517e-001,
+ 7.3473888635635e-001, 7.3473888635635e-001, -6.1764723062515e-001, -6.1764723062515e-001,
+ -6.7835003137589e-001, 6.7835003137589e-001, -7.8645521402359e-001, 7.8645521402359e-001,
+ -7.9682439565659e-002, 7.9682439565659e-002, -7.9682439565659e-002, 7.9682439565659e-002,
+ 9.9682027101517e-001, 9.9682027101517e-001, 9.9682027101517e-001, 9.9682027101517e-001,
+ 3.9872929453850e-002, 3.9872929453850e-002, -1.1936521530151e-001, -1.1936521530151e-001,
+ -9.9920475482941e-001, 9.9920475482941e-001, 9.9285042285919e-001, -9.9285042285919e-001,
+ 9.9772304296494e-001, 9.9772304296494e-001, 9.9772304296494e-001, 9.9772304296494e-001,
+ -6.7443922162056e-002, 6.7443922162056e-002, -6.7443922162056e-002, 6.7443922162056e-002,
+ 9.9943059682846e-001, 9.9943059682846e-001, 9.9487930536270e-001, 9.9487930536270e-001,
+ -3.3741172403097e-002, 3.3741172403097e-002, -1.0106986761093e-001, 1.0106986761093e-001,
+ -9.9772304296494e-001, 9.9772304296494e-001, -9.9772304296494e-001, 9.9772304296494e-001,
+ 6.7443922162056e-002, 6.7443922162056e-002, 6.7443922162056e-002, 6.7443922162056e-002,
+ 6.8284553289413e-001, 6.8284553289413e-001, -7.7495306730270e-001, -7.7495306730270e-001,
+ -7.3056274652481e-001, 7.3056274652481e-001, -6.3201874494553e-001, 6.3201874494553e-001,
+ 6.5780669450760e-001, 6.5780669450760e-001, 6.5780669450760e-001, 6.5780669450760e-001,
+ -7.5318676233292e-001, 7.5318676233292e-001, -7.5318676233292e-001, 7.5318676233292e-001,
+ 9.1044127941132e-001, 9.1044127941132e-001, 2.8734743595123e-001, 2.8734743595123e-001,
+ -4.1363832354546e-001, 4.1363832354546e-001, -9.5782625675201e-001, 9.5782625675201e-001,
+ -6.5780669450760e-001, 6.5780669450760e-001, -6.5780669450760e-001, 6.5780669450760e-001,
+ 7.5318676233292e-001, 7.5318676233292e-001, 7.5318676233292e-001, 7.5318676233292e-001,
+ 3.5129275918007e-001, 3.5129275918007e-001, -8.8047087192535e-001, -8.8047087192535e-001,
+ -9.3626564741135e-001, 9.3626564741135e-001, 4.7410020232201e-001, -4.7410020232201e-001,
+ 8.9596623182297e-001, 8.9596623182297e-001, 8.9596623182297e-001, 8.9596623182297e-001,
+ -4.4412216544151e-001, 4.4412216544151e-001, -4.4412216544151e-001, 4.4412216544151e-001,
+ 9.7364425659180e-001, 9.7364425659180e-001, 7.7106052637100e-001, 7.7106052637100e-001,
+ -2.2807209193707e-001, 2.2807209193707e-001, -6.3676190376282e-001, 6.3676190376282e-001,
+ -8.9596623182297e-001, 8.9596623182297e-001, -8.9596623182297e-001, 8.9596623182297e-001,
+ 4.4412216544151e-001, 4.4412216544151e-001, 4.4412216544151e-001, 4.4412216544151e-001,
+ 5.2719914913177e-001, 5.2719914913177e-001, -9.9548065662384e-001, -9.9548065662384e-001,
+ -8.4974175691605e-001, 8.4974175691605e-001, -9.4963490962982e-002, 9.4963490962982e-002,
+ 3.1950202584267e-001, 3.1950202584267e-001, 3.1950202584267e-001, 3.1950202584267e-001,
+ -9.4758558273315e-001, 9.4758558273315e-001, -9.4758558273315e-001, 9.4758558273315e-001,
+ 8.1225055456161e-001, 8.1225055456161e-001, -2.9321926832199e-001, -2.9321926832199e-001,
+ -5.8330869674683e-001, 5.8330869674683e-001, -9.5604515075684e-001, 9.5604515075684e-001,
+ -3.1950202584267e-001, 3.1950202584267e-001, -3.1950202584267e-001, 3.1950202584267e-001,
+ 9.4758558273315e-001, 9.4758558273315e-001, 9.4758558273315e-001, 9.4758558273315e-001,
+ 1.6188639402390e-001, 1.6188639402390e-001, -4.6868878602982e-001, -4.6868878602982e-001,
+ -9.8680937290192e-001, 9.8680937290192e-001, 8.8336330652237e-001, -8.8336330652237e-001,
+ 9.6539443731308e-001, 9.6539443731308e-001, 9.6539443731308e-001, 9.6539443731308e-001,
+ -2.6079413294792e-001, 2.6079413294792e-001, -2.6079413294792e-001, 2.6079413294792e-001,
+ 9.9131083488464e-001, 9.9131083488464e-001, 9.2270112037659e-001, 9.2270112037659e-001,
+ -1.3154003024101e-001, 1.3154003024101e-001, -3.8551607728004e-001, 3.8551607728004e-001,
+ -9.6539443731308e-001, 9.6539443731308e-001, -9.6539443731308e-001, 9.6539443731308e-001,
+ 2.6079413294792e-001, 2.6079413294792e-001, 2.6079413294792e-001, 2.6079413294792e-001,
+ 6.0794979333878e-001, 6.0794979333878e-001, -9.2504924535751e-001, -9.2504924535751e-001,
+ -7.9397547245026e-001, 7.9397547245026e-001, -3.7984716892242e-001, 3.7984716892242e-001,
+ 4.9822768568993e-001, 4.9822768568993e-001, 4.9822768568993e-001, 4.9822768568993e-001,
+ -8.6704623699188e-001, 8.6704623699188e-001, -8.6704623699188e-001, 8.6704623699188e-001,
+ 8.6551362276077e-001, 8.6551362276077e-001, -3.0679106712341e-003, -3.0679106712341e-003,
+ -5.0088536739349e-001, 5.0088536739349e-001, -9.9999535083771e-001, 9.9999535083771e-001,
+ -4.9822768568993e-001, 4.9822768568993e-001, -4.9822768568993e-001, 4.9822768568993e-001,
+ 8.6704623699188e-001, 8.6704623699188e-001, 8.6704623699188e-001, 8.6704623699188e-001,
+ 2.5783109664917e-001, 2.5783109664917e-001, -7.0493412017822e-001, -7.0493412017822e-001,
+ -9.6618998050690e-001, 9.6618998050690e-001, 7.0927280187607e-001, -7.0927280187607e-001,
+ 7.9210656881332e-001, 7.9210656881332e-001, 7.9210656881332e-001, 7.9210656881332e-001,
+ -6.1038279533386e-001, 6.1038279533386e-001, -6.1038279533386e-001, 6.1038279533386e-001,
+ 9.4660091400146e-001, 9.4660091400146e-001, 5.5301666259766e-001, 5.5301666259766e-001,
+ -3.2240769267082e-001, 3.2240769267082e-001, -8.3317005634308e-001, 8.3317005634308e-001,
+ -7.9210656881332e-001, 7.9210656881332e-001, -7.9210656881332e-001, 7.9210656881332e-001,
+ 6.1038279533386e-001, 6.1038279533386e-001, 6.1038279533386e-001, 6.1038279533386e-001,
+ 4.4137129187584e-001, 4.4137129187584e-001, -9.8018205165863e-001, -9.8018205165863e-001,
+ -8.9732456207275e-001, 8.9732456207275e-001, 1.9809836149216e-001, -1.9809836149216e-001,
+ 1.2849810719490e-001, 1.2849810719490e-001, 1.2849810719490e-001, 1.2849810719490e-001,
+ -9.9170976877213e-001, 9.9170976877213e-001, -9.9170976877213e-001, 9.9170976877213e-001,
+ 7.5116509199142e-001, 7.5116509199142e-001, -5.5811864137650e-001, -5.5811864137650e-001,
+ -6.6011434793472e-001, 6.6011434793472e-001, -8.2976120710373e-001, 8.2976120710373e-001,
+ -1.2849810719490e-001, 1.2849810719490e-001, -1.2849810719490e-001, 1.2849810719490e-001,
+ 9.9170976877213e-001, 9.9170976877213e-001, 9.9170976877213e-001, 9.9170976877213e-001,
+ 6.4382635056973e-002, 6.4382635056973e-002, -1.9208037853241e-001, -1.9208037853241e-001,
+ -9.9792528152466e-001, 9.9792528152466e-001, 9.8137921094894e-001, -9.8137921094894e-001,
+ 9.8630809783936e-001, 9.8630809783936e-001, 9.8630809783936e-001, 9.8630809783936e-001,
+ -1.6491311788559e-001, 1.6491311788559e-001, -1.6491311788559e-001, 1.6491311788559e-001,
+ 9.9657112360001e-001, 9.9657112360001e-001, 9.6928119659424e-001, 9.6928119659424e-001,
+ -8.2740269601345e-002, 8.2740269601345e-002, -2.4595502018929e-001, 2.4595502018929e-001,
+ -9.8630809783936e-001, 9.8630809783936e-001, -9.8630809783936e-001, 9.8630809783936e-001,
+ 1.6491311788559e-001, 1.6491311788559e-001, 1.6491311788559e-001, 1.6491311788559e-001,
+ 6.4617604017258e-001, 6.4617604017258e-001, -8.5930174589157e-001, -8.5930174589157e-001,
+ -7.6318842172623e-001, 7.6318842172623e-001, -5.1146894693375e-001, 5.1146894693375e-001,
+ 5.8081394433975e-001, 5.8081394433975e-001, 5.8081394433975e-001, 5.8081394433975e-001,
+ -8.1403630971909e-001, 8.1403630971909e-001, -8.1403630971909e-001, 8.1403630971909e-001,
+ 8.8904833793640e-001, 8.8904833793640e-001, 1.4369499683380e-001, 1.4369499683380e-001,
+ -4.5781332254410e-001, 4.5781332254410e-001, -9.8962193727493e-001, 9.8962193727493e-001,
+ -5.8081394433975e-001, 5.8081394433975e-001, -5.8081394433975e-001, 5.8081394433975e-001,
+ 8.1403630971909e-001, 8.1403630971909e-001, 8.1403630971909e-001, 8.1403630971909e-001,
+ 3.0492922663689e-001, 3.0492922663689e-001, -8.0137610435486e-001, -8.0137610435486e-001,
+ -9.5237499475479e-001, 9.5237499475479e-001, 5.9816074371338e-001, -5.9816074371338e-001,
+ 8.4812033176422e-001, 8.4812033176422e-001, 8.4812033176422e-001, 8.4812033176422e-001,
+ -5.2980363368988e-001, 5.2980363368988e-001, -5.2980363368988e-001, 5.2980363368988e-001,
+ 9.6128046512604e-001, 9.6128046512604e-001, 6.6928255558014e-001, 6.6928255558014e-001,
+ -2.7557182312012e-001, 2.7557182312012e-001, -7.4300789833069e-001, 7.4300789833069e-001,
+ -8.4812033176422e-001, 8.4812033176422e-001, -8.4812033176422e-001, 8.4812033176422e-001,
+ 5.2980363368988e-001, 5.2980363368988e-001, 5.2980363368988e-001, 5.2980363368988e-001,
+ 4.8486927151680e-001, 4.8486927151680e-001, -9.9864017963409e-001, -9.9864017963409e-001,
+ -8.7458664178848e-001, 8.7458664178848e-001, 5.2131652832031e-002, -5.2131652832031e-002,
+ 2.2508391737938e-001, 2.2508391737938e-001, 2.2508391737938e-001, 2.2508391737938e-001,
+ -9.7433936595917e-001, 9.7433936595917e-001, -9.7433936595917e-001, 9.7433936595917e-001,
+ 7.8265058994293e-001, 7.8265058994293e-001, -4.3032658100128e-001, -4.3032658100128e-001,
+ -6.2246131896973e-001, 6.2246131896973e-001, -9.0267324447632e-001, 9.0267324447632e-001,
+ -2.2508391737938e-001, 2.2508391737938e-001, -2.2508391737938e-001, 2.2508391737938e-001,
+ 9.7433936595917e-001, 9.7433936595917e-001, 9.7433936595917e-001, 9.7433936595917e-001,
+ 1.1327095329762e-001, 1.1327095329762e-001, -3.3399963378906e-001, -3.3399963378906e-001,
+ -9.9356412887573e-001, 9.9356412887573e-001, 9.4257318973541e-001, -9.4257318973541e-001,
+ 9.3518352508545e-001, 9.3518352508545e-001, 9.3518352508545e-001, 9.3518352508545e-001,
+ -3.5416352748871e-001, 3.5416352748871e-001, -3.5416352748871e-001, 3.5416352748871e-001,
+ 9.8366242647171e-001, 9.8366242647171e-001, 8.5614734888077e-001, 8.5614734888077e-001,
+ -1.8002291023731e-001, 1.8002291023731e-001, -5.1673179864883e-001, 5.1673179864883e-001,
+ -9.3518352508545e-001, 9.3518352508545e-001, -9.3518352508545e-001, 9.3518352508545e-001,
+ 3.5416352748871e-001, 3.5416352748871e-001, 3.5416352748871e-001, 3.5416352748871e-001,
+ 5.6825894117355e-001, 5.6825894117355e-001, -9.7077208757401e-001, -9.7077208757401e-001,
+ -8.2284975051880e-001, 8.2284975051880e-001, -2.4000298976898e-001, 2.4000298976898e-001,
+ 4.1084319353104e-001, 4.1084319353104e-001, 4.1084319353104e-001, 4.1084319353104e-001,
+ -9.1170603036880e-001, 9.1170603036880e-001, -9.1170603036880e-001, 9.1170603036880e-001,
+ 8.3989375829697e-001, 8.3989375829697e-001, -1.4976453781128e-001, -1.4976453781128e-001,
+ -5.4275077581406e-001, 5.4275077581406e-001, -9.8872166872025e-001, 9.8872166872025e-001,
+ -4.1084319353104e-001, 4.1084319353104e-001, -4.1084319353104e-001, 4.1084319353104e-001,
+ 9.1170603036880e-001, 9.1170603036880e-001, 9.1170603036880e-001, 9.1170603036880e-001,
+ 2.1011184155941e-001, 2.1011184155941e-001, -5.9323233366013e-001, -5.9323233366013e-001,
+ -9.7767734527588e-001, 9.7767734527588e-001, 8.0503129959106e-001, -8.0503129959106e-001,
+ 7.2846436500549e-001, 7.2846436500549e-001, 7.2846436500549e-001, 7.2846436500549e-001,
+ -6.8508368730545e-001, 6.8508368730545e-001, -6.8508368730545e-001, 6.8508368730545e-001,
+ 9.2964088916779e-001, 9.2964088916779e-001, 4.2477965354919e-001, 4.2477965354919e-001,
+ -3.6846685409546e-001, 3.6846685409546e-001, -9.0529680252075e-001, 9.0529680252075e-001,
+ -7.2846436500549e-001, 7.2846436500549e-001, -7.2846436500549e-001, 7.2846436500549e-001,
+ 6.8508368730545e-001, 6.8508368730545e-001, 6.8508368730545e-001, 6.8508368730545e-001,
+ 3.9680999517441e-001, 3.9680999517441e-001, -9.4050604104996e-001, -9.4050604104996e-001,
+ -9.1790080070496e-001, 9.1790080070496e-001, 3.3977693319321e-001, -3.3977693319321e-001,
+ 3.0674804002047e-002, 3.0674804002047e-002, 3.0674804002047e-002, 3.0674804002047e-002,
+ -9.9952942132950e-001, 9.9952942132950e-001, -9.9952942132950e-001, 9.9952942132950e-001,
+ 7.1787005662918e-001, 7.1787005662918e-001, -6.7382901906967e-001, -6.7382901906967e-001,
+ -6.9617712497711e-001, 6.9617712497711e-001, -7.3888731002808e-001, 7.3888731002808e-001,
+ -3.0674804002047e-002, 3.0674804002047e-002, -3.0674804002047e-002, 3.0674804002047e-002,
+ 9.9952942132950e-001, 9.9952942132950e-001, 9.9952942132950e-001, 9.9952942132950e-001,
+ 1.5339206904173e-002, 1.5339206904173e-002, -4.6003181487322e-002, -4.6003181487322e-002,
+ -9.9988234043121e-001, 9.9988234043121e-001, 9.9894130229950e-001, -9.9894130229950e-001,
+ 9.9907773733139e-001, 9.9907773733139e-001, 9.9907773733139e-001, 9.9907773733139e-001,
+ -4.2938258498907e-002, 4.2938258498907e-002, -4.2938258498907e-002, 4.2938258498907e-002,
+ 9.9976938962936e-001, 9.9976938962936e-001, 9.9792528152466e-001, 9.9792528152466e-001,
+ -2.1474080160260e-002, 2.1474080160260e-002, -6.4382635056973e-002, 6.4382635056973e-002,
+ -9.9907773733139e-001, 9.9907773733139e-001, -9.9907773733139e-001, 9.9907773733139e-001,
+ 4.2938258498907e-002, 4.2938258498907e-002, 4.2938258498907e-002, 4.2938258498907e-002,
+ 6.9175928831100e-001, 6.9175928831100e-001, -7.5116509199142e-001, -7.5116509199142e-001,
+ -7.2212815284729e-001, 7.2212815284729e-001, -6.6011440753937e-001, 6.6011440753937e-001,
+ 6.7609274387360e-001, 6.7609274387360e-001, 6.7609274387360e-001, 6.7609274387360e-001,
+ -7.3681652545929e-001, 7.3681652545929e-001, -7.3681652545929e-001, 7.3681652545929e-001,
+ 9.1544872522354e-001, 9.1544872522354e-001, 3.2240772247314e-001, 3.2240772247314e-001,
+ -4.0243464708328e-001, 4.0243464708328e-001, -9.4660085439682e-001, 9.4660085439682e-001,
+ -6.7609274387360e-001, 6.7609274387360e-001, -6.7609274387360e-001, 6.7609274387360e-001,
+ 7.3681652545929e-001, 7.3681652545929e-001, 7.3681652545929e-001, 7.3681652545929e-001,
+ 3.6275574564934e-001, 3.6275574564934e-001, -8.9732468128204e-001, -8.9732468128204e-001,
+ -9.3188428878784e-001, 9.3188428878784e-001, 4.4137123227119e-001, -4.4137123227119e-001,
+ 9.0659570693970e-001, 9.0659570693970e-001, 9.0659570693970e-001, 9.0659570693970e-001,
+ -4.2200028896332e-001, 4.2200028896332e-001, -4.2200028896332e-001, 4.2200028896332e-001,
+ 9.7636973857880e-001, 9.7636973857880e-001, 7.9397547245026e-001, 7.9397547245026e-001,
+ -2.1610680222511e-001, 2.1610680222511e-001, -6.0794985294342e-001, 6.0794985294342e-001,
+ -9.0659570693970e-001, 9.0659570693970e-001, -9.0659570693970e-001, 9.0659570693970e-001,
+ 4.2200028896332e-001, 4.2200028896332e-001, 4.2200028896332e-001, 4.2200028896332e-001,
+ 5.3758710622787e-001, 5.3758710622787e-001, -9.9131089448929e-001, -9.9131089448929e-001,
+ -8.4320825338364e-001, 8.4320825338364e-001, -1.3154006004333e-001, 1.3154006004333e-001,
+ 3.4266072511673e-001, 3.4266072511673e-001, 3.4266072511673e-001, 3.4266072511673e-001,
+ -9.3945920467377e-001, 9.3945920467377e-001, -9.3945920467377e-001, 9.3945920467377e-001,
+ 8.1934750080109e-001, 8.1934750080109e-001, -2.5783121585846e-001, -2.5783121585846e-001,
+ -5.7329720258713e-001, 5.7329720258713e-001, -9.6618992090225e-001, 9.6618992090225e-001,
+ -3.4266072511673e-001, 3.4266072511673e-001, -3.4266072511673e-001, 3.4266072511673e-001,
+ 9.3945920467377e-001, 9.3945920467377e-001, 9.3945920467377e-001, 9.3945920467377e-001,
+ 1.7398387193680e-001, 1.7398387193680e-001, -5.0088536739349e-001, -5.0088536739349e-001,
+ -9.8474848270416e-001, 9.8474848270416e-001, 8.6551362276077e-001, -8.6551362276077e-001,
+ 9.7150391340256e-001, 9.7150391340256e-001, 9.7150391340256e-001, 9.7150391340256e-001,
+ -2.3702360689640e-001, 2.3702360689640e-001, -2.3702360689640e-001, 2.3702360689640e-001,
+ 9.9285042285919e-001, 9.9285042285919e-001, 9.3626564741135e-001, 9.3626564741135e-001,
+ -1.1936521530151e-001, 1.1936521530151e-001, -3.5129275918007e-001, 3.5129275918007e-001,
+ -9.7150391340256e-001, 9.7150391340256e-001, -9.7150391340256e-001, 9.7150391340256e-001,
+ 2.3702360689640e-001, 2.3702360689640e-001, 2.3702360689640e-001, 2.3702360689640e-001,
+ 6.1764734983444e-001, 6.1764734983444e-001, -9.1044133901596e-001, -9.1044133901596e-001,
+ -7.8645521402359e-001, 7.8645521402359e-001, -4.1363841295242e-001, 4.1363841295242e-001,
+ 5.1935601234436e-001, 5.1935601234436e-001, 5.1935601234436e-001, 5.1935601234436e-001,
+ -8.5455799102783e-001, 8.5455799102783e-001, -8.5455799102783e-001, 8.5455799102783e-001,
+ 8.7159508466721e-001, 8.7159508466721e-001, 3.3741116523743e-002, 3.3741116523743e-002,
+ -4.9022650718689e-001, 4.9022650718689e-001, -9.9943053722382e-001, 9.9943053722382e-001,
+ -5.1935601234436e-001, 5.1935601234436e-001, -5.1935601234436e-001, 5.1935601234436e-001,
+ 8.5455799102783e-001, 8.5455799102783e-001, 8.5455799102783e-001, 8.5455799102783e-001,
+ 2.6966834068298e-001, 2.6966834068298e-001, -7.3056280612946e-001, -7.3056280612946e-001,
+ -9.6295326948166e-001, 9.6295326948166e-001, 6.8284553289413e-001, -6.8284553289413e-001,
+ 8.0684757232666e-001, 8.0684757232666e-001, 8.0684757232666e-001, 8.0684757232666e-001,
+ -5.9075969457626e-001, 5.9075969457626e-001, -5.9075969457626e-001, 5.9075969457626e-001,
+ 9.5048606395721e-001, 9.5048606395721e-001, 5.8330863714218e-001, 5.8330863714218e-001,
+ -3.1076717376709e-001, 3.1076717376709e-001, -8.1225049495697e-001, 8.1225049495697e-001,
+ -8.0684757232666e-001, 8.0684757232666e-001, -8.0684757232666e-001, 8.0684757232666e-001,
+ 5.9075969457626e-001, 5.9075969457626e-001, 5.9075969457626e-001, 5.9075969457626e-001,
+ 4.5234960317612e-001, 4.5234960317612e-001, -9.8680943250656e-001, -9.8680943250656e-001,
+ -8.9184069633484e-001, 8.9184069633484e-001, 1.6188633441925e-001, -1.6188633441925e-001,
+ 1.5279719233513e-001, 1.5279719233513e-001, 1.5279719233513e-001, 1.5279719233513e-001,
+ -9.8825758695602e-001, 9.8825758695602e-001, -9.8825758695602e-001, 9.8825758695602e-001,
+ 7.5920915603638e-001, 7.5920915603638e-001, -5.2719926834106e-001, -5.2719926834106e-001,
+ -6.5084671974182e-001, 6.5084671974182e-001, -8.4974169731140e-001, 8.4974169731140e-001,
+ -1.5279719233513e-001, 1.5279719233513e-001, -1.5279719233513e-001, 1.5279719233513e-001,
+ 9.8825758695602e-001, 9.8825758695602e-001, 9.8825758695602e-001, 9.8825758695602e-001,
+ 7.6623864471912e-002, 7.6623864471912e-002, -2.2807210683823e-001, -2.2807210683823e-001,
+ -9.9706006050110e-001, 9.9706006050110e-001, 9.7364425659180e-001, -9.7364425659180e-001,
+ 9.9005818367004e-001, 9.9005818367004e-001, 9.9005818367004e-001, 9.9005818367004e-001,
+ -1.4065824449062e-001, 1.4065824449062e-001, -1.4065824449062e-001, 1.4065824449062e-001,
+ 9.9751144647598e-001, 9.9751144647598e-001, 9.7767734527588e-001, 9.7767734527588e-001,
+ -7.0504575967789e-002, 7.0504575967789e-002, -2.1011185646057e-001, 2.1011185646057e-001,
+ -9.9005818367004e-001, 9.9005818367004e-001, -9.9005818367004e-001, 9.9005818367004e-001,
+ 1.4065824449062e-001, 1.4065824449062e-001, 1.4065824449062e-001, 1.4065824449062e-001,
+ 6.5549284219742e-001, 6.5549284219742e-001, -8.3989363908768e-001, -8.3989363908768e-001,
+ -7.5520133972168e-001, 7.5520133972168e-001, -5.4275071620941e-001, 5.4275071620941e-001,
+ 6.0061651468277e-001, 6.0061651468277e-001, 6.0061651468277e-001, 6.0061651468277e-001,
+ -7.9953724145889e-001, 7.9953724145889e-001, -7.9953724145889e-001, 7.9953724145889e-001,
+ 8.9459949731827e-001, 8.9459949731827e-001, 1.8002295494080e-001, 1.8002295494080e-001,
+ -4.4686883687973e-001, 4.4686883687973e-001, -9.8366242647171e-001, 9.8366242647171e-001,
+ -6.0061651468277e-001, 6.0061651468277e-001, -6.0061651468277e-001, 6.0061651468277e-001,
+ 7.9953724145889e-001, 7.9953724145889e-001, 7.9953724145889e-001, 7.9953724145889e-001,
+ 3.1659337878227e-001, 3.1659337878227e-001, -8.2284986972809e-001, -8.2284986972809e-001,
+ -9.4856137037277e-001, 9.4856137037277e-001, 5.6825894117355e-001, -5.6825894117355e-001,
+ 8.6086690425873e-001, 8.6086690425873e-001, 8.6086690425873e-001, 8.6086690425873e-001,
+ -5.0883013010025e-001, 5.0883013010025e-001, -5.0883013010025e-001, 5.0883013010025e-001,
+ 9.6458977460861e-001, 9.6458977460861e-001, 6.9617712497711e-001, 6.9617712497711e-001,
+ -2.6375469565392e-001, 2.6375469565392e-001, -7.1786999702454e-001, 7.1786999702454e-001,
+ -8.6086690425873e-001, 8.6086690425873e-001, -8.6086690425873e-001, 8.6086690425873e-001,
+ 5.0883013010025e-001, 5.0883013010025e-001, 5.0883013010025e-001, 5.0883013010025e-001,
+ 4.9556526541710e-001, 4.9556526541710e-001, -9.9988222122192e-001, -9.9988222122192e-001,
+ -8.6857068538666e-001, 8.6857068538666e-001, 1.5339195728302e-002, -1.5339195728302e-002,
+ 2.4892760813236e-001, 2.4892760813236e-001, 2.4892760813236e-001, 2.4892760813236e-001,
+ -9.6852207183838e-001, 9.6852207183838e-001, -9.6852207183838e-001, 9.6852207183838e-001,
+ 7.9023021459579e-001, 7.9023021459579e-001, -3.9680999517441e-001, -3.9680999517441e-001,
+ -6.1281007528305e-001, 6.1281007528305e-001, -9.1790074110031e-001, 9.1790074110031e-001,
+ -2.4892760813236e-001, 2.4892760813236e-001, -2.4892760813236e-001, 2.4892760813236e-001,
+ 9.6852207183838e-001, 9.6852207183838e-001, 9.6852207183838e-001, 9.6852207183838e-001,
+ 1.2545499205589e-001, 1.2545499205589e-001, -3.6846682429314e-001, -3.6846682429314e-001,
+ -9.9209928512573e-001, 9.9209928512573e-001, 9.2964088916779e-001, -9.2964088916779e-001,
+ 9.4359344244003e-001, 9.4359344244003e-001, 9.4359344244003e-001, 9.4359344244003e-001,
+ -3.3110630512238e-001, 3.3110630512238e-001, -3.3110630512238e-001, 3.3110630512238e-001,
+ 9.8579752445221e-001, 9.8579752445221e-001, 8.7458664178848e-001, 8.7458664178848e-001,
+ -1.6793830692768e-001, 1.6793830692768e-001, -4.8486924171448e-001, 4.8486924171448e-001,
+ -9.4359344244003e-001, 9.4359344244003e-001, -9.4359344244003e-001, 9.4359344244003e-001,
+ 3.3110630512238e-001, 3.3110630512238e-001, 3.3110630512238e-001, 3.3110630512238e-001,
+ 5.7831382751465e-001, 5.7831382751465e-001, -9.6128034591675e-001, -9.6128034591675e-001,
+ -8.1581437587738e-001, 8.1581437587738e-001, -2.7557194232941e-001, 2.7557194232941e-001,
+ 4.3309381604195e-001, 4.3309381604195e-001, 4.3309381604195e-001, 4.3309381604195e-001,
+ -9.0134882926941e-001, 9.0134882926941e-001, -9.0134882926941e-001, 9.0134882926941e-001,
+ 8.4649091959000e-001, 8.4649091959000e-001, -1.1327093839645e-001, -1.1327093839645e-001,
+ -5.3240311145782e-001, 5.3240311145782e-001, -9.9356412887573e-001, 9.9356412887573e-001,
+ -4.3309381604195e-001, 4.3309381604195e-001, -4.3309381604195e-001, 4.3309381604195e-001,
+ 9.0134882926941e-001, 9.0134882926941e-001, 9.0134882926941e-001, 9.0134882926941e-001,
+ 2.2209362685680e-001, 2.2209362685680e-001, -6.2246125936508e-001, -6.2246125936508e-001,
+ -9.7502535581589e-001, 9.7502535581589e-001, 7.8265058994293e-001, -7.8265058994293e-001,
+ 7.4505776166916e-001, 7.4505776166916e-001, 7.4505776166916e-001, 7.4505776166916e-001,
+ -6.6699993610382e-001, 6.6699993610382e-001, -6.6699993610382e-001, 6.6699993610382e-001,
+ 9.3409252166748e-001, 9.3409252166748e-001, 4.5781326293945e-001, 4.5781326293945e-001,
+ -3.5703095793724e-001, 3.5703095793724e-001, -8.8904833793640e-001, 8.8904833793640e-001,
+ -7.4505776166916e-001, 7.4505776166916e-001, -7.4505776166916e-001, 7.4505776166916e-001,
+ 6.6699993610382e-001, 6.6699993610382e-001, 6.6699993610382e-001, 6.6699993610382e-001,
+ 4.0804415941238e-001, 4.0804415941238e-001, -9.5237499475479e-001, -9.5237499475479e-001,
+ -9.1296219825745e-001, 9.1296219825745e-001, 3.0492925643921e-001, -3.0492925643921e-001,
+ 5.5195245891809e-002, 5.5195245891809e-002, 5.5195245891809e-002, 5.5195245891809e-002,
+ -9.9847555160522e-001, 9.9847555160522e-001, -9.9847555160522e-001, 9.9847555160522e-001,
+ 7.2635912895203e-001, 7.2635912895203e-001, -6.4617598056793e-001, -6.4617598056793e-001,
+ -6.8731534481049e-001, 6.8731534481049e-001, -7.6318836212158e-001, 7.6318836212158e-001,
+ -5.5195245891809e-002, 5.5195245891809e-002, -5.5195245891809e-002, 5.5195245891809e-002,
+ 9.9847555160522e-001, 9.9847555160522e-001, 9.9847555160522e-001, 9.9847555160522e-001,
+ 2.7608146890998e-002, 2.7608146890998e-002, -8.2740262150764e-002, -8.2740262150764e-002,
+ -9.9961882829666e-001, 9.9961882829666e-001, 9.9657112360001e-001, -9.9657112360001e-001,
+ 9.9576741456985e-001, 9.9576741456985e-001, 9.9576741456985e-001, 9.9576741456985e-001,
+ -9.1908961534500e-002, 9.1908961534500e-002, -9.1908961534500e-002, 9.1908961534500e-002,
+ 9.9894130229950e-001, 9.9894130229950e-001, 9.9048507213593e-001, 9.9048507213593e-001,
+ -4.6003185212612e-002, 4.6003185212612e-002, -1.3762012124062e-001, 1.3762012124062e-001,
+ -9.9576741456985e-001, 9.9576741456985e-001, -9.9576741456985e-001, 9.9576741456985e-001,
+ 9.1908961534500e-002, 9.1908961534500e-002, 9.1908961534500e-002, 9.1908961534500e-002,
+ 6.7382901906967e-001, 6.7382901906967e-001, -7.9769080877304e-001, -7.9769080877304e-001,
+ -7.3888731002808e-001, 7.3888731002808e-001, -6.0306668281555e-001, 6.0306668281555e-001,
+ 6.3912445306778e-001, 6.3912445306778e-001, 6.3912445306778e-001, 6.3912445306778e-001,
+ -7.6910334825516e-001, 7.6910334825516e-001, -7.6910334825516e-001, 7.6910334825516e-001,
+ 9.0529674291611e-001, 9.0529674291611e-001, 2.5189781188965e-001, 2.5189781188965e-001,
+ -4.2477968335152e-001, 4.2477968335152e-001, -9.6775388717651e-001, 9.6775388717651e-001,
+ -6.3912445306778e-001, 6.3912445306778e-001, -6.3912445306778e-001, 6.3912445306778e-001,
+ 7.6910334825516e-001, 7.6910334825516e-001, 7.6910334825516e-001, 7.6910334825516e-001,
+ 3.3977690339088e-001, 3.3977690339088e-001, -8.6242389678955e-001, -8.6242389678955e-001,
+ -9.4050604104996e-001, 9.4050604104996e-001, 5.0618660449982e-001, -5.0618660449982e-001,
+ 8.8479709625244e-001, 8.8479709625244e-001, 8.8479709625244e-001, 8.8479709625244e-001,
+ -4.6597650647163e-001, 4.6597650647163e-001, -4.6597650647163e-001, 4.6597650647163e-001,
+ 9.7077214717865e-001, 9.7077214717865e-001, 7.4710059165955e-001, 7.4710059165955e-001,
+ -2.4000303447247e-001, 2.4000303447247e-001, -6.6471099853516e-001, 6.6471099853516e-001,
+ -8.8479709625244e-001, 8.8479709625244e-001, -8.8479709625244e-001, 8.8479709625244e-001,
+ 4.6597650647163e-001, 4.6597650647163e-001, 4.6597650647163e-001, 4.6597650647163e-001,
+ 5.1673179864883e-001, 5.1673179864883e-001, -9.9830156564713e-001, -9.9830156564713e-001,
+ -8.5614734888077e-001, 8.5614734888077e-001, -5.8258235454559e-002, 5.8258235454559e-002,
+ 2.9615089297295e-001, 2.9615089297295e-001, 2.9615089297295e-001, 2.9615089297295e-001,
+ -9.5514118671417e-001, 9.5514118671417e-001, -9.5514118671417e-001, 9.5514118671417e-001,
+ 8.0503129959106e-001, 8.0503129959106e-001, -3.2820999622345e-001, -3.2820999622345e-001,
+ -5.9323233366013e-001, 5.9323233366013e-001, -9.4460481405258e-001, 9.4460481405258e-001,
+ -2.9615089297295e-001, 2.9615089297295e-001, -2.9615089297295e-001, 2.9615089297295e-001,
+ 9.5514118671417e-001, 9.5514118671417e-001, 9.5514118671417e-001, 9.5514118671417e-001,
+ 1.4976453781128e-001, 1.4976453781128e-001, -4.3585705757141e-001, -4.3585705757141e-001,
+ -9.8872166872025e-001, 9.8872166872025e-001, 9.0001589059830e-001, -9.0001589059830e-001,
+ 9.5870345830917e-001, 9.5870345830917e-001, 9.5870345830917e-001, 9.5870345830917e-001,
+ -2.8440755605698e-001, 2.8440755605698e-001, -2.8440755605698e-001, 2.8440755605698e-001,
+ 9.8962199687958e-001, 9.8962199687958e-001, 9.0788608789444e-001, 9.0788608789444e-001,
+ -1.4369504153728e-001, 1.4369504153728e-001, -4.1921687126160e-001, 4.1921687126160e-001,
+ -9.5870345830917e-001, 9.5870345830917e-001, -9.5870345830917e-001, 9.5870345830917e-001,
+ 2.8440755605698e-001, 2.8440755605698e-001, 2.8440755605698e-001, 2.8440755605698e-001,
+ 5.9816074371338e-001, 5.9816074371338e-001, -9.3840348720551e-001, -9.3840348720551e-001,
+ -8.0137616395950e-001, 8.0137616395950e-001, -3.4554141759872e-001, 3.4554141759872e-001,
+ 4.7679924964905e-001, 4.7679924964905e-001, 4.7679924964905e-001, 4.7679924964905e-001,
+ -8.7901222705841e-001, 8.7901222705841e-001, -8.7901222705841e-001, 8.7901222705841e-001,
+ 8.5930180549622e-001, 8.5930180549622e-001, -3.9873003959656e-002, -3.9873003959656e-002,
+ -5.1146888732910e-001, 5.1146888732910e-001, -9.9920475482941e-001, 9.9920475482941e-001,
+ -4.7679924964905e-001, 4.7679924964905e-001, -4.7679924964905e-001, 4.7679924964905e-001,
+ 8.7901222705841e-001, 8.7901222705841e-001, 8.7901222705841e-001, 8.7901222705841e-001,
+ 2.4595504999161e-001, 2.4595504999161e-001, -6.7835009098053e-001, -6.7835009098053e-001,
+ -9.6928125619888e-001, 9.6928125619888e-001, 7.3473888635635e-001, -7.3473888635635e-001,
+ 7.7688843011856e-001, 7.7688843011856e-001, 7.7688843011856e-001, 7.7688843011856e-001,
+ -6.2963825464249e-001, 6.2963825464249e-001, -6.2963825464249e-001, 6.2963825464249e-001,
+ 9.4257318973541e-001, 9.4257318973541e-001, 5.2197527885437e-001, 5.2197527885437e-001,
+ -3.3399966359138e-001, 3.3399966359138e-001, -8.5296058654785e-001, 8.5296058654785e-001,
+ -7.7688843011856e-001, 7.7688843011856e-001, -7.7688843011856e-001, 7.7688843011856e-001,
+ 6.2963825464249e-001, 6.2963825464249e-001, 6.2963825464249e-001, 6.2963825464249e-001,
+ 4.3032649159431e-001, 4.3032649159431e-001, -9.7222638130188e-001, -9.7222638130188e-001,
+ -9.0267330408096e-001, 9.0267330408096e-001, 2.3404198884964e-001, -2.3404198884964e-001,
+ 1.0412164032459e-001, 1.0412164032459e-001, 1.0412164032459e-001, 1.0412164032459e-001,
+ -9.9456459283829e-001, 9.9456459283829e-001, -9.9456459283829e-001, 9.9456459283829e-001,
+ 7.4300795793533e-001, 7.4300795793533e-001, -5.8828157186508e-001, -5.8828157186508e-001,
+ -6.6928261518478e-001, 6.6928261518478e-001, -8.0865615606308e-001, 8.0865615606308e-001,
+ -1.0412164032459e-001, 1.0412164032459e-001, -1.0412164032459e-001, 1.0412164032459e-001,
+ 9.9456459283829e-001, 9.9456459283829e-001, 9.9456459283829e-001, 9.9456459283829e-001,
+ 5.2131704986095e-002, 5.2131704986095e-002, -1.5582841634750e-001, -1.5582841634750e-001,
+ -9.9864023923874e-001, 9.9864023923874e-001, 9.8778414726257e-001, -9.8778414726257e-001,
+ 9.8196387290955e-001, 9.8196387290955e-001, 9.8196387290955e-001, 9.8196387290955e-001,
+ -1.8906867504120e-001, 1.8906867504120e-001, -1.8906867504120e-001, 1.8906867504120e-001,
+ 9.9548077583313e-001, 9.9548077583313e-001, 9.5957154035568e-001, 9.5957154035568e-001,
+ -9.4963498413563e-002, 9.4963498413563e-002, -2.8146496415138e-001, 2.8146496415138e-001,
+ -9.8196387290955e-001, 9.8196387290955e-001, -9.8196387290955e-001, 9.8196387290955e-001,
+ 1.8906867504120e-001, 1.8906867504120e-001, 1.8906867504120e-001, 1.8906867504120e-001,
+ 6.3676190376282e-001, 6.3676190376282e-001, -8.7754523754120e-001, -8.7754523754120e-001,
+ -7.7106052637100e-001, 7.7106052637100e-001, -4.7949379682541e-001, 4.7949379682541e-001,
+ 5.6066161394119e-001, 5.6066161394119e-001, 5.6066161394119e-001, 5.6066161394119e-001,
+ -8.2804501056671e-001, 8.2804501056671e-001, -8.2804501056671e-001, 8.2804501056671e-001,
+ 8.8336330652237e-001, 8.8336330652237e-001, 1.0717236995697e-001, 1.0717236995697e-001,
+ -4.6868884563446e-001, 4.6868884563446e-001, -9.9424028396606e-001, 9.9424028396606e-001,
+ -5.6066161394119e-001, 5.6066161394119e-001, -5.6066161394119e-001, 5.6066161394119e-001,
+ 8.2804501056671e-001, 8.2804501056671e-001, 8.2804501056671e-001, 8.2804501056671e-001,
+ 2.9321917891502e-001, 2.9321917891502e-001, -7.7881658077240e-001, -7.7881658077240e-001,
+ -9.5604526996613e-001, 9.5604526996613e-001, 6.2725180387497e-001, -6.2725180387497e-001,
+ 8.3486288785934e-001, 8.3486288785934e-001, 8.3486288785934e-001, 8.3486288785934e-001,
+ -5.5045801401138e-001, 5.5045801401138e-001, -5.5045801401138e-001, 5.5045801401138e-001,
+ 9.5782643556595e-001, 9.5782643556595e-001, 6.4148104190826e-001, 6.4148104190826e-001,
+ -2.8734746575356e-001, 2.8734746575356e-001, -7.6713907718658e-001, 7.6713907718658e-001,
+ -8.3486288785934e-001, 8.3486288785934e-001, -8.3486288785934e-001, 8.3486288785934e-001,
+ 5.5045801401138e-001, 5.5045801401138e-001, 5.5045801401138e-001, 5.5045801401138e-001,
+ 4.7410023212433e-001, 4.7410023212433e-001, -9.9604463577271e-001, -9.9604463577271e-001,
+ -8.8047087192535e-001, 8.8047087192535e-001, 8.8853478431702e-002, -8.8853478431702e-002,
+ 2.0110464096069e-001, 2.0110464096069e-001, 2.0110464096069e-001, 2.0110464096069e-001,
+ -9.7956979274750e-001, 9.7956979274750e-001, -9.7956979274750e-001, 9.7956979274750e-001,
+ 7.7495306730270e-001, 7.7495306730270e-001, -4.6325987577438e-001, -4.6325987577438e-001,
+ -6.3201874494553e-001, 6.3201874494553e-001, -8.8622254133224e-001, 8.8622254133224e-001,
+ -2.0110464096069e-001, 2.0110464096069e-001, -2.0110464096069e-001, 2.0110464096069e-001,
+ 9.7956979274750e-001, 9.7956979274750e-001, 9.7956979274750e-001, 9.7956979274750e-001,
+ 1.0106986761093e-001, 1.0106986761093e-001, -2.9907983541489e-001, -2.9907983541489e-001,
+ -9.9487930536270e-001, 9.9487930536270e-001, 9.5422804355621e-001, -9.5422804355621e-001,
+ 9.2621022462845e-001, 9.2621022462845e-001, 9.2621022462845e-001, 9.2621022462845e-001,
+ -3.7700742483139e-001, 3.7700742483139e-001, -3.7700742483139e-001, 3.7700742483139e-001,
+ 9.8137921094894e-001, 9.8137921094894e-001, 8.3654773235321e-001, 8.3654773235321e-001,
+ -1.9208040833473e-001, 1.9208040833473e-001, -5.4789412021637e-001, 5.4789412021637e-001,
+ -9.2621022462845e-001, 9.2621022462845e-001, -9.2621022462845e-001, 9.2621022462845e-001,
+ 3.7700742483139e-001, 3.7700742483139e-001, 3.7700742483139e-001, 3.7700742483139e-001,
+ 5.5811852216721e-001, 5.5811852216721e-001, -9.7894805669785e-001, -9.7894805669785e-001,
+ -8.2976120710373e-001, 8.2976120710373e-001, -2.0410901308060e-001, 2.0410901308060e-001,
+ 3.8834506273270e-001, 3.8834506273270e-001, 3.8834506273270e-001, 3.8834506273270e-001,
+ -9.2151403427124e-001, 9.2151403427124e-001, -9.2151403427124e-001, 9.2151403427124e-001,
+ 8.3317017555237e-001, 8.3317017555237e-001, -1.8605518341064e-001, -1.8605518341064e-001,
+ -5.5301672220230e-001, 5.5301672220230e-001, -9.8253935575485e-001, 9.8253935575485e-001,
+ -3.8834506273270e-001, 3.8834506273270e-001, -3.8834506273270e-001, 3.8834506273270e-001,
+ 9.2151403427124e-001, 9.2151403427124e-001, 9.2151403427124e-001, 9.2151403427124e-001,
+ 1.9809842109680e-001, 1.9809842109680e-001, -5.6319934129715e-001, -5.6319934129715e-001,
+ -9.8018211126328e-001, 9.8018211126328e-001, 8.2632100582123e-001, -8.2632100582123e-001,
+ 7.1143215894699e-001, 7.1143215894699e-001, 7.1143215894699e-001, 7.1143215894699e-001,
+ -7.0275473594666e-001, 7.0275473594666e-001, -7.0275473594666e-001, 7.0275473594666e-001,
+ 9.2504924535751e-001, 9.2504924535751e-001, 3.9117038249969e-001, 3.9117038249969e-001,
+ -3.7984722852707e-001, 3.7984722852707e-001, -9.2031830549240e-001, 9.2031830549240e-001,
+ -7.1143215894699e-001, 7.1143215894699e-001, -7.1143215894699e-001, 7.1143215894699e-001,
+ 7.0275473594666e-001, 7.0275473594666e-001, 7.0275473594666e-001, 7.0275473594666e-001,
+ 3.8551607728004e-001, 3.8551607728004e-001, -9.2736244201660e-001, -9.2736244201660e-001,
+ -9.2270112037659e-001, 9.2270112037659e-001, 3.7416404485703e-001, -3.7416404485703e-001,
+ 6.1358846724033e-003, 6.1358846724033e-003, 6.1358846724033e-003, 6.1358846724033e-003,
+ -9.9998116493225e-001, 9.9998116493225e-001, -9.9998116493225e-001, 9.9998116493225e-001,
+ 7.0927280187607e-001, 7.0927280187607e-001, -7.0056885480881e-001, -7.0056885480881e-001,
+ -7.0493412017822e-001, 7.0493412017822e-001, -7.1358478069305e-001, 7.1358478069305e-001,
+ -6.1358846724033e-003, 6.1358846724033e-003, -6.1358846724033e-003, 6.1358846724033e-003,
+ 9.9998116493225e-001, 9.9998116493225e-001, 9.9998116493225e-001, 9.9998116493225e-001,
+ 3.0679567717016e-003, 3.0679567717016e-003, -9.2037543654442e-003, -9.2037543654442e-003,
+ -9.9999529123306e-001, 9.9999529123306e-001, 9.9995762109756e-001, -9.9995762109756e-001
+};
+
+static _MM_ALIGN16 float CTMDLP[] = {
+ 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -3.8268345594406e-001, 3.8268345594406e-001,
+ 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001,
+ -7.0710676908493e-001, 7.0710676908493e-001, -7.0710676908493e-001, 7.0710676908493e-001,
+ 3.8268336653709e-001, 3.8268336653709e-001, 3.8268336653709e-001, 3.8268336653709e-001,
+ -9.2387944459915e-001, 9.2387944459915e-001, -9.2387944459915e-001, 9.2387944459915e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, -9.2387950420380e-001, 9.2387950420380e-001,
+ -7.0710676908493e-001, 7.0710676908493e-001, -7.0710676908493e-001, 7.0710676908493e-001,
+ 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001,
+ -9.2387944459915e-001, -9.2387944459915e-001, -9.2387944459915e-001, -9.2387944459915e-001,
+ 3.8268336653709e-001, -3.8268336653709e-001, 3.8268336653709e-001, -3.8268336653709e-001,
+ 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -3.8268345594406e-001, 3.8268345594406e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, -9.2387950420380e-001, 9.2387950420380e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001,
+ -9.8078519105911e-001, -9.8078519105911e-001, -9.8078519105911e-001, -9.8078519105911e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001, 3.8268345594406e-001,
+ -9.2387950420380e-001, 9.2387950420380e-001, -9.2387950420380e-001, 9.2387950420380e-001,
+ -1.9509032368660e-001, -1.9509032368660e-001, -1.9509032368660e-001, -1.9509032368660e-001,
+ -9.8078519105911e-001, 9.8078519105911e-001, -9.8078519105911e-001, 9.8078519105911e-001,
+ 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, -9.8078525066376e-001, 9.8078525066376e-001,
+ -3.8268345594406e-001, 3.8268345594406e-001, -3.8268345594406e-001, 3.8268345594406e-001,
+ 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001, 9.2387950420380e-001,
+ -5.5557024478912e-001, -5.5557024478912e-001, -5.5557024478912e-001, -5.5557024478912e-001,
+ 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -9.8017141222954e-002, 9.8017141222954e-002,
+ 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -2.9028466343880e-001, 2.9028466343880e-001, -2.9028466343880e-001, 2.9028466343880e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, -9.8078525066376e-001, 9.8078525066376e-001,
+ 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001,
+ -8.8192117214203e-001, -8.8192117214203e-001, -8.8192117214203e-001, -8.8192117214203e-001,
+ -4.7139671444893e-001, 4.7139671444893e-001, -4.7139671444893e-001, 4.7139671444893e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001,
+ -4.7139674425125e-001, 4.7139674425125e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001,
+ 9.8017111420631e-002, 9.8017111420631e-002, 9.8017111420631e-002, 9.8017111420631e-002,
+ -9.9518465995789e-001, 9.9518465995789e-001, -9.9518465995789e-001, 9.9518465995789e-001,
+ 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ -7.7301049232483e-001, -7.7301049232483e-001, -7.7301049232483e-001, -7.7301049232483e-001,
+ 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -2.9028469324112e-001, 2.9028469324112e-001,
+ 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001, 8.3146959543228e-001,
+ -5.5557024478912e-001, 5.5557024478912e-001, -5.5557024478912e-001, 5.5557024478912e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -7.7301049232483e-001, 7.7301049232483e-001, -7.7301049232483e-001, 7.7301049232483e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ -8.3146959543228e-001, 8.3146959543228e-001, -8.3146959543228e-001, 8.3146959543228e-001,
+ 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001, 5.5557024478912e-001,
+ -9.9518465995789e-001, -9.9518465995789e-001, -9.9518465995789e-001, -9.9518465995789e-001,
+ 9.8017111420631e-002, -9.8017111420631e-002, 9.8017111420631e-002, -9.8017111420631e-002,
+ 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001,
+ 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001, 1.9509032368660e-001,
+ -9.8078525066376e-001, 9.8078525066376e-001, -9.8078525066376e-001, 9.8078525066376e-001,
+ -4.7139671444893e-001, -4.7139671444893e-001, -4.7139671444893e-001, -4.7139671444893e-001,
+ -8.8192117214203e-001, 8.8192117214203e-001, -8.8192117214203e-001, 8.8192117214203e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002,
+ -9.9518471956253e-001, 9.9518471956253e-001, -9.9518471956253e-001, 9.9518471956253e-001,
+ -1.9509032368660e-001, 1.9509032368660e-001, -1.9509032368660e-001, 1.9509032368660e-001,
+ 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001, 9.8078525066376e-001,
+ -2.9028466343880e-001, -2.9028466343880e-001, -2.9028466343880e-001, -2.9028466343880e-001,
+ 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -4.9067676067352e-002, 4.9067676067352e-002,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -9.8017141222954e-002, 9.8017141222954e-002,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -1.4673046767712e-001, 1.4673046767712e-001, -1.4673046767712e-001, 1.4673046767712e-001,
+ 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001,
+ -9.9518471956253e-001, 9.9518471956253e-001, -9.9518471956253e-001, 9.9518471956253e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002,
+ -8.0320751667023e-001, -8.0320751667023e-001, -8.0320751667023e-001, -8.0320751667023e-001,
+ -5.9569936990738e-001, 5.9569936990738e-001, -5.9569936990738e-001, 5.9569936990738e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -9.7003120183945e-001, 9.7003120183945e-001, -9.7003120183945e-001, 9.7003120183945e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001,
+ 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001,
+ -8.5772860050201e-001, -8.5772860050201e-001, -8.5772860050201e-001, -8.5772860050201e-001,
+ 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001,
+ -4.7139674425125e-001, 4.7139674425125e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001,
+ -9.9879539012909e-001, -9.9879539012909e-001, -9.9879539012909e-001, -9.9879539012909e-001,
+ -4.9067672342062e-002, 4.9067672342062e-002, -4.9067672342062e-002, 4.9067672342062e-002,
+ 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -5.9569931030273e-001, 5.9569931030273e-001,
+ 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001,
+ -3.3688989281654e-001, -3.3688989281654e-001, -3.3688989281654e-001, -3.3688989281654e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -2.9028469324112e-001, 2.9028469324112e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -4.2755511403084e-001, -4.2755511403084e-001, -4.2755511403084e-001, -4.2755511403084e-001,
+ 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -1.4673048257828e-001, 1.4673048257828e-001,
+ 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001, 9.5694035291672e-001,
+ -2.9028469324112e-001, 2.9028469324112e-001, -2.9028469324112e-001, 2.9028469324112e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ -9.5694035291672e-001, 9.5694035291672e-001, -9.5694035291672e-001, 9.5694035291672e-001,
+ 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001, 2.9028469324112e-001,
+ -9.4154405593872e-001, -9.4154405593872e-001, -9.4154405593872e-001, -9.4154405593872e-001,
+ -3.3688989281654e-001, 3.3688989281654e-001, -3.3688989281654e-001, 3.3688989281654e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001,
+ -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001,
+ 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001, 4.7139674425125e-001,
+ -8.8192123174667e-001, 8.8192123174667e-001, -8.8192123174667e-001, 8.8192123174667e-001,
+ -4.9067672342062e-002, -4.9067672342062e-002, -4.9067672342062e-002, -4.9067672342062e-002,
+ -9.9879539012909e-001, 9.9879539012909e-001, -9.9879539012909e-001, 9.9879539012909e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ -4.7139674425125e-001, 4.7139674425125e-001, -4.7139674425125e-001, 4.7139674425125e-001,
+ 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001, 8.8192123174667e-001,
+ -6.7155897617340e-001, -6.7155897617340e-001, -6.7155897617340e-001, -6.7155897617340e-001,
+ 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001, 7.7301043272018e-001,
+ -6.3439327478409e-001, 6.3439327478409e-001, -6.3439327478409e-001, 6.3439327478409e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001,
+ -7.7301043272018e-001, 7.7301043272018e-001, -7.7301043272018e-001, 7.7301043272018e-001,
+ 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001, 6.3439327478409e-001,
+ -9.7003120183945e-001, -9.7003120183945e-001, -9.7003120183945e-001, -9.7003120183945e-001,
+ 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002, 9.8017141222954e-002,
+ -9.9518471956253e-001, 9.9518471956253e-001, -9.9518471956253e-001, 9.9518471956253e-001,
+ -5.9569936990738e-001, -5.9569936990738e-001, -5.9569936990738e-001, -5.9569936990738e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002,
+ -9.9879544973373e-001, 9.9879544973373e-001, -9.9879544973373e-001, 9.9879544973373e-001,
+ -9.8017141222954e-002, 9.8017141222954e-002, -9.8017141222954e-002, 9.8017141222954e-002,
+ 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001, 9.9518471956253e-001,
+ -1.4673046767712e-001, -1.4673046767712e-001, -1.4673046767712e-001, -1.4673046767712e-001,
+ 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -2.4541229009628e-002, 2.4541229009628e-002,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -4.9067676067352e-002, 4.9067676067352e-002,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ -9.9879544973373e-001, 9.9879544973373e-001, -9.9879544973373e-001, 9.9879544973373e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002,
+ -7.5720882415771e-001, -7.5720882415771e-001, -7.5720882415771e-001, -7.5720882415771e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -4.0524131059647e-001, 4.0524131059647e-001,
+ 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001,
+ 3.1368172168732e-001, 3.1368172168732e-001, 3.1368172168732e-001, 3.1368172168732e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -8.9322435855865e-001, -8.9322435855865e-001, -8.9322435855865e-001, -8.9322435855865e-001,
+ 4.4961130619049e-001, -4.4961130619049e-001, 4.4961130619049e-001, -4.4961130619049e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -6.1523157358170e-001, 6.1523157358170e-001, -6.1523157358170e-001, 6.1523157358170e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -8.4485357999802e-001, 8.4485357999802e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001,
+ -9.9247956275940e-001, -9.9247956275940e-001, -9.9247956275940e-001, -9.9247956275940e-001,
+ -1.2241072207689e-001, 1.2241072207689e-001, -1.2241072207689e-001, 1.2241072207689e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -5.7580822706223e-001, 5.7580822706223e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ -2.6671281456947e-001, -2.6671281456947e-001, -2.6671281456947e-001, -2.6671281456947e-001,
+ -9.6377599239349e-001, 9.6377599239349e-001, -9.6377599239349e-001, 9.6377599239349e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001,
+ -4.9289822578430e-001, -4.9289822578430e-001, -4.9289822578430e-001, -4.9289822578430e-001,
+ 8.7008702754974e-001, -8.7008702754974e-001, 8.7008702754974e-001, -8.7008702754974e-001,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -1.2241067737341e-001, 1.2241067737341e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -9.1420966386795e-001, -9.1420966386795e-001, -9.1420966386795e-001, -9.1420966386795e-001,
+ -4.0524142980576e-001, 4.0524142980576e-001, -4.0524142980576e-001, 4.0524142980576e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001,
+ -4.9289819598198e-001, 4.9289819598198e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ 2.4541208520532e-002, 2.4541208520532e-002, 2.4541208520532e-002, 2.4541208520532e-002,
+ -9.9969875812531e-001, 9.9969875812531e-001, -9.9969875812531e-001, 9.9969875812531e-001,
+ 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001,
+ -7.2424709796906e-001, -7.2424709796906e-001, -7.2424709796906e-001, -7.2424709796906e-001,
+ 6.8954050540924e-001, -6.8954050540924e-001, 6.8954050540924e-001, -6.8954050540924e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -3.1368175148964e-001, 3.1368175148964e-001,
+ 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -5.9569931030273e-001, 5.9569931030273e-001,
+ 5.7580816745758e-001, 5.7580816745758e-001, 5.7580816745758e-001, 5.7580816745758e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001,
+ -9.8527759313583e-001, -9.8527759313583e-001, -9.8527759313583e-001, -9.8527759313583e-001,
+ 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001,
+ -5.3499764204025e-001, -5.3499764204025e-001, -5.3499764204025e-001, -5.3499764204025e-001,
+ -8.4485352039337e-001, 8.4485352039337e-001, -8.4485352039337e-001, 8.4485352039337e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002,
+ -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -1.4673048257828e-001, 1.4673048257828e-001,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -2.1910125017166e-001, -2.1910125017166e-001, -2.1910125017166e-001, -2.1910125017166e-001,
+ 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001, 9.8917651176453e-001,
+ -1.4673048257828e-001, 1.4673048257828e-001, -1.4673048257828e-001, 1.4673048257828e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ -9.8917651176453e-001, 9.8917651176453e-001, -9.8917651176453e-001, 9.8917651176453e-001,
+ 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001, 1.4673048257828e-001,
+ -8.4485352039337e-001, -8.4485352039337e-001, -8.4485352039337e-001, -8.4485352039337e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -4.4961133599281e-001, 4.4961133599281e-001,
+ 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001, 5.9569931030273e-001,
+ -8.0320751667023e-001, 8.0320751667023e-001, -8.0320751667023e-001, 8.0320751667023e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ -9.8527759313583e-001, 9.8527759313583e-001, -9.8527759313583e-001, 9.8527759313583e-001,
+ 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ -5.9569931030273e-001, 5.9569931030273e-001, -5.9569931030273e-001, 5.9569931030273e-001,
+ 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001, 8.0320751667023e-001,
+ -8.1758481264114e-001, -8.1758481264114e-001, -8.1758481264114e-001, -8.1758481264114e-001,
+ 5.7580816745758e-001, -5.7580816745758e-001, 5.7580816745758e-001, -5.7580816745758e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -2.6671275496483e-001, 2.6671275496483e-001,
+ 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001, 8.5772860050201e-001,
+ -5.1410275697708e-001, 5.1410275697708e-001, -5.1410275697708e-001, 5.1410275697708e-001,
+ 6.8954050540924e-001, 6.8954050540924e-001, 6.8954050540924e-001, 6.8954050540924e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, -8.7008696794510e-001, 8.7008696794510e-001,
+ -8.5772860050201e-001, 8.5772860050201e-001, -8.5772860050201e-001, 8.5772860050201e-001,
+ 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001, 5.1410275697708e-001,
+ -9.9969875812531e-001, -9.9969875812531e-001, -9.9969875812531e-001, -9.9969875812531e-001,
+ 2.4541208520532e-002, -2.4541208520532e-002, 2.4541208520532e-002, -2.4541208520532e-002,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -6.1523163318634e-001, 6.1523163318634e-001,
+ 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001, 2.4298018217087e-001,
+ -9.7003126144409e-001, 9.7003126144409e-001, -9.7003126144409e-001, 9.7003126144409e-001,
+ -4.0524142980576e-001, -4.0524142980576e-001, -4.0524142980576e-001, -4.0524142980576e-001,
+ -9.1420966386795e-001, 9.1420966386795e-001, -9.1420966386795e-001, 9.1420966386795e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ -2.4298018217087e-001, 2.4298018217087e-001, -2.4298018217087e-001, 2.4298018217087e-001,
+ 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001, 9.7003126144409e-001,
+ -3.5989505052567e-001, -3.5989505052567e-001, -3.5989505052567e-001, -3.5989505052567e-001,
+ 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001,
+ 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001,
+ 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001, 9.4154405593872e-001,
+ -3.3688986301422e-001, 3.3688986301422e-001, -3.3688986301422e-001, 3.3688986301422e-001,
+ 8.7008702754974e-001, 8.7008702754974e-001, 8.7008702754974e-001, 8.7008702754974e-001,
+ -4.9289822578430e-001, 4.9289822578430e-001, -4.9289822578430e-001, 4.9289822578430e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ -9.4154405593872e-001, 9.4154405593872e-001, -9.4154405593872e-001, 9.4154405593872e-001,
+ 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001, 3.3688986301422e-001,
+ -9.6377599239349e-001, -9.6377599239349e-001, -9.6377599239349e-001, -9.6377599239349e-001,
+ -2.6671281456947e-001, 2.6671281456947e-001, -2.6671281456947e-001, 2.6671281456947e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001, 4.2755511403084e-001,
+ -9.0398931503296e-001, 9.0398931503296e-001, -9.0398931503296e-001, 9.0398931503296e-001,
+ -1.2241072207689e-001, -1.2241072207689e-001, -1.2241072207689e-001, -1.2241072207689e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001,
+ -4.2755511403084e-001, 4.2755511403084e-001, -4.2755511403084e-001, 4.2755511403084e-001,
+ 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001, 9.0398931503296e-001,
+ -6.1523157358170e-001, -6.1523157358170e-001, -6.1523157358170e-001, -6.1523157358170e-001,
+ 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001, 7.4095112085342e-001,
+ -6.7155897617340e-001, 6.7155897617340e-001, -6.7155897617340e-001, 6.7155897617340e-001,
+ 4.4961130619049e-001, 4.4961130619049e-001, 4.4961130619049e-001, 4.4961130619049e-001,
+ -8.9322435855865e-001, 8.9322435855865e-001, -8.9322435855865e-001, 8.9322435855865e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ -7.4095112085342e-001, 7.4095112085342e-001, -7.4095112085342e-001, 7.4095112085342e-001,
+ 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001, 6.7155897617340e-001,
+ -9.4952815771103e-001, -9.4952815771103e-001, -9.4952815771103e-001, -9.4952815771103e-001,
+ 3.1368172168732e-001, -3.1368172168732e-001, 3.1368172168732e-001, -3.1368172168732e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -6.8954056501389e-001, 6.8954056501389e-001,
+ 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002, 4.9067676067352e-002,
+ -9.9879544973373e-001, 9.9879544973373e-001, -9.9879544973373e-001, 9.9879544973373e-001,
+ -6.5317285060883e-001, -6.5317285060883e-001, -6.5317285060883e-001, -6.5317285060883e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002,
+ -9.9969881772995e-001, 9.9969881772995e-001, -9.9969881772995e-001, 9.9969881772995e-001,
+ -4.9067676067352e-002, 4.9067676067352e-002, -4.9067676067352e-002, 4.9067676067352e-002,
+ 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001, 9.9879544973373e-001,
+ -7.3564566671848e-002, -7.3564566671848e-002, -7.3564566671848e-002, -7.3564566671848e-002,
+ 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001,
+ 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001, 9.9992471933365e-001,
+ -1.2271538376808e-002, 1.2271538376808e-002, -1.2271538376808e-002, 1.2271538376808e-002,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -2.4541229009628e-002, 2.4541229009628e-002,
+ 9.9932241439819e-001, 9.9932241439819e-001, 9.9932241439819e-001, 9.9932241439819e-001,
+ -3.6807224154472e-002, 3.6807224154472e-002, -3.6807224154472e-002, 3.6807224154472e-002,
+ 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001, 6.9837623834610e-001,
+ -7.1573078632355e-001, 7.1573078632355e-001, -7.1573078632355e-001, 7.1573078632355e-001,
+ -9.9969881772995e-001, 9.9969881772995e-001, -9.9969881772995e-001, 9.9969881772995e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002,
+ -7.3265421390533e-001, -7.3265421390533e-001, -7.3265421390533e-001, -7.3265421390533e-001,
+ -6.8060100078583e-001, 6.8060100078583e-001, -6.8060100078583e-001, 6.8060100078583e-001,
+ 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001,
+ -3.9399203658104e-001, 3.9399203658104e-001, -3.9399203658104e-001, 3.9399203658104e-001,
+ 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001,
+ -9.3733906745911e-001, 9.3733906745911e-001, -9.3733906745911e-001, 9.3733906745911e-001,
+ 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001, 3.7131720781326e-001,
+ -9.2850607633591e-001, 9.2850607633591e-001, -9.2850607633591e-001, 9.2850607633591e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -6.8954056501389e-001, 6.8954056501389e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001,
+ -9.0916800498962e-001, -9.0916800498962e-001, -9.0916800498962e-001, -9.0916800498962e-001,
+ 4.1642951965332e-001, -4.1642951965332e-001, 4.1642951965332e-001, -4.1642951965332e-001,
+ 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001, 9.7831737995148e-001,
+ -2.0711138844490e-001, 2.0711138844490e-001, -2.0711138844490e-001, 2.0711138844490e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -4.0524131059647e-001, 4.0524131059647e-001,
+ 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001,
+ -5.8579784631729e-001, 5.8579784631729e-001, -5.8579784631729e-001, 5.8579784631729e-001,
+ 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001, 5.4532498121262e-001,
+ -8.3822470903397e-001, 8.3822470903397e-001, -8.3822470903397e-001, 8.3822470903397e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001,
+ -9.8730134963989e-001, -9.8730134963989e-001, -9.8730134963989e-001, -9.8730134963989e-001,
+ -1.5885809063911e-001, 1.5885809063911e-001, -1.5885809063911e-001, 1.5885809063911e-001,
+ 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001, 8.2458931207657e-001,
+ -5.6573182344437e-001, 5.6573182344437e-001, -5.6573182344437e-001, 5.6573182344437e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001,
+ -2.3105813562870e-001, -2.3105813562870e-001, -2.3105813562870e-001, -2.3105813562870e-001,
+ -9.7293996810913e-001, 9.7293996810913e-001, -9.7293996810913e-001, 9.7293996810913e-001,
+ 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001, 1.8303988873959e-001,
+ -9.8310548067093e-001, 9.8310548067093e-001, -9.8310548067093e-001, 9.8310548067093e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -5.2458971738815e-001, -5.2458971738815e-001, -5.2458971738815e-001, -5.2458971738815e-001,
+ 8.5135519504547e-001, -8.5135519504547e-001, 8.5135519504547e-001, -8.5135519504547e-001,
+ 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001, 9.9390697479248e-001,
+ -1.1022221297026e-001, 1.1022221297026e-001, -1.1022221297026e-001, 1.1022221297026e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001,
+ -3.2531031966209e-001, 3.2531031966209e-001, -3.2531031966209e-001, 3.2531031966209e-001,
+ 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001, 6.2485951185226e-001,
+ -7.8073722124100e-001, 7.8073722124100e-001, -7.8073722124100e-001, 7.8073722124100e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001,
+ 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001,
+ -8.9867436885834e-001, -8.9867436885834e-001, -8.9867436885834e-001, -8.9867436885834e-001,
+ -4.3861627578735e-001, 4.3861627578735e-001, -4.3861627578735e-001, 4.3861627578735e-001,
+ 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001, 8.7607008218765e-001,
+ -4.8218378424644e-001, 4.8218378424644e-001, -4.8218378424644e-001, 4.8218378424644e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -8.4485357999802e-001, 8.4485357999802e-001,
+ 6.1320688575506e-002, 6.1320688575506e-002, 6.1320688575506e-002, 6.1320688575506e-002,
+ -9.9811810255051e-001, 9.9811810255051e-001, -9.9811810255051e-001, 9.9811810255051e-001,
+ 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001,
+ -9.6043050289154e-001, 9.6043050289154e-001, -9.6043050289154e-001, 9.6043050289154e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001,
+ -7.4913644790649e-001, -7.4913644790649e-001, -7.4913644790649e-001, -7.4913644790649e-001,
+ 6.6241574287415e-001, -6.6241574287415e-001, 6.6241574287415e-001, -6.6241574287415e-001,
+ 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001, 9.5330601930618e-001,
+ -3.0200594663620e-001, 3.0200594663620e-001, -3.0200594663620e-001, 3.0200594663620e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -5.7580822706223e-001, 5.7580822706223e-001,
+ 6.0551100969315e-001, 6.0551100969315e-001, 6.0551100969315e-001, 6.0551100969315e-001,
+ -7.9583692550659e-001, 7.9583692550659e-001, -7.9583692550659e-001, 7.9583692550659e-001,
+ 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001, 4.6053871512413e-001,
+ -8.8763964176178e-001, 8.8763964176178e-001, -8.8763964176178e-001, 8.8763964176178e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001,
+ -9.9090266227722e-001, -9.9090266227722e-001, -9.9090266227722e-001, -9.9090266227722e-001,
+ 1.3458071649075e-001, -1.3458071649075e-001, 1.3458071649075e-001, -1.3458071649075e-001,
+ 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001, 7.6516723632813e-001,
+ -6.4383155107498e-001, 6.4383155107498e-001, -6.4383155107498e-001, 6.4383155107498e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ -5.0353842973709e-001, -5.0353842973709e-001, -5.0353842973709e-001, -5.0353842973709e-001,
+ -8.6397278308868e-001, 8.6397278308868e-001, -8.6397278308868e-001, 8.6397278308868e-001,
+ 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002, 8.5797317326069e-002,
+ -9.9631261825562e-001, 9.9631261825562e-001, -9.9631261825562e-001, 9.9631261825562e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001,
+ 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001,
+ -2.5486564636230e-001, -2.5486564636230e-001, -2.5486564636230e-001, -2.5486564636230e-001,
+ 9.6697646379471e-001, -9.6697646379471e-001, 9.6697646379471e-001, -9.6697646379471e-001,
+ 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001, 9.9811810255051e-001,
+ -6.1320737004280e-002, 6.1320737004280e-002, -6.1320737004280e-002, 6.1320737004280e-002,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -1.2241067737341e-001, 1.2241067737341e-001,
+ 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001,
+ -1.8303988873959e-001, 1.8303988873959e-001, -1.8303988873959e-001, 1.8303988873959e-001,
+ 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001, 6.6241580247879e-001,
+ -7.4913638830185e-001, 7.4913638830185e-001, -7.4913638830185e-001, 7.4913638830185e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001,
+ -8.2458931207657e-001, -8.2458931207657e-001, -8.2458931207657e-001, -8.2458931207657e-001,
+ -5.6573188304901e-001, 5.6573188304901e-001, -5.6573188304901e-001, 5.6573188304901e-001,
+ 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001, 8.9867448806763e-001,
+ -4.3861624598503e-001, 4.3861624598503e-001, -4.3861624598503e-001, 4.3861624598503e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001,
+ 2.0711140334606e-001, 2.0711140334606e-001, 2.0711140334606e-001, 2.0711140334606e-001,
+ -9.7831737995148e-001, 9.7831737995148e-001, -9.7831737995148e-001, 9.7831737995148e-001,
+ 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001, 3.2531028985977e-001,
+ -9.4560730457306e-001, 9.4560730457306e-001, -9.4560730457306e-001, 9.4560730457306e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -6.1523163318634e-001, 6.1523163318634e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -8.3822476863861e-001, -8.3822476863861e-001, -8.3822476863861e-001, -8.3822476863861e-001,
+ 5.4532492160797e-001, -5.4532492160797e-001, 5.4532492160797e-001, -5.4532492160797e-001,
+ 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001,
+ -2.5486567616463e-001, 2.5486567616463e-001, -2.5486567616463e-001, 2.5486567616463e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001,
+ -4.9289819598198e-001, 4.9289819598198e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001,
+ -6.9837623834610e-001, 6.9837623834610e-001, -6.9837623834610e-001, 6.9837623834610e-001,
+ 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001, 5.0353837013245e-001,
+ -8.6397284269333e-001, 8.6397284269333e-001, -8.6397284269333e-001, 8.6397284269333e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, -8.7008696794510e-001, 8.7008696794510e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001,
+ -9.9992465972900e-001, -9.9992465972900e-001, -9.9992465972900e-001, -9.9992465972900e-001,
+ -1.2271504849195e-002, 1.2271504849195e-002, -1.2271504849195e-002, 1.2271504849195e-002,
+ 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001, 7.9583686590195e-001,
+ -6.0551106929779e-001, 6.0551106929779e-001, -6.0551106929779e-001, 6.0551106929779e-001,
+ 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ -3.7131726741791e-001, -3.7131726741791e-001, -3.7131726741791e-001, -3.7131726741791e-001,
+ -9.2850595712662e-001, 9.2850595712662e-001, -9.2850595712662e-001, 9.2850595712662e-001,
+ 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001,
+ -9.9090266227722e-001, 9.9090266227722e-001, -9.9090266227722e-001, 9.9090266227722e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -2.6671275496483e-001, 2.6671275496483e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001,
+ -3.9399203658104e-001, -3.9399203658104e-001, -3.9399203658104e-001, -3.9399203658104e-001,
+ 9.1911387443542e-001, -9.1911387443542e-001, 9.1911387443542e-001, -9.1911387443542e-001,
+ 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001, 9.8730140924454e-001,
+ -1.5885815024376e-001, 1.5885815024376e-001, -1.5885815024376e-001, 1.5885815024376e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -3.1368175148964e-001, 3.1368175148964e-001,
+ 8.8763958215714e-001, 8.8763958215714e-001, 8.8763958215714e-001, 8.8763958215714e-001,
+ -4.6053871512413e-001, 4.6053871512413e-001, -4.6053871512413e-001, 4.6053871512413e-001,
+ 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001, 5.8579784631729e-001,
+ -8.1045717000961e-001, 8.1045717000961e-001, -8.1045717000961e-001, 8.1045717000961e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001,
+ -9.5330595970154e-001, -9.5330595970154e-001, -9.5330595970154e-001, -9.5330595970154e-001,
+ -3.0200591683388e-001, 3.0200591683388e-001, -3.0200591683388e-001, 3.0200591683388e-001,
+ 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001,
+ -5.2458971738815e-001, 5.2458971738815e-001, -5.2458971738815e-001, 5.2458971738815e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ -8.5797369480133e-002, -8.5797369480133e-002, -8.5797369480133e-002, -8.5797369480133e-002,
+ -9.9631255865097e-001, 9.9631255865097e-001, -9.9631255865097e-001, 9.9631255865097e-001,
+ 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001, 2.3105812072754e-001,
+ -9.7293996810913e-001, 9.7293996810913e-001, -9.7293996810913e-001, 9.7293996810913e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -4.4961133599281e-001, 4.4961133599281e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001,
+ -6.4383155107498e-001, -6.4383155107498e-001, -6.4383155107498e-001, -6.4383155107498e-001,
+ 7.6516729593277e-001, -7.6516729593277e-001, 7.6516729593277e-001, -7.6516729593277e-001,
+ 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001, 9.3733900785446e-001,
+ -3.4841868281364e-001, 3.4841868281364e-001, -3.4841868281364e-001, 3.4841868281364e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 4.8218375444412e-001, 4.8218375444412e-001, 4.8218375444412e-001, 4.8218375444412e-001,
+ -8.7607008218765e-001, 8.7607008218765e-001, -8.7607008218765e-001, 8.7607008218765e-001,
+ 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001, 4.1642957925797e-001,
+ -9.0916800498962e-001, 9.0916800498962e-001, -9.0916800498962e-001, 9.0916800498962e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001,
+ -9.6043050289154e-001, -9.6043050289154e-001, -9.6043050289154e-001, -9.6043050289154e-001,
+ 2.7851969003677e-001, -2.7851969003677e-001, 2.7851969003677e-001, -2.7851969003677e-001,
+ 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001, 7.3265427350998e-001,
+ -6.8060100078583e-001, 6.8060100078583e-001, -6.8060100078583e-001, 6.8060100078583e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002,
+ -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001,
+ -6.2485945224762e-001, -6.2485945224762e-001, -6.2485945224762e-001, -6.2485945224762e-001,
+ -7.8073722124100e-001, 7.8073722124100e-001, -7.8073722124100e-001, 7.8073722124100e-001,
+ 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002, 3.6807224154472e-002,
+ -9.9932235479355e-001, 9.9932235479355e-001, -9.9932235479355e-001, 9.9932235479355e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -1.1022220551968e-001, -1.1022220551968e-001, -1.1022220551968e-001, -1.1022220551968e-001,
+ 9.9390691518784e-001, -9.9390691518784e-001, 9.9390691518784e-001, -9.9390691518784e-001,
+ 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001, 9.9932235479355e-001,
+ -3.6807224154472e-002, 3.6807224154472e-002, -3.6807224154472e-002, 3.6807224154472e-002,
+ 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001, 9.9729043245316e-001,
+ -7.3564566671848e-002, 7.3564566671848e-002, -7.3564566671848e-002, 7.3564566671848e-002,
+ 9.9390691518784e-001, 9.9390691518784e-001, 9.9390691518784e-001, 9.9390691518784e-001,
+ -1.1022220551968e-001, 1.1022220551968e-001, -1.1022220551968e-001, 1.1022220551968e-001,
+ 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001, 6.8060100078583e-001,
+ -7.3265427350998e-001, 7.3265427350998e-001, -7.3265427350998e-001, 7.3265427350998e-001,
+ -9.9729043245316e-001, 9.9729043245316e-001, -9.9729043245316e-001, 9.9729043245316e-001,
+ 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002, 7.3564566671848e-002,
+ -7.8073722124100e-001, -7.8073722124100e-001, -7.8073722124100e-001, -7.8073722124100e-001,
+ -6.2485945224762e-001, 6.2485945224762e-001, -6.2485945224762e-001, 6.2485945224762e-001,
+ 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001, 9.0916800498962e-001,
+ -4.1642957925797e-001, 4.1642957925797e-001, -4.1642957925797e-001, 4.1642957925797e-001,
+ 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001, 6.5317285060883e-001,
+ -7.5720882415771e-001, 7.5720882415771e-001, -7.5720882415771e-001, 7.5720882415771e-001,
+ 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001, 2.7851969003677e-001,
+ -9.6043050289154e-001, 9.6043050289154e-001, -9.6043050289154e-001, 9.6043050289154e-001,
+ 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001, 3.4841868281364e-001,
+ -9.3733900785446e-001, 9.3733900785446e-001, -9.3733900785446e-001, 9.3733900785446e-001,
+ -6.5317285060883e-001, 6.5317285060883e-001, -6.5317285060883e-001, 6.5317285060883e-001,
+ 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001, 7.5720882415771e-001,
+ -8.7607008218765e-001, -8.7607008218765e-001, -8.7607008218765e-001, -8.7607008218765e-001,
+ 4.8218375444412e-001, -4.8218375444412e-001, 4.8218375444412e-001, -4.8218375444412e-001,
+ 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001, 9.7293996810913e-001,
+ -2.3105812072754e-001, 2.3105812072754e-001, -2.3105812072754e-001, 2.3105812072754e-001,
+ 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001, 8.9322429895401e-001,
+ -4.4961133599281e-001, 4.4961133599281e-001, -4.4961133599281e-001, 4.4961133599281e-001,
+ 7.6516729593277e-001, 7.6516729593277e-001, 7.6516729593277e-001, 7.6516729593277e-001,
+ -6.4383155107498e-001, 6.4383155107498e-001, -6.4383155107498e-001, 6.4383155107498e-001,
+ 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001, 5.2458971738815e-001,
+ -8.5135519504547e-001, 8.5135519504547e-001, -8.5135519504547e-001, 8.5135519504547e-001,
+ -8.9322429895401e-001, 8.9322429895401e-001, -8.9322429895401e-001, 8.9322429895401e-001,
+ 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001, 4.4961133599281e-001,
+ -9.9631255865097e-001, -9.9631255865097e-001, -9.9631255865097e-001, -9.9631255865097e-001,
+ -8.5797369480133e-002, 8.5797369480133e-002, -8.5797369480133e-002, 8.5797369480133e-002,
+ 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001, 8.1045717000961e-001,
+ -5.8579784631729e-001, 5.8579784631729e-001, -5.8579784631729e-001, 5.8579784631729e-001,
+ 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001, 3.1368175148964e-001,
+ -9.4952815771103e-001, 9.4952815771103e-001, -9.4952815771103e-001, 9.4952815771103e-001,
+ -3.0200591683388e-001, -3.0200591683388e-001, -3.0200591683388e-001, -3.0200591683388e-001,
+ -9.5330595970154e-001, 9.5330595970154e-001, -9.5330595970154e-001, 9.5330595970154e-001,
+ 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001, 1.5885815024376e-001,
+ -9.8730140924454e-001, 9.8730140924454e-001, -9.8730140924454e-001, 9.8730140924454e-001,
+ -3.1368175148964e-001, 3.1368175148964e-001, -3.1368175148964e-001, 3.1368175148964e-001,
+ 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001, 9.4952815771103e-001,
+ -4.6053871512413e-001, -4.6053871512413e-001, -4.6053871512413e-001, -4.6053871512413e-001,
+ 8.8763958215714e-001, -8.8763958215714e-001, 8.8763958215714e-001, -8.8763958215714e-001,
+ 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001, 9.9090266227722e-001,
+ -1.3458071649075e-001, 1.3458071649075e-001, -1.3458071649075e-001, 1.3458071649075e-001,
+ 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001, 9.6377605199814e-001,
+ -2.6671275496483e-001, 2.6671275496483e-001, -2.6671275496483e-001, 2.6671275496483e-001,
+ 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001, 9.1911387443542e-001,
+ -3.9399203658104e-001, 3.9399203658104e-001, -3.9399203658104e-001, 3.9399203658104e-001,
+ 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001, 6.0551106929779e-001,
+ -7.9583686590195e-001, 7.9583686590195e-001, -7.9583686590195e-001, 7.9583686590195e-001,
+ -9.6377605199814e-001, 9.6377605199814e-001, -9.6377605199814e-001, 9.6377605199814e-001,
+ 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001, 2.6671275496483e-001,
+ -9.2850595712662e-001, -9.2850595712662e-001, -9.2850595712662e-001, -9.2850595712662e-001,
+ -3.7131726741791e-001, 3.7131726741791e-001, -3.7131726741791e-001, 3.7131726741791e-001,
+ 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001, 8.6397284269333e-001,
+ -5.0353837013245e-001, 5.0353837013245e-001, -5.0353837013245e-001, 5.0353837013245e-001,
+ 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001, 4.9289819598198e-001,
+ -8.7008696794510e-001, 8.7008696794510e-001, -8.7008696794510e-001, 8.7008696794510e-001,
+ -1.2271504849195e-002, -1.2271504849195e-002, -1.2271504849195e-002, -1.2271504849195e-002,
+ -9.9992465972900e-001, 9.9992465972900e-001, -9.9992465972900e-001, 9.9992465972900e-001,
+ 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001, 2.5486567616463e-001,
+ -9.6697646379471e-001, 9.6697646379471e-001, -9.6697646379471e-001, 9.6697646379471e-001,
+ -4.9289819598198e-001, 4.9289819598198e-001, -4.9289819598198e-001, 4.9289819598198e-001,
+ 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001, 8.7008696794510e-001,
+ -6.9837623834610e-001, -6.9837623834610e-001, -6.9837623834610e-001, -6.9837623834610e-001,
+ 7.1573078632355e-001, -7.1573078632355e-001, 7.1573078632355e-001, -7.1573078632355e-001,
+ 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001, 9.4560730457306e-001,
+ -3.2531028985977e-001, 3.2531028985977e-001, -3.2531028985977e-001, 3.2531028985977e-001,
+ 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001, 7.8834640979767e-001,
+ -6.1523163318634e-001, 6.1523163318634e-001, -6.1523163318634e-001, 6.1523163318634e-001,
+ 5.4532492160797e-001, 5.4532492160797e-001, 5.4532492160797e-001, 5.4532492160797e-001,
+ -8.3822476863861e-001, 8.3822476863861e-001, -8.3822476863861e-001, 8.3822476863861e-001,
+ 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001, 4.3861624598503e-001,
+ -8.9867448806763e-001, 8.9867448806763e-001, -8.9867448806763e-001, 8.9867448806763e-001,
+ -7.8834640979767e-001, 7.8834640979767e-001, -7.8834640979767e-001, 7.8834640979767e-001,
+ 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001, 6.1523163318634e-001,
+ -9.7831737995148e-001, -9.7831737995148e-001, -9.7831737995148e-001, -9.7831737995148e-001,
+ 2.0711140334606e-001, -2.0711140334606e-001, 2.0711140334606e-001, -2.0711140334606e-001,
+ 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001, 7.4913638830185e-001,
+ -6.6241580247879e-001, 6.6241580247879e-001, -6.6241580247879e-001, 6.6241580247879e-001,
+ 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001, 1.2241067737341e-001,
+ -9.9247956275940e-001, 9.9247956275940e-001, -9.9247956275940e-001, 9.9247956275940e-001,
+ -5.6573188304901e-001, -5.6573188304901e-001, -5.6573188304901e-001, -5.6573188304901e-001,
+ -8.2458931207657e-001, 8.2458931207657e-001, -8.2458931207657e-001, 8.2458931207657e-001,
+ 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002, 6.1320737004280e-002,
+ -9.9811810255051e-001, 9.9811810255051e-001, -9.9811810255051e-001, 9.9811810255051e-001,
+ -1.2241067737341e-001, 1.2241067737341e-001, -1.2241067737341e-001, 1.2241067737341e-001,
+ 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001, 9.9247956275940e-001,
+ -1.8303988873959e-001, -1.8303988873959e-001, -1.8303988873959e-001, -1.8303988873959e-001,
+ 9.8310548067093e-001, -9.8310548067093e-001, 9.8310548067093e-001, -9.8310548067093e-001,
+ 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001, 9.9631261825562e-001,
+ -8.5797317326069e-002, 8.5797317326069e-002, -8.5797317326069e-002, 8.5797317326069e-002,
+ 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001, 9.8527765274048e-001,
+ -1.7096188664436e-001, 1.7096188664436e-001, -1.7096188664436e-001, 1.7096188664436e-001,
+ 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001, 9.6697646379471e-001,
+ -2.5486564636230e-001, 2.5486564636230e-001, -2.5486564636230e-001, 2.5486564636230e-001,
+ 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001, 6.4383155107498e-001,
+ -7.6516723632813e-001, 7.6516723632813e-001, -7.6516723632813e-001, 7.6516723632813e-001,
+ -9.8527765274048e-001, 9.8527765274048e-001, -9.8527765274048e-001, 9.8527765274048e-001,
+ 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001, 1.7096188664436e-001,
+ -8.6397278308868e-001, -8.6397278308868e-001, -8.6397278308868e-001, -8.6397278308868e-001,
+ -5.0353842973709e-001, 5.0353842973709e-001, -5.0353842973709e-001, 5.0353842973709e-001,
+ 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001, 8.8763964176178e-001,
+ -4.6053871512413e-001, 4.6053871512413e-001, -4.6053871512413e-001, 4.6053871512413e-001,
+ 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001, 5.7580822706223e-001,
+ -8.1758481264114e-001, 8.1758481264114e-001, -8.1758481264114e-001, 8.1758481264114e-001,
+ 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001, 1.3458071649075e-001,
+ -9.9090266227722e-001, 9.9090266227722e-001, -9.9090266227722e-001, 9.9090266227722e-001,
+ 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001, 3.0200594663620e-001,
+ -9.5330601930618e-001, 9.5330601930618e-001, -9.5330601930618e-001, 9.5330601930618e-001,
+ -5.7580822706223e-001, 5.7580822706223e-001, -5.7580822706223e-001, 5.7580822706223e-001,
+ 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001, 8.1758481264114e-001,
+ -7.9583692550659e-001, -7.9583692550659e-001, -7.9583692550659e-001, -7.9583692550659e-001,
+ 6.0551100969315e-001, -6.0551100969315e-001, 6.0551100969315e-001, -6.0551100969315e-001,
+ 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001, 9.6043050289154e-001,
+ -2.7851969003677e-001, 2.7851969003677e-001, -2.7851969003677e-001, 2.7851969003677e-001,
+ 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001, 8.4485357999802e-001,
+ -5.3499764204025e-001, 5.3499764204025e-001, -5.3499764204025e-001, 5.3499764204025e-001,
+ 6.6241574287415e-001, 6.6241574287415e-001, 6.6241574287415e-001, 6.6241574287415e-001,
+ -7.4913644790649e-001, 7.4913644790649e-001, -7.4913644790649e-001, 7.4913644790649e-001,
+ 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001, 4.8218378424644e-001,
+ -8.7607008218765e-001, 8.7607008218765e-001, -8.7607008218765e-001, 8.7607008218765e-001,
+ -8.4485357999802e-001, 8.4485357999802e-001, -8.4485357999802e-001, 8.4485357999802e-001,
+ 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001, 5.3499764204025e-001,
+ -9.9811810255051e-001, -9.9811810255051e-001, -9.9811810255051e-001, -9.9811810255051e-001,
+ 6.1320688575506e-002, -6.1320688575506e-002, 6.1320688575506e-002, -6.1320688575506e-002,
+ 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001, 7.8073722124100e-001,
+ -6.2485951185226e-001, 6.2485951185226e-001, -6.2485951185226e-001, 6.2485951185226e-001,
+ 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001, 2.1910125017166e-001,
+ -9.7570210695267e-001, 9.7570210695267e-001, -9.7570210695267e-001, 9.7570210695267e-001,
+ -4.3861627578735e-001, -4.3861627578735e-001, -4.3861627578735e-001, -4.3861627578735e-001,
+ -8.9867436885834e-001, 8.9867436885834e-001, -8.9867436885834e-001, 8.9867436885834e-001,
+ 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001, 1.1022221297026e-001,
+ -9.9390697479248e-001, 9.9390697479248e-001, -9.9390697479248e-001, 9.9390697479248e-001,
+ -2.1910125017166e-001, 2.1910125017166e-001, -2.1910125017166e-001, 2.1910125017166e-001,
+ 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001, 9.7570210695267e-001,
+ -3.2531031966209e-001, -3.2531031966209e-001, -3.2531031966209e-001, -3.2531031966209e-001,
+ 9.4560730457306e-001, -9.4560730457306e-001, 9.4560730457306e-001, -9.4560730457306e-001,
+ 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001, 9.8310548067093e-001,
+ -1.8303988873959e-001, 1.8303988873959e-001, -1.8303988873959e-001, 1.8303988873959e-001,
+ 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001, 9.3299281597137e-001,
+ -3.5989505052567e-001, 3.5989505052567e-001, -3.5989505052567e-001, 3.5989505052567e-001,
+ 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001, 8.5135519504547e-001,
+ -5.2458971738815e-001, 5.2458971738815e-001, -5.2458971738815e-001, 5.2458971738815e-001,
+ 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001, 5.6573182344437e-001,
+ -8.2458931207657e-001, 8.2458931207657e-001, -8.2458931207657e-001, 8.2458931207657e-001,
+ -9.3299281597137e-001, 9.3299281597137e-001, -9.3299281597137e-001, 9.3299281597137e-001,
+ 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001, 3.5989505052567e-001,
+ -9.7293996810913e-001, -9.7293996810913e-001, -9.7293996810913e-001, -9.7293996810913e-001,
+ -2.3105813562870e-001, 2.3105813562870e-001, -2.3105813562870e-001, 2.3105813562870e-001,
+ 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001, 8.3822470903397e-001,
+ -5.4532498121262e-001, 5.4532498121262e-001, -5.4532498121262e-001, 5.4532498121262e-001,
+ 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001, 4.0524131059647e-001,
+ -9.1420972347260e-001, 9.1420972347260e-001, -9.1420972347260e-001, 9.1420972347260e-001,
+ -1.5885809063911e-001, -1.5885809063911e-001, -1.5885809063911e-001, -1.5885809063911e-001,
+ -9.8730134963989e-001, 9.8730134963989e-001, -9.8730134963989e-001, 9.8730134963989e-001,
+ 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001, 2.0711138844490e-001,
+ -9.7831737995148e-001, 9.7831737995148e-001, -9.7831737995148e-001, 9.7831737995148e-001,
+ -4.0524131059647e-001, 4.0524131059647e-001, -4.0524131059647e-001, 4.0524131059647e-001,
+ 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001, 9.1420972347260e-001,
+ -5.8579784631729e-001, -5.8579784631729e-001, -5.8579784631729e-001, -5.8579784631729e-001,
+ 8.1045717000961e-001, -8.1045717000961e-001, 8.1045717000961e-001, -8.1045717000961e-001,
+ 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001, 9.2850607633591e-001,
+ -3.7131720781326e-001, 3.7131720781326e-001, -3.7131720781326e-001, 3.7131720781326e-001,
+ 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001, 7.2424709796906e-001,
+ -6.8954056501389e-001, 6.8954056501389e-001, -6.8954056501389e-001, 6.8954056501389e-001,
+ 4.1642951965332e-001, 4.1642951965332e-001, 4.1642951965332e-001, 4.1642951965332e-001,
+ -9.0916800498962e-001, 9.0916800498962e-001, -9.0916800498962e-001, 9.0916800498962e-001,
+ 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001, 3.9399203658104e-001,
+ -9.1911387443542e-001, 9.1911387443542e-001, -9.1911387443542e-001, 9.1911387443542e-001,
+ -7.2424709796906e-001, 7.2424709796906e-001, -7.2424709796906e-001, 7.2424709796906e-001,
+ 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001, 6.8954056501389e-001,
+ -9.3733906745911e-001, -9.3733906745911e-001, -9.3733906745911e-001, -9.3733906745911e-001,
+ 3.4841868281364e-001, -3.4841868281364e-001, 3.4841868281364e-001, -3.4841868281364e-001,
+ 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001, 7.1573078632355e-001,
+ -6.9837623834610e-001, 6.9837623834610e-001, -6.9837623834610e-001, 6.9837623834610e-001,
+ 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002, 2.4541229009628e-002,
+ -9.9969881772995e-001, 9.9969881772995e-001, -9.9969881772995e-001, 9.9969881772995e-001,
+ -6.8060100078583e-001, -6.8060100078583e-001, -6.8060100078583e-001, -6.8060100078583e-001,
+ -7.3265421390533e-001, 7.3265421390533e-001, -7.3265421390533e-001, 7.3265421390533e-001,
+ 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002, 1.2271538376808e-002,
+ -9.9992471933365e-001, 9.9992471933365e-001, -9.9992471933365e-001, 9.9992471933365e-001,
+ -2.4541229009628e-002, 2.4541229009628e-002, -2.4541229009628e-002, 2.4541229009628e-002,
+ 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001, 9.9969881772995e-001,
+ -3.6807224154472e-002, -3.6807224154472e-002, -3.6807224154472e-002, -3.6807224154472e-002,
+ 9.9932241439819e-001, -9.9932241439819e-001, 9.9932241439819e-001, -9.9932241439819e-001
+};
+
+static inline void cft1st(int n, float *a)
+{
+ int j;
+ float *w = CT1STP;
+
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(a ));
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(a+ 2));
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(a+ 4));
+ XMM2 = _mm_loadh_pi(XMM2, (__m64*)(a+ 6));
+ XMM1 = XMM0;
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM2);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM1 = _mm_movelh_ps(XMM1, XMM1);
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,2,3));
+ XMM2 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ XMM4 = _mm_loadl_pi(XMM4, (__m64*)(a+ 8));
+ XMM5 = _mm_loadl_pi(XMM5, (__m64*)(a+10));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM4 = _mm_loadh_pi(XMM4, (__m64*)(a+12));
+ XMM5 = _mm_loadh_pi(XMM5, (__m64*)(a+14));
+ XMM2 = XMM4;
+ _mm_storel_pi((__m64*)(a ), XMM0);
+ _mm_storel_pi((__m64*)(a+ 2), XMM1);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XMM5);
+ _mm_storeh_pi((__m64*)(a+ 4), XMM0);
+ _mm_storeh_pi((__m64*)(a+ 6), XMM1);
+ XMM5 = XMM4;
+ XMM3 = XMM2;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,3,1,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,1,3,2));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,1,0));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,2,3));
+ XMM5 = _mm_xor_ps(XMM5, PCS_RRNN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM2 = _mm_add_ps(XMM2, XMM3);
+ _mm_storel_pi((__m64*)(a+ 8), XMM4);
+ _mm_storeh_pi((__m64*)(a+12), XMM4);
+ XMM5 = XMM2;
+ XMM3 = _mm_load_ss(w+2);
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,0,0));
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,0,0,0));
+#if defined(__SSE3__)
+ XMM2 = _mm_addsub_ps(XMM2, XMM5);
+#else
+ XMM5 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+#endif
+ XMM2 = _mm_mul_ps(XMM2, XMM3);
+ _mm_storel_pi((__m64*)(a+10), XMM2);
+ _mm_storeh_pi((__m64*)(a+14), XMM2);
+ for (j = 16; j < n; j += 16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(a+j ));
+ XMM2 = _mm_loadl_pi(XMM2, (__m64*)(a+j+ 2));
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(a+j+ 4));
+ XMM2 = _mm_loadh_pi(XMM2, (__m64*)(a+j+ 6));
+ XMM1 = XMM0;
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM2);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM0 = _mm_movelh_ps(XMM0, XMM0);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM1 = _mm_movelh_ps(XMM1, XMM1);
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,2,3));
+ XMM2 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ _mm_storel_pi((__m64*)(a+j ), XMM0);
+ XMM2 = XMM0;
+ XMM4 = _mm_loadl_pi(XMM4, (__m64*)(a+j+ 8));
+ XMM5 = _mm_loadl_pi(XMM5, (__m64*)(a+j+10));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,2,3));
+ XMM0 = _mm_mul_ps(XMM0, PM128(w ));
+ XMM2 = _mm_mul_ps(XMM2, PM128(w+ 4));
+ XMM3 = XMM1;
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM4 = _mm_loadh_pi(XMM4, (__m64*)(a+j+12));
+ XMM5 = _mm_loadh_pi(XMM5, (__m64*)(a+j+14));
+ _mm_storeh_pi((__m64*)(a+j+ 4), XMM0);
+ XMM1 = _mm_mul_ps(XMM1, PM128(w+ 8));
+ XMM2 = XMM4;
+ XMM3 = _mm_mul_ps(XMM3, PM128(w+12));
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XMM5);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM5 = XMM4;
+ XMM0 = XMM2;
+ _mm_storel_pi((__m64*)(a+j+ 2), XMM1);
+ XMM4 = _mm_movelh_ps(XMM4, XMM4);
+ XMM5 = _mm_movehl_ps(XMM5, XMM5);
+ _mm_storeh_pi((__m64*)(a+j+ 6), XMM1);
+ XMM2 = _mm_movelh_ps(XMM2, XMM2);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,2,3));
+ XMM5 = _mm_xor_ps(XMM5, PCS_RRNN.ps);
+ XMM0 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM2 = _mm_add_ps(XMM2, XMM0);
+ _mm_storel_pi((__m64*)(a+j+ 8), XMM4);
+ XMM5 = XMM4;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,2,3));
+ XMM4 = _mm_mul_ps(XMM4, PM128(w+16));
+ XMM5 = _mm_mul_ps(XMM5, PM128(w+20));
+ XMM0 = XMM2;
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+ _mm_storeh_pi((__m64*)(a+j+12), XMM4);
+ XMM2 = _mm_mul_ps(XMM2, PM128(w+24));
+ XMM0 = _mm_mul_ps(XMM0, PM128(w+28));
+ XMM2 = _mm_add_ps(XMM2, XMM0);
+ _mm_storel_pi((__m64*)(a+j+10), XMM2);
+ _mm_storeh_pi((__m64*)(a+j+14), XMM2);
+ w += 32;
+ }
+}
+
+
+STIN void cftmdl(int n, int l, float *a)
+{
+ int j, j1, j2, j3, k, m, m2;
+ __m128 XMM6;
+ __m128 *ctmdl = (__m128*)CTMDLP;
+
+ m = l << 2;
+ for (j = 0; j < l; j += 8) {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1 );
+ XMM2 = _mm_load_ps(a+j2 );
+ XMM5 = _mm_load_ps(a+j3 );
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM6 = _mm_load_ps(a+j +4);
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM2 = _mm_load_ps(a+j1+4);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM5 = _mm_sub_ps(XMM5, XMM3);
+ XMM3 = _mm_load_ps(a+j2+4);
+ _mm_store_ps(a+j , XMM0);
+ XMM0 = _mm_load_ps(a+j3+4);
+ _mm_store_ps(a+j1 , XMM1);
+ XMM1 = XMM6;
+ _mm_store_ps(a+j2 , XMM4);
+ XMM4 = XMM3;
+ XMM6 = _mm_add_ps(XMM6, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM0);
+ XMM1 = _mm_sub_ps(XMM1, XMM2);
+ XMM4 = _mm_sub_ps(XMM4, XMM0);
+ XMM2 = XMM6;
+ XMM0 = XMM1;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+ _mm_store_ps(a+j3 , XMM5);
+ XMM6 = _mm_add_ps(XMM6, XMM3);
+ XMM4 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+ XMM2 = _mm_sub_ps(XMM2, XMM3);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ XMM0 = _mm_sub_ps(XMM0, XMM4);
+ _mm_store_ps(a+j +4, XMM6);
+ _mm_store_ps(a+j1+4, XMM1);
+ _mm_store_ps(a+j2+4, XMM2);
+ _mm_store_ps(a+j3+4, XMM0);
+ }
+ XMM6 = _mm_load_ps(W2);
+ for (j = m; j < l + m; j += 8) {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1 );
+ XMM2 = _mm_load_ps(a+j2 );
+ XMM5 = _mm_load_ps(a+j3 );
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM0;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,2,0)); /* (x2i_1,x2i_0,x0r_1,x0r_0) */
+ XMM5 = _mm_shuffle_ps(XMM5, XMM2, _MM_SHUFFLE(2,0,3,1)); /* (x2r_1,x2r_0,x0i_1,x0i_0) */
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(1,3,0,2)); /* (x0r_1,x2i_1,x0r_0,x2i_0) */
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,1,2,0)); /* (x2r_1,x0i_1,x2r_0,x0i_0) */
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM2 = XMM1; /* x1 */
+ XMM4 = _mm_sub_ps(XMM4, XMM5);
+ XMM5 = XMM3; /* x3 */
+#if defined(__SSE3__)
+ XMM2 = _mm_moveldup_ps(XMM2);
+ XMM1 = _mm_movehdup_ps(XMM1);
+#else
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,2,0,0)); /* x1r */
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(3,3,1,1)); /* x1i */
+#endif
+ _mm_store_ps(a+j , XMM0);
+ _mm_store_ps(a+j2 , XMM4);
+#if defined(__SSE3__)
+ XMM5 = _mm_moveldup_ps(XMM5);
+ XMM3 = _mm_movehdup_ps(XMM3);
+#else
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,0,0)); /* x3r */
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,3,1,1)); /* x3i */
+#endif
+ XMM0 = XMM2; /* x1r */
+ XMM4 = XMM1; /* x1i */
+ XMM2 = _mm_sub_ps(XMM2, XMM3); /* x1r - x3i */
+ XMM1 = _mm_add_ps(XMM1, XMM5); /* x1i + x3r */
+ XMM5 = _mm_sub_ps(XMM5, XMM4); /* x3r - x1i */
+ XMM4 = _mm_load_ps(a+j +4);
+ XMM3 = _mm_add_ps(XMM3, XMM0); /* x3i + x1r */
+ XMM0 = _mm_load_ps(a+j1+4);
+#if defined(__SSE3__)
+ XMM2 = _mm_addsub_ps(XMM2, XMM1);
+ XMM1 = _mm_load_ps(a+j2+4);
+ XMM5 = _mm_addsub_ps(XMM5, XMM3);
+#else
+ XMM1 = _mm_xor_ps(XMM1, PCS_NRNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ XMM2 = _mm_add_ps(XMM2, XMM1);
+ XMM1 = _mm_load_ps(a+j2+4);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+#endif
+ XMM3 = _mm_load_ps(a+j3+4);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ _mm_store_ps(a+j1 , XMM2);
+ XMM2 = XMM4;
+ _mm_store_ps(a+j3 , XMM5);
+ XMM5 = XMM1;
+ XMM4 = _mm_add_ps(XMM4, XMM0);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM2 = _mm_sub_ps(XMM2, XMM0);
+ XMM5 = _mm_sub_ps(XMM5, XMM3);
+ XMM0 = XMM4;
+ XMM3 = XMM4;
+ XMM0 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,2,0)); /* (x2i_1,x2i_0,x0r_1,x0r_0) */
+ XMM3 = _mm_shuffle_ps(XMM3, XMM1, _MM_SHUFFLE(2,0,3,1)); /* (x2r_1,x2r_0,x0i_1,x0i_0) */
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(1,3,0,2)); /* (x0r_1,x2i_1,x0r_0,x2i_0) */
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,1,2,0)); /* (x2r_1,x0i_1,x2r_0,x0i_0) */
+ XMM4 = _mm_add_ps(XMM4, XMM1);
+ XMM1 = XMM2; /* x1 */
+ XMM0 = _mm_sub_ps(XMM0, XMM3);
+ XMM3 = XMM5; /* x3 */
+#if defined(__SSE3__)
+ XMM1 = _mm_moveldup_ps(XMM1);
+ XMM2 = _mm_movehdup_ps(XMM2);
+#else
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0)); /* x1r */
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1)); /* x1i */
+#endif
+ _mm_store_ps(a+j +4, XMM4);
+ _mm_store_ps(a+j2+4, XMM0);
+#if defined(__SSE3__)
+ XMM3 = _mm_moveldup_ps(XMM3);
+ XMM5 = _mm_movehdup_ps(XMM5);
+#else
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0)); /* x3r */
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,3,1,1)); /* x3i */
+#endif
+ XMM4 = XMM1; /* x1r */
+ XMM0 = XMM2; /* x1i */
+ XMM1 = _mm_sub_ps(XMM1, XMM5); /* x1r - x3i */
+ XMM2 = _mm_add_ps(XMM2, XMM3); /* x1i + x3r */
+ XMM3 = _mm_sub_ps(XMM3, XMM0); /* x3r - x1i */
+ XMM5 = _mm_add_ps(XMM5, XMM4); /* x3i + x1r */
+#if defined(__SSE3__)
+ XMM1 = _mm_addsub_ps(XMM1, XMM2);
+ XMM3 = _mm_addsub_ps(XMM3, XMM5);
+#else
+ XMM2 = _mm_xor_ps(XMM2, PCS_NRNR.ps);
+ XMM5 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+ XMM1 = _mm_add_ps(XMM1, XMM2);
+ XMM3 = _mm_add_ps(XMM3, XMM5);
+#endif
+ XMM1 = _mm_mul_ps(XMM1, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ _mm_store_ps(a+j1+4, XMM1);
+ _mm_store_ps(a+j3+4, XMM3);
+ }
+ m2 = 2 * m;
+ for (k = m2; k < n; k += m2) {
+ for (j = k; j < l + k; j += 4) {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1);
+ XMM2 = _mm_load_ps(a+j2);
+ XMM5 = _mm_load_ps(a+j3);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+
+ XMM4 = XMM0;
+ XMM5 = XMM0;
+ XMM6 = XMM2;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(2,3,0,1));
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM5 = _mm_sub_ps(XMM5, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, *(ctmdl+ 2));
+ XMM5 = _mm_mul_ps(XMM5, *(ctmdl+ 3));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM2 = XMM1;
+ XMM4 = _mm_add_ps(XMM4, XMM5);
+ XMM5 = XMM3;
+ _mm_store_ps(a+j , XMM0);
+ XMM0 = XMM1;
+ _mm_store_ps(a+j2, XMM4);
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+ XMM4 = XMM0;
+ XMM5 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM0 = _mm_sub_ps(XMM0, XMM3);
+ XMM2 = _mm_sub_ps(XMM2, XMM5);
+ XMM4 = _mm_add_ps(XMM4, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, *(ctmdl ));
+ XMM0 = _mm_mul_ps(XMM0, *(ctmdl+ 1));
+ XMM2 = _mm_mul_ps(XMM2, *(ctmdl+ 4));
+ XMM4 = _mm_mul_ps(XMM4, *(ctmdl+ 5));
+ XMM1 = _mm_add_ps(XMM1, XMM0);
+ XMM2 = _mm_add_ps(XMM2, XMM4);
+ _mm_store_ps(a+j1, XMM1);
+ _mm_store_ps(a+j3, XMM2);
+ }
+ for (j = k + m; j < l + (k + m); j += 4) {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1);
+ XMM2 = _mm_load_ps(a+j2);
+ XMM5 = _mm_load_ps(a+j3);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM0;
+ XMM6 = XMM2;
+ XMM5 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+ XMM6 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(2,3,0,1));
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM5 = _mm_sub_ps(XMM5, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, *(ctmdl+ 9));
+ XMM5 = _mm_mul_ps(XMM5, *(ctmdl+ 8));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM2 = XMM1;
+ XMM5 = _mm_sub_ps(XMM5, XMM4);
+ XMM4 = XMM3;
+ _mm_store_ps(a+j , XMM0);
+ XMM0 = XMM1;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+ _mm_store_ps(a+j2, XMM5);
+ XMM5 = XMM0;
+ XMM4 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ XMM1 = _mm_add_ps(XMM1, XMM4);
+ XMM0 = _mm_sub_ps(XMM0, XMM3);
+ XMM2 = _mm_sub_ps(XMM2, XMM4);
+ XMM5 = _mm_add_ps(XMM5, XMM3);
+ XMM1 = _mm_mul_ps(XMM1, *(ctmdl+ 6));
+ XMM0 = _mm_mul_ps(XMM0, *(ctmdl+ 7));
+ XMM2 = _mm_mul_ps(XMM2, *(ctmdl+10));
+ XMM5 = _mm_mul_ps(XMM5, *(ctmdl+11));
+ XMM1 = _mm_add_ps(XMM1, XMM0);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ _mm_store_ps(a+j1, XMM1);
+ _mm_store_ps(a+j3, XMM2);
+ }
+ ctmdl += 12;
+ }
+}
+
+
+static inline void bitrv2(int n, int *ip, float *a)
+{
+ int j, j1, k, k1, l, m = 0, m2;
+ float xr, xi, yr, yi;
+
+ ip[0] = 0;
+ l = n;
+ if(n==256)
+ {
+ l = 32;
+ m = 8;
+ }
+ else if(n==512)
+ {
+ l = 64;
+ m = 8;
+ }
+ else if(n==1024)
+ {
+ l = 64;
+ m = 16;
+ }
+ else if(n==2048)
+ {
+ l = 128;
+ m = 16;
+ }
+ else if(n==4096)
+ {
+ l = 128;
+ m = 32;
+ }
+ m2 = 2 * m;
+ if ((m << 3) == l) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ __m128 X0, Y0, X1, Y1;
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ X0 = _mm_loadl_pi(X0, (__m64*)(a+j1 ));
+ Y0 = _mm_loadl_pi(Y0, (__m64*)(a+k1 ));
+ X1 = _mm_loadl_pi(X1, (__m64*)(a+j1+m2*2));
+ Y1 = _mm_loadl_pi(Y1, (__m64*)(a+k1+m2 ));
+ X0 = _mm_loadh_pi(X0, (__m64*)(a+j1+m2 ));
+ Y0 = _mm_loadh_pi(Y0, (__m64*)(a+k1+m2*2));
+ X1 = _mm_loadh_pi(X1, (__m64*)(a+j1+m2*3));
+ Y1 = _mm_loadh_pi(Y1, (__m64*)(a+k1+m2*3));
+ _mm_storel_pi((__m64*)(a+k1 ), X0);
+ _mm_storel_pi((__m64*)(a+j1 ), Y0);
+ _mm_storel_pi((__m64*)(a+k1+m2 ), X1);
+ _mm_storel_pi((__m64*)(a+j1+m2*2), Y1);
+ _mm_storeh_pi((__m64*)(a+k1+m2*2), X0);
+ _mm_storeh_pi((__m64*)(a+j1+m2 ), Y0);
+ _mm_storeh_pi((__m64*)(a+k1+m2*3), X1);
+ _mm_storeh_pi((__m64*)(a+j1+m2*3), Y1);
+ }
+ j1 = 2 * k + m2 + ip[k];
+ k1 = j1 + m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ } else {
+ for (k = 1; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ __m128 X, Y;
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ X = _mm_loadl_pi(X, (__m64*)(a+j1 ));
+ Y = _mm_loadl_pi(Y, (__m64*)(a+k1 ));
+ X = _mm_loadh_pi(X, (__m64*)(a+j1+m2));
+ Y = _mm_loadh_pi(Y, (__m64*)(a+k1+m2));
+ _mm_storel_pi((__m64*)(a+k1 ), X);
+ _mm_storel_pi((__m64*)(a+j1 ), Y);
+ _mm_storeh_pi((__m64*)(a+k1+m2), X);
+ _mm_storeh_pi((__m64*)(a+j1+m2), Y);
+ }
+ }
+ }
+}
+
+
+STIN void cftfsub(int n, float *a)
+{
+ int j, j1, j2, j3, l;
+
+ l = 2;
+ if (n > 8) {
+ cft1st(n, a);
+ l = 8;
+ while ((l << 2) < n) {
+ cftmdl(n, l, a);
+ l <<= 2;
+ }
+ }
+ if ((l << 2) == n) {
+ for (j = 0; j < l; j += 4) {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1);
+ XMM2 = _mm_load_ps(a+j2);
+ XMM5 = _mm_load_ps(a+j3);
+ XMM1 = XMM0;
+ XMM3 = XMM2;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM2 = _mm_add_ps(XMM2, XMM5);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ XMM4 = XMM0;
+ XMM5 = XMM1;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM4 = _mm_sub_ps(XMM4, XMM2);
+ XMM3 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+ _mm_store_ps(a+j , XMM0);
+ _mm_store_ps(a+j2, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM3);
+ XMM5 = _mm_sub_ps(XMM5, XMM3);
+ _mm_store_ps(a+j1, XMM1);
+ _mm_store_ps(a+j3, XMM5);
+ }
+ } else {
+ for (j = 0; j < l; j += 8)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+ j1 = j + l;
+
+ XMM0 = _mm_load_ps(a+j );
+ XMM4 = _mm_load_ps(a+j1 );
+ XMM1 = _mm_load_ps(a+j+ 4);
+ XMM5 = _mm_load_ps(a+j1+4);
+ XMM2 = XMM0;
+ XMM3 = XMM1;
+ XMM0 = _mm_add_ps(XMM0, XMM4);
+ XMM1 = _mm_add_ps(XMM1, XMM5);
+ XMM2 = _mm_sub_ps(XMM2, XMM4);
+ XMM3 = _mm_sub_ps(XMM3, XMM5);
+ _mm_store_ps(a+j , XMM0);
+ _mm_store_ps(a+j +4, XMM1);
+ _mm_store_ps(a+j1 , XMM2);
+ _mm_store_ps(a+j1+4, XMM3);
+ }
+ }
+}
+
+STIN void rftfsub(int n, float *a, int nc, float *w)
+{
+ int j, k, m, o;
+
+ m = n >> 1;
+ j = 2;
+ {
+ float wkr, wki, xr, xi, yr, yi;
+ k = n - j;
+ wkr = w[0];
+ wki = w[1];
+ xr = a[j ] - a[k ];
+ xi = a[j+1] + a[k+1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j ] -= yr;
+ a[j+1] -= yi;
+ a[k ] += yr;
+ a[k+1] -= yi;
+ j += 2;
+ }
+ n -= 2;
+ w -= 4;
+ o = ((m-j)&(~7))+j;
+ for(;j<o;j+=8)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ k = n - j;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(a+k+2));
+ XMM5 = _mm_load_ps(w+j*2 );
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(a+k ));
+ XMM6 = _mm_load_ps(w+j*2+ 4);
+ XMM1 = XMM0;
+ XMM0 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+ XMM2 = _mm_load_ps(a+j );
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = XMM0;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM5);
+ XMM5 = _mm_loadl_pi(XMM5, (__m64*)(a+k-2));
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM6 = _mm_load_ps(w+j*2+ 8);
+ XMM0 = _mm_add_ps(XMM0, XMM3);
+ XMM5 = _mm_loadh_pi(XMM5, (__m64*)(a+k-4));
+ XMM4 = XMM0;
+ XMM2 = _mm_sub_ps(XMM2, XMM0);
+ XMM4 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+ _mm_store_ps(a+j , XMM2);
+ XMM3 = _mm_load_ps(w+j*2+12);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ XMM0 = XMM5;
+ _mm_storel_pi((__m64*)(a+k+2), XMM1);
+ XMM2 = _mm_load_ps(a+j+4);
+ XMM5 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+ _mm_storeh_pi((__m64*)(a+k ), XMM1);
+ XMM5 = _mm_add_ps(XMM5, XMM2);
+ XMM4 = XMM5;
+ XMM4 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+ XMM5 = _mm_mul_ps(XMM5, XMM6);
+ XMM4 = _mm_mul_ps(XMM4, XMM3);
+ XMM5 = _mm_add_ps(XMM5, XMM4);
+ XMM1 = XMM5;
+ XMM2 = _mm_sub_ps(XMM2, XMM5);
+ XMM1 = _mm_xor_ps(XMM1, PCS_NRNR.ps);
+ _mm_store_ps(a+j+4, XMM2);
+ XMM0 = _mm_sub_ps(XMM0, XMM1);
+ _mm_storel_pi((__m64*)(a+k-2), XMM0);
+ _mm_storeh_pi((__m64*)(a+k-4), XMM0);
+ }
+ for(;j<m;j+=4)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+ k = n - j;
+ XMM0 = _mm_loadl_pi(XMM0, (__m64*)(a+k+2));
+ XMM5 = _mm_load_ps(w+j*2 );
+ XMM0 = _mm_loadh_pi(XMM0, (__m64*)(a+k ));
+ XMM6 = _mm_load_ps(w+j*2+4);
+ XMM1 = XMM0;
+ XMM0 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+ XMM2 = _mm_load_ps(a+j );
+ XMM0 = _mm_add_ps(XMM0, XMM2);
+ XMM3 = XMM0;
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+ XMM0 = _mm_mul_ps(XMM0, XMM5);
+ XMM3 = _mm_mul_ps(XMM3, XMM6);
+ XMM0 = _mm_add_ps(XMM0, XMM3);
+ XMM4 = XMM0;
+ XMM2 = _mm_sub_ps(XMM2, XMM0);
+ XMM4 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+ _mm_store_ps(a+j , XMM2);
+ XMM1 = _mm_sub_ps(XMM1, XMM4);
+ _mm_storel_pi((__m64*)(a+k+2), XMM1);
+ _mm_storeh_pi((__m64*)(a+k ), XMM1);
+ }
+}
+
+STIN void rdft(int n, float *a, int *ip, float *w)
+{
+ int nw, nc;
+ float xi;
+
+ nw = ip[0];
+ nc = ip[1];
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a);
+ rftfsub(n, a, nc, w);
+ } else if (n == 4) {
+ cftfsub(n, a);
+ }
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+}
+
+/*------ Interface ------*/
+
+static void drftf256(float* a)
+{
+ rdft(256, a, IP256, W256);
+}
+static void drftf512(float* a)
+{
+ rdft(512, a, IP512, W512);
+}
+static void drftf1024(float* a)
+{
+ rdft(1024, a, IP1024, W1024);
+}
+static void drftf2048(float* a)
+{
+ rdft(2048, a, IP2048, W2048);
+}
+static void drftf4096(float* a)
+{
+ rdft(4096, a, IP4096, W4096);
+}
+#endif /* SSE Optimize */
static void drfti1(int n, float *wa, int *ifac){
static int ntryh[4] = { 4,2,3,5 };
@@ -1231,16 +6754,52 @@
void drft_forward(drft_lookup *l,float *data){
if(l->n==1)return;
+#ifdef __SSE__ /* SSE Optimize */
+ if(l->n==256)
+ {
+ drftf256(data);
+ return;
+ }
+ if(l->n==512)
+ {
+ drftf512(data);
+ return;
+ }
+ if(l->n==1024)
+ {
+ drftf1024(data);
+ return;
+ }
+ if(l->n==2048)
+ {
+ drftf2048(data);
+ return;
+ }
+ if(l->n==4096)
+ {
+ drftf4096(data);
+ return;
+ }
+#endif /* SSE Optimize */
+ {
drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
+ }
}
void drft_backward(drft_lookup *l,float *data){
+#ifdef __SSE__ /* SSE Optimize */
+ if (l->n==1||(l->n>=256&&l->n<=4096))return;
+#else /* SSE Optimize */
if (l->n==1)return;
+#endif /* SSE Optimize */
drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
}
void drft_init(drft_lookup *l,int n){
l->n=n;
+#ifdef __SSE__ /* SSE Optimize */
+ if (l->n>=256&&l->n<=4096)return;
+#endif /* SSE Optimize */
l->trigcache=_ogg_calloc(3*n,sizeof(*l->trigcache));
l->splitcache=_ogg_calloc(32,sizeof(*l->splitcache));
fdrffti(n, l->trigcache, l->splitcache);
@@ -1248,8 +6807,12 @@
void drft_clear(drft_lookup *l){
if(l){
+#ifdef __SSE__ /* SSE Optimize */
+ if (l->n>=256&&l->n<=4096)return;
+#endif /* SSE Optimize */
if(l->trigcache)_ogg_free(l->trigcache);
if(l->splitcache)_ogg_free(l->splitcache);
memset(l,0,sizeof(*l));
}
}
+
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/vorbisenc.c libvorbis-1.2.0-sse/lib/vorbisenc.c
--- libvorbis-1.2.0/lib/vorbisenc.c 2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/vorbisenc.c 2007-08-02 12:43:15.000000000 +0200
@@ -23,6 +23,9 @@
#include "vorbis/vorbisenc.h"
#include "codec_internal.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
#include "os.h"
#include "misc.h"
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/vorbisfile.c libvorbis-1.2.0-sse/lib/vorbisfile.c
--- libvorbis-1.2.0/lib/vorbisfile.c 2007-07-24 03:08:23.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/vorbisfile.c 2007-08-02 12:43:15.000000000 +0200
@@ -26,6 +26,9 @@
#include "os.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
/* A 'chained bitstream' is a Vorbis bitstream that contains more than
one logical bitstream arranged end to end (the only form of Ogg
@@ -1706,6 +1709,135 @@
return 0;
}
+#ifdef __SSE__ /* SSE Optimize */
+STIN void ov_read_float2pcm(float *src1, float *src2, short *dest, long samples)
+{
+ register long i;
+#if 0 // defined(__SSE2__)
+ int samples8 = samples&(~15);
+ static _MM_ALIGN16 const float parm[4] = {
+ 32768.f, 32768.f, 32768.f, 32768.f
+ };
+ for(i=0;i<samples8;i+=8)
+ {
+ __m128i XMM0 = _mm_castps_si128(_mm_load_ps(src1+i ));
+ __m128i XMM2 = _mm_castps_si128(_mm_load_ps(src1+i+4));
+ __m128i XMM1 = _mm_castps_si128(_mm_load_ps(src2+i ));
+ __m128i XMM3 = _mm_castps_si128(_mm_load_ps(src2+i+4));
+ XMM0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM0), PM128(parm)));
+ XMM2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM2), PM128(parm)));
+ XMM1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM1), PM128(parm)));
+ XMM3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM3), PM128(parm)));
+ XMM0 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM0));
+ XMM2 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM2));
+ XMM1 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM1));
+ XMM3 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM3));
+ XMM0 = _mm_packs_epi32(XMM0, XMM2);
+ XMM1 = _mm_packs_epi32(XMM1, XMM3);
+ XMM2 = XMM0;
+ XMM0 = _mm_unpacklo_epi16(XMM0, XMM1);
+ XMM2 = _mm_unpackhi_epi16(XMM2, XMM1);
+ _mm_store_si128((__m128i*)(dest+i*2 ), XMM0);
+ _mm_store_si128((__m128i*)(dest+i*2+ 8), XMM2);
+ }
+#else
+ int samples4 = samples&(~7);
+ static _MM_ALIGN16 const float parm[4] = {
+ 32768.f, 32768.f, 32768.f, 32768.f
+ };
+ register __m128 XMM0, XMM1, XMM2, XMM3;
+ register __m64 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+ for(i=0;i<samples4;i+=8){
+ XMM0 = _mm_load_ps(src1+i );
+ XMM1 = _mm_load_ps(src2+i );
+ XMM2 = _mm_load_ps(src1+i+4);
+ XMM3 = _mm_load_ps(src2+i+4);
+ XMM0 = _mm_mul_ps(XMM0, PM128(parm));
+ XMM1 = _mm_mul_ps(XMM1, PM128(parm));
+ XMM2 = _mm_mul_ps(XMM2, PM128(parm));
+ XMM3 = _mm_mul_ps(XMM3, PM128(parm));
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+ MM4 = _mm_cvtps_pi32(XMM2);
+ MM6 = _mm_cvtps_pi32(XMM3);
+
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+ XMM2 = _mm_movehl_ps(XMM2, XMM2);
+ XMM3 = _mm_movehl_ps(XMM3, XMM3);
+
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+ MM5 = _mm_cvtps_pi32(XMM2);
+ MM7 = _mm_cvtps_pi32(XMM3);
+
+ MM0 = _mm_packs_pi32(MM0, MM1);
+ MM2 = _mm_packs_pi32(MM2, MM3);
+ MM4 = _mm_packs_pi32(MM4, MM5);
+ MM6 = _mm_packs_pi32(MM6, MM7);
+
+ MM1 = MM0;
+ MM5 = MM4;
+
+ MM0 = _mm_unpacklo_pi16(MM0, MM2);
+ MM1 = _mm_unpackhi_pi16(MM1, MM2);
+ MM4 = _mm_unpacklo_pi16(MM4, MM6);
+ MM5 = _mm_unpackhi_pi16(MM5, MM6);
+
+ SETPM64(dest+i*2, MM0);
+ SETPM64(dest+i*2+ 4, MM1);
+ SETPM64(dest+i*2+ 8, MM4);
+ SETPM64(dest+i*2+12, MM5);
+ }
+ samples4 = samples&(~3);
+ for(;i<samples4;i+=4){
+ XMM0 = _mm_load_ps(src1+i );
+ XMM1 = _mm_load_ps(src2+i );
+ XMM0 = _mm_mul_ps(XMM0, PM128(parm));
+ XMM1 = _mm_mul_ps(XMM1, PM128(parm));
+
+ MM0 = _mm_cvtps_pi32(XMM0);
+ MM2 = _mm_cvtps_pi32(XMM1);
+
+ XMM0 = _mm_movehl_ps(XMM0, XMM0);
+ XMM1 = _mm_movehl_ps(XMM1, XMM1);
+
+ MM1 = _mm_cvtps_pi32(XMM0);
+ MM3 = _mm_cvtps_pi32(XMM1);
+
+ MM0 = _mm_packs_pi32(MM0, MM1);
+ MM2 = _mm_packs_pi32(MM2, MM3);
+
+ MM1 = MM0;
+
+ MM0 = _mm_unpacklo_pi16(MM0, MM2);
+ MM1 = _mm_unpackhi_pi16(MM1, MM2);
+
+ SETPM64(dest+i*2, MM0);
+ SETPM64(dest+i*2+ 4, MM1);
+ }
+ _mm_empty();
+#endif
+ for(;i<samples;i++)
+ {
+ float f1 = src1[i];
+ float f2 = src2[i];
+ f1 *= 32768.f;
+ f2 *= 32768.f;
+ if(f1>32767.f)
+ f1 = 32767.f;
+ if(f1<-32768.f)
+ f1 =-32768.f;
+ if(f2>32767.f)
+ f2 = 32767.f;
+ if(f2<-32768.f)
+ f2 =-32768.f;
+ dest[i*2 ] = (short)f1;
+ dest[i*2+1] = (short)f2;
+ }
+}
+#endif /* SSE Optimize */
+
/* up to this point, everything could more or less hide the multiple
logical bitstream nature of chaining from the toplevel application
if the toplevel application didn't particularly care. However, at
@@ -1795,7 +1927,26 @@
if(host_endian==bigendianp){
if(sgned){
-
+#ifdef __SSE__ /* SSE Optimize */
+ if(channels==2){
+ ov_read_float2pcm(pcm[0], pcm[1], ((short *)buffer), samples);
+ }else{
+ for(i=0;i<channels;i++){ /* It's faster in this order */
+ float *src = pcm[i];
+ short *dest = ((short *)buffer)+i;
+ for(j=0;j<samples;j++){
+ val = vorbis_ftoi(src[j]*32768.f);
+ if(val>32767)
+ val = 32767;
+ else
+ if(val<-32768)
+ val = -32768;
+ *dest=val;
+ dest+=channels;
+ }
+ }
+ }
+#else /* SSE Optimize */
vorbis_fpu_setround(&fpu);
for(i=0;i<channels;i++) { /* It's faster in this order */
float *src=pcm[i];
@@ -1810,6 +1961,8 @@
}
vorbis_fpu_restore(fpu);
+#endif /* SSE Optimize */
+
}else{
vorbis_fpu_setround(&fpu);
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/window.c libvorbis-1.2.0-sse/lib/window.c
--- libvorbis-1.2.0/lib/window.c 2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/window.c 2007-08-02 12:43:15.000000000 +0200
@@ -19,8 +19,15 @@
#include <math.h>
#include "os.h"
#include "misc.h"
+#ifdef __SSE__ /* SSE Optimize */
+#include "xmmlib.h"
+#endif /* SSE Optimize */
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin64[32] = {
+#else /* SSE Optimize */
static float vwin64[32] = {
+#endif /* SSE Optimize */
0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F,
0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F,
0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F,
@@ -31,7 +38,11 @@
0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin128[64] = {
+#else /* SSE Optimize */
static float vwin128[64] = {
+#endif /* SSE Optimize */
0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F,
0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F,
0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F,
@@ -50,7 +61,11 @@
0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin256[128] = {
+#else /* SSE Optimize */
static float vwin256[128] = {
+#endif /* SSE Optimize */
0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F,
0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F,
0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F,
@@ -85,7 +100,11 @@
0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin512[256] = {
+#else /* SSE Optimize */
static float vwin512[256] = {
+#endif /* SSE Optimize */
0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F,
0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F,
0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F,
@@ -152,7 +171,11 @@
0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin1024[512] = {
+#else /* SSE Optimize */
static float vwin1024[512] = {
+#endif /* SSE Optimize */
0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F,
0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F,
0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F,
@@ -283,7 +306,11 @@
0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin2048[1024] = {
+#else /* SSE Optimize */
static float vwin2048[1024] = {
+#endif /* SSE Optimize */
0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F,
0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F,
0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F,
@@ -542,7 +569,11 @@
0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin4096[2048] = {
+#else /* SSE Optimize */
static float vwin4096[2048] = {
+#endif /* SSE Optimize */
0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F,
0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F,
0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F,
@@ -1057,7 +1088,11 @@
0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
};
+#ifdef __SSE__ /* SSE Optimize */
+static _MM_ALIGN16 const float vwin8192[4096] = {
+#else /* SSE Optimize */
static float vwin8192[4096] = {
+#endif /* SSE Optimize */
0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F,
0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F,
0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F,
@@ -2084,7 +2119,7 @@
1.0000000000F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
};
-static float *vwin[8] = {
+static const float *vwin[8] = {
vwin64,
vwin128,
vwin256,
@@ -2095,7 +2130,7 @@
vwin8192,
};
-float *_vorbis_window_get(int n){
+const float *_vorbis_window_get(int n){
return vwin[n];
}
@@ -2105,8 +2140,8 @@
nW=(W?nW:0);
{
- float *windowLW=vwin[winno[lW]];
- float *windowNW=vwin[winno[nW]];
+ const float *windowLW=vwin[winno[lW]];
+ const float *windowNW=vwin[winno[nW]];
long n=blocksizes[W];
long ln=blocksizes[lW];
@@ -2120,6 +2155,96 @@
int i,p;
+#ifdef __SSE__ /* SSE Optimize */
+ if(leftbegin>0)
+ {
+ __m128 XMM0 = _mm_setzero_ps();
+ __m128 XMM1 = _mm_setzero_ps();
+ __m128 XMM2 = _mm_setzero_ps();
+ __m128 XMM3 = _mm_setzero_ps();
+ for(i=0;i<leftbegin;i+=32)
+ {
+ _mm_store_ps(d+i ,XMM0);
+ _mm_store_ps(d+i+ 4,XMM1);
+ _mm_store_ps(d+i+ 8,XMM2);
+ _mm_store_ps(d+i+12,XMM3);
+ _mm_store_ps(d+i+16,XMM0);
+ _mm_store_ps(d+i+20,XMM1);
+ _mm_store_ps(d+i+24,XMM2);
+ _mm_store_ps(d+i+28,XMM3);
+ }
+ }
+ _mm_prefetch(windowLW , _MM_HINT_NTA);
+ _mm_prefetch(windowLW+32, _MM_HINT_NTA);
+ for(i=leftbegin,p=0;i<leftend;i+=16,p+=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ __m128 XMM4, XMM5, XMM6, XMM7;
+ _mm_prefetch(windowLW+p+64, _MM_HINT_NTA);
+ XMM0 = _mm_load_ps(d+i );
+ XMM4 = _mm_load_ps(windowLW+p );
+ XMM1 = _mm_load_ps(d+i+ 4);
+ XMM5 = _mm_load_ps(windowLW+p+ 4);
+ XMM2 = _mm_load_ps(d+i+ 8);
+ XMM6 = _mm_load_ps(windowLW+p+ 8);
+ XMM3 = _mm_load_ps(d+i+12);
+ XMM7 = _mm_load_ps(windowLW+p+12);
+ XMM0 = _mm_mul_ps(XMM0, XMM4);
+ XMM1 = _mm_mul_ps(XMM1, XMM5);
+ XMM2 = _mm_mul_ps(XMM2, XMM6);
+ XMM3 = _mm_mul_ps(XMM3, XMM7);
+ _mm_store_ps(d+i ,XMM0);
+ _mm_store_ps(d+i+ 4,XMM1);
+ _mm_store_ps(d+i+ 8,XMM2);
+ _mm_store_ps(d+i+12,XMM3);
+ }
+ p = rn/2-16;
+ _mm_prefetch(windowLW+p-16, _MM_HINT_NTA);
+ _mm_prefetch(windowLW+p-48, _MM_HINT_NTA);
+ for(i=rightbegin;i<rightend;i+=16,p-=16)
+ {
+ __m128 XMM0, XMM1, XMM2, XMM3,XMM4, XMM5, XMM6, XMM7;
+ _mm_prefetch(windowLW+p-80, _MM_HINT_NTA);
+ XMM0 = _mm_load_ps(windowNW+p+12);
+ XMM1 = _mm_load_ps(windowNW+p+ 8);
+ XMM2 = _mm_load_ps(windowNW+p+ 4);
+ XMM3 = _mm_load_ps(windowNW+p );
+ XMM4 = _mm_load_ps(d+i );
+ XMM5 = _mm_load_ps(d+i+ 4);
+ XMM6 = _mm_load_ps(d+i+ 8);
+ XMM7 = _mm_load_ps(d+i+12);
+ XMM0 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+ XMM1 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+ XMM2 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+ XMM3 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+ XMM4 = _mm_mul_ps(XMM4, XMM0);
+ XMM5 = _mm_mul_ps(XMM5, XMM1);
+ XMM6 = _mm_mul_ps(XMM6, XMM2);
+ XMM7 = _mm_mul_ps(XMM7, XMM3);
+ _mm_store_ps(d+i ,XMM4);
+ _mm_store_ps(d+i+ 4,XMM5);
+ _mm_store_ps(d+i+ 8,XMM6);
+ _mm_store_ps(d+i+12,XMM7);
+ }
+ if(i<n)
+ {
+ __m128 XMM0 = _mm_setzero_ps();
+ __m128 XMM1 = _mm_setzero_ps();
+ __m128 XMM2 = _mm_setzero_ps();
+ __m128 XMM3 = _mm_setzero_ps();
+ for(;i<n;i+=32)
+ {
+ _mm_store_ps(d+i ,XMM0);
+ _mm_store_ps(d+i+ 4,XMM1);
+ _mm_store_ps(d+i+ 8,XMM2);
+ _mm_store_ps(d+i+12,XMM3);
+ _mm_store_ps(d+i+16,XMM0);
+ _mm_store_ps(d+i+20,XMM1);
+ _mm_store_ps(d+i+24,XMM2);
+ _mm_store_ps(d+i+28,XMM3);
+ }
+ }
+#else /* SSE Optimize */
for(i=0;i<leftbegin;i++)
d[i]=0.f;
@@ -2131,6 +2256,7 @@
for(;i<n;i++)
d[i]=0.f;
+#endif /* SSE Optimize */
}
}
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/xmmlib.c libvorbis-1.2.0-sse/lib/xmmlib.c
--- libvorbis-1.2.0/lib/xmmlib.c 1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/xmmlib.c 2007-08-02 12:43:15.000000000 +0200
@@ -0,0 +1,277 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003 *
+ * by the XIPHOPHORUS Company http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: SSE Function Library
+ last mod: $Id: xmmlib.c,v 1.4 2005-07-08 15:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include "xmmlib.h"
+
+#if defined(__SSE__)
+_MM_ALIGN16 const __m128x PCS_NNRN = {.si32 = {0x00000000, 0x80000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NNRR = {.si32 = {0x80000000, 0x80000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRNN = {.si32 = {0x00000000, 0x00000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRNR = {.si32 = {0x80000000, 0x00000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRRN = {.si32 = {0x00000000, 0x80000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRRR = {.si32 = {0x80000000, 0x80000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_RNNN = {.si32 = {0x00000000, 0x00000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNRN = {.si32 = {0x00000000, 0x80000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNRR = {.si32 = {0x80000000, 0x80000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RRNN = {.si32 = {0x00000000, 0x00000000, 0x80000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNNR = {.si32 = {0x80000000, 0x00000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RRRR = {.si32 = {0x80000000, 0x80000000, 0x80000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_NNNR = {.si32 = {0x80000000, 0x00000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PABSMASK = {.si32 = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM1 = {.si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM2 = {.si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM3 = {.si32 = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PENDEDGEM1 = {.si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PENDEDGEM2 = {.si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PENDEDGEM3 = {.si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}};
+
+_MM_ALIGN16 const __m128x PMASKTABLE[16] = {
+ { .si32 = {0x00000000, 0x00000000, 0x00000000, 0x00000000} },
+ { .si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000} },
+ { .si32 = {0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000} },
+ { .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000} },
+ { .si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000} },
+ { .si32 = {0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000} },
+ { .si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000} },
+ { .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000} },
+ { .si32 = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF} },
+ { .si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF} },
+ { .si32 = {0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF} },
+ { .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF} },
+ { .si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF} },
+ { .si32 = {0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF} },
+ { .si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} },
+ { .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} }
+};
+
+_MM_ALIGN16 const __m128x PFV_0 = { .sf = { 0.0f, 0.0f, 0.0f, 0.0f} };
+_MM_ALIGN16 const __m128x PFV_1 = { .sf = { 1.0f, 1.0f, 1.0f, 1.0f} };
+_MM_ALIGN16 const __m128x PFV_2 = { .sf = { 2.0f, 2.0f, 2.0f, 2.0f} };
+_MM_ALIGN16 const __m128x PFV_4 = { .sf = { 4.0f, 4.0f, 4.0f, 4.0f} };
+_MM_ALIGN16 const __m128x PFV_8 = { .sf = { 8.0f, 8.0f, 8.0f, 8.0f} };
+_MM_ALIGN16 const __m128x PFV_INIT = { .sf = { 0.0f, 1.0f, 2.0f, 3.0f} };
+_MM_ALIGN16 const __m128x PFV_0P5 = { .sf = { 0.5f, 0.5f, 0.5f, 0.5f} };
+_MM_ALIGN16 const __m128x PFV_M0P5 = { .sf = {-0.5f,-0.5f,-0.5f,-0.5f} };
+
+#endif /* defined(__SSE__) */
+
+const int bitCountTable[16] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
+};
+
+#if 0
+/*---------------------------------------------------------------------------
+// for calcurate performance
+//-------------------------------------------------------------------------*/
+static double perfsum[16];
+static unsigned long perfcount[16];
+
+unsigned __int64* _perf_start(void)
+{
+ unsigned __int64* stime;
+ {
+ stime = malloc(sizeof(*stime));
+ *stime = _rdtsc();
+ }
+ return stime;
+}
+
+void _perf_end(unsigned __int64 *stime, int index)
+{
+ *stime = _rdtsc() - *stime;
+ {
+ perfsum[index] += (double)(*stime);
+ perfcount[index] ++;
+ }
+ free(stime);
+}
+
+void _perf_result(int index)
+{
+ printf("\nPerfSum = %f\n", perfsum[index]);
+ printf("PerfCount = %d\n", perfcount[index]);
+ printf("PerfAvg = %f\n", perfsum[index]/(double)perfcount[index]);
+}
+#endif
+
+/*
+ * aligned malloc wrapper; assume blindly align = 16
+ */
+#ifndef __INTEL_COMPILER
+static void *_aligned_malloc(size_t size, unsigned int align)
+{
+ unsigned int *p;
+ unsigned long addr, align_addr;
+
+ size = (size + 15) & ~15UL;
+ p = malloc(size + 16);
+ if (!p)
+ return NULL;
+ addr = (unsigned long)p;
+ align_addr = (addr + 15 + 2*4) & ~15UL;
+ p = (unsigned int *)align_addr;
+ p[-1] = align_addr - addr;
+ p[-2] = size;
+ return p;
+}
+
+static void _aligned_free(void *ptr)
+{
+ if (ptr) {
+ unsigned int offset = ((unsigned int*)ptr)[-1];
+ free(ptr - offset);
+ }
+}
+
+static void *_aligned_realloc(void *orig, size_t size, unsigned int align)
+{
+ unsigned int *p = orig;
+ unsigned int offset, new_offset, orig_len;
+ unsigned long orig_addr, addr, align_addr;
+ if (!orig)
+ return _aligned_malloc(size, align);
+ offset = p[-1];
+ orig_len = p[-2];
+ orig_addr = (unsigned long)p - offset;
+ if (orig_len >= size)
+ return orig;
+ size = (size + 15) & ~15UL;
+ p = realloc((void *)orig_addr, size + 16);
+ if (!p)
+ return NULL;
+ addr = (unsigned long)p;
+ if (addr == orig_addr)
+ return orig;
+ align_addr = (addr + 15 + 2*4) & ~15UL;
+ new_offset = align_addr - addr;
+ if (offset != new_offset)
+ memmove((char*)p + new_offset, (char*)p + offset, orig_len);
+ p = (unsigned int*)align_addr;
+ p[-1] = new_offset;
+ p[-2] = size;
+ return p;
+}
+#endif
+
+/*---------------------------------------------------------------------------
+// 16Byte Allignment malloc
+//-------------------------------------------------------------------------*/
+void* xmm_malloc(size_t size)
+{
+ return (void*)_aligned_malloc(size, 16);
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment calloc
+//-------------------------------------------------------------------------*/
+void* xmm_calloc(size_t nitems, size_t size)
+{
+ unsigned char* t_RetPtr = xmm_malloc(nitems * size);
+
+ if(t_RetPtr)
+ {
+#ifdef __SSE__
+ size_t i,j, k;
+ __m128 XMM0, XMM1, XMM2, XMM3;
+ XMM0 =
+ XMM1 =
+ XMM2 =
+ XMM3 = _mm_setzero_ps();
+ k = nitems*size;
+ j = k&(~127);
+ for(i=0;i<j;i+=128)
+ {
+ _mm_stream_ps((float*)(t_RetPtr+i ), XMM0);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 64), XMM0);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 80), XMM1);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 96), XMM2);
+ _mm_stream_ps((float*)(t_RetPtr+i+112), XMM3);
+ }
+ j = k&(~63);
+ for(;i<j;i+=64)
+ {
+ _mm_stream_ps((float*)(t_RetPtr+i ), XMM0);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
+ }
+ j = k&(~31);
+ for(;i<j;i+=32)
+ {
+ _mm_stream_ps((float*)(t_RetPtr+i ), XMM0);
+ _mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+ }
+ j = k&(~15);
+ for(;i<j;i+=16)
+ {
+ _mm_stream_ps((float*)(t_RetPtr+i ), XMM0);
+ }
+ j = k&(~7);
+#if 0 /* XXX */
+ for(;i<j;i+=8)
+ {
+ _mm_storel_pi((__m64*)(t_RetPtr+i ), XMM0);
+ }
+ j = k&(~3);
+#endif
+ for(;i<j;i+=4)
+ {
+ _mm_store_ss((float*)(t_RetPtr+i) , XMM0);
+ }
+ for(;i<k;i++)
+ *(t_RetPtr+i ) = 0;
+ _mm_sfence();
+#else
+ memset(t_RetPtr, 0, nitems*size);
+#endif
+ }
+ return (void*)t_RetPtr;
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment realloc
+//-------------------------------------------------------------------------*/
+void* xmm_realloc(void *block, size_t size)
+{
+ return (void*)_aligned_realloc(block, size, 16);
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment free
+//-------------------------------------------------------------------------*/
+void xmm_free(void* a_AlignedPtr)
+{
+ if(a_AlignedPtr)
+ _aligned_free(a_AlignedPtr);
+}
+#if 0
+/*---------------------------------------------------------------------------
+// 16Byte Allignment alloca
+//-------------------------------------------------------------------------*/
+void* xmm_align(void *t_Ptr)
+{
+ unsigned char* t_RetPtr = 0;
+ if(t_Ptr){
+ t_RetPtr = (unsigned char*)(( ((unsigned long)t_Ptr+15)/16)*16);
+ }
+ return (void*)t_RetPtr;
+}
+#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/xmmlib.h libvorbis-1.2.0-sse/lib/xmmlib.h
--- libvorbis-1.2.0/lib/xmmlib.h 1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/xmmlib.h 2007-08-02 12:43:15.000000000 +0200
@@ -0,0 +1,266 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003 *
+ * by the XIPHOPHORUS Company http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: Header of SSE Function Library
+ last mod: $Id: xmmlib.h,v 1.3 2005-07-08 15:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#ifndef _XMMLIB_H_INCLUDED
+#define _XMMLIB_H_INCLUDED
+
+#if !defined(STIN)
+#define STIN static __inline
+#endif
+
+#if defined(__SSE__)
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#include <tmmintrin.h>
+#else
+/* GCC */
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#ifdef __SSE2__
+ #include <emmintrin.h>
+#endif
+#ifdef __SSE3__
+ #include <pmmintrin.h>
+#endif
+#define _MM_ALIGN16 __attribute__((aligned(16)))
+#define __declspec(x)
+#endif
+
+/*#define PM64(x) (*(__m64*)(x))*/
+static __inline __m64 __attribute__((__always_inline__))
+ PM64(const float *x)
+{
+ union {
+ const float *sf;
+ __m64 *m;
+ } v = { .sf = x };
+ return *v.m;
+}
+
+static __inline void __attribute__((__always_inline__))
+ SETPM64(int *x, __m64 y)
+{
+ union {
+ int *i;
+ __m64 *m;
+ } v = { .i = x };
+ *v.m = y;
+}
+
+/*#define PM128(x) (*(__m128*)(x))*/
+static __inline __m128 __attribute__((__always_inline__))
+ PM128(const float *x)
+{
+ union {
+ const float *sf;
+ __m128 *m;
+ } v = { .sf = x };
+ return *v.m;
+}
+#ifdef __SSE2__
+/*#define PM128I(x) (*(__m128i*)(x))*/
+static __inline __m128i __attribute__((__always_inline__))
+ PM128I(const int *x)
+{
+ union {
+ const int *si;
+ __m128i *m;
+ } v = { .si = x };
+ return *v.m;
+}
+#endif
+
+#include <stdint.h>
+
+typedef union {
+ uint8_t si8[8];
+ uint16_t si16[4];
+ uint32_t si32[2];
+ int8_t ssi8[8];
+ int16_t ssi16[4];
+ int32_t ssi32[2];
+ __m64 pi64;
+} __m64x;
+
+typedef union __declspec(intrin_type) _MM_ALIGN16 __m128x{
+ uint32_t si32[4];
+ float sf[4];
+ __m64 pi64[2];
+ __m128 ps;
+#ifdef __SSE2__
+ __m128i pi;
+ __m128d pd;
+#endif
+} __m128x;
+
+#if defined(__SSE3__)
+#define _mm_lddqu_ps(x) _mm_castsi128_ps(_mm_lddqu_si128((__m128i*)(x)))
+#else
+#define _mm_lddqu_ps(x) _mm_loadu_ps(x)
+#endif
+
+extern const __m128x PCS_NNRN;
+extern const __m128x PCS_NNRR;
+extern const __m128x PCS_NRNN;
+extern const __m128x PCS_NRNR;
+extern const __m128x PCS_NRRN;
+extern const __m128x PCS_NRRR;
+extern const __m128x PCS_RNNN;
+extern const __m128x PCS_RNRN;
+extern const __m128x PCS_RNRR;
+extern const __m128x PCS_RRNN;
+extern const __m128x PCS_RNNR;
+extern const __m128x PCS_RRRR;
+extern const __m128x PCS_NNNR;
+extern const __m128x PABSMASK;
+extern const __m128x PSTARTEDGEM1;
+extern const __m128x PSTARTEDGEM2;
+extern const __m128x PSTARTEDGEM3;
+extern const __m128x PENDEDGEM1;
+extern const __m128x PENDEDGEM2;
+extern const __m128x PENDEDGEM3;
+extern const __m128x PMASKTABLE[16];
+
+extern const __m128x PFV_0;
+extern const __m128x PFV_1;
+extern const __m128x PFV_2;
+extern const __m128x PFV_4;
+extern const __m128x PFV_8;
+extern const __m128x PFV_INIT;
+extern const __m128x PFV_0P5;
+extern const __m128x PFV_M0P5;
+
+extern const int bitCountTable[16];
+
+extern void *xmm_malloc(size_t);
+extern void *xmm_calloc(size_t, size_t);
+extern void *xmm_realloc(void*, size_t);
+extern void xmm_free(void*);
+
+static inline void* xmm_align(void *t_Ptr)
+{
+ return t_Ptr ? (void*)(((unsigned long)t_Ptr+15) & ~15UL) :
+ (void*)0;
+}
+
+static inline __m128 _mm_todB_ps(__m128 x)
+{
+ static _MM_ALIGN16 float mparm[4] = {
+ 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f
+ };
+ static _MM_ALIGN16 float aparm[4] = {
+ -764.6161886f, -764.6161886f, -764.6161886f, -764.6161886f
+ };
+#ifdef __SSE2__
+ __m128x U;
+ U.ps = _mm_and_ps(x, PABSMASK.ps);
+ U.ps = _mm_cvtepi32_ps(U.pi);
+ U.ps = _mm_mul_ps(U.ps, _mm_load_ps(mparm));
+ U.ps = _mm_add_ps(U.ps, _mm_load_ps(aparm));
+ return U.ps;
+#else
+ __m128 RESULT;
+ __m128x U;
+ U.ps = _mm_and_ps(x, PABSMASK.ps);
+ RESULT = _mm_cvtpi32_ps(RESULT, U.pi64[1]);
+ RESULT = _mm_movelh_ps(RESULT, RESULT);
+ RESULT = _mm_cvtpi32_ps(RESULT, U.pi64[0]);
+ RESULT = _mm_mul_ps(RESULT, mparm);
+ RESULT = _mm_add_ps(RESULT, aparm);
+ return RESULT;
+#endif
+}
+
+static inline __m128 _mm_untnorm_ps(__m128 x)
+{
+ static _MM_ALIGN16 const __m128x PIV0 = {
+ .si32 = {
+ 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ }
+ };
+ register __m128 r;
+ r = _mm_and_ps(x, PCS_RRRR.ps);
+ r = _mm_or_ps(x, PIV0.ps);
+ return r;
+}
+
+static inline float _mm_add_horz(__m128 x)
+{
+#if defined(__SSE3__)
+ x = _mm_hadd_ps(x, x);
+ x = _mm_hadd_ps(x, x);
+#else
+ __m128 y;
+ y = _mm_movehl_ps(y, x);
+ x = _mm_add_ps(x, y);
+ y = x;
+ y = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+ x = _mm_add_ss(x, y);
+#endif
+ return _mm_cvtss_f32(x);
+}
+
+static inline __m128 _mm_add_horz_ss(__m128 x)
+{
+#if defined(__SSE3__)
+ x = _mm_hadd_ps(x, x);
+ x = _mm_hadd_ps(x, x);
+#else
+ __m128 y;
+ y = _mm_movehl_ps(y, x);
+ x = _mm_add_ps(x, y);
+ y = x;
+ y = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+ x = _mm_add_ss(x, y);
+#endif
+ return x;
+}
+
+static inline float _mm_max_horz(__m128 x)
+{
+ __m128 y;
+ y = _mm_movehl_ps(y, x);
+ x = _mm_max_ps(x, y);
+ y = x;
+ y = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+ x = _mm_max_ss(x, y);
+ return _mm_cvtss_f32(x);
+}
+
+static inline float _mm_min_horz(__m128 x)
+{
+ __m128 y;
+ y = _mm_movehl_ps(y, x);
+ x = _mm_min_ps(x, y);
+ y = x;
+ y = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+ x = _mm_min_ss(x, y);
+ return _mm_cvtss_f32(x);
+}
+
+#endif /* defined(__SSE__) */
+
+#if 0
+/*---------------------------------------------------------------------------
+// for calcurate performance
+//-------------------------------------------------------------------------*/
+extern unsigned __int64* _perf_start(void);
+extern void _perf_end(unsigned __int64 *stime, int index);
+extern void _perf_result(int index);
+#endif
+
+#endif /* _XMMLIB_H_INCLUDED */