File libvorbis-sse-optimize.diff of Package libvorbis-sse

diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/block.c libvorbis-1.2.0-sse/lib/block.c
--- libvorbis-1.2.0/lib/block.c	2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/block.c	2007-08-02 12:43:10.000000000 +0200
@@ -30,6 +30,9 @@
 #include "lpc.h"
 #include "registry.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 static int ilog2(unsigned int v){
   int ret=0;
@@ -81,6 +84,10 @@
 
 /* block abstraction setup *********************************************/
 
+#ifdef	__SSE__											/* SSE Optimize */
+#undef DWORD_ALIGN
+#define DWORD_ALIGN 16
+#endif													/* SSE Optimize */
 #ifndef WORD_ALIGN
 #define WORD_ALIGN 8
 #endif
@@ -111,7 +118,12 @@
 }
 
 void *_vorbis_block_alloc(vorbis_block *vb,long bytes){
+   void *ret = NULL;
+#ifdef	__SSE__
+  bytes=(bytes+(DWORD_ALIGN-1)) & ~(DWORD_ALIGN-1);
+#else	// for __SSE__
   bytes=(bytes+(WORD_ALIGN-1)) & ~(WORD_ALIGN-1);
+#endif	// for __SSE__
   if(bytes+vb->localtop>vb->localalloc){
     /* can't just _ogg_realloc... there are outstanding pointers */
     if(vb->localstore){
@@ -127,10 +139,10 @@
     vb->localtop=0;
   }
   {
-    void *ret=(void *)(((char *)vb->localstore)+vb->localtop);
+    ret=(void *)(((char *)vb->localstore)+vb->localtop);
     vb->localtop+=bytes;
-    return ret;
   }
+  return ret;
 }
 
 /* reap the chain, pull the ripcord */
@@ -609,7 +621,39 @@
   for(i=0;i<vi->channels;i++){
     vbi->pcmdelay[i]=
       _vorbis_block_alloc(vb,(vb->pcmend+beginW)*sizeof(*vbi->pcmdelay[i]));
+#ifdef __SSE__											/* SSE Optimize */
+	{
+		int j;
+		float	*d	 = (float*)(vbi->pcmdelay[i]);
+		float	*s	 = (float*)(v->pcm[i]);
+		for(j=0;j<vb->pcmend+beginW;)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+			XMM0	 = _mm_load_ps(s   );
+			XMM1	 = _mm_load_ps(s+ 4);
+			XMM2	 = _mm_load_ps(s+ 8);
+			XMM3	 = _mm_load_ps(s+12);
+			XMM4	 = _mm_load_ps(s+16);
+			XMM5	 = _mm_load_ps(s+20);
+			XMM6	 = _mm_load_ps(s+24);
+			XMM7	 = _mm_load_ps(s+28);
+			_mm_store_ps(d   , XMM0);
+			_mm_store_ps(d+ 4, XMM1);
+			_mm_store_ps(d+ 8, XMM2);
+			_mm_store_ps(d+12, XMM3);
+			_mm_store_ps(d+16, XMM4);
+			_mm_store_ps(d+20, XMM5);
+			_mm_store_ps(d+24, XMM6);
+			_mm_store_ps(d+28, XMM7);
+			_mm_prefetch((const char*)(s+64), _MM_HINT_T0);
+			s	+= 32;
+			d	+= 32;
+			j	+= 32;
+		}
+	}
+#else													/* SSE Optimize */
     memcpy(vbi->pcmdelay[i],v->pcm[i],(vb->pcmend+beginW)*sizeof(*vbi->pcmdelay[i]));
+#endif													/* SSE Optimize */
     vb->pcm[i]=vbi->pcmdelay[i]+beginW;
     
     /* before we added the delay 
@@ -642,8 +686,72 @@
       v->pcm_current-=movementW;
       
       for(i=0;i<vi->channels;i++)
+#ifdef __SSE__											/* SSE Optimize */
+	{
+		int j;
+		float	*d	 = (float*)(v->pcm[i]);
+		float	*s	 = (float*)(v->pcm[i]+movementW);
+		if(s>=d)
+		{
+			for(j=0;j<v->pcm_current;)
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+				XMM0	 = _mm_load_ps(s   );
+				XMM1	 = _mm_load_ps(s+ 4);
+				XMM2	 = _mm_load_ps(s+ 8);
+				XMM3	 = _mm_load_ps(s+12);
+				XMM4	 = _mm_load_ps(s+16);
+				XMM5	 = _mm_load_ps(s+20);
+				XMM6	 = _mm_load_ps(s+24);
+				XMM7	 = _mm_load_ps(s+28);
+				_mm_store_ps(d   , XMM0);
+				_mm_store_ps(d+ 4, XMM1);
+				_mm_store_ps(d+ 8, XMM2);
+				_mm_store_ps(d+12, XMM3);
+				_mm_store_ps(d+16, XMM4);
+				_mm_store_ps(d+20, XMM5);
+				_mm_store_ps(d+24, XMM6);
+				_mm_store_ps(d+28, XMM7);
+				s	+= 32;
+				d	+= 32;
+				j	+= 32;
+				_mm_prefetch((const char*)(s+64), _MM_HINT_NTA);
+			}
+		}
+		else
+		{
+			d	+= v->pcm_current;
+			s	+= v->pcm_current;
+			for(j=0;j<v->pcm_current;)
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+				XMM0	 = _mm_load_ps(s-32);
+				XMM1	 = _mm_load_ps(s-28);
+				XMM2	 = _mm_load_ps(s-24);
+				XMM3	 = _mm_load_ps(s-20);
+				XMM4	 = _mm_load_ps(s-16);
+				XMM5	 = _mm_load_ps(s-12);
+				XMM6	 = _mm_load_ps(s- 8);
+				XMM7	 = _mm_load_ps(s- 4);
+				_mm_store_ps(d-32, XMM0);
+				_mm_store_ps(d-28, XMM1);
+				_mm_store_ps(d-24, XMM2);
+				_mm_store_ps(d-20, XMM3);
+				_mm_store_ps(d-16, XMM4);
+				_mm_store_ps(d-12, XMM5);
+				_mm_store_ps(d- 8, XMM6);
+				_mm_store_ps(d- 4, XMM7);
+				s	-= 32;
+				d	-= 32;
+				j	+= 32;
+				_mm_prefetch((const char*)(s-64), _MM_HINT_NTA);
+			}
+		}
+	}
+#else													/* SSE Optimize */
 	memmove(v->pcm[i],v->pcm[i]+movementW,
 		v->pcm_current*sizeof(*v->pcm[i]));
+#endif													/* SSE Optimize */
       
       
       v->lW=v->W;
@@ -699,6 +807,53 @@
   return 0;
 }
 
+#ifdef	__SSE__											/* SSE Optimize */
+static inline void vorbis_synthesis_blockin_pmadd(float *pcm, float *w, float *p, int count)
+{
+	int	i;
+	for(i=0;i<count;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM1	 = _mm_load_ps(w+count-i- 4);
+		XMM4	 = _mm_load_ps(w+count-i- 8);
+		XMM2	 = _mm_load_ps(p+i   );
+		XMM6	 = _mm_load_ps(w+i   );
+		XMM5	 = _mm_load_ps(p+i+ 4);
+		XMM7	 = _mm_load_ps(w+i+ 4);
+		XMM0	 = _mm_load_ps(pcm+i   );
+		XMM3	 = _mm_load_ps(pcm+i+ 4);
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM5	 = _mm_mul_ps(XMM5, XMM7);
+		XMM0	 = _mm_mul_ps(XMM0, XMM1);
+		XMM3	 = _mm_mul_ps(XMM3, XMM4);
+		XMM1	 = _mm_load_ps(pcm+i+ 8);
+		XMM4	 = _mm_load_ps(pcm+i+12);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM3	 = _mm_add_ps(XMM3, XMM5);
+		XMM2	 = _mm_load_ps(w+count-i-12);
+		XMM6	 = _mm_load_ps(w+i+ 8);
+		XMM5	 = _mm_load_ps(w+count-i-16);
+		XMM7	 = _mm_load_ps(w+i+12);
+		_mm_store_ps(pcm+i   , XMM0);
+		_mm_store_ps(pcm+i+ 4, XMM3);
+		XMM0	 = _mm_load_ps(p+i+ 8);
+		XMM3	 = _mm_load_ps(p+i+12);
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+		XMM0	 = _mm_mul_ps(XMM0, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		XMM1	 = _mm_mul_ps(XMM1, XMM2);
+		XMM4	 = _mm_mul_ps(XMM4, XMM5);
+		XMM1	 = _mm_add_ps(XMM1, XMM0);
+		XMM4	 = _mm_add_ps(XMM4, XMM3);
+		_mm_store_ps(pcm+i+ 8, XMM1);
+		_mm_store_ps(pcm+i+12, XMM4);
+	}
+}
+#endif													/* SSE Optimize */
+
 /* Unlike in analysis, the window is only partially applied for each
    block.  The time domain envelope is not yet handled at the point of
    calling (as it relies on the previous block). */
@@ -754,6 +909,36 @@
     for(j=0;j<vi->channels;j++){
       /* the overlap/add section */
       if(v->lW){
+#ifdef	__SSE__											/* SSE Optimize */
+	if(v->W){
+		/* large/large */
+		float	*w		 = _vorbis_window_get(b->window[1]-hs);
+		float	*pcm	 = v->pcm[j]+prevCenter;
+		float	*p		 = vb->pcm[j];
+		vorbis_synthesis_blockin_pmadd(pcm, w, p, n1);
+	}else{
+		/* large/small */
+		float *w		 = _vorbis_window_get(b->window[0]-hs);
+		float *pcm		 = v->pcm[j]+prevCenter+n1/2-n0/2;
+		float *p		 = vb->pcm[j];
+		vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+	}
+	  }else{
+	if(v->W){
+		/* small/large */
+		float	*w		 = _vorbis_window_get(b->window[0]-hs);
+		float	*pcm	 = v->pcm[j]+prevCenter;
+		float	*p		 = vb->pcm[j]+n1/2-n0/2;
+		vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+		memcpy(pcm+n0, p+n0, (n1/2-n0/2)*sizeof(float));
+	}else{
+		/* small/small */
+		float	*w		 = _vorbis_window_get(b->window[0]-hs);
+		float	*pcm	 = v->pcm[j]+prevCenter;
+		float	*p		 = vb->pcm[j];
+		vorbis_synthesis_blockin_pmadd(pcm, w, p, n0);
+	}
+#else														/* SSE Optimize */
 	if(v->W){
 	  /* large/large */
 	  float *w=_vorbis_window_get(b->window[1]-hs);
@@ -787,14 +972,38 @@
 	  for(i=0;i<n0;i++)
 	    pcm[i]=pcm[i]*w[n0-i-1] +p[i]*w[i];
 	}
+#endif														/* SSE Optimize */
       }
       
       /* the copy section */
       {
 	float *pcm=v->pcm[j]+thisCenter;
 	float *p=vb->pcm[j]+n;
+#ifdef	__SSE__											/* SSE Optimize */
+	for(i=0;i<n;i+=32)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(p+i   );
+		XMM1	 = _mm_load_ps(p+i+ 4);
+		XMM2	 = _mm_load_ps(p+i+ 8);
+		XMM3	 = _mm_load_ps(p+i+12);
+		XMM4	 = _mm_load_ps(p+i+16);
+		XMM5	 = _mm_load_ps(p+i+20);
+		XMM6	 = _mm_load_ps(p+i+24);
+		XMM7	 = _mm_load_ps(p+i+28);
+		_mm_store_ps(pcm+i   , XMM0);
+		_mm_store_ps(pcm+i+ 4, XMM1);
+		_mm_store_ps(pcm+i+ 8, XMM2);
+		_mm_store_ps(pcm+i+12, XMM3);
+		_mm_store_ps(pcm+i+16, XMM4);
+		_mm_store_ps(pcm+i+20, XMM5);
+		_mm_store_ps(pcm+i+24, XMM6);
+		_mm_store_ps(pcm+i+28, XMM7);
+	}
+#else														/* SSE Optimize */
 	for(i=0;i<n;i++)
 	  pcm[i]=p[i];
+#endif														/* SSE Optimize */
       }
     }
     
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/codebook.c libvorbis-1.2.0-sse/lib/codebook.c
--- libvorbis-1.2.0/lib/codebook.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/codebook.c	2007-08-02 12:52:26.000000000 +0200
@@ -24,6 +24,148 @@
 #include "scales.h"
 #include "misc.h"
 #include "os.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
+
+#define BUFFER_INCREMENT 256
+
+#if 1
+static const unsigned char bitrev8[256] = {
+	0x00,	0x80,	0x40,	0xC0,	0x20,	0xA0,	0x60,	0xE0,
+	0x10,	0x90,	0x50,	0xD0,	0x30,	0xB0,	0x70,	0xF0,
+	0x08,	0x88,	0x48,	0xC8,	0x28,	0xA8,	0x68,	0xE8,
+	0x18,	0x98,	0x58,	0xD8,	0x38,	0xB8,	0x78,	0xF8,
+	0x04,	0x84,	0x44,	0xC4,	0x24,	0xA4,	0x64,	0xE4,
+	0x14,	0x94,	0x54,	0xD4,	0x34,	0xB4,	0x74,	0xF4,
+	0x0C,	0x8C,	0x4C,	0xCC,	0x2C,	0xAC,	0x6C,	0xEC,
+	0x1C,	0x9C,	0x5C,	0xDC,	0x3C,	0xBC,	0x7C,	0xFC,
+	0x02,	0x82,	0x42,	0xC2,	0x22,	0xA2,	0x62,	0xE2,
+	0x12,	0x92,	0x52,	0xD2,	0x32,	0xB2,	0x72,	0xF2,
+	0x0A,	0x8A,	0x4A,	0xCA,	0x2A,	0xAA,	0x6A,	0xEA,
+	0x1A,	0x9A,	0x5A,	0xDA,	0x3A,	0xBA,	0x7A,	0xFA,
+	0x06,	0x86,	0x46,	0xC6,	0x26,	0xA6,	0x66,	0xE6,
+	0x16,	0x96,	0x56,	0xD6,	0x36,	0xB6,	0x76,	0xF6,
+	0x0E,	0x8E,	0x4E,	0xCE,	0x2E,	0xAE,	0x6E,	0xEE,
+	0x1E,	0x9E,	0x5E,	0xDE,	0x3E,	0xBE,	0x7E,	0xFE,
+	0x01,	0x81,	0x41,	0xC1,	0x21,	0xA1,	0x61,	0xE1,
+	0x11,	0x91,	0x51,	0xD1,	0x31,	0xB1,	0x71,	0xF1,
+	0x09,	0x89,	0x49,	0xC9,	0x29,	0xA9,	0x69,	0xE9,
+	0x19,	0x99,	0x59,	0xD9,	0x39,	0xB9,	0x79,	0xF9,
+	0x05,	0x85,	0x45,	0xC5,	0x25,	0xA5,	0x65,	0xE5,
+	0x15,	0x95,	0x55,	0xD5,	0x35,	0xB5,	0x75,	0xF5,
+	0x0D,	0x8D,	0x4D,	0xCD,	0x2D,	0xAD,	0x6D,	0xED,
+	0x1D,	0x9D,	0x5D,	0xDD,	0x3D,	0xBD,	0x7D,	0xFD,
+	0x03,	0x83,	0x43,	0xC3,	0x23,	0xA3,	0x63,	0xE3,
+	0x13,	0x93,	0x53,	0xD3,	0x33,	0xB3,	0x73,	0xF3,
+	0x0B,	0x8B,	0x4B,	0xCB,	0x2B,	0xAB,	0x6B,	0xEB,
+	0x1B,	0x9B,	0x5B,	0xDB,	0x3B,	0xBB,	0x7B,	0xFB,
+	0x07,	0x87,	0x47,	0xC7,	0x27,	0xA7,	0x67,	0xE7,
+	0x17,	0x97,	0x57,	0xD7,	0x37,	0xB7,	0x77,	0xF7,
+	0x0F,	0x8F,	0x4F,	0xCF,	0x2F,	0xAF,	0x6F,	0xEF,
+	0x1F,	0x9F,	0x5F,	0xDF,	0x3F,	0xBF,	0x7F,	0xFF
+};
+#endif
+
+static const uint32_t mask[]=
+{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
+ 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
+ 0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,
+ 0x00007fff,0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff,
+ 0x000fffff,0x001fffff,0x003fffff,0x007fffff,0x00ffffff,
+ 0x01ffffff,0x03ffffff,0x07ffffff,0x0fffffff,0x1fffffff,
+ 0x3fffffff,0x7fffffff,0xffffffff };
+
+#if	!defined(_USRDLL)
+/* Takes only up to 32 bits. */
+static void vorbis_oggpack_write(oggpack_buffer *b, unsigned long value, int bits)
+{
+	uint32_t lvalue, hvalue;
+	if(b->endbyte+4>=b->storage){
+		b->buffer=realloc(b->buffer,b->storage+BUFFER_INCREMENT+4);
+		b->storage+=BUFFER_INCREMENT;
+		b->ptr=b->buffer+b->endbyte;
+	}
+
+	value&=mask[bits]; 
+	bits+=b->endbit;
+
+	if(bits<24)
+	{
+		lvalue = value<<b->endbit;
+		lvalue |= (b->ptr[0]&mask[b->endbit]);
+		*(uint32_t*)(b->ptr) = lvalue;
+	}
+	else
+	{
+		lvalue = value<<b->endbit;
+		hvalue = value>>(32-b->endbit);
+		lvalue |= (b->ptr[0]&mask[b->endbit]);
+		b->ptr[4] = hvalue;
+		*(uint32_t*)(b->ptr) = lvalue;
+	}
+
+	b->endbyte+=bits/8;
+	b->ptr+=bits/8;
+	b->endbit=bits&7;
+}
+#endif
+
+#if defined(_OPENMP)
+void vorbis_oggpack_writecache(oggpack_writecache *c, unsigned long value, int bits)
+{
+	c->data[c->count].size  = bits;
+	c->data[c->count].value = value;
+	c->count ++;
+}
+
+int vorbis_book_encode_cache(codebook *book, int a, oggpack_writecache *c){
+  vorbis_oggpack_writecache(c,book->codelist[a],book->c->lengthlist[a]);
+  return(book->c->lengthlist[a]);
+}
+
+void vorbis_oggpack_cacheflush(oggpack_writecache *c, oggpack_buffer *b)
+{
+	int i;
+	for(i=0;i<c->count;i++)
+		oggpack_write(b, c->data[i].value, c->data[i].size);
+	c->count = 0;
+}
+#endif
+
+/* Read in bits without advancing the bitptr; bits <= 32 */
+static inline uint32_t vorbis_oggpack_look(oggpack_buffer *b,int bits){
+  uint32_t ret;
+  uint32_t m=mask[bits];
+
+  bits+=b->endbit;
+
+  if(b->endbyte+4>=b->storage){
+    /* not the main path */
+    if(b->endbyte*8+bits>b->storage*8)return(-1);
+  }
+  
+  ret=b->ptr[0]>>b->endbit;
+  if(bits>8){
+    ret|=b->ptr[1]<<(8-b->endbit);  
+    if(bits>16){
+      ret|=b->ptr[2]<<(16-b->endbit);  
+      if(bits>24){
+	ret|=b->ptr[3]<<(24-b->endbit);  
+	if(bits>32 && b->endbit)
+	  ret|=b->ptr[4]<<(32-b->endbit);
+      }
+    }
+  }
+  return(m&ret);
+}
+
+static inline void vorbis_oggpack_adv(oggpack_buffer *b,int bits){
+  bits+=b->endbit;
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+}
 
 /* packs the given codebook into the bitstream **************************/
 
@@ -256,7 +398,11 @@
 /* returns the number of bits ************************************************/
 int vorbis_book_encode(codebook *book, int a, oggpack_buffer *b){
   if(a<0 || a>=book->c->entries)return(0);
+#if	!defined(_USRDLL)
+  vorbis_oggpack_write(b,book->codelist[a],book->c->lengthlist[a]);
+#else
   oggpack_write(b,book->codelist[a],book->c->lengthlist[a]);
+#endif
   return(book->c->lengthlist[a]);
 }
 
@@ -300,25 +446,47 @@
    bitreverse is not in the main execution path. */
 
 static ogg_uint32_t bitreverse(ogg_uint32_t x){
+#if	0
   x=    ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000);
   x=    ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00);
   x=    ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0);
   x=    ((x>> 2)&0x33333333) | ((x<< 2)&0xcccccccc);
   return((x>> 1)&0x55555555) | ((x<< 1)&0xaaaaaaaa);
+#else
+	ogg_uint32_t x1, x2, x3;
+	x3 = x;
+	x2 = x;
+	x1 = x;
+	x3 = (x3 >> 24);
+	x2 = (x2 >> 16)&0xFF;
+	x1 = (x1 >>  8)&0xFF;
+	x  = x&0xFF;
+	x3 = bitrev8[x3];
+	x2 = bitrev8[x2];
+	x1 = bitrev8[x1];
+	x  = bitrev8[x ];
+	x2 = x2 <<  8;
+	x1 = x1 << 16;
+	x  = x  << 24;
+	x  = x  | x1;
+	x2 = x2 | x3;
+	x  = x  | x2;
+	return x;
+#endif
 }
 
 STIN long decode_packed_entry_number(codebook *book, oggpack_buffer *b){
   int  read=book->dec_maxlength;
-  long lo,hi;
-  long lok = oggpack_look(b,book->dec_firsttablen);
+  uint32_t lo,hi;
+  uint32_t lok = vorbis_oggpack_look(b,book->dec_firsttablen);
   
   if (lok >= 0) {
-    long entry = book->dec_firsttable[lok];
+    uint32_t entry = book->dec_firsttable[lok];
     if(entry&0x80000000UL){
       lo=(entry>>15)&0x7fff;
       hi=book->used_entries-(entry&0x7fff);
     }else{
-      oggpack_adv(b, book->dec_codelengths[entry-1]);
+      vorbis_oggpack_adv(b, book->dec_codelengths[entry-1]);
       return(entry-1);
     }
   }else{
@@ -326,10 +494,10 @@
     hi=book->used_entries;
   }
   
-  lok = oggpack_look(b, read);
+  lok = vorbis_oggpack_look(b, read);
   
   while(lok<0 && read>1)
-    lok = oggpack_look(b, --read);
+    lok = vorbis_oggpack_look(b, --read);
   if(lok<0)return -1;
   
   /* bisect search for the codeword in the ordered list */
@@ -344,12 +512,12 @@
       }
     
     if(book->dec_codelengths[lo]<=read){
-      oggpack_adv(b, book->dec_codelengths[lo]);
+      vorbis_oggpack_adv(b, book->dec_codelengths[lo]);
       return(lo);
     }
   }
   
-  oggpack_adv(b, read);
+  vorbis_oggpack_adv(b, read);
 
   return(-1);
 }
@@ -470,7 +638,341 @@
 
 long vorbis_book_decodevv_add(codebook *book,float **a,long offset,int ch,
 			      oggpack_buffer *b,int n){
-
+#ifdef __SSE__												/* SSE Optimize */
+	long i,j;
+	int chptr=0;
+
+	if(ch==2)
+	{
+		int mid0 = (offset/2+3)&(~3);
+		int mid1 = ((offset+n)/2)&(~3);
+		float *bvl = book->valuelist;
+		float *a0 = a[0];
+		float *a1 = a[1];
+		switch(book->dim)
+		{
+			default :
+				for(i=offset/2;i<(offset+n)/2;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*book->dim;
+						for (j=0;j<book->dim;j++)
+						{
+							a[chptr++][i]	+= t[j];
+							if(chptr==2)
+							{
+								chptr	 = 0;
+								i	++;
+							}
+						}
+					}
+				}
+				break;
+			case 2:
+				for(i=offset/2;i<mid0;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*2;
+						__m128	XMM0 = _mm_load_ss(t  );
+						__m128	XMM1 = _mm_load_ss(a0+i);
+						__m128	XMM2 = _mm_load_ss(t  );
+						__m128	XMM3 = _mm_load_ss(a1+i);
+						XMM0	 = _mm_add_ss(XMM0, XMM1);
+						XMM2	 = _mm_add_ss(XMM2, XMM3);
+						_mm_store_ss(a0+i  , XMM0);
+						_mm_store_ss(a1+i++, XMM2);
+					}
+				}
+				for(;i<mid1;)
+				{
+					/*
+						XMM0	(T11 T10 T01 T00)
+						XMM2	(T31 T30 T21 T20)
+					*/
+					__m128	XMM0, XMM1, XMM2, XMM3, XMM4;
+					const float *t0, *t1,*t2, *t3;
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t0	 = bvl+entry*2;
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t1	 = bvl+entry*2;
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t2	 = bvl+entry*2;
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t3	 = bvl+entry*2;
+					XMM0	 = _mm_loadl_pi(XMM0, (__m64*)t0);
+					XMM2	 = _mm_loadl_pi(XMM2, (__m64*)t2);
+					XMM3	 = _mm_load_ps(a0+i);
+					XMM0	 = _mm_loadh_pi(XMM0, (__m64*)t1);
+					XMM2	 = _mm_loadh_pi(XMM2, (__m64*)t3);
+					/*
+						XMM0	(T30 T20 T10 T00)
+						XMM2	(T31 T21 T11 T01)
+					*/
+					XMM4	 = _mm_load_ps(a1+i);
+					XMM1	 = XMM0;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+					XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+					XMM0	 = _mm_add_ps(XMM0, XMM3);
+					XMM1	 = _mm_add_ps(XMM1, XMM4);
+					_mm_store_ps(a0+i, XMM0);
+					_mm_store_ps(a1+i, XMM1);
+					i	+= 4;
+				}
+				for(;i<(offset+n)/2;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*2;
+						__m128	XMM0 = _mm_load_ss(t  );
+						__m128	XMM1 = _mm_load_ss(a0+i);
+						__m128	XMM2 = _mm_load_ss(t  );
+						__m128	XMM3 = _mm_load_ss(a1+i);
+						XMM0	 = _mm_add_ss(XMM0, XMM1);
+						XMM2	 = _mm_add_ss(XMM2, XMM3);
+						_mm_store_ss(a0+i  , XMM0);
+						_mm_store_ss(a1+i++, XMM2);
+					}
+				}
+				break;
+			case 4:
+				for(i=offset/2;i<mid0;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*4;
+						__m128	XMM0 = _mm_load_ss(t  );
+						__m128	XMM1 = _mm_load_ss(a0+i  );
+						__m128	XMM2 = _mm_load_ss(t+1);
+						__m128	XMM3 = _mm_load_ss(a1+i  );
+						__m128	XMM4 = _mm_load_ss(t+2);
+						__m128	XMM5 = _mm_load_ss(a0+i+1);
+						__m128	XMM6 = _mm_load_ss(t+3);
+						__m128	XMM7 = _mm_load_ss(a1+i+1);
+						XMM0	 = _mm_add_ss(XMM0, XMM1);
+						XMM2	 = _mm_add_ss(XMM2, XMM3);
+						XMM4	 = _mm_add_ss(XMM4, XMM5);
+						XMM6	 = _mm_add_ss(XMM6, XMM7);
+						_mm_store_ss(a0+i  , XMM0);
+						_mm_store_ss(a1+i  , XMM2);
+						_mm_store_ss(a0+i+1, XMM4);
+						_mm_store_ss(a1+i+1, XMM6);
+						i	+= 2;
+					}
+				}
+				for(;i<mid1;)
+				{
+					/*
+						XMM0	(T03 T02 T01 T00)
+						XMM1	(T13 T12 T11 T10)
+						XMM2	(T23 T22 T21 T20)
+						XMM3	(T33 T32 T31 T30)
+					*/
+					__m128	XMM0, XMM1, XMM2, XMM3;
+					__m128	XMM4, XMM5, XMM6, XMM7;
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					XMM0	 = _mm_lddqu_ps(bvl+entry*4);
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					XMM1	 = _mm_lddqu_ps(bvl+entry*4);
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					XMM2	 = _mm_lddqu_ps(bvl+entry*4);
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					XMM3	 = _mm_lddqu_ps(bvl+entry*4);
+					/*
+						XMM0	(T12 T10 T02 T00)
+						XMM4	(T13 T11 T03 T01)
+						XMM2	(T32 T20 T12 T10)
+						XMM5	(T33 T21 T13 T11)
+					*/
+					XMM4	 = XMM0;
+					XMM5	 = XMM2;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+					XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+					XMM1	 = _mm_load_ps(a0+i  );
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+					XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+					XMM3	 = _mm_load_ps(a1+i  );
+					XMM6	 = _mm_load_ps(a0+i+4);
+					XMM7	 = _mm_load_ps(a1+i+4);
+					XMM0	 = _mm_add_ps(XMM0, XMM1);
+					XMM4	 = _mm_add_ps(XMM4, XMM3);
+					XMM2	 = _mm_add_ps(XMM2, XMM6);
+					XMM5	 = _mm_add_ps(XMM5, XMM7);
+					_mm_store_ps(a0+i  , XMM0);
+					_mm_store_ps(a1+i  , XMM4);
+					_mm_store_ps(a0+i+4, XMM2);
+					_mm_store_ps(a1+i+4, XMM5);
+					i	+= 8;
+				}
+				for(;i<(offset+n)/2;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*4;
+						__m128	XMM0 = _mm_load_ss(t  );
+						__m128	XMM1 = _mm_load_ss(a0+i  );
+						__m128	XMM2 = _mm_load_ss(t+1);
+						__m128	XMM3 = _mm_load_ss(a1+i  );
+						__m128	XMM4 = _mm_load_ss(t+2);
+						__m128	XMM5 = _mm_load_ss(a0+i+1);
+						__m128	XMM6 = _mm_load_ss(t+3);
+						__m128	XMM7 = _mm_load_ss(a1+i+1);
+						XMM0	 = _mm_add_ss(XMM0, XMM1);
+						XMM2	 = _mm_add_ss(XMM2, XMM3);
+						XMM4	 = _mm_add_ss(XMM4, XMM5);
+						XMM6	 = _mm_add_ss(XMM6, XMM7);
+						_mm_store_ss(a0+i  , XMM0);
+						_mm_store_ss(a1+i  , XMM2);
+						_mm_store_ss(a0+i+1, XMM4);
+						_mm_store_ss(a1+i+1, XMM6);
+						i	+= 2;
+					}
+				}
+				break;
+			case 8:
+				for(i=offset/2;i<mid0;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*8;
+						__m128	XMM0	 = _mm_lddqu_ps(t  );
+						__m128	XMM1	 = _mm_lddqu_ps(t+4);
+						__m128	XMM2	 = _mm_load_ps(a0+i);
+						__m128	XMM3	 = _mm_load_ps(a1+i);
+						__m128	XMM4	 = XMM0;
+						XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+						XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+						XMM0	 = _mm_add_ps(XMM0, XMM2);
+						XMM4	 = _mm_add_ps(XMM4, XMM3);
+						_mm_store_ps(a0+i  , XMM0);
+						_mm_store_ps(a1+i  , XMM4);
+						i	+= 4;
+					}
+				}
+				for(;i<mid1;)
+				{
+					/*
+						XMM0	(T03 T02 T01 T00)
+						XMM1	(T13 T12 T11 T10)
+						XMM2	(T07 T06 T05 T04)
+						XMM2	(T17 T16 T15 T14)
+					*/
+					__m128	XMM0, XMM1, XMM2, XMM3;
+					__m128	XMM4, XMM5, XMM6, XMM7;
+					const float *t;
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t	 = bvl+entry*8;
+					XMM0	 = _mm_lddqu_ps(t  );
+					XMM1	 = _mm_lddqu_ps(t+4);
+					entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					t	 = bvl+entry*8;
+					XMM2	 = _mm_lddqu_ps(t  );
+					XMM3	 = _mm_lddqu_ps(t+4);
+					/*
+						XMM0	(T12 T10 T02 T00)
+						XMM4	(T13 T11 T03 T01)
+						XMM2	(T16 T14 T06 T04)
+						XMM5	(T17 T15 T07 T05)
+					*/
+					XMM4	 = XMM0;
+					XMM5	 = XMM2;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+					XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+					XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+					XMM1	 = _mm_load_ps(a0+i  );
+					XMM3	 = _mm_load_ps(a1+i  );
+					XMM6	 = _mm_load_ps(a0+i+4);
+					XMM7	 = _mm_load_ps(a1+i+4);
+					XMM0	 = _mm_add_ps(XMM0, XMM1);
+					XMM4	 = _mm_add_ps(XMM4, XMM3);
+					XMM2	 = _mm_add_ps(XMM2, XMM6);
+					XMM5	 = _mm_add_ps(XMM5, XMM7);
+					_mm_store_ps(a0+i  , XMM0);
+					_mm_store_ps(a1+i  , XMM4);
+					_mm_store_ps(a0+i+4, XMM2);
+					_mm_store_ps(a1+i+4, XMM5);
+					i	+= 8;
+				}
+				for(;i<(offset+n)/2;)
+				{
+					long entry = decode_packed_entry_number(book,b);
+					if(entry==-1)
+						return(-1);
+					{
+						const float *t	 = bvl+entry*8;
+						__m128	XMM0	 = _mm_lddqu_ps(t  );
+						__m128	XMM1	 = _mm_lddqu_ps(t+4);
+						__m128	XMM4	 = XMM0;
+						__m128	XMM2	 = _mm_load_ps(a0+i);
+						__m128	XMM3	 = _mm_load_ps(a1+i);
+						XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+						XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+						XMM0	 = _mm_add_ps(XMM0, XMM2);
+						XMM4	 = _mm_add_ps(XMM4, XMM3);
+						_mm_store_ps(a0+i  , XMM0);
+						_mm_store_ps(a1+i  , XMM4);
+						i	+= 4;
+					}
+				}
+				break;
+		}
+	}
+	else
+	{
+		for(i=offset/ch;i<(offset+n)/ch;)
+		{
+			long entry = decode_packed_entry_number(book,b);
+			if(entry==-1)
+				return(-1);
+			{
+				const float *t	 = book->valuelist+entry*book->dim;
+				for (j=0;j<book->dim;j++)
+				{
+					a[chptr++][i]	+= t[j];
+					if(chptr==ch)
+					{
+						chptr	 = 0;
+						i	++;
+					}
+				}
+			}
+		}
+	}
+#else														/* SSE Optimize */
   long i,j,entry;
   int chptr=0;
   if(book->used_entries>0){
@@ -489,6 +991,7 @@
       }
     }
   }
+#endif														/* SSE Optimize */
   return(0);
 }
 
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/codebook.h libvorbis-1.2.0-sse/lib/codebook.h
--- libvorbis-1.2.0/lib/codebook.h	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/codebook.h	2007-08-02 12:43:10.000000000 +0200
@@ -155,6 +155,4 @@
 				     long off,int ch, 
 				    oggpack_buffer *b,int n);
 
-
-
 #endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/cpu.c libvorbis-1.2.0-sse/lib/cpu.c
--- libvorbis-1.2.0/lib/cpu.c	1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/cpu.c	2007-08-02 12:43:10.000000000 +0200
@@ -0,0 +1,50 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003             *
+ * by the XIPHOPHORUS Company http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: CPU ID Check
+ last mod: $Id: cpu.c,v 1.1 2006-06-09 00:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#if defined(__INTEL_COMPILER)&&defined(_WIN32)&&defined(_USRDLL)
+extern int __intel_cpu_indicator;
+
+void __intel_cpu_indicator_init(void)
+{
+	unsigned int t, u;
+	_asm { 
+		mov	eax,1 
+		cpuid 
+		mov	t, edx
+		mov	u, ecx
+	}
+	/* SSE3 Check */
+	if(u&0x0000001)
+	{
+		__intel_cpu_indicator = 0x800;
+		return;
+	}
+	/* SSE2 Check */
+	if(t&0x4000000)
+	{
+		__intel_cpu_indicator = 0x200;
+		return;
+	}
+	/* SSE Check */
+	if(t&0x2000000)
+	{
+		__intel_cpu_indicator = 0x100;
+		return;
+	}
+	__intel_cpu_indicator = 1;
+}
+#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/envelope.c libvorbis-1.2.0-sse/lib/envelope.c
--- libvorbis-1.2.0/lib/envelope.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/envelope.c	2007-08-02 12:43:10.000000000 +0200
@@ -28,6 +28,9 @@
 #include "envelope.h"
 #include "mdct.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 void _ve_envelope_init(envelope_lookup *e,vorbis_info *vi){
   codec_setup_info *ci=vi->codec_setup;
@@ -103,7 +106,11 @@
      itself (for low power signals) */
 
   float minV=ve->minenergy;
+#ifdef	__SSE__												/* SSE Optimize */
+  float *vec	 = (float*)_ogg_alloca(n*sizeof(*vec));
+#else														/* SSE Optimize */
   float *vec=alloca(n*sizeof(*vec));
+#endif														/* SSE Optimize */
 
   /* stretch is used to gradually lengthen the number of windows
      considered prevoius-to-potential-trigger */
@@ -116,9 +123,50 @@
     totalshift+pos*ve->searchstep);*/
   
  /* window and transform */
+#ifdef	__SSE__												/* SSE Optimize */
+	for(i=0;i<n;i+=32)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(data+i   );
+		XMM4	 = _mm_load_ps(ve->mdct_win+i   );
+		XMM1	 = _mm_load_ps(data+i+ 4);
+		XMM5	 = _mm_load_ps(ve->mdct_win+i+ 4);
+		XMM2	 = _mm_load_ps(data+i+ 8);
+		XMM6	 = _mm_load_ps(ve->mdct_win+i+ 8);
+		XMM3	 = _mm_load_ps(data+i+12);
+		XMM7	 = _mm_load_ps(ve->mdct_win+i+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM4	 = _mm_load_ps(data+i+16);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM5	 = _mm_load_ps(ve->mdct_win+i+16);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM6	 = _mm_load_ps(data+i+20);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		XMM7	 = _mm_load_ps(ve->mdct_win+i+20);
+		_mm_store_ps(vec+i   , XMM0);
+		XMM0	 = _mm_load_ps(data+i+24);
+		_mm_store_ps(vec+i+ 4, XMM1);
+		XMM1	 = _mm_load_ps(ve->mdct_win+i+24);
+		_mm_store_ps(vec+i+ 8, XMM2);
+		XMM2	 = _mm_load_ps(data+i+28);
+		_mm_store_ps(vec+i+12, XMM3);
+		XMM3	 = _mm_load_ps(ve->mdct_win+i+28);
+		XMM4	 = _mm_mul_ps(XMM4, XMM5);
+		XMM6	 = _mm_mul_ps(XMM6, XMM7);
+		XMM0	 = _mm_mul_ps(XMM0, XMM1);
+		XMM2	 = _mm_mul_ps(XMM2, XMM3);
+		_mm_store_ps(vec+i+16, XMM4);
+		_mm_store_ps(vec+i+20, XMM6);
+		_mm_store_ps(vec+i+24, XMM0);
+		_mm_store_ps(vec+i+28, XMM2);
+	}
+	mdct_forward(&ve->mdct, vec, vec, NULL);
+#else														/* SSE Optimize */
   for(i=0;i<n;i++)
     vec[i]=data[i]*ve->mdct_win[i];
   mdct_forward(&ve->mdct,vec,vec);
+#endif														/* SSE Optimize */
   
   /*_analysis_output_always("mdct",seq2,vec,n/2,0,1,0); */
 
@@ -149,7 +197,231 @@
   /* perform spreading and limiting, also smooth the spectrum.  yes,
      the MDCT results in all real coefficients, but it still *behaves*
      like real/imaginary pairs */
-  for(i=0;i<n/2;i+=2){
+#ifdef	__SSE__												/* SSE Optimize */
+	{
+		static _MM_ALIGN16 const float mparm[4]	 = {
+			7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f
+		};
+		static _MM_ALIGN16 const float aparm[4]	 = {
+			-764.6161886f/2.f, -764.6161886f/2.f, -764.6161886f/2.f, -764.6161886f/2.f
+		};
+		static _MM_ALIGN16 const float decayinit0[4]	 = {
+			0.f, 8.f,	16.f,	24.f
+		};
+		static _MM_ALIGN16 const float decayinit1[4]	 = {
+			32.f, 40.f,	48.f,	56.f
+		};
+		static _MM_ALIGN16 const float p16[4]	 = {
+			64.f,	64.f,	64.f,	64.f
+		};
+		__m128 MINV		 = _mm_set_ps1(minV);
+		float	*p	 = vec;
+		int midpoint	 = ((int)(-(minV-decay)/4.f)+15)&(~15);
+		int last_n		 = n/2;
+		__m128 DECAY0	 = _mm_set_ps1(decay);
+		__m128 DECAY1	 = _mm_set_ps1(decay);
+		DECAY0	 = _mm_sub_ps(DECAY0, PM128(decayinit0));
+		DECAY1	 = _mm_sub_ps(DECAY1, PM128(decayinit1));
+#if	defined(__SSE2__)
+		for(i=0;i<midpoint;i+=16,p+=8)
+		{
+			__m128	XMM0, XMM2;
+			__m128	XMM1, XMM3;
+#if	defined(__SSE3__)
+			XMM0	 = _mm_load_ps(vec+i   );
+			XMM1	 = _mm_load_ps(vec+i+ 4);
+			XMM2	 = _mm_load_ps(vec+i+ 8);
+			XMM3	 = _mm_load_ps(vec+i+12);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_hadd_ps(XMM0, XMM1);
+			XMM2	 = _mm_hadd_ps(XMM2, XMM3);
+#else
+			__m128	XMM4, XMM5;
+			XMM0	 = _mm_load_ps(vec+i   );
+			XMM2	 = _mm_load_ps(vec+i+ 8);
+			XMM4	 = _mm_load_ps(vec+i+ 4);
+			XMM5	 = _mm_load_ps(vec+i+12);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_add_ps(XMM0, XMM1);
+			XMM2	 = _mm_add_ps(XMM2, XMM3);
+#endif
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM2	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, PM128(aparm));
+			XMM2	 = _mm_add_ps(XMM2, PM128(aparm));
+			XMM0	 = _mm_max_ps(XMM0, DECAY0);
+			XMM2	 = _mm_max_ps(XMM2, DECAY1);
+			XMM0	 = _mm_max_ps(XMM0, MINV);
+			XMM2	 = _mm_max_ps(XMM2, MINV);
+			_mm_store_ps(p  , XMM0);
+			_mm_store_ps(p+4, XMM2);
+			DECAY0	 = _mm_sub_ps(DECAY0, PM128(p16));
+			DECAY1	 = _mm_sub_ps(DECAY1, PM128(p16));
+		}
+		for(;i<last_n;i+=16,p+=8)
+		{
+			__m128	XMM0, XMM2;
+			__m128	XMM1, XMM3;
+#if	defined(__SSE3__)
+			XMM0	 = _mm_load_ps(vec+i   );
+			XMM1	 = _mm_load_ps(vec+i+ 4);
+			XMM2	 = _mm_load_ps(vec+i+ 8);
+			XMM3	 = _mm_load_ps(vec+i+12);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_hadd_ps(XMM0, XMM1);
+			XMM2	 = _mm_hadd_ps(XMM2, XMM3);
+#else
+			__m128	XMM4, XMM5;
+			XMM0	 = _mm_load_ps(vec+i   );
+			XMM2	 = _mm_load_ps(vec+i+ 8);
+			XMM4	 = _mm_load_ps(vec+i+ 4);
+			XMM5	 = _mm_load_ps(vec+i+12);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_add_ps(XMM0, XMM1);
+			XMM2	 = _mm_add_ps(XMM2, XMM3);
+#endif
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM2	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, PM128(aparm));
+			XMM2	 = _mm_add_ps(XMM2, PM128(aparm));
+			XMM0	 = _mm_max_ps(XMM0, MINV);
+			XMM2	 = _mm_max_ps(XMM2, MINV);
+			_mm_store_ps(p  , XMM0);
+			_mm_store_ps(p+4, XMM2);
+		}
+#else	/* for __SSE2__ */
+/*
+		SSE optimized code
+*/
+		for(i=0;i<midpoint;i+=16,p+=8)
+		{
+			__m64	MM0, MM1, MM2, MM3;
+			__m128x	U0, U1;
+			{
+				__m128	XMM0, XMM2;
+				__m128	XMM1, XMM3;
+				__m128	XMM4, XMM5;
+				XMM0	 = _mm_load_ps(vec+i   );
+				XMM2	 = _mm_load_ps(vec+i+ 8);
+				XMM4	 = _mm_load_ps(vec+i+ 4);
+				XMM5	 = _mm_load_ps(vec+i+12);
+				XMM1	 = XMM0;
+				XMM3	 = XMM2;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+				XMM2	 = _mm_add_ps(XMM2, XMM3);
+				U0.ps	 = XMM0;
+				U1.ps	 = XMM2;
+				MM0		 = U0.pi64[1];
+				MM1		 = U1.pi64[1];
+				MM2		 = U0.pi64[0];
+				MM3		 = U1.pi64[0];
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM1);
+				XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+				XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM2);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM3);
+				XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+				XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+				XMM0	 = _mm_add_ps(XMM0, PM128(aparm));
+				XMM2	 = _mm_add_ps(XMM2, PM128(aparm));
+				XMM0	 = _mm_max_ps(XMM0, DECAY0);
+				XMM2	 = _mm_max_ps(XMM2, DECAY1);
+				XMM0	 = _mm_max_ps(XMM0, MINV);
+				XMM2	 = _mm_max_ps(XMM2, MINV);
+				_mm_store_ps(p  , XMM0);
+				_mm_store_ps(p+4, XMM2);
+			}
+			DECAY0	 = _mm_sub_ps(DECAY0, PM128(p16));
+			DECAY1	 = _mm_sub_ps(DECAY1, PM128(p16));
+		}
+		for(;i<last_n;i+=16,p+=8)
+		{
+			__m64	MM0, MM1, MM2, MM3;
+			__m128x	U0, U1;
+			{
+				__m128	XMM0, XMM2;
+				__m128	XMM1, XMM3;
+				__m128	XMM4, XMM5;
+				XMM0	 = _mm_load_ps(vec+i   );
+				XMM2	 = _mm_load_ps(vec+i+ 8);
+				XMM4	 = _mm_load_ps(vec+i+ 4);
+				XMM5	 = _mm_load_ps(vec+i+12);
+				XMM1	 = XMM0;
+				XMM3	 = XMM2;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+				XMM2	 = _mm_add_ps(XMM2, XMM3);
+				U0.ps	 = XMM0;
+				U1.ps	 = XMM2;
+				MM0		 = U0.pi64[1];
+				MM1		 = U1.pi64[1];
+				MM2		 = U0.pi64[0];
+				MM3		 = U1.pi64[0];
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM1);
+				XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+				XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM2);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM3);
+				XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+				XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+				XMM0	 = _mm_add_ps(XMM0, PM128(aparm));
+				XMM2	 = _mm_add_ps(XMM2, PM128(aparm));
+				XMM0	 = _mm_max_ps(XMM0, MINV);
+				XMM2	 = _mm_max_ps(XMM2, MINV);
+				_mm_store_ps(p  , XMM0);
+				_mm_store_ps(p+4, XMM2);
+			}
+		}
+		_mm_empty();
+#endif	/* for __SSE2__ */
+	}
+#else														/* SSE Optimize */
+    for(i=0;i<n/2;i+=2){
     float val=vec[i]*vec[i]+vec[i+1]*vec[i+1];
     val=todB(&val)*.5f;
     if(val<decay)val=decay;
@@ -157,17 +429,70 @@
     vec[i>>1]=val;
     decay-=8.;
   }
+#endif														/* SSE Optimize */
 
   /*_analysis_output_always("spread",seq2++,vec,n/4,0,0,0);*/
   
   /* perform preecho/postecho triggering by band */
   for(j=0;j<VE_BANDS;j++){
-    float acc=0.;
-    float valmax,valmin;
 
     /* accumulate amplitude */
+#ifdef	__SSE__												/* SSE Optimize */
+	float acc;
+	float valmax,valmin;
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		if(bands[j].end!=8)
+		{
+			switch(bands[j].end)
+			{
+				case 4 :	/* bands[j].end==4(14.286%) */
+					XMM0	 = _mm_lddqu_ps(vec+bands[j].begin);
+					XMM1	 = _mm_load_ps(bands[j].window  );
+					XMM0	 = _mm_mul_ps(XMM0, XMM1);
+					break;
+				case 5 :	/* bands[j].end==5(14.286%) */
+					XMM0	 = _mm_lddqu_ps(vec+bands[j].begin);
+					XMM2	 = _mm_load_ss(vec+bands[j].begin+4);
+					XMM1	 = _mm_load_ps(bands[j].window  );
+					XMM3	 = _mm_load_ss(bands[j].window+4);
+					XMM0	 = _mm_mul_ps(XMM0, XMM1);
+					XMM2	 = _mm_mul_ss(XMM2, XMM3);
+					XMM0	 = _mm_add_ss(XMM0, XMM2);
+					break;
+				case 6 :	/* bands[j].end==6(14.286%) */
+					XMM0	 = _mm_lddqu_ps(vec+bands[j].begin);
+					XMM2	 = _mm_load_ss(vec+bands[j].begin+4);
+					XMM4	 = _mm_load_ss(vec+bands[j].begin+5);
+					XMM1	 = _mm_load_ps(bands[j].window  );
+					XMM3	 = _mm_load_ss(bands[j].window+4);
+					XMM5	 = _mm_load_ss(bands[j].window+5);
+					XMM0	 = _mm_mul_ps(XMM0, XMM1);
+					XMM2	 = _mm_mul_ss(XMM2, XMM3);
+					XMM4	 = _mm_mul_ss(XMM4, XMM5);
+					XMM2	 = _mm_add_ss(XMM2, XMM4);
+					XMM0	 = _mm_add_ss(XMM0, XMM2);
+					break;
+			}
+		}
+		else	/* bands[j].end==8(57.143%) */
+		{
+			XMM0	 = _mm_lddqu_ps(vec+bands[j].begin  );
+			XMM1	 = _mm_load_ps(bands[j].window  );
+			XMM2	 = _mm_lddqu_ps(vec+bands[j].begin+4);
+			XMM3	 = _mm_load_ps(bands[j].window+4);
+			XMM0	 = _mm_mul_ps(XMM0, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM3);
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+		}
+		acc		 = _mm_add_horz(XMM0);
+	}
+#else														/* SSE Optimize */
+    float acc=0.;
+    float valmax,valmin;
     for(i=0;i<bands[j].end;i++)
       acc+=vec[i+bands[j].begin]*bands[j].window[i];
+#endif														/* SSE Optimize */
    
     acc*=bands[j].total;
 
@@ -278,7 +603,7 @@
       if(ve->mark[j/ve->searchstep]){
 	if(j>centerW){
 
-#if 0
+	  #if 0
 	  if(j>ve->curmark){
 	    float *marker=alloca(v->pcm_current*sizeof(*marker));
 	    int l,m;
@@ -361,13 +686,13 @@
 
   memmove(e->mark,e->mark+smallshift,(smallsize-smallshift)*sizeof(*e->mark));
   
-#if 0
+  #if 0
   for(i=0;i<VE_BANDS*e->ch;i++)
     memmove(e->filter[i].markers,
 	    e->filter[i].markers+smallshift,
 	    (1024-smallshift)*sizeof(*(*e->filter).markers));
   totalshift+=shift;
-#endif 
+  #endif 
 
   e->current-=shift;
   if(e->curmark>=0)
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/floor0.c libvorbis-1.2.0-sse/lib/floor0.c
--- libvorbis-1.2.0/lib/floor0.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/floor0.c	2007-08-02 12:43:10.000000000 +0200
@@ -28,6 +28,9 @@
 #include "scales.h"
 #include "misc.h"
 #include "os.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 #include "misc.h"
 #include <stdio.h>
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/floor1.c libvorbis-1.2.0-sse/lib/floor1.c
--- libvorbis-1.2.0/lib/floor1.c	2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/floor1.c	2007-08-02 13:50:49.000000000 +0200
@@ -25,6 +25,12 @@
 #include "codebook.h"
 #include "misc.h"
 #include "scales.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
+#if	defined(__INTEL_COMPILER)
+#include <ia32intrin.h>
+#endif
 
 #include <stdio.h>
 
@@ -49,15 +55,15 @@
 } vorbis_look_floor1;
 
 typedef struct lsfit_acc{
-  long x0;
-  long x1;
+  int32_t x0;
+  int32_t x1;
 
-  long xa;
-  long ya;
-  long x2a;
-  long y2a;
-  long xya; 
-  long an;
+  int32_t xa;
+  int32_t ya;
+  int32_t x2a;
+  int32_t y2a;
+  int32_t xya; 
+  int32_t an;
 } lsfit_acc;
 
 /***********************************************/
@@ -83,6 +89,16 @@
   }
 }
 
+#if	defined(__INTEL_COMPILER)
+static int ilog(unsigned int v){
+	return(_bit_scan_reverse(v) + 1);
+}
+
+static int ilog2(unsigned int v){
+	if(v)--v;
+	return(_bit_scan_reverse(v) + 1);
+}
+#else
 static int ilog(unsigned int v){
   int ret=0;
   while(v){
@@ -101,6 +117,7 @@
   }
   return(ret);
 }
+#endif
 
 static void floor1_pack (vorbis_info_floor *i,oggpack_buffer *opb){
   vorbis_info_floor1 *info=(vorbis_info_floor1 *)i;
@@ -283,15 +300,64 @@
     return(y0+off);
   }
 }
+#if	defined(__SSE__)										/* SSE Optimize */
+static _MM_ALIGN16 const __m128x pfv0 = 
+	{ .sf = {7.3142857f, 7.3142857f, 7.3142857f, 7.3142857f} };
+static _MM_ALIGN16 const __m128x pfv1 = 
+	{ .sf = {1023.5f, 1023.5f, 1023.5f, 1023.5f} };
+static _MM_ALIGN16 const __m128x pfv2 = 
+	{ .sf = {1023.f, 1023.f, 1023.f, 1023.f} };
+#endif														/* SSE Optimize */
 
 static int vorbis_dBquant(const float *x){
+#if	defined(__SSE__)										/* SSE Optimize */
+	__m128	XMM0	 = _mm_load_ss(x);
+	XMM0	 = _mm_mul_ss(XMM0, pfv0.ps);
+	XMM0	 = _mm_add_ss(XMM0, pfv1.ps);
+	XMM0	 = _mm_max_ss(XMM0, PFV_0.ps);
+	XMM0	 = _mm_min_ss(XMM0, pfv2.ps);
+	return	_mm_cvttss_si32(XMM0);
+#else														/* SSE Optimize */
   int i= *x*7.3142857f+1023.5f;
   if(i>1023)return(1023);
   if(i<0)return(0);
   return i;
+#endif														/* SSE Optimize */
 }
 
-static float FLOOR1_fromdB_LOOKUP[256]={
+#if 0
+#if	defined(__SSE__)										/* SSE Optimize */
+static __m128 vorbis_dBquant_ps(float *x)
+{
+#if	defined(__SSE2__)
+	register __m128	pi;
+	pi	 = _mm_load_ps(x);
+	pi	 = _mm_mul_ps(pi, pfv0.ps);
+	pi	 = _mm_add_ps(pi, pfv1.ps);
+	pi	 = _mm_max_ps(pi, PFV_0.ps);
+	pi	 = _mm_min_ps(pi, pfv2.ps);
+	pi	 = _mm_cvtepi32_ps(_mm_cvttps_epi32(pi));
+	return	pi;
+#else
+	register __m128	pi	 = PM128(x);
+	register __m64	MM0, MM1;
+	pi	 = _mm_mul_ps(pi, pfv0.ps);
+	pi	 = _mm_add_ps(pi, pfv1.ps);
+	pi	 = _mm_max_ps(pi, PFV_0.ps);
+	pi	 = _mm_min_ps(pi, pfv2.ps);
+	MM0	 = _mm_cvttps_pi32(pi);
+	pi	 = _mm_movehl_ps(pi, pi);
+	MM1	 = _mm_cvttps_pi32(pi);
+	pi	 = _mm_cvtpi32_ps(pi, MM1);
+	pi	 = _mm_movelh_ps(pi, pi);
+	pi	 = _mm_cvtpi32_ps(pi, MM0);
+	return	pi;
+#endif
+}
+#endif														/* SSE Optimize */
+#endif
+
+static const float FLOOR1_fromdB_LOOKUP[256]={
   1.0649863e-07F, 1.1341951e-07F, 1.2079015e-07F, 1.2863978e-07F, 
   1.3699951e-07F, 1.4590251e-07F, 1.5538408e-07F, 1.6548181e-07F, 
   1.7623575e-07F, 1.8768855e-07F, 1.9988561e-07F, 2.128753e-07F, 
@@ -358,74 +424,420 @@
   0.82788260F, 0.88168307F, 0.9389798F, 1.F, 
 };
 
-static void render_line(int n, int x0,int x1,int y0,int y1,float *d){
-  int dy=y1-y0;
-  int adx=x1-x0;
-  int ady=abs(dy);
-  int base=dy/adx;
-  int sy=(dy<0?base-1:base+1);
-  int x=x0;
-  int y=y0;
-  int err=0;
-
-  ady-=abs(base*adx);
+static void render_line(int n, int x, int x2, int y, int y2,float *d)
+{
+	int shortLen = y2-y;
+	int longLen;
+	int decInc;
+	int j;
+
+	if(n>x2) n=x2;
+
+	longLen = n - x;
+	if (longLen < 0)
+		return;
 
-  if(n>x1)n=x1;
-
-  if(x<n)
-    d[x]*=FLOOR1_fromdB_LOOKUP[y];
-
-  while(++x<n){
-    err=err+ady;
-    if(err>=adx){
-      err-=adx;
-      y+=sy;
-    }else{
-      y+=base;
-    }
-    d[x]*=FLOOR1_fromdB_LOOKUP[y];
-  }
+	if(shortLen==0)
+	{
+#if defined(__SSE__)										/* SSE Optimize */
+		__m128	XMM0	 = _mm_set1_ps(FLOOR1_fromdB_LOOKUP[y]);
+		decInc	 = (longLen&(~7));
+		j		 = (longLen&(~3));
+		for(;x<decInc;x+=8)
+		{
+			__m128	XMM1	 = _mm_lddqu_ps(d+x  );
+			__m128	XMM2	 = _mm_lddqu_ps(d+x+4);
+			XMM1	 = _mm_mul_ps(XMM1, XMM0);
+			XMM2	 = _mm_mul_ps(XMM2, XMM0);
+			_mm_storeu_ps(d+x  , XMM1);
+			_mm_storeu_ps(d+x+4, XMM2);
+		}
+		for(;x<j;x+=4)
+		{
+			__m128	XMM1	 = _mm_lddqu_ps(d+x  );
+			XMM1	 = _mm_mul_ps(XMM1, XMM0);
+			_mm_storeu_ps(d+x  , XMM1);
+		}
+#endif														/* SSE Optimize */
+		for(;x<n;x++)
+			d[x] *= FLOOR1_fromdB_LOOKUP[y];
+	}
+	else
+	{
+		decInc = (shortLen << 21) / longLen;
+		if(shortLen>=0)
+			j = 0x200   + (y<<21);
+		else
+			j = 0x1FF800 + (y<<21);
+	
+		for (;x<n;x++)
+		{
+			d[x] *= FLOOR1_fromdB_LOOKUP[j >> 21];
+			j += decInc;
+		}
+	}
+	return;
 }
 
-static void render_line0(int x0,int x1,int y0,int y1,int *d){
-  int dy=y1-y0;
-  int adx=x1-x0;
-  int ady=abs(dy);
-  int base=dy/adx;
-  int sy=(dy<0?base-1:base+1);
-  int x=x0;
-  int y=y0;
-  int err=0;
-
-  ady-=abs(base*adx);
+static void render_line0(int x, int x2,int y, int y2, int *d)
+{
+	int shortLen = y2-y;
+	int longLen = x2-x;
+	int decInc = (shortLen << 21) / longLen;
+	int j;
+	if(shortLen>=0)
+		j = 0x200   + (y<<21);
+	else
+		j = 0x1FF800 + (y<<21);
 
-  d[x]=y;
-  while(++x<x1){
-    err=err+ady;
-    if(err>=adx){
-      err-=adx;
-      y+=sy;
-    }else{
-      y+=base;
-    }
-    d[x]=y;
-  }
+#if defined(__SSE2__)
+	if(longLen>=4)
+	{
+		__m128i PJ0 = _mm_set_epi32(
+			j+decInc*3, j+decInc*2, j+decInc  , j
+		);
+		__m128i PJ1 = _mm_set_epi32(
+			j+decInc*7, j+decInc*6, j+decInc*5, j+decInc*4
+		);
+		__m128i	PDECINC = _mm_set1_epi32(decInc*8);
+		int x1 = x+(longLen&(~7));
+		for(;x<x1;x+=8)
+		{
+			__m128i XMM0 = PJ0;
+			__m128i XMM1 = PJ1;
+			XMM0 = _mm_srai_epi32(XMM0, 21);
+			XMM1 = _mm_srai_epi32(XMM1, 21);
+			_mm_storeu_si128((__m128i*)(d+x  ), XMM0);
+			_mm_storeu_si128((__m128i*)(d+x+4), XMM1);
+			PJ0 = _mm_add_epi32(PJ0, PDECINC);
+			PJ1 = _mm_add_epi32(PJ1, PDECINC);
+		}
+		if(x2-x>=4)
+		{
+			__m128i XMM0 = PJ0;
+			XMM0 = _mm_srai_epi32(XMM0, 21);
+			_mm_storeu_si128((__m128i*)(d+x  ), XMM0);
+			PJ0 = PJ1;
+			x += 4;
+		}
+		j = _mm_cvtsi128_si32(PJ0);
+	}
+#elif defined(__SSE__)
+	if(longLen>=4)
+	{
+		__m64 PJ0 = _mm_set_pi32(j+decInc  , j         );
+		__m64 PJ1 = _mm_set_pi32(j+decInc*3, j+decInc*2);
+		__m64 PDECINC = _mm_set1_pi32(decInc*4);
+		int x1 = x+(longLen&(~3));
+		for(;x<x1;x+=4)
+		{
+			__m64 MM0 = PJ0;
+			__m64 MM1 = PJ1;
+			MM0 = _mm_srai_pi32(MM0, 21);
+			MM1 = _mm_srai_pi32(MM1, 21);
+			PM64(d+x  ) = MM0;
+			PM64(d+x+2) = MM1;
+			PJ0 = _mm_add_pi32(PJ0, PDECINC);
+			PJ1 = _mm_add_pi32(PJ1, PDECINC);
+		}
+		j = _mm_cvtsi64_si32(PJ0);
+	}
+#endif
+	for (;x<x2;x++)
+	{
+		d[x] = j >> 21;
+		j += decInc;
+	}
+	return;
 }
 
 /* the floor has already been filtered to only include relevant sections */
 static int accumulate_fit(const float *flr,const float *mdct,
 			  int x0, int x1,lsfit_acc *a,
-			  int n,vorbis_info_floor1 *info){
+#if	defined(__SSE__)										/* SSE Optimize */
+			  int n,vorbis_info_floor1 *info, const float *tflr,
+			  const float *tmask, const int *tcres)
+#else														/* SSE Optimize */
+			  int n,vorbis_info_floor1 *info)
+#endif														/* SSE Optimize */
+{
   long i;
-  /*int quantized=vorbis_dBquant(flr+x0);*/
-
-  long xa=0,ya=0,x2a=0,y2a=0,xya=0,na=0, xb=0,yb=0,x2b=0,y2b=0,xyb=0,nb=0;
+  int xa=0,ya=0,x2a=0,y2a=0,xya=0,na=0, xb=0,yb=0,x2b=0,y2b=0,xyb=0,nb=0;
+#ifdef __SSE__												/* SSE Optimize */
+	int	x05;
+	int j;
+
+	extern float findex[2048];
+	extern float findex2[2048];
+#endif														/* SSE Optimize */
 
   memset(a,0,sizeof(*a));
   a->x0=x0;
   a->x1=x1;
   if(x1>=n)x1=n-1;
 
+#ifdef __SSE__												/* SSE Optimize */
+	x1	++;
+	{
+		static const int parm0[16] = {
+			0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6
+		};
+		static const int parm3[16] = {
+			6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0
+		};
+		__m128	PYA;
+		__m128	PY2A;
+		__m128	PX2A;
+		__m128	PXYA;
+		__m128	PYB;
+		__m128	PY2B;
+		__m128	PX2B;
+		__m128	PXYB;
+		x05	 = (x0+3)&(~3);
+		x05	 = (x05>x1)?x1:x05;
+		if(x1-x05<4)
+		{
+			for(i=x0;i<x1;i++)
+			{
+				int quantized	 = (int)tflr[i];
+				if(quantized)
+				{
+					if(mdct[i]+info->twofitatten>=flr[i])
+					{
+						xa	+= i;
+						ya	+= quantized;
+						x2a	+= i*i;
+						y2a	+= quantized*quantized;
+						xya	+= i*quantized;
+						na	++;
+					}
+					else
+					{
+						xb	+= i;
+						yb	+= quantized;
+						x2b	+= i*i;
+						y2b	+= quantized*quantized;
+						xyb	+= i*quantized;
+						nb	++;
+					}
+				}
+			}
+		}
+		else
+		{
+			_mm_prefetch((const float*)(findex +x0)  , _MM_HINT_NTA);
+			_mm_prefetch((const float*)(findex2+x0)  , _MM_HINT_NTA);
+			PYA		 = _mm_setzero_ps();
+			PY2A	 = _mm_setzero_ps();
+			PX2A	 = _mm_setzero_ps();
+			PXYA	 = _mm_setzero_ps();
+			PYB		 = _mm_setzero_ps();
+			PY2B	 = _mm_setzero_ps();
+			PX2B	 = _mm_setzero_ps();
+			PXYB	 = _mm_setzero_ps();
+#if	1
+			j = 16>>(x05-x0);
+			for(i=x0;i<x05;i++)
+			{
+				__m128	XMM0,  XMM1, XMM2, XMM3;
+				XMM0	 = _mm_load_ss(tflr+i);
+				XMM3	 = _mm_load_ss(findex+i);
+				XMM1	 = XMM0;
+				XMM2	 = XMM0;
+				XMM1	 = _mm_mul_ss(XMM1, XMM1);
+				XMM2	 = _mm_mul_ss(XMM2, XMM3);
+				if((tcres[x05-4]&j)!=0)	/*	Type-A 1 unit burst mode */
+				{
+					xa	+= i;
+					PYA		 = _mm_add_ss(PYA,  XMM0);
+					x2a += i*i;
+					PY2A	 = _mm_add_ss(PY2A, XMM1);
+					PXYA	 = _mm_add_ss(PXYA, XMM2);
+					na	++;
+				}
+				else					/*	Type-B 1 unit burst mode */
+				{
+					xb	+= i;
+					PYB		 = _mm_add_ss(PYB,  XMM0);
+					x2b += i*i;
+					PY2B	 = _mm_add_ss(PY2B, XMM1);
+					PXYB	 = _mm_add_ss(PXYB, XMM2);
+					nb	++;
+				}
+				j = j << 1;
+			}
+#else
+			for(i=x0;i<x05;i++)
+			{
+				int quantized	 = (int)tflr[i];
+				if(quantized)
+				{
+					if(mdct[i]+info->twofitatten>=flr[i])
+					{
+						xa	+= i;
+						ya	+= quantized;
+						x2a	+= i*i;
+						y2a	+= quantized*quantized;
+						xya	+= i*quantized;
+						na	++;
+					}
+					else
+					{
+						xb	+= i;
+						yb	+= quantized;
+						x2b	+= i*i;
+						y2b	+= quantized*quantized;
+						xyb	+= i*quantized;
+						nb	++;
+					}
+				}
+			}
+#endif
+			x05	 = ((x1-i)&(~3))+i;
+			for(;i<x05;i+=4)
+			{
+				__m128	XMM0,  XMM1, XMM2;
+				XMM0	 = _mm_load_ps(tflr+i);
+				_mm_prefetch((const float*)(findex +i+16)  , _MM_HINT_NTA);
+				_mm_prefetch((const float*)(findex2+i+16)  , _MM_HINT_NTA);
+				XMM1	 = XMM0;
+				XMM2	 = XMM0;
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM2	 = _mm_mul_ps(XMM2, PM128(findex+i));
+				if(tcres[i]==0xF)	/*	Type-A 4 unit burst mode */
+				{
+					xa	+= (i*4+6);
+					PYA		 = _mm_add_ps(PYA,  XMM0);
+					x2a += 4*i*(i+3)+14;
+					PY2A	 = _mm_add_ps(PY2A, XMM1);
+					PXYA	 = _mm_add_ps(PXYA, XMM2);
+					na	+= 4;
+				}
+				else if(tcres[i]==0x0)	/*	Type-B 4 unit burst mode */
+				{
+					xb	+= (i*4+6);
+					PYB		 = _mm_add_ps(PYB,  XMM0);
+					x2b += 4*i*(i+3)+14;
+					PY2B	 = _mm_add_ps(PY2B, XMM1);
+					PXYB	 = _mm_add_ps(PXYB, XMM2);
+					nb	+= 4;
+				}
+				else
+				{
+					int p = bitCountTable[tcres[i]];
+					int q = 4 - p;
+					__m128	PMASKA	 = _mm_load_ps(tmask+i);
+					__m128	PMASKB	 = _mm_xor_ps(PMASKA, PMASKTABLE[15].ps);
+					xa		+= i*p+parm0[tcres[i]];
+					PYA		 = _mm_add_ps(PYA , _mm_and_ps(XMM0, PMASKA));
+					PX2A	 = _mm_add_ps(PX2A, _mm_and_ps(PM128(findex2+i), PMASKA));
+					PY2A	 = _mm_add_ps(PY2A, _mm_and_ps(XMM1, PMASKA));
+					PXYA	 = _mm_add_ps(PXYA, _mm_and_ps(XMM2, PMASKA));
+					na		+= p;
+					xb		+= i*q+parm3[tcres[i]];
+					PYB		 = _mm_add_ps(PYB , _mm_and_ps(XMM0, PMASKB));
+					PX2B	 = _mm_add_ps(PX2B, _mm_and_ps(PM128(findex2+i), PMASKB));
+					PY2B	 = _mm_add_ps(PY2B, _mm_and_ps(XMM1, PMASKB));
+					PXYB	 = _mm_add_ps(PXYB, _mm_and_ps(XMM2, PMASKB));
+					nb		+= q;
+				}
+			}
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+				__m128x	TMA, TMB;
+				XMM0	 = XMM1	 = PYA;
+				XMM3	 = XMM2	 = PY2A;
+				XMM0	 = _mm_shuffle_ps(XMM0, PX2A, _MM_SHUFFLE(1,0,1,0));
+				XMM1	 = _mm_shuffle_ps(XMM1, PX2A, _MM_SHUFFLE(3,2,3,2));
+				XMM2	 = _mm_shuffle_ps(XMM2, PXYA, _MM_SHUFFLE(1,0,1,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, PXYA, _MM_SHUFFLE(3,2,3,2));
+				XMM4	 = XMM0;
+				XMM5	 = XMM1;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,3,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(2,0,2,0));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+				XMM0	 = _mm_add_ps(XMM0, XMM4);
+				XMM1	 = _mm_add_ps(XMM1, XMM5);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+	
+				TMA.ps	 = XMM0;
+	
+				XMM0	 = XMM1	 = PYB;
+				XMM3	 = XMM2	 = PY2B;
+				XMM0	 = _mm_shuffle_ps(XMM0, PX2B, _MM_SHUFFLE(1,0,1,0));
+				XMM1	 = _mm_shuffle_ps(XMM1, PX2B, _MM_SHUFFLE(3,2,3,2));
+				XMM2	 = _mm_shuffle_ps(XMM2, PXYB, _MM_SHUFFLE(1,0,1,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, PXYB, _MM_SHUFFLE(3,2,3,2));
+				XMM4	 = XMM0;
+				XMM5	 = XMM1;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,3,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(2,0,2,0));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,1,3,1));
+				XMM0	 = _mm_add_ps(XMM0, XMM4);
+				XMM1	 = _mm_add_ps(XMM1, XMM5);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+	
+				TMB.ps	 = XMM0;
+	
+#if	defined(__SSE2__)
+				TMA.pi	 = _mm_cvttps_epi32(TMA.ps);
+				TMB.pi	 = _mm_cvttps_epi32(TMB.ps);
+#else
+				{
+					__m64	MM0, MM1, MM2, MM3;
+					MM0	 = _mm_cvttps_pi32(TMA.ps);
+					MM2	 = _mm_cvttps_pi32(TMB.ps);
+					TMA.ps	 = _mm_movehl_ps(TMA.ps, TMA.ps);
+					TMB.ps	 = _mm_movehl_ps(TMB.ps, TMB.ps);
+					MM1	 = _mm_cvttps_pi32(TMA.ps);
+					MM3	 = _mm_cvttps_pi32(TMB.ps);
+					TMA.pi64[0]	 = MM0;
+					TMB.pi64[0]	 = MM2;
+					TMA.pi64[1]	 = MM1;
+					TMB.pi64[1]	 = MM3;
+				}
+				_mm_empty();
+#endif
+				
+				ya	+= TMA.si32[0];
+				x2a	+= TMA.si32[1];
+				y2a	+= TMA.si32[2];
+				xya	+= TMA.si32[3];
+				yb	+= TMB.si32[0];
+				x2b	+= TMB.si32[1];
+				y2b	+= TMB.si32[2];
+				xyb	+= TMB.si32[3];
+			}
+			for(;i<x1;i++)
+			{
+				int quantized	 = (int)tflr[i];
+				if(quantized)
+				{
+					if(mdct[i]+info->twofitatten>=flr[i])
+					{
+						xa	+= i;
+						ya	+= quantized;
+						x2a	+= i*i;
+						y2a	+= quantized*quantized;
+						xya	+= i*quantized;
+						na	++;
+					}
+					else
+					{
+						xb	+= i;
+						yb	+= quantized;
+						x2b	+= i*i;
+						y2b	+= quantized*quantized;
+						xyb	+= i*quantized;
+						nb	++;
+					}
+				}
+			}
+		}
+	}
+#else														/* SSE Optimize */
   for(i=x0;i<=x1;i++){
     int quantized=vorbis_dBquant(flr+i);
     if(quantized){
@@ -446,6 +858,7 @@
       }
     }
   }
+#endif														/* SSE Optimize */
 
   xb+=xa;
   yb+=ya;
@@ -470,10 +883,93 @@
 }
 
 static void fit_line(lsfit_acc *a,int fits,int *y0,int *y1){
-  long x=0,y=0,x2=0,y2=0,xy=0,an=0,i;
-  long x0=a[0].x0;
-  long x1=a[fits-1].x1;
+	int32_t	x, y, x2,y2 ,xy ,an ,i;
+	int32_t	x0	 = a[0].x0;
+	int32_t	x1	 = a[fits-1].x1;
+#ifdef __SSE__												/* SSE Optimize */
+//#if	defined(__SSE2__)&&!defined(__PROF__)
+#if 0 //	defined(__SSE2__)
+	__m128i	XMM0, XMM1, XMM2, XMM3;
+	__m128x T;
+	__m128i *PA = (__m128i*)a;
 
+	XMM0 = XMM1 = XMM2 = XMM3 = _mm_setzero_si128();
+	for(i=0;i<(fits&(~1));i+=2)
+	{
+		__m128i XMM4 = _mm_load_si128(PA+i*2  );
+		__m128i XMM5 = _mm_load_si128(PA+i*2+1);
+		__m128i XMM6 = _mm_load_si128(PA+i*2+2);
+		__m128i XMM7 = _mm_load_si128(PA+i*2+3);
+		XMM0 = _mm_add_epi32(XMM0, XMM4);
+		XMM1 = _mm_add_epi32(XMM1, XMM5);
+		XMM2 = _mm_add_epi32(XMM2, XMM6);
+		XMM3 = _mm_add_epi32(XMM3, XMM7);
+	}
+	for(;i<fits;i++)
+	{
+		__m128i XMM4 = _mm_load_si128(PA+i*2  );
+		__m128i XMM5 = _mm_load_si128(PA+i*2+1);
+		XMM0 = _mm_add_epi32(XMM0, XMM4);
+		XMM1 = _mm_add_epi32(XMM1, XMM5);
+	}
+	XMM0 = _mm_add_epi32(XMM0, XMM2);
+	XMM1 = _mm_add_epi32(XMM1, XMM3);
+	T.pi = XMM0;
+	x	 = T.si32[2];
+	y	 = T.si32[3];
+	T.pi = XMM1;
+	x2	 = T.si32[0];
+	y2	 = T.si32[1];
+	xy	 = T.si32[2];
+	an	 = T.si32[3];
+#else
+	__m64	XY, X2Y2, XYAN;
+	__m64	*PA	 = (__m64*)a;
+	XY		 = 
+	X2Y2	 = 
+	XYAN	 = _mm_setzero_si64();
+
+	for(i=0;i<(fits&~1);i+=2)
+	{
+		__m64	MM0	 = *(PA+1);
+		__m64	MM1	 = *(PA+2);
+		__m64	MM2	 = *(PA+3);
+		XY		 = _mm_add_pi32(XY,   MM0);
+		X2Y2	 = _mm_add_pi32(X2Y2, MM1);
+		XYAN	 = _mm_add_pi32(XYAN, MM2);
+		MM0	 = *(PA+5);
+		MM1	 = *(PA+6);
+		MM2	 = *(PA+7);
+		XY		 = _mm_add_pi32(XY,   MM0);
+		X2Y2	 = _mm_add_pi32(X2Y2, MM1);
+		XYAN	 = _mm_add_pi32(XYAN, MM2);
+		PA	+= 8;
+	}
+	for(;i<fits;i++)
+	{
+		__m64	MM0	 = *(PA+1);
+		__m64	MM1	 = *(PA+2);
+		__m64	MM2	 = *(PA+3);
+		XY		 = _mm_add_pi32(XY,   MM0);
+		X2Y2	 = _mm_add_pi32(X2Y2, MM1);
+		XYAN	 = _mm_add_pi32(XYAN, MM2);
+		PA	+= 4;
+	}
+	{
+		__m64x	M0X, M1X, M2X;
+		M0X.pi64	 = XY;
+		M1X.pi64	 = X2Y2;
+		M2X.pi64	 = XYAN;
+		x	 = M0X.ssi32[0];
+		y	 = M0X.ssi32[1];
+		x2	 = M1X.ssi32[0];
+		y2	 = M1X.ssi32[1];
+		xy	 = M2X.ssi32[0];
+		an	 = M2X.ssi32[1];
+	}
+	_mm_empty();
+#endif
+#else														/* SSE Optimize */
   for(i=0;i<fits;i++){
     x+=a[i].xa;
     y+=a[i].ya;
@@ -482,6 +978,7 @@
     xy+=a[i].xya;
     an+=a[i].an;
   }
+#endif														/* SSE Optimize */
 
   if(*y0>=0){
     x+=   x0;
@@ -537,7 +1034,138 @@
 
 static int inspect_error(int x0,int x1,int y0,int y1,const float *mask,
 			 const float *mdct,
+#if	defined(__SSE__)										/* SSE Optimize */
+			 vorbis_info_floor1 *info, const float *tflr,
+			  const float *tmask, const int *tcres){
+#else														/* SSE Optimize */
 			 vorbis_info_floor1 *info){
+#endif														/* SSE Optimize */
+#if	defined(__SSE__)										/* SSE Optimize */
+	int x = x0;
+	int y = y0;
+	int val = vorbis_dBquant(mask+x);
+	int mse = 0;
+	int n = 0;
+	int shortLen = y1-y;
+	int longLen = x1-x;
+	int decInc = (shortLen << 21) / longLen;
+	int j;
+
+	if(shortLen>=0)
+		j = 0x200   + (y<<21);
+	else
+		j = 0x1FF800 + (y<<21);
+	{
+		int	x05;
+		x05	 = (x+3)&(~3);
+		x05	 = (x05>x1)?x1:x05;
+		for(;x<x05;x++)
+		{
+			y = j >> 21;
+			val	 = tflr[x];
+			mse	+= ((y-val)*(y-val));
+			n++;
+			if(mdct[x]+info->twofitatten>=mask[x])
+			{
+				if(y+info->maxover<val)return(1);
+				if(y-info->maxunder>val)return(1);
+			}
+			j += decInc;
+		}
+	}
+	{
+		register __m128	PMSE;
+		__m128	PIMOVER		 = _mm_set1_ps(info->maxover);
+		__m128	PIMUNDER	 = _mm_set1_ps(info->maxunder);
+#if defined(__SSE2__)
+		__m128i PJ0 = _mm_set_epi32(
+			j+decInc*3, j+decInc*2, j+decInc  , j
+		);
+		__m128i	PDECINC = _mm_set1_epi32(decInc*4);
+#else
+		__m64 PJ0 = _mm_set_pi32(j+decInc  , j         );
+		__m64 PJ1 = _mm_set_pi32(j+decInc*3, j+decInc*2);
+		__m64 PDECINC = _mm_set1_pi32(decInc*4);
+#endif
+		int	x05	 = x1&(~3);
+
+		x05	 = (x05>x1)?x1:x05;
+		PMSE	 = _mm_setzero_ps();
+		for(;x<x05;x+=4)
+		{
+			__m128	PY;
+			register __m128	PVAL, PDMSE;
+#if defined(__SSE2__)
+			{
+				__m128i XMM0 = PJ0;
+				XMM0 = _mm_srai_epi32(XMM0, 21);
+				PY = _mm_cvtepi32_ps(XMM0);
+				PJ0 = _mm_add_epi32(PJ0, PDECINC);
+			}
+#else
+			{
+				__m64 MM1 = PJ1;
+				__m64 MM0 = PJ0;
+				MM1 = _mm_srai_pi32(MM1, 21);
+				MM0 = _mm_srai_pi32(MM0, 21);
+#pragma warning(disable : 592)
+				PY  = _mm_cvtpi32_ps(PY, MM1);
+#pragma warning(default : 592)
+				PJ1 = _mm_add_pi32(PJ1, PDECINC);
+				PY  = _mm_movelh_ps(PY, PY);
+				PJ0 = _mm_add_pi32(PJ0, PDECINC);
+				PY  = _mm_cvtpi32_ps(PY, MM0);
+			}
+#endif
+
+			PVAL	 = _mm_load_ps(tflr+x);
+			PDMSE	 = PY;
+			PDMSE	 = _mm_sub_ps(PDMSE, PVAL);
+			PDMSE	 = _mm_mul_ps(PDMSE, PDMSE);
+			PMSE	 = _mm_add_ps(PMSE, PDMSE);
+			n	+= 4;
+			if(tcres[x]){
+				register __m128	PMASK1, PMASK2;
+				PMASK1	 = PY;
+				PMASK2	 = PY;
+				PMASK1	 = _mm_add_ps(PMASK1, PIMOVER);
+				PMASK2	 = _mm_sub_ps(PMASK2, PIMUNDER);
+				PMASK1	 = _mm_cmplt_ps(PMASK1, PVAL);
+				PMASK2	 = _mm_cmpgt_ps(PMASK2, PVAL);
+				PMASK1	 = _mm_or_ps(PMASK1, PMASK2);
+				if(_mm_movemask_ps(PMASK1)&tcres[x])
+				{
+#if	!defined(__SSE2__)
+					_mm_empty();
+#endif
+					return(1);
+				}
+			}
+		}
+#if	defined(__SSE2__)
+		j = _mm_cvtsi128_si32(PJ0);
+#else
+		j = _mm_cvtsi64_si32(PJ0);
+		_mm_empty();
+#endif
+		mse	+= (int)_mm_add_horz(PMSE);
+	}
+	{
+		for(;x<x1;x++)
+		{
+			y = j >> 21;
+			val	 = tflr[x];
+			mse	+= ((y-val)*(y-val));
+			n++;
+			if(mdct[x]+info->twofitatten>=mask[x])
+			{
+				if(y+info->maxover<val)return(1);
+				if(y-info->maxunder>val)return(1);
+			}
+			j += decInc;
+		}
+	}
+#else														/* SSE Optimize */
   int dy=y1-y0;
   int adx=x1-x0;
   int ady=abs(dy);
@@ -579,6 +1207,7 @@
       }
     }
   }
+#endif														/* SSE Optimize */
   
   if(info->maxover*info->maxover/n>info->maxerr)return(0);
   if(info->maxunder*info->maxunder/n>info->maxerr)return(0);
@@ -614,6 +1243,185 @@
   int *output=NULL;
   int memo[VIF_POSIT+2];
 
+#if	defined(__SSE__)										/* SSE Optimize */
+	float *tflr = (float*)_ogg_alloca(sizeof(float)*n);
+	float *tmask = (float*)_ogg_alloca(sizeof(float)*n);
+	int *tcres = (int*)_ogg_alloca(sizeof(int)*n);
+	__m128	PIT	 = _mm_set1_ps(info->twofitatten);
+	
+	/*
+		preprocess (vorbis_dbQuant)
+	*/
+	for(i=0;i<n;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+#if	!defined(__SSE2__)
+		register __m64	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+		XMM0	 = _mm_load_ps(logmask+i   );
+		XMM1	 = _mm_load_ps(logmask+i+ 4);
+		XMM2	 = _mm_load_ps(logmask+i+ 8);
+		XMM3	 = _mm_load_ps(logmask+i+12);
+		XMM0	 = _mm_mul_ps(XMM0, pfv0.ps);
+		XMM1	 = _mm_mul_ps(XMM1, pfv0.ps);
+		XMM2	 = _mm_mul_ps(XMM2, pfv0.ps);
+		XMM3	 = _mm_mul_ps(XMM3, pfv0.ps);
+		XMM0	 = _mm_add_ps(XMM0, pfv1.ps);
+		XMM1	 = _mm_add_ps(XMM1, pfv1.ps);
+		XMM2	 = _mm_add_ps(XMM2, pfv1.ps);
+		XMM3	 = _mm_add_ps(XMM3, pfv1.ps);
+		XMM0	 = _mm_max_ps(XMM0, PFV_0.ps);
+		XMM1	 = _mm_max_ps(XMM1, PFV_0.ps);
+		XMM2	 = _mm_max_ps(XMM2, PFV_0.ps);
+		XMM3	 = _mm_max_ps(XMM3, PFV_0.ps);
+		XMM0	 = _mm_min_ps(XMM0, pfv2.ps);
+		XMM1	 = _mm_min_ps(XMM1, pfv2.ps);
+		XMM2	 = _mm_min_ps(XMM2, pfv2.ps);
+		XMM3	 = _mm_min_ps(XMM3, pfv2.ps);
+#if	defined(__SSE2__)
+		XMM0	 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM0));
+		XMM1	 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM1));
+		XMM2	 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM2));
+		XMM3	 = _mm_cvtepi32_ps(_mm_cvttps_epi32(XMM3));
+#else
+		MM0	 = _mm_cvttps_pi32(XMM0);
+		MM2	 = _mm_cvttps_pi32(XMM1);
+		MM4	 = _mm_cvttps_pi32(XMM2);
+		MM6	 = _mm_cvttps_pi32(XMM3);
+		XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+		XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+		XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+		XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+		MM1	 = _mm_cvttps_pi32(XMM0);
+		MM3	 = _mm_cvttps_pi32(XMM1);
+		MM5	 = _mm_cvttps_pi32(XMM2);
+		MM7	 = _mm_cvttps_pi32(XMM3);
+		XMM0	 = _mm_cvtpi32_ps(XMM0, MM1);
+		XMM1	 = _mm_cvtpi32_ps(XMM1, MM3);
+		XMM2	 = _mm_cvtpi32_ps(XMM2, MM5);
+		XMM3	 = _mm_cvtpi32_ps(XMM3, MM7);
+		XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+		XMM1	 = _mm_movelh_ps(XMM1, XMM1);
+		XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+		XMM3	 = _mm_movelh_ps(XMM3, XMM3);
+		XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+		XMM1	 = _mm_cvtpi32_ps(XMM1, MM2);
+		XMM2	 = _mm_cvtpi32_ps(XMM2, MM4);
+		XMM3	 = _mm_cvtpi32_ps(XMM3, MM6);
+#endif
+		_mm_store_ps(tflr+i   , XMM0);
+		_mm_store_ps(tflr+i+ 4, XMM1);
+		_mm_store_ps(tflr+i+ 8, XMM2);
+		_mm_store_ps(tflr+i+12, XMM3);
+	}
+#if	!defined(__SSE2__)
+	_mm_empty();
+#endif
+	/*
+		preprocess (mdct+info->twofitatten>=flr)
+	*/
+	for(i=0;i<n;i+=64)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		XMM0	 = _mm_load_ps(logmdct+i   );
+		XMM1	 = _mm_load_ps(logmdct+i+ 4);
+		XMM2	 = _mm_load_ps(logmask+i   );
+		XMM3	 = _mm_load_ps(logmask+i+ 4);
+		XMM0	 = _mm_add_ps(XMM0, PIT);
+		XMM1	 = _mm_add_ps(XMM1, PIT);
+		XMM4	 = _mm_load_ps(logmdct+i+ 8);
+		XMM5	 = _mm_load_ps(logmdct+i+12);
+		XMM2	 = _mm_cmplt_ps(XMM2, XMM0);
+		XMM3	 = _mm_cmplt_ps(XMM3, XMM1);
+		XMM0	 = _mm_load_ps(logmask+i+ 8);
+		XMM1	 = _mm_load_ps(logmask+i+12);
+		XMM4	 = _mm_add_ps(XMM4, PIT);
+		XMM5	 = _mm_add_ps(XMM5, PIT);
+		_mm_store_ps(tmask+i   , XMM2);
+		_mm_store_ps(tmask+i+ 4, XMM3);
+		XMM6	 = _mm_load_ps(logmdct+i+16);
+		XMM0	 = _mm_cmplt_ps(XMM0, XMM4);
+		XMM4	 = _mm_load_ps(logmdct+i+20);
+		XMM1	 = _mm_cmplt_ps(XMM1, XMM5);
+		XMM5	 = _mm_load_ps(logmask+i+16);
+		tcres[i   ]	 = _mm_movemask_ps(XMM2);
+		XMM6	 = _mm_add_ps(XMM6, PIT);
+		XMM2	 = _mm_load_ps(logmask+i+20);
+		tcres[i+ 4]	 = _mm_movemask_ps(XMM3);
+		XMM4	 = _mm_add_ps(XMM4, PIT);
+		XMM3	 = _mm_load_ps(logmdct+i+24);
+		_mm_store_ps(tmask+i+ 8, XMM0);
+		tcres[i+ 8]	 = _mm_movemask_ps(XMM0);
+		_mm_store_ps(tmask+i+12, XMM1);
+		XMM0	 = _mm_load_ps(logmdct+i+28);
+		tcres[i+12]	 = _mm_movemask_ps(XMM1);
+		XMM1	 = _mm_load_ps(logmdct+i+32);
+		XMM5	 = _mm_cmplt_ps(XMM5, XMM6);
+		XMM2	 = _mm_cmplt_ps(XMM2, XMM4);
+		XMM6	 = _mm_load_ps(logmask+i+24);
+		XMM4	 = _mm_load_ps(logmask+i+28);
+		XMM3	 = _mm_add_ps(XMM3, PIT);
+		XMM0	 = _mm_add_ps(XMM0, PIT);
+		_mm_store_ps(tmask+i+16, XMM5);
+		_mm_store_ps(tmask+i+20, XMM2);
+		XMM6	 = _mm_cmplt_ps(XMM6, XMM3);
+		XMM3	 = _mm_load_ps(logmdct+i+36);
+		XMM4	 = _mm_cmplt_ps(XMM4, XMM0);
+		XMM0	 = _mm_load_ps(logmask+i+32);
+		tcres[i+16]	 = _mm_movemask_ps(XMM5);
+		XMM5	 = _mm_load_ps(logmask+i+36);
+		XMM1	 = _mm_add_ps(XMM1, PIT);
+		XMM3	 = _mm_add_ps(XMM3, PIT);
+		tcres[i+20]	 = _mm_movemask_ps(XMM2);
+		XMM2	 = _mm_load_ps(logmdct+i+40);
+		_mm_store_ps(tmask+i+24, XMM6);
+		tcres[i+24]	 = _mm_movemask_ps(XMM6);
+		XMM6	 = _mm_load_ps(logmdct+i+44);
+		_mm_store_ps(tmask+i+28, XMM4);
+		XMM0	 = _mm_cmplt_ps(XMM0, XMM1);
+		tcres[i+28]	 = _mm_movemask_ps(XMM4);
+		XMM5	 = _mm_cmplt_ps(XMM5, XMM3);
+		XMM1	 = _mm_load_ps(logmask+i+40);
+		XMM3	 = _mm_load_ps(logmask+i+44);
+		XMM2	 = _mm_add_ps(XMM2, PIT);
+		XMM6	 = _mm_add_ps(XMM6, PIT);
+		_mm_store_ps(tmask+i+32, XMM0);
+		_mm_store_ps(tmask+i+36, XMM5);
+		XMM4	 = _mm_load_ps(logmdct+i+48);
+		XMM1	 = _mm_cmplt_ps(XMM1, XMM2);
+		XMM2	 = _mm_load_ps(logmdct+i+52);
+		XMM3	 = _mm_cmplt_ps(XMM3, XMM6);
+		XMM6	 = _mm_load_ps(logmask+i+48);
+		tcres[i+32]	 = _mm_movemask_ps(XMM0);
+		XMM4	 = _mm_add_ps(XMM4, PIT);
+		XMM0	 = _mm_load_ps(logmask+i+52);
+		tcres[i+36]	 = _mm_movemask_ps(XMM5);
+		XMM2	 = _mm_add_ps(XMM2, PIT);
+		XMM5	 = _mm_load_ps(logmdct+i+56);
+		_mm_store_ps(tmask+i+40, XMM1);
+		tcres[i+40]	 = _mm_movemask_ps(XMM1);
+		_mm_store_ps(tmask+i+44, XMM3);
+		XMM1	 = _mm_load_ps(logmdct+i+60);
+		tcres[i+44]	 = _mm_movemask_ps(XMM3);
+		XMM6	 = _mm_cmplt_ps(XMM6, XMM4);
+		XMM0	 = _mm_cmplt_ps(XMM0, XMM2);
+		XMM4	 = _mm_load_ps(logmask+i+56);
+		XMM2	 = _mm_load_ps(logmask+i+60);
+		XMM5	 = _mm_add_ps(XMM5, PIT);
+		XMM1	 = _mm_add_ps(XMM1, PIT);
+		_mm_store_ps(tmask+i+48, XMM6);
+		_mm_store_ps(tmask+i+52, XMM0);
+		XMM4	 = _mm_cmplt_ps(XMM4, XMM5);
+		XMM2	 = _mm_cmplt_ps(XMM2, XMM1);
+		tcres[i+48]	 = _mm_movemask_ps(XMM6);
+		tcres[i+52]	 = _mm_movemask_ps(XMM0);
+		_mm_store_ps(tmask+i+56, XMM4);
+		tcres[i+56]	 = _mm_movemask_ps(XMM4);
+		_mm_store_ps(tmask+i+60, XMM2);
+		tcres[i+60]	 = _mm_movemask_ps(XMM2);
+	}
+#endif														/* SSE Optimize */
+
   for(i=0;i<posts;i++)fit_valueA[i]=-200; /* mark all unused */
   for(i=0;i<posts;i++)fit_valueB[i]=-200; /* mark all unused */
   for(i=0;i<posts;i++)loneighbor[i]=0; /* 0 for the implicit 0 post */
@@ -622,6 +1430,19 @@
 
   /* quantize the relevant floor points and collect them into line fit
      structures (one per minimal division) at the same time */
+#if	defined(__SSE__)										/* SSE Optimize */
+	if(posts==0)
+	{
+		nonzero+=accumulate_fit(logmask,logmdct,0,n,fits,n,info, tflr, tmask, tcres);
+	}
+	else
+	{
+		for(i=0;i<posts-1;i++)
+			nonzero+=accumulate_fit(logmask,logmdct,look->sorted_index[i],
+				look->sorted_index[i+1],fits+i,
+				n,info, tflr, tmask, tcres);
+	}
+#else														/* SSE Optimize */
   if(posts==0){
     nonzero+=accumulate_fit(logmask,logmdct,0,n,fits,n,info);
   }else{
@@ -630,6 +1451,7 @@
 			      look->sorted_index[i+1],fits+i,
 			      n,info);
   }
+#endif														/* SSE Optimize */
   
   if(nonzero){
     /* start by fitting the implicit base case.... */
@@ -669,7 +1491,11 @@
 	    exit(1);
 	  }
 
+#if	defined(__SSE__)										/* SSE Optimize */
+	  if(inspect_error(lx,hx,ly,hy,logmask,logmdct,info, tflr, tmask, tcres)){
+#else														/* SSE Optimize */
 	  if(inspect_error(lx,hx,ly,hy,logmask,logmdct,info)){
+#endif														/* SSE Optimize */
 	    /* outside error bounds/begin search area.  Split it. */
 	    int ly0=-200;
 	    int ly1=-200;
@@ -763,7 +1589,7 @@
 
 int floor1_encode(oggpack_buffer *opb,vorbis_block *vb,
 		  vorbis_look_floor1 *look,
-		  int *post,int *ilogmask){
+		   int *post,int *ilogmask){
 
   long i,j;
   vorbis_info_floor1 *info=look->vi;
@@ -777,6 +1603,324 @@
 
   /* quantize values to multiplier spec */
   if(post){
+#if	defined(__SSE2__)
+	int	posts4	 = posts&(~3);
+	int	posts8	 = posts&(~7);
+	static _MM_ALIGN16 const __m128x PIV0 = 
+		{ .si32 = {0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF} };
+	static _MM_ALIGN16 const __m128x PIV1 = 
+		{ .si32 = {0xFFFF8000, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000} };
+	
+	i	 = 0;
+	switch(info->mult)
+	{
+		case 1:
+			for(;i<posts8;i+=8)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV1	 = PM128I(post+i+4);
+				__m128i	PV2	 = PV0;
+				__m128i	PV3	 = PV1;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV1	 = _mm_and_si128(PV1, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV3	 = _mm_and_si128(PV3, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 2);
+				PV1	 = _mm_srli_epi32(PV1, 2);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				PV1	 = _mm_or_si128(PV1, PV3);
+				_mm_store_si128(post+i, PV0);
+				_mm_store_si128(post+i+4, PV1);
+			}
+			for(;i<posts4;i+=4)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV2	 = PV0;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 2);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				_mm_store_si128(post+i, PV0);
+			}
+			break;
+		case 2:
+			for(;i<posts8;i+=8)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV1	 = PM128I(post+i+4);
+				__m128i	PV2	 = PV0;
+				__m128i	PV3	 = PV1;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV1	 = _mm_and_si128(PV1, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV3	 = _mm_and_si128(PV3, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 3);
+				PV1	 = _mm_srli_epi32(PV1, 3);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				PV1	 = _mm_or_si128(PV1, PV3);
+				_mm_store_si128(post+i, PV0);
+				_mm_store_si128(post+i+4, PV1);
+			}
+			for(;i<posts4;i+=4)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV2	 = PV0;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 3);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				_mm_store_si128(post+i, PV0);
+			}
+			break;
+		case 3:
+			for(;i<posts8;i+=8)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV1	 = PM128I(post+i+4);
+				__m128i	PV2	 = PV0;
+				__m128i	PV3	 = PV1;
+				__m128i	PV4, PV5;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV1	 = _mm_and_si128(PV1, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV3	 = _mm_and_si128(PV3, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 4);
+				PV1	 = _mm_srli_epi32(PV1, 4);
+				PV4	 = PV0;
+				PV5	 = PV1;
+				PV0	 = _mm_add_epi32(PV0, PV0);
+				PV1	 = _mm_add_epi32(PV1, PV1);
+				PV0	 = _mm_add_epi32(PV0, PV4);
+				PV1	 = _mm_add_epi32(PV1, PV5);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				PV1	 = _mm_or_si128(PV1, PV3);
+				_mm_store_si128(post+i, PV0);
+				_mm_store_si128(post+i+4, PV1);
+			}
+			for(;i<posts4;i+=4)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV2	 = PV0;
+				__m128i	PV4;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 4);
+				PV4	 = PV0;
+				PV0	 = _mm_add_epi32(PV0, PV0);
+				PV0	 = _mm_add_epi32(PV0, PV4);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				_mm_store_si128(post+i, PV0);
+			}
+			break;
+		case 4:
+			for(;i<posts8;i+=8)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV1	 = PM128I(post+i+4);
+				__m128i	PV2	 = PV0;
+				__m128i	PV3	 = PV1;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV1	 = _mm_and_si128(PV1, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV3	 = _mm_and_si128(PV3, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 4);
+				PV1	 = _mm_srli_epi32(PV1, 4);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				PV1	 = _mm_or_si128(PV1, PV3);
+				_mm_store_si128(post+i, PV0);
+				_mm_store_si128(post+i+4, PV1);
+			}
+			for(;i<posts4;i+=4)
+			{
+				__m128i	PV0	 = PM128I(post+i  );
+				__m128i	PV2	 = PV0;
+				PV0	 = _mm_and_si128(PV0, PIV0.pi);
+				PV2	 = _mm_and_si128(PV2, PIV1.pi);
+				PV0	 = _mm_srli_epi32(PV0, 4);
+				PV0	 = _mm_or_si128(PV0, PV2);
+				_mm_store_si128(post+i, PV0);
+			}
+			break;
+	}
+	for(;i<posts;i++)
+	{
+		int val	 = post[i]&0x7fff;
+		switch(info->mult)
+		{
+			case 1: /* 1024 -> 256 */
+				val>>=2;
+				break;
+			case 2: /* 1024 -> 128 */
+				val>>=3;
+				break;
+			case 3: /* 1024 -> 86 */
+				val/=12;
+				break;
+			case 4: /* 1024 -> 64 */
+				val>>=4;
+				break;
+		}
+		post[i]=val | (post[i]&0x8000);
+	}
+#elif	defined(__SSE__)
+	int	posts2	 = posts&(~1);
+	int	posts4	 = posts&(~3);
+	static uint32_t PIV0[2]	 = {0x00007FFF, 0x00007FFF};
+	static uint32_t PIV1[2]	 = {0xFFFF8000, 0xFFFF8000};
+	
+	i	 = 0;
+	switch(info->mult)
+	{
+		case 1:
+			for(;i<posts4;i+=4)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV1	 = PM64(post+i+2);
+				__m64	PV2	 = PV0;
+				__m64	PV3	 = PV1;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV1	 = _mm_and_si64(PV1, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV3	 = _mm_and_si64(PV3, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 2);
+				PV1	 = _mm_srli_pi32(PV1, 2);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PV1	 = _mm_or_si64(PV1, PV3);
+				PM64(post+i  )	 = PV0;
+				PM64(post+i+2)	 = PV1;
+			}
+			for(;i<posts2;i+=2)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV2	 = PV0;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 2);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PM64(post+i  )	 = PV0;
+			}
+			break;
+		case 2:
+			for(;i<posts4;i+=4)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV1	 = PM64(post+i+2);
+				__m64	PV2	 = PV0;
+				__m64	PV3	 = PV1;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV1	 = _mm_and_si64(PV1, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV3	 = _mm_and_si64(PV3, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 3);
+				PV1	 = _mm_srli_pi32(PV1, 3);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PV1	 = _mm_or_si64(PV1, PV3);
+				PM64(post+i  )	 = PV0;
+				PM64(post+i+2)	 = PV1;
+			}
+			for(;i<posts2;i+=2)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV2	 = PV0;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 3);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PM64(post+i  )	 = PV0;
+			}
+			break;
+		case 3:
+			for(;i<posts4;i+=4)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV1	 = PM64(post+i+2);
+				__m64	PV2	 = PV0;
+				__m64	PV3	 = PV1;
+				__m64	PV4, PV5;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV1	 = _mm_and_si64(PV1, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV3	 = _mm_and_si64(PV3, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 4);
+				PV1	 = _mm_srli_pi32(PV1, 4);
+				PV4	 = PV0;
+				PV5	 = PV1;
+				PV0	 = _mm_add_pi32(PV0, PV0);
+				PV1	 = _mm_add_pi32(PV1, PV1);
+				PV0	 = _mm_add_pi32(PV0, PV4);
+				PV1	 = _mm_add_pi32(PV1, PV5);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PV1	 = _mm_or_si64(PV1, PV3);
+				PM64(post+i  )	 = PV0;
+				PM64(post+i+2)	 = PV1;
+			}
+			for(;i<posts2;i+=2)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV2	 = PV0;
+				__m64	PV4;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 4);
+				PV4	 = PV0;
+				PV0	 = _mm_add_pi32(PV0, PV0);
+				PV0	 = _mm_add_pi32(PV0, PV4);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PM64(post+i  )	 = PV0;
+			}
+			break;
+		case 4:
+			for(;i<posts4;i+=4)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV1	 = PM64(post+i+2);
+				__m64	PV2	 = PV0;
+				__m64	PV3	 = PV1;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV1	 = _mm_and_si64(PV1, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV3	 = _mm_and_si64(PV3, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 4);
+				PV1	 = _mm_srli_pi32(PV1, 4);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PV1	 = _mm_or_si64(PV1, PV3);
+				PM64(post+i  )	 = PV0;
+				PM64(post+i+2)	 = PV1;
+			}
+			for(;i<posts2;i+=2)
+			{
+				__m64	PV0	 = PM64(post+i  );
+				__m64	PV2	 = PV0;
+				PV0	 = _mm_and_si64(PV0, PM64(PIV0));
+				PV2	 = _mm_and_si64(PV2, PM64(PIV1));
+				PV0	 = _mm_srli_pi32(PV0, 4);
+				PV0	 = _mm_or_si64(PV0, PV2);
+				PM64(post+i  )	 = PV0;
+			}
+			break;
+	}
+	_mm_empty();
+	for(;i<posts;i++)
+	{
+		int val	 = post[i]&0x7fff;
+		switch(info->mult)
+		{
+			case 1: /* 1024 -> 256 */
+				val>>=2;
+				break;
+			case 2: /* 1024 -> 128 */
+				val>>=3;
+				break;
+			case 3: /* 1024 -> 86 */
+				val/=12;
+				break;
+			case 4: /* 1024 -> 64 */
+				val>>=4;
+				break;
+		}
+		post[i]=val | (post[i]&0x8000);
+	}
+#else														/* SSE Optimize */
     for(i=0;i<posts;i++){
       int val=post[i]&0x7fff;
       switch(info->mult){
@@ -795,6 +1939,7 @@
       }
       post[i]=val | (post[i]&0x8000);
     }
+#endif														/* SSE Optimize */
 
     out[0]=post[0];
     out[1]=post[1];
@@ -853,7 +1998,6 @@
     oggpack_write(opb,out[0],ilog(look->quant_q-1));
     oggpack_write(opb,out[1],ilog(look->quant_q-1));
       
-      
     /* partition by partition */
     for(i=0,j=2;i<info->partitions;i++){
       int class=info->partitionclass[i];
@@ -951,6 +2095,9 @@
 	  ly=hy;
 	}
       }
+#if	defined(__SSE__)&&!defined(__SSE2__)					/* SSE Optimize */
+	_mm_empty();
+#endif														/* SSE Optimize */
       for(j=hx;j<vb->pcmend/2;j++)ilogmask[j]=ly; /* be certain */    
       seq++;
       return(1);
@@ -1081,7 +2228,21 @@
     for(j=hx;j<n;j++)out[j]*=FLOOR1_fromdB_LOOKUP[ly]; /* be certain */    
     return(1);
   }
+#if	defined(__SSE__)										/* SSE Optimize */
+  {
+	__m128 XMM0 = _mm_setzero_ps();
+	for(j=0;j<n;j+=16)
+	{
+	  _mm_store_ps(out+j   , XMM0);
+	  _mm_store_ps(out+j+ 4, XMM0);
+	  _mm_store_ps(out+j+ 8, XMM0);
+	  _mm_store_ps(out+j+12, XMM0);
+	}
+  }
+#else														/* SSE Optimize */
   memset(out,0,sizeof(*out)*n);
+#endif														/* SSE Optimize */
+
   return(0);
 }
 
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/info.c libvorbis-1.2.0-sse/lib/info.c
--- libvorbis-1.2.0/lib/info.c	2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/info.c	2007-08-02 12:44:44.000000000 +0200
@@ -30,6 +30,12 @@
 #include "psy.h"
 #include "misc.h"
 #include "os.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#if	!defined(CPDATE)
+#define CPDATE __DATE__
+#endif
+#endif														/* SSE Optimize */
 
 /* helpers */
 static int ilog2(unsigned int v){
@@ -60,12 +66,21 @@
 }
 
 void vorbis_comment_add(vorbis_comment *vc,char *comment){
+#ifdef	__SSE__												/* SSE Optimize */
+  vc->user_comments=realloc(vc->user_comments,
+			    (vc->comments+2)*sizeof(*vc->user_comments));
+  vc->comment_lengths=realloc(vc->comment_lengths,
+      			    (vc->comments+2)*sizeof(*vc->comment_lengths));
+  vc->comment_lengths[vc->comments]=strlen(comment);
+  vc->user_comments[vc->comments]=malloc(vc->comment_lengths[vc->comments]+1);
+#else														/* SSE Optimize */
   vc->user_comments=_ogg_realloc(vc->user_comments,
 			    (vc->comments+2)*sizeof(*vc->user_comments));
   vc->comment_lengths=_ogg_realloc(vc->comment_lengths,
       			    (vc->comments+2)*sizeof(*vc->comment_lengths));
   vc->comment_lengths[vc->comments]=strlen(comment);
   vc->user_comments[vc->comments]=_ogg_malloc(vc->comment_lengths[vc->comments]+1);
+#endif														/* SSE Optimize */
   strcpy(vc->user_comments[vc->comments], comment);
   vc->comments++;
   vc->user_comments[vc->comments]=NULL;
@@ -130,11 +145,19 @@
 void vorbis_comment_clear(vorbis_comment *vc){
   if(vc){
     long i;
+#ifdef	__SSE__												/* SSE Optimize */
+    for(i=0;i<vc->comments;i++)
+      if(vc->user_comments[i])free(vc->user_comments[i]);
+    if(vc->user_comments)free(vc->user_comments);
+	if(vc->comment_lengths)free(vc->comment_lengths);
+    if(vc->vendor)free(vc->vendor);
+#else														/* SSE Optimize */
     for(i=0;i<vc->comments;i++)
       if(vc->user_comments[i])_ogg_free(vc->user_comments[i]);
     if(vc->user_comments)_ogg_free(vc->user_comments);
 	if(vc->comment_lengths)_ogg_free(vc->comment_lengths);
     if(vc->vendor)_ogg_free(vc->vendor);
+#endif														/* SSE Optimize */
     memset(vc,0,sizeof(*vc));
   }
 }
@@ -236,10 +259,25 @@
   int i;
   int vendorlen=oggpack_read(opb,32);
   if(vendorlen<0)goto err_out;
+#ifdef	__SSE__												/* SSE Optimize */
+  vc->vendor=calloc(vendorlen+1,1);
+#else														/* SSE Optimize */
   vc->vendor=_ogg_calloc(vendorlen+1,1);
+#endif														/* SSE Optimize */
   _v_readstring(opb,vc->vendor,vendorlen);
   vc->comments=oggpack_read(opb,32);
   if(vc->comments<0)goto err_out;
+#ifdef	__SSE__												/* SSE Optimize */
+  vc->user_comments=calloc(vc->comments+1,sizeof(*vc->user_comments));
+  vc->comment_lengths=calloc(vc->comments+1, sizeof(*vc->comment_lengths));
+  for(i=0;i<vc->comments;i++){
+    int len=oggpack_read(opb,32);
+    if(len<0)goto err_out;
+	vc->comment_lengths[i]=len;
+    vc->user_comments[i]=calloc(len+1,1);
+    _v_readstring(opb,vc->user_comments[i],len);
+  }	  
+#else														/* SSE Optimize */
   vc->user_comments=_ogg_calloc(vc->comments+1,sizeof(*vc->user_comments));
   vc->comment_lengths=_ogg_calloc(vc->comments+1, sizeof(*vc->comment_lengths));
 	    
@@ -250,6 +288,7 @@
     vc->user_comments[i]=_ogg_calloc(len+1,1);
     _v_readstring(opb,vc->user_comments[i],len);
   }	  
+#endif														/* SSE Optimize */
   if(oggpack_read(opb,1)!=1)goto err_out; /* EOP check */
 
   return(0);
@@ -451,7 +490,15 @@
 }
 
 static int _vorbis_pack_comment(oggpack_buffer *opb,vorbis_comment *vc){
+#if defined(__SSE3__)
+  char temp[]="BS; Lancer(SSE3) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#elif defined(__SSE2__)
+  char temp[]="BS; Lancer(SSE2) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#elif defined(__SSE__)
+  char temp[]="BS; Lancer(SSE) [" CPDATE "] (based on aoTuV b5 [20061024])";
+#else
   char temp[]="AO; aoTuV b5 [20061024] (based on Xiph.Org's I 20070622)";
+#endif
   int bytes = strlen(temp);
 
   /* preamble */  
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/Makefile.am libvorbis-1.2.0-sse/lib/Makefile.am
--- libvorbis-1.2.0/lib/Makefile.am	2004-07-26 15:31:38.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/Makefile.am	2007-08-02 12:43:10.000000000 +0200
@@ -10,16 +10,16 @@
 			lpc.c analysis.c synthesis.c psy.c info.c \
 			floor1.c floor0.c\
 			res0.c mapping0.c registry.c codebook.c sharedbook.c\
-			lookup.c bitrate.c\
+			lookup.c bitrate.c xmmlib.c \
 			envelope.h lpc.h lsp.h codebook.h misc.h psy.h\
 			masking.h os.h mdct.h smallft.h highlevel.h\
 			registry.h scales.h window.h lookup.h lookup_data.h\
-			codec_internal.h backends.h bitrate.h 
+			codec_internal.h backends.h bitrate.h xmmlib.h
 libvorbis_la_LDFLAGS = -no-undefined -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@
 libvorbis_la_LIBADD = @OGG_LIBS@ @VORBIS_LIBS@
 
 libvorbisfile_la_SOURCES = vorbisfile.c
-libvorbisfile_la_LDFLAGS = -no-undefined -version-info @VF_LIB_CURRENT@:@VF_LIB_REVISION@:@VF_LIB_AGE@
+libvorbisfile_la_LDFLAGS = -no-undefined -version-info @VF_LIB_CURRENT@:@VF_LIB_REVISION@:@VF_LIB_AGE@ @OGG_LIBS@
 libvorbisfile_la_LIBADD = libvorbis.la
 
 libvorbisenc_la_SOURCES = vorbisenc.c 
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mapping0.c libvorbis-1.2.0-sse/lib/mapping0.c
--- libvorbis-1.2.0/lib/mapping0.c	2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mapping0.c	2007-08-02 12:43:10.000000000 +0200
@@ -27,6 +27,10 @@
 #include "registry.h"
 #include "psy.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include <float.h>
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 /* simplistic, wasteful way of doing this (unique lookup for each
    mode/submapping); there should be a central repository for
@@ -239,6 +243,508 @@
 			 vorbis_look_floor *look,
 			 int *post,int *ilogmask);
 
+#ifdef __SSE__												/* SSE Optimize */
+static void mapping_forward_sub0(float *pcm, float *logfft, float scale_dB,
+								 float *local_ampmax, int i, int n)
+{
+	_MM_ALIGN16 const float mparm[4] = {
+		7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f, 7.17711438e-7f/2.f,
+	};
+	__m128	SCALEdB;
+	__m128	LAM0;
+#if	!defined(__SSE2__)
+	__m128	LAM1;
+#endif
+	int	j, k;
+	SCALEdB	 = _mm_set_ps1(scale_dB+.345f-764.6161886f/2.f);
+	LAM0	 = _mm_set_ps1(local_ampmax[i]);
+#if	defined(__SSE2__)
+	if(n>=256&&n<=4096)
+	{
+		/*
+			Cation! This routhine is for SSE optimized fft only.
+		*/
+		float	rfv	 = logfft[0];
+		logfft[0]	 = 0.f;
+		logfft[1]	 = 0.f;
+#if	defined(__SSE3__)
+		/*
+			SSE3 optimized code
+		*/
+		for(j=0,k=0;j<n;j+=16,k+=8)
+		{
+			__m128	XMM0, XMM2;
+			__m128	XMM1, XMM3;
+			XMM0	 = _mm_load_ps(pcm+j   );
+			XMM1	 = _mm_load_ps(pcm+j+ 4);
+			XMM2	 = _mm_load_ps(pcm+j+ 8);
+			XMM3	 = _mm_load_ps(pcm+j+12);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_hadd_ps(XMM0, XMM1);
+			XMM2	 = _mm_hadd_ps(XMM2, XMM3);
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM2	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			XMM2	 = _mm_add_ps(XMM2, SCALEdB);
+			_mm_store_ps(logfft+k   , XMM0);
+			_mm_store_ps(logfft+k+ 4, XMM2);
+			XMM0		 = _mm_max_ps(XMM0, XMM2);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+		}
+#else	/* for SSE2 */
+		/*
+			SSE2 optimized code
+		*/
+		for(j=0,k=0;j<n;j+=16,k+=8)
+		{
+			__m128	XMM0, XMM2;
+			__m128	XMM1, XMM3;
+			__m128	XMM4, XMM5;
+			XMM0	 = _mm_load_ps(pcm+j   );
+			XMM2	 = _mm_load_ps(pcm+j+ 8);
+			XMM4	 = _mm_load_ps(pcm+j+ 4);
+			XMM5	 = _mm_load_ps(pcm+j+12);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM0	 = _mm_add_ps(XMM0, XMM1);
+			XMM2	 = _mm_add_ps(XMM2, XMM3);
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM2	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			XMM2	 = _mm_add_ps(XMM2, SCALEdB);
+			_mm_store_ps(logfft+k   , XMM0);
+			_mm_store_ps(logfft+k+ 4, XMM2);
+			XMM0		 = _mm_max_ps(XMM0, XMM2);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+		}
+#endif
+		local_ampmax[i]	 =  _mm_max_horz(LAM0);
+		logfft[0]	 = rfv;
+	}
+	else
+	{
+		/*
+			SSE2 optimized code
+		*/
+		int Cnt	 = ((n-2)&(~15))+1;
+		for(j=1;j<Cnt;j+=16){
+		__m128	XMM0, XMM3;
+#if	defined(__SSE3__)
+			{
+				__m128	XMM2, XMM5;
+				XMM0	 = _mm_lddqu_ps(pcm+j   );
+				XMM2	 = _mm_lddqu_ps(pcm+j+ 4);
+				XMM3	 = _mm_lddqu_ps(pcm+j+ 8);
+				XMM5	 = _mm_lddqu_ps(pcm+j+12);
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM5	 = _mm_mul_ps(XMM5, XMM5);
+				XMM0	 = _mm_hadd_ps(XMM0, XMM2);
+				XMM3	 = _mm_hadd_ps(XMM3, XMM5);
+			}
+#else
+			{
+				__m128	XMM2, XMM5;
+				{
+					__m128	XMM1, XMM4;
+					XMM0	 = _mm_loadu_ps(pcm+j   );
+					XMM1	 = _mm_loadu_ps(pcm+j+ 4);
+					XMM3	 = _mm_loadu_ps(pcm+j+ 8);
+					XMM4	 = _mm_loadu_ps(pcm+j+12);
+					XMM2	 = XMM0;
+					XMM5	 = XMM3;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+					XMM3	 = _mm_shuffle_ps(XMM3, XMM4,_MM_SHUFFLE(2,0,2,0));
+					XMM5	 = _mm_shuffle_ps(XMM5, XMM4,_MM_SHUFFLE(3,1,3,1));
+				}
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM5	 = _mm_mul_ps(XMM5, XMM5);
+				XMM0	 = _mm_add_ps(XMM0, XMM2);
+				XMM3	 = _mm_add_ps(XMM3, XMM5);
+			}
+#endif
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM3	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM3));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm  ));
+			XMM3	 = _mm_mul_ps(XMM3, PM128(mparm+4));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			XMM3	 = _mm_add_ps(XMM3, SCALEdB);
+			_mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+			_mm_storeu_ps(logfft+((j+9)>>1), XMM3);
+			XMM0		 = _mm_max_ps(XMM0, XMM3);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+		}
+		Cnt	 = ((n-2)&(~7))+1;
+		for(;j<Cnt;j+=8){
+			__m128	XMM0;
+#if	defined(__SSE3__)
+			{
+				__m128	XMM1;
+				XMM0	 = _mm_lddqu_ps(pcm+j   );
+				XMM1	 = _mm_lddqu_ps(pcm+j+ 4);
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM0	 = _mm_hadd_ps(XMM0, XMM1);
+			}
+#else
+			{
+				__m128	XMM2;
+				{
+					__m128	XMM1;
+					XMM0	 = _mm_loadu_ps(pcm+j   );
+					XMM1	 = _mm_loadu_ps(pcm+j+ 4);
+					XMM2	 = XMM0;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+				}
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM0	 = _mm_add_ps(XMM0, XMM2);
+			}
+#endif
+			XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			_mm_storeu_ps(&logfft[(j+1)>>1], XMM0);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+		}
+		local_ampmax[i]	 = _mm_max_horz(LAM0);
+		for(;j<n;j+=2){
+			float	temp	 = pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
+			temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp)  + .345; /* +
+										.345 is a hack; the original todB
+										estimation used on IEEE 754
+										compliant machines had a bug that
+										returned dB values about a third
+										of a decibel too high.  The bug
+										was harmless because tunings
+										implicitly took that into
+										account.  However, fixing the bug
+										in the estimator requires
+										changing all the tunings as well.
+										For now, it's easier to sync
+										things back up here, and
+										recalibrate the tunings in the
+										next major model upgrade. */
+			if(temp>local_ampmax[i])
+				local_ampmax[i]	 = temp;
+		}
+	}
+#else	/* for __SSE2__ */
+	/*
+		SSE optimized code
+	*/
+	LAM1	 = LAM0;
+	if(n>=256&&n<=4096)
+	{
+		/*
+			Cation! This routhine is for SSE optimized fft only.
+		*/
+		float	rfv	 = logfft[0];
+		logfft[0]	 = 0.f;
+		logfft[1]	 = 0.f;
+		for(j=0,k=0;j<n;j+=32,k+=16)
+		{
+			__m64	MM0, MM1, MM2, MM3;
+			__m64	MM4, MM5, MM6, MM7;
+			__m128x	U0, U1, U2, U3;
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3;
+				__m128	XMM4, XMM5;
+				XMM0	 = _mm_load_ps(pcm+j   );
+				XMM2	 = _mm_load_ps(pcm+j+ 8);
+				XMM4	 = _mm_load_ps(pcm+j+ 4);
+				XMM5	 = _mm_load_ps(pcm+j+12);
+				XMM1	 = XMM0;
+				XMM3	 = XMM2;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(2,0,2,0));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM4,_MM_SHUFFLE(3,1,3,1));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM5,_MM_SHUFFLE(2,0,2,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM5,_MM_SHUFFLE(3,1,3,1));
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+				XMM2	 = _mm_add_ps(XMM2, XMM3);
+				XMM4	 = _mm_load_ps(pcm+j+16);
+				U0.ps	 = XMM0;
+				U1.ps	 = XMM2;
+				XMM1	 = _mm_load_ps(pcm+j+24);
+				XMM0	 = _mm_load_ps(pcm+j+20);
+				XMM2	 = _mm_load_ps(pcm+j+28);
+				XMM5	 = XMM4;
+				XMM3	 = XMM1;
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM0,_MM_SHUFFLE(2,0,2,0));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM0,_MM_SHUFFLE(3,1,3,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM2,_MM_SHUFFLE(2,0,2,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM2,_MM_SHUFFLE(3,1,3,1));
+				MM0		 = U0.pi64[1];
+				MM1		 = U1.pi64[1];
+				MM2		 = U0.pi64[0];
+				MM3		 = U1.pi64[0];
+				XMM4	 = _mm_mul_ps(XMM4, XMM4);
+				XMM5	 = _mm_mul_ps(XMM5, XMM5);
+				XMM1	 = _mm_mul_ps(XMM1, XMM1);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM4	 = _mm_add_ps(XMM4, XMM5);
+				XMM1	 = _mm_add_ps(XMM1, XMM3);
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM1);
+				U2.ps	 = XMM4;
+				U3.ps	 = XMM1;
+				MM4		 = U2.pi64[1];
+				MM5		 = U3.pi64[1];
+				MM6		 = U2.pi64[0];
+				MM7		 = U3.pi64[0];
+				XMM5	 = _mm_cvtpi32_ps(XMM5, MM4);
+				XMM3	 = _mm_cvtpi32_ps(XMM3, MM5);
+				XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+				XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+				XMM5	 = _mm_movelh_ps(XMM5, XMM5);
+				XMM3	 = _mm_movelh_ps(XMM3, XMM3);
+				XMM0	 = _mm_cvtpi32_ps(XMM0, MM2);
+				XMM2	 = _mm_cvtpi32_ps(XMM2, MM3);
+				XMM5	 = _mm_cvtpi32_ps(XMM5, MM6);
+				XMM3	 = _mm_cvtpi32_ps(XMM3, MM7);
+				XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+				XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+				XMM5	 = _mm_mul_ps(XMM5, PM128(mparm));
+				XMM3	 = _mm_mul_ps(XMM3, PM128(mparm));
+				XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+				XMM2	 = _mm_add_ps(XMM2, SCALEdB);
+				XMM5	 = _mm_add_ps(XMM5, SCALEdB);
+				XMM3	 = _mm_add_ps(XMM3, SCALEdB);
+				_mm_store_ps(logfft+k   , XMM0);
+				_mm_store_ps(logfft+k+ 4, XMM2);
+				_mm_store_ps(logfft+k+ 8, XMM5);
+				_mm_store_ps(logfft+k+12, XMM3);
+				XMM0		 = _mm_max_ps(XMM0, XMM2);
+				XMM5		 = _mm_max_ps(XMM5, XMM3);
+				LAM0		 = _mm_max_ps(LAM0, XMM0);
+				LAM1		 = _mm_max_ps(LAM1, XMM5);
+			}
+		}
+		_mm_empty();
+		logfft[0]	 = rfv;
+		LAM0		 = _mm_max_ps(LAM0, LAM1);
+		local_ampmax[i]	 = _mm_max_horz(LAM0);
+	}
+	else
+	{
+		__m64	MM0, MM1, MM2, MM3;
+		__m128x	U0, U1;
+		int Cnt	 = ((n-2)&(~15))+1;
+		for(j=1;j<Cnt;j+=16){
+			__m128	XMM0, XMM3;
+			{
+				__m128	XMM2, XMM5;
+				{
+					__m128	XMM1, XMM4;
+					XMM0	 = _mm_loadu_ps(pcm+j   );
+					XMM1	 = _mm_loadu_ps(pcm+j+ 4);
+					XMM3	 = _mm_loadu_ps(pcm+j+ 8);
+					XMM4	 = _mm_loadu_ps(pcm+j+12);
+					XMM2	 = XMM0;
+					XMM5	 = XMM3;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+					XMM3	 = _mm_shuffle_ps(XMM3, XMM4,_MM_SHUFFLE(2,0,2,0));
+					XMM5	 = _mm_shuffle_ps(XMM5, XMM4,_MM_SHUFFLE(3,1,3,1));
+				}
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM3	 = _mm_mul_ps(XMM3, XMM3);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM5	 = _mm_mul_ps(XMM5, XMM5);
+				XMM0	 = _mm_add_ps(XMM0, XMM2);
+				XMM3	 = _mm_add_ps(XMM3, XMM5);
+			}
+			U0.ps	 = XMM0;
+			U1.ps	 = XMM3;
+			MM0		 = U0.pi64[1];
+			MM1		 = U1.pi64[1];
+			MM2		 = U0.pi64[0];
+			MM3		 = U1.pi64[0];
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+			XMM3	 = _mm_cvtpi32_ps(XMM3, MM1);
+			XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+			XMM3	 = _mm_movelh_ps(XMM3, XMM3);
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM2);
+			XMM3	 = _mm_cvtpi32_ps(XMM3, MM3);
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM3	 = _mm_mul_ps(XMM3, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			XMM3	 = _mm_add_ps(XMM3, SCALEdB);
+			_mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+			_mm_storeu_ps(logfft+((j+9)>>1), XMM3);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+			LAM1		 = _mm_max_ps(LAM1, XMM3);
+		}
+		Cnt	 = ((n-2)&(~7))+1;
+		for(;j<Cnt;j+=8){
+			__m128	XMM0;
+			{
+				__m128	XMM2;
+				{
+					__m128	XMM1;
+					XMM0	 = _mm_loadu_ps(pcm+j   );
+					XMM1	 = _mm_loadu_ps(pcm+j+ 4);
+					XMM2	 = XMM0;
+					XMM0	 = _mm_shuffle_ps(XMM0, XMM1,_MM_SHUFFLE(2,0,2,0));
+					XMM2	 = _mm_shuffle_ps(XMM2, XMM1,_MM_SHUFFLE(3,1,3,1));
+				}
+				XMM0	 = _mm_mul_ps(XMM0, XMM0);
+				XMM2	 = _mm_mul_ps(XMM2, XMM2);
+				XMM0	 = _mm_add_ps(XMM0, XMM2);
+			}
+			U0.ps	 = XMM0;
+			MM0		 = U0.pi64[1];
+			MM1		 = U0.pi64[0];
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+			XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM1);
+			XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+			XMM0	 = _mm_add_ps(XMM0, SCALEdB);
+			_mm_storeu_ps(logfft+((j+1)>>1), XMM0);
+			LAM0		 = _mm_max_ps(LAM0, XMM0);
+		}
+		LAM0		 = _mm_max_ps(LAM0, LAM1);
+		_mm_empty();
+		local_ampmax[i]	 = _mm_max_horz(LAM0);
+		for(;j<n;j+=2){
+			float	temp	 = pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
+			temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp)  + .345; /* +
+										.345 is a hack; the original todB
+										estimation used on IEEE 754
+										compliant machines had a bug that
+										returned dB values about a third
+										of a decibel too high.  The bug
+										was harmless because tunings
+										implicitly took that into
+										account.  However, fixing the bug
+										in the estimator requires
+										changing all the tunings as well.
+										For now, it's easier to sync
+										things back up here, and
+										recalibrate the tunings in the
+										next major model upgrade. */
+			if(temp>local_ampmax[i])
+				local_ampmax[i]	 = temp;
+		}
+	}
+#endif
+}
+
+static void mapping_forward_sub1(float *mdct, float *logmdct, int n)
+{
+	static _MM_ALIGN16 const float mparm[4]	 = {
+		7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f
+	};
+	static _MM_ALIGN16 const float PFV0[4]	 = {
+		0.345f-764.6161886f,	0.345f-764.6161886f,
+		0.345f-764.6161886f,	0.345f-764.6161886f
+	};
+	int j;
+#if	defined(__SSE2__)
+	/*
+		SSE2 optimized code
+	*/
+	for(j=0;j<n/2;j+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		XMM0	 = _mm_load_ps(mdct+j   );
+		XMM1	 = _mm_load_ps(mdct+j+ 4);
+		XMM2	 = _mm_load_ps(mdct+j+ 8);
+		XMM3	 = _mm_load_ps(mdct+j+12);
+		XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+		XMM1	 = _mm_and_ps(XMM1, PABSMASK.ps);
+		XMM2	 = _mm_and_ps(XMM2, PABSMASK.ps);
+		XMM3	 = _mm_and_ps(XMM3, PABSMASK.ps);
+		XMM0	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM0));
+		XMM1	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM1));
+		XMM2	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM2));
+		XMM3	 = _mm_cvtepi32_ps(_mm_castps_si128(XMM3));
+		XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+		XMM1	 = _mm_mul_ps(XMM1, PM128(mparm));
+		XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+		XMM3	 = _mm_mul_ps(XMM3, PM128(mparm));
+		XMM0	 = _mm_add_ps(XMM0, PM128(PFV0));
+		XMM1	 = _mm_add_ps(XMM1, PM128(PFV0));
+		XMM2	 = _mm_add_ps(XMM2, PM128(PFV0));
+		XMM3	 = _mm_add_ps(XMM3, PM128(PFV0));
+		_mm_store_ps(logmdct+j   , XMM0);
+		_mm_store_ps(logmdct+j+ 4, XMM1);
+		_mm_store_ps(logmdct+j+ 8, XMM2);
+		_mm_store_ps(logmdct+j+12, XMM3);
+	}
+#else	/* __SSE2__ */
+	/*
+		SSE optimized code
+	*/
+	for(j=0;j<n/2;j+=16)
+	{
+		__m128x	U0, U1, U2, U3;
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		XMM0	 = _mm_load_ps(mdct+j   );
+		XMM1	 = _mm_load_ps(mdct+j+ 4);
+		XMM2	 = _mm_load_ps(mdct+j+ 8);
+		XMM3	 = _mm_load_ps(mdct+j+12);
+		XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+		XMM1	 = _mm_and_ps(XMM1, PABSMASK.ps);
+		XMM2	 = _mm_and_ps(XMM2, PABSMASK.ps);
+		XMM3	 = _mm_and_ps(XMM3, PABSMASK.ps);
+		U0.ps	 = XMM0;
+		U1.ps	 = XMM1;
+		U2.ps	 = XMM2;
+		U3.ps	 = XMM3;
+		XMM0	 = _mm_cvtpi32_ps(XMM0, U0.pi64[1]);
+		XMM1	 = _mm_cvtpi32_ps(XMM1, U1.pi64[1]);
+		XMM2	 = _mm_cvtpi32_ps(XMM2, U2.pi64[1]);
+		XMM3	 = _mm_cvtpi32_ps(XMM3, U3.pi64[1]);
+		XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+		XMM1	 = _mm_movelh_ps(XMM1, XMM1);
+		XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+		XMM3	 = _mm_movelh_ps(XMM3, XMM3);
+		XMM0	 = _mm_cvtpi32_ps(XMM0, U0.pi64[0]);
+		XMM1	 = _mm_cvtpi32_ps(XMM1, U1.pi64[0]);
+		XMM2	 = _mm_cvtpi32_ps(XMM2, U2.pi64[0]);
+		XMM3	 = _mm_cvtpi32_ps(XMM3, U3.pi64[0]);
+		XMM0	 = _mm_mul_ps(XMM0, PM128(mparm));
+		XMM1	 = _mm_mul_ps(XMM1, PM128(mparm));
+		XMM2	 = _mm_mul_ps(XMM2, PM128(mparm));
+		XMM3	 = _mm_mul_ps(XMM3, PM128(mparm));
+		XMM0	 = _mm_add_ps(XMM0, PM128(PFV0));
+		XMM1	 = _mm_add_ps(XMM1, PM128(PFV0));
+		XMM2	 = _mm_add_ps(XMM2, PM128(PFV0));
+		XMM3	 = _mm_add_ps(XMM3, PM128(PFV0));
+		_mm_store_ps(logmdct+j   , XMM0);
+		_mm_store_ps(logmdct+j+ 4, XMM1);
+		_mm_store_ps(logmdct+j+ 8, XMM2);
+		_mm_store_ps(logmdct+j+12, XMM3);
+	}
+	_mm_empty();
+#endif
+}
+#endif														/* SSE Optimize */
 
 static int mapping0_forward(vorbis_block *vb){
   vorbis_dsp_state      *vd=vb->vd;
@@ -315,8 +821,12 @@
 
     /* transform the PCM data */
     /* only MDCT right now.... */
+#if	defined(__SSE__)										/* SSE Optimize */
+    mdct_forward(b->transform[vb->W][0],pcm,gmdct[i], gmdct_org[i]);
+#else														/* SSE Optimize */
     mdct_forward(b->transform[vb->W][0],pcm,gmdct[i]);
     memcpy(gmdct_org[i], gmdct[i], n/2*sizeof(**gmdct_org));
+#endif														/* SSE Optimize */
     
     /* FFT yields more accurate tonal estimation (not phase sensitive) */
     drft_forward(&b->fft_look[vb->W],pcm);
@@ -335,6 +845,9 @@
                                      recalibrate the tunings in the
                                      next major model upgrade. */
     local_ampmax[i]=logfft[0];
+#ifdef __SSE__												/* SSE Optimize */
+	mapping_forward_sub0(pcm, logfft, scale_dB, local_ampmax, i, n);
+#else														/* SSE Optimize */
     for(j=1;j<n-1;j+=2){
       float temp=pcm[j]*pcm[j]+pcm[j+1]*pcm[j+1];
       temp=logfft[(j+1)>>1]=scale_dB+.5f*todB(&temp)  + .345; /* +
@@ -354,6 +867,7 @@
                                      next major model upgrade. */
       if(temp>local_ampmax[i])local_ampmax[i]=temp;
     }
+#endif														/* SSE Optimize */
 
     if(local_ampmax[i]>0.f)local_ampmax[i]=0.f;
     if(local_ampmax[i]>global_ampmax)global_ampmax=local_ampmax[i];
@@ -397,6 +911,9 @@
       floor_posts[i]=_vorbis_block_alloc(vb,PACKETBLOBS*sizeof(**floor_posts));
       memset(floor_posts[i],0,sizeof(**floor_posts)*PACKETBLOBS);
       
+#ifdef __SSE__												/* SSE Optimize */
+	mapping_forward_sub1(mdct, logmdct, n);
+#else														/* SSE Optimize */
       for(j=0;j<n/2;j++)
 	logmdct[j]=todB(mdct+j)  + .345; /* + .345 is a hack; the original
                                      todB estimation used on IEEE 754
@@ -412,6 +929,7 @@
                                      things back up here, and
                                      recalibrate the tunings in the
                                      next major model upgrade. */
+#endif														/* SSE Optimize */
 
 #if 0
       if(vi->channels==2){
@@ -492,7 +1010,12 @@
 			   vif->n,
 			   blocktype, modenumber,
 			   vb->nW,
+#ifdef __SSE__												/* SSE Optimize */
+			   b->lW_blocktype, b->lW_modenumber, b->lW_no,
+			   res_org[i]);
+#else														/* SSE Optimize */
 			   b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif														/* SSE Optimize */
 	
 #if 0
 	if(vi->channels==2){
@@ -541,7 +1064,12 @@
 			   vif->n,
 			   blocktype, modenumber,
 			   vb->nW,
+#ifdef __SSE__												/* SSE Optimize */
+			   b->lW_blocktype, b->lW_modenumber, b->lW_no,
+			   res_org[i]);
+#else														/* SSE Optimize */
 			   b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif														/* SSE Optimize */
 
 #if 0
 	if(vi->channels==2){
@@ -570,7 +1098,12 @@
 			   vif->n,
 			   blocktype, modenumber,
 			   vb->nW,
+#ifdef __SSE__												/* SSE Optimize */
+			   b->lW_blocktype, b->lW_modenumber, b->lW_no,
+			   res_org[i]);
+#else														/* SSE Optimize */
 			   b->lW_blocktype, b->lW_modenumber, b->lW_no);
+#endif														/* SSE Optimize */
 
 #if 0
 	if(vi->channels==2)
@@ -636,7 +1169,12 @@
       mag_sort=_vp_quantize_couple_sort(vb,
 					psy_look,
 					info,
+#ifdef __SSE__												/* SSE Optimize */
+					mag_memo,
+					res_org[0]);    
+#else														/* SSE Optimize */
 					mag_memo);    
+#endif														/* SSE Optimize */
     }
 
     memset(sortindex,0,sizeof(*sortindex)*vi->channels);
@@ -644,7 +1182,11 @@
       for(i=0;i<vi->channels;i++){
 	float *mdct    =gmdct[i];
 	sortindex[i]=alloca(sizeof(**sortindex)*n/2);
+#ifdef __SSE__												/* SSE Optimize */
+	_vp_noise_normalize_sort(psy_look,mdct,sortindex[i],res_org[0]);
+#else														/* SSE Optimize */
 	_vp_noise_normalize_sort(psy_look,mdct,sortindex[i]);
+#endif														/* SSE Optimize */
       }
     }
 
@@ -835,6 +1377,77 @@
 
   /* channel coupling */
   for(i=info->coupling_steps-1;i>=0;i--){
+#ifdef	__SSE__												/* SSE Optimize */
+	{
+		float	*PCMM	 = vb->pcm[info->coupling_mag[i]];
+		float	*PCMA	 = vb->pcm[info->coupling_ang[i]];
+		int	Lim	 = (n/2)&(~7);
+		for(j=0;j<Lim;j+=8){
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			XMM0	 = _mm_load_ps(PCMA+j  );
+			XMM3	 = _mm_load_ps(PCMA+j+4);
+			XMM1	 = _mm_load_ps(PCMM+j  );
+			XMM4	 = _mm_load_ps(PCMM+j+4);
+			XMM2	 = XMM0;
+			XMM5	 = XMM3;
+			XMM0	 = _mm_cmpnle_ps(XMM0, PFV_0.ps);
+			XMM3	 = _mm_cmpnle_ps(XMM3, PFV_0.ps);
+			XMM1	 = _mm_xor_ps(XMM1, XMM2);
+			XMM4	 = _mm_xor_ps(XMM4, XMM5);
+			XMM1	 = _mm_andnot_ps(XMM1, PCS_RRRR.ps);
+			XMM4	 = _mm_andnot_ps(XMM4, PCS_RRRR.ps);
+			XMM1	 = _mm_xor_ps(XMM1, XMM2);
+			XMM4	 = _mm_xor_ps(XMM4, XMM5);
+			XMM2	 = XMM1;
+			XMM5	 = XMM4;
+			XMM1	 = _mm_and_ps(XMM1, XMM0);
+			XMM4	 = _mm_and_ps(XMM4, XMM3);
+			XMM0	 = _mm_andnot_ps(XMM0, XMM2);
+			XMM3	 = _mm_andnot_ps(XMM3, XMM5);
+			XMM2	 = _mm_load_ps(PCMM+j  );
+			XMM5	 = _mm_load_ps(PCMM+j+4);
+			XMM1	 = _mm_add_ps(XMM1, XMM2);
+			XMM4	 = _mm_add_ps(XMM4, XMM5);
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			XMM3	 = _mm_add_ps(XMM3, XMM5);
+			_mm_store_ps(PCMA+j  , XMM1);
+			_mm_store_ps(PCMA+j+4, XMM4);
+			_mm_store_ps(PCMM+j  , XMM0);
+			_mm_store_ps(PCMM+j+4, XMM3);
+		}
+		Lim	 = (n/2)&(~3);
+		for(;j<Lim;j+=4){
+			__m128	XMM0, XMM1, XMM2;
+			XMM0	 = _mm_load_ps(PCMA+j  );
+			XMM1	 = _mm_load_ps(PCMM+j  );
+			XMM2	 = XMM0;
+			XMM0	 = _mm_cmpnle_ps(XMM0, PFV_0.ps);
+			XMM1	 = _mm_xor_ps(XMM1, XMM2);
+			XMM1	 = _mm_andnot_ps(XMM1, PCS_RRRR.ps);
+			XMM1	 = _mm_xor_ps(XMM1, XMM2);
+			XMM2	 = XMM1;
+			XMM1	 = _mm_and_ps(XMM1, XMM0);
+			XMM0	 = _mm_andnot_ps(XMM0, XMM2);
+			XMM2	 = _mm_load_ps(PCMM+j  );
+			XMM1	 = _mm_add_ps(XMM1, XMM2);
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			_mm_store_ps(PCMA+j  , XMM1);
+			_mm_store_ps(PCMM+j  , XMM0);
+		}
+		for(;j<n/2;j++){
+			float mag=PCMM[j];
+			float ang=PCMA[j];
+
+			if(ang>0){
+				PCMM[j]=mag;
+				PCMA[j]=mag > 0 ? mag-ang : mag+ang;
+			}else{
+				PCMM[j]=mag > 0 ? mag+ang : mag-ang;
+				PCMA[j]=mag;
+			}
+		}
+	}
+#else														/* SSE Optimize */
     float *pcmM=vb->pcm[info->coupling_mag[i]];
     float *pcmA=vb->pcm[info->coupling_ang[i]];
 
@@ -859,6 +1472,7 @@
 	  pcmM[j]=mag-ang;
 	}
     }
+#endif														/* SSE Optimize */
   }
 
   /* compute and apply spectral envelope */
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mdct.c libvorbis-1.2.0-sse/lib/mdct.c
--- libvorbis-1.2.0/lib/mdct.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mdct.c	2007-08-02 12:43:10.000000000 +0200
@@ -45,6 +45,10 @@
 #include "mdct.h"
 #include "os.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
+
 
 /* build lookups for trig functions; also pre-figure scaling and
    some window function algebra. */
@@ -88,10 +92,342 @@
     }
   }
   lookup->scale=FLOAT_CONV(4.f/n);
+#ifdef __SSE__												/* SSE Optimize */
+	{
+		__m128	pscalem	 = _mm_set_ps1(lookup->scale);
+		float *S, *U;
+		int n2	 = n>>1;
+		int n4	 = n>>2;
+		int n8	 = n>>3;
+		int j;
+		/*
+			for mdct_bitreverse
+		*/
+		T	 = _ogg_malloc(sizeof(*T)*n2);
+		lookup->trig_bitreverse	 = T;
+		S	 = lookup->trig+n;
+		for(i=0;i<n4;i+=8)
+		{
+			__m128	XMM0	 = _mm_load_ps(S+i   );
+			__m128	XMM1	 = _mm_load_ps(S+i+ 4);
+			__m128	XMM2	 = XMM0;
+			__m128	XMM3	 = XMM1;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+			XMM2	 = _mm_xor_ps(XMM2, PCS_RNRN.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+			_mm_store_ps(T+i*2   , XMM0);
+			_mm_store_ps(T+i*2+ 4, XMM2);
+			_mm_store_ps(T+i*2+ 8, XMM1);
+			_mm_store_ps(T+i*2+12, XMM3);
+		}
+		/*
+			for mdct_forward part 0
+		*/
+		T	 = _ogg_malloc(sizeof(*T)*(n*2));
+		lookup->trig_forward	 = T;
+		S	 = lookup->trig;
+		for(i=0,j=n2-4;i<n8;i+=4,j-=4)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(S+j+2));
+			XMM2	 = _mm_loadl_pi(XMM2, (__m64*)(S+j  ));
+			XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(S+i  ));
+			XMM2	 = _mm_loadh_pi(XMM2, (__m64*)(S+i+2));
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+			XMM0	 = _mm_xor_ps(XMM0, PCS_RRNN.ps);
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+			XMM2	 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+			_mm_store_ps(T+i*4   , XMM0);
+			_mm_store_ps(T+i*4+ 4, XMM1);
+			_mm_store_ps(T+i*4+ 8, XMM2);
+			_mm_store_ps(T+i*4+12, XMM3);
+		}
+		for(;i<n4;i+=4,j-=4)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(S+j+2));
+			XMM2	 = _mm_loadl_pi(XMM2, (__m64*)(S+j  ));
+			XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(S+i  ));
+			XMM2	 = _mm_loadh_pi(XMM2, (__m64*)(S+i+2));
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,0,1));
+			XMM0	 = _mm_xor_ps(XMM0, PCS_NNRR.ps);
+			XMM2	 = _mm_xor_ps(XMM2, PCS_NNRR.ps);
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+			_mm_store_ps(T+i*4   , XMM0);
+			_mm_store_ps(T+i*4+ 4, XMM1);
+			_mm_store_ps(T+i*4+ 8, XMM2);
+			_mm_store_ps(T+i*4+12, XMM3);
+		}
+		/*
+			for mdct_forward part 1
+		*/
+		T	 = lookup->trig_forward+n;
+		S	 = lookup->trig+n2;
+		for(i=0;i<n4;i+=4){
+			__m128	XMM0, XMM1, XMM2;
+			XMM0	 = _mm_load_ps(S+4);
+			XMM2	 = _mm_load_ps(S  );
+			XMM1	 = XMM0;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM2,_MM_SHUFFLE(1,3,1,3));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM2,_MM_SHUFFLE(0,2,0,2));
+			XMM0	 = _mm_mul_ps(XMM0, pscalem);
+			XMM1	 = _mm_mul_ps(XMM1, pscalem);
+			_mm_store_ps(T   , XMM0);
+			_mm_store_ps(T+ 4, XMM1);
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+			_mm_store_ps(T+ 8, XMM1);
+			_mm_store_ps(T+12, XMM0);
+			S		+= 8;
+			T		+= 16;
+		}
+		/*
+			for mdct_backward part 0
+		*/
+		S	 = U	 = lookup->trig+n4;
+		T	 = _ogg_malloc(sizeof(*T)*(n+n2));
+		lookup->trig_backward	 = T;
+		for(i=0;i<n4;i+=4)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			U		-= 4;
+			XMM0	 = _mm_load_ps(S);
+			XMM2	 = _mm_load_ps(U);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(1,1,3,3));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,0,2,2));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,3,2));
+			XMM0	 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+			_mm_store_ps(T   , XMM0);
+			_mm_store_ps(T+ 4, XMM1);
+			_mm_store_ps(T+ 8, XMM2);
+			_mm_store_ps(T+12, XMM3);
+			S		+= 4;
+			T		+= 16;
+		}
+		/*
+			for mdct_backward part 1
+		*/
+		S	 = lookup->trig+n2;
+		T	 = lookup->trig_backward+n;
+		for(i=0;i<n4;i+=4)
+		{
+			__m128	XMM0, XMM1, XMM2;
+			XMM0	 = _mm_load_ps(S  );
+			XMM2	 = _mm_load_ps(S+4);
+			XMM1	 = XMM0;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(3,1,3,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(2,0,2,0));
+			_mm_store_ps(T  , XMM0);
+			_mm_store_ps(T+4, XMM1);
+			S		+= 8;
+			T		+= 8;
+		}
+		/*
+			for mdct_butterfly_first
+		*/
+		S	 = lookup->trig;
+		T	 = _ogg_malloc(sizeof(*T)*n*2);
+		lookup->trig_butterfly_first	 = T;
+		for(i=0;i<n4;i+=4)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			XMM2	 = _mm_load_ps(S   );
+			XMM0	 = _mm_load_ps(S+ 4);
+			XMM5	 = _mm_load_ps(S+ 8);
+			XMM3	 = _mm_load_ps(S+12);
+			XMM1	 = XMM0;
+			XMM4	 = XMM3;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+			XMM4	 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+			_mm_store_ps(T   , XMM1);
+			_mm_store_ps(T+ 4, XMM4);
+			_mm_store_ps(T+ 8, XMM0);
+			_mm_store_ps(T+12, XMM3);
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RRRR.ps);
+			XMM4	 = _mm_xor_ps(XMM4, PCS_RRRR.ps);
+			XMM0	 = _mm_xor_ps(XMM0, PCS_RRRR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_RRRR.ps);
+			_mm_store_ps(T+n   , XMM1);
+			_mm_store_ps(T+n+ 4, XMM4);
+			_mm_store_ps(T+n+ 8, XMM0);
+			_mm_store_ps(T+n+12, XMM3);
+			S	+= 16;
+			T	+= 16;
+		}
+		/*
+			for mdct_butterfly_generic(trigint=8)
+		*/
+		S	 = lookup->trig;
+		T	 = _ogg_malloc(sizeof(*T)*n2);
+		lookup->trig_butterfly_generic8	 = T;
+		for(i=0;i<n;i+=32)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+			XMM0	 = _mm_load_ps(S+ 24);
+			XMM2	 = _mm_load_ps(S+ 16);
+			XMM3	 = _mm_load_ps(S+  8);
+			XMM5	 = _mm_load_ps(S    );
+			XMM1	 = XMM0;
+			XMM4	 = XMM3;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+			XMM4	 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+			_mm_store_ps(T   , XMM0);
+			_mm_store_ps(T+ 4, XMM1);
+			_mm_store_ps(T+ 8, XMM3);
+			_mm_store_ps(T+12, XMM4);
+			S	+= 32;
+			T	+= 16;
+		}
+		/*
+			for mdct_butterfly_generic(trigint=16)
+		*/
+		S	 = lookup->trig;
+		T	 = _ogg_malloc(sizeof(*T)*n4);
+		lookup->trig_butterfly_generic16	 = T;
+		for(i=0;i<n;i+=64)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+			XMM0	 = _mm_load_ps(S+ 48);
+			XMM2	 = _mm_load_ps(S+ 32);
+			XMM3	 = _mm_load_ps(S+ 16);
+			XMM5	 = _mm_load_ps(S    );
+			XMM1	 = XMM0;
+			XMM4	 = XMM3;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+			XMM4	 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+			_mm_store_ps(T   , XMM0);
+			_mm_store_ps(T+ 4, XMM1);
+			_mm_store_ps(T+ 8, XMM3);
+			_mm_store_ps(T+12, XMM4);
+			S	+= 64;
+			T	+= 16;
+		}
+		/*
+			for mdct_butterfly_generic(trigint=32)
+		*/
+		if(n<128)
+			lookup->trig_butterfly_generic32	 = NULL;
+		else
+		{
+			S	 = lookup->trig;
+			T	 = _ogg_malloc(sizeof(*T)*n8);
+			lookup->trig_butterfly_generic32	 = T;
+			for(i=0;i<n;i+=128)
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+	
+				XMM0	 = _mm_load_ps(S+ 96);
+				XMM2	 = _mm_load_ps(S+ 64);
+				XMM3	 = _mm_load_ps(S+ 32);
+				XMM5	 = _mm_load_ps(S    );
+				XMM1	 = XMM0;
+				XMM4	 = XMM3;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+				XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+				XMM4	 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+				_mm_store_ps(T   , XMM0);
+				_mm_store_ps(T+ 4, XMM1);
+				_mm_store_ps(T+ 8, XMM3);
+				_mm_store_ps(T+12, XMM4);
+				S	+= 128;
+				T	+= 16;
+			}
+		}
+		/*
+			for mdct_butterfly_generic(trigint=64)
+		*/
+		if(n<256)
+			lookup->trig_butterfly_generic64	 = NULL;
+		else
+		{
+			S	 = lookup->trig;
+			T	 = _ogg_malloc(sizeof(*T)*(n8>>1));
+			lookup->trig_butterfly_generic64	 = T;
+			for(i=0;i<n;i+=256)
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+	
+				XMM0	 = _mm_load_ps(S+192);
+				XMM2	 = _mm_load_ps(S+128);
+				XMM3	 = _mm_load_ps(S+ 64);
+				XMM5	 = _mm_load_ps(S    );
+				XMM1	 = XMM0;
+				XMM4	 = XMM3;
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(0,1,0,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(1,0,1,0));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(0,1,0,1));
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(1,0,1,0));
+				XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+				XMM4	 = _mm_xor_ps(XMM4, PCS_RNRN.ps);
+				_mm_store_ps(T   , XMM0);
+				_mm_store_ps(T+ 4, XMM1);
+				_mm_store_ps(T+ 8, XMM3);
+				_mm_store_ps(T+12, XMM4);
+				S	+= 256;
+				T	+= 16;
+			}
+		}
+	}
+#endif														/* SSE Optimize */
 }
 
 /* 8 point butterfly (in place, 4 register) */
 STIN void mdct_butterfly_8(DATA_TYPE *x){
+#ifdef __SSE__												/* SSE Optimize */
+	__m128	XMM0, XMM1, XMM2, XMM3;
+	XMM0	 = _mm_load_ps(x+4);
+	XMM1	 = _mm_load_ps(x  );
+	XMM2	 = XMM0;
+	XMM0	 = _mm_sub_ps(XMM0, XMM1);
+	XMM2	 = _mm_add_ps(XMM2, XMM1);
+
+	XMM1	 = XMM0;
+	XMM3	 = XMM2;
+
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,2,3,2));
+	XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,0,1));
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,2,3,2));
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,1,0));
+
+	XMM1	 = _mm_xor_ps(XMM1, PCS_NRRN.ps);
+	XMM3	 = _mm_xor_ps(XMM3, PCS_NNRR.ps);
+
+	XMM0	 = _mm_add_ps(XMM0, XMM1);
+	XMM2	 = _mm_add_ps(XMM2, XMM3);
+
+	_mm_store_ps(x  , XMM0);
+	_mm_store_ps(x+4, XMM2);
+#else														/* SSE Optimize */
   REG_TYPE r0   = x[6] + x[2];
   REG_TYPE r1   = x[6] - x[2];
   REG_TYPE r2   = x[4] + x[0];
@@ -112,10 +448,49 @@
 	   x[7] = r1   + r0;
 	   x[5] = r1   - r0;
 	   
+#endif														/* SSE Optimize */
 }
 
 /* 16 point butterfly (in place, 4 register) */
 STIN void mdct_butterfly_16(DATA_TYPE *x){
+#ifdef __SSE__												/* SSE Optimize */
+	static _MM_ALIGN16 const float PFV0[4] = { cPI2_8,  cPI2_8,     1.f,    -1.f};
+	static _MM_ALIGN16 const float PFV1[4] = { cPI2_8, -cPI2_8,     0.f,     0.f};
+	static _MM_ALIGN16 const float PFV2[4] = { cPI2_8,  cPI2_8,     1.f,     1.f};
+	static _MM_ALIGN16 const float PFV3[4] = {-cPI2_8,  cPI2_8,     0.f,     0.f};
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+
+	XMM3	 = _mm_load_ps(x+12);
+	XMM0	 = _mm_load_ps(x   );
+	XMM1	 = _mm_load_ps(x+ 4);
+	XMM2	 = _mm_load_ps(x+ 8);
+	XMM4	 = XMM3;
+	XMM5	 = XMM0;
+	XMM0	 = _mm_sub_ps(XMM0, XMM2);
+	XMM3	 = _mm_sub_ps(XMM3, XMM1);
+	XMM2	 = _mm_add_ps(XMM2, XMM5);
+	XMM4	 = _mm_add_ps(XMM4, XMM1);
+	XMM1	 = XMM0;
+	XMM5	 = XMM3;
+	_mm_store_ps(x+ 8, XMM2);
+	_mm_store_ps(x+12, XMM4);
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,1,1));
+	XMM2	 = _mm_load_ps(PFV0);
+	XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,3,0,0));
+	XMM4	 = _mm_load_ps(PFV1);
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,2,0,0));
+	XMM6	 = _mm_load_ps(PFV2);
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,2,1,1));
+	XMM7	 = _mm_load_ps(PFV3);
+	XMM0	 = _mm_mul_ps(XMM0, XMM2);
+	XMM1	 = _mm_mul_ps(XMM1, XMM4);
+	XMM3	 = _mm_mul_ps(XMM3, XMM6);
+	XMM5	 = _mm_mul_ps(XMM5, XMM7);
+	XMM0	 = _mm_add_ps(XMM0, XMM1);
+	XMM3	 = _mm_add_ps(XMM3, XMM5);
+	_mm_store_ps(x   , XMM0);
+	_mm_store_ps(x+ 4, XMM3);
+#else														/* SSE Optimize */
   REG_TYPE r0     = x[1]  - x[9];
   REG_TYPE r1     = x[0]  - x[8];
 
@@ -144,6 +519,7 @@
            x[15] += x[7];
            x[6]  = r0;
            x[7]  = r1;
+#endif														/* SSE Optimize */
 
 	   mdct_butterfly_8(x);
 	   mdct_butterfly_8(x+8);
@@ -151,6 +527,85 @@
 
 /* 32 point butterfly (in place, 4 register) */
 STIN void mdct_butterfly_32(DATA_TYPE *x){
+#ifdef __SSE__												/* SSE Optimize */
+	static _MM_ALIGN16 const __m128x PFV0	 =
+		{ .sf = {-cPI3_8, -cPI1_8, -cPI2_8, -cPI2_8} };
+	static _MM_ALIGN16 const __m128x PFV1	 =
+		{ .sf = {-cPI1_8,  cPI3_8, -cPI2_8,  cPI2_8} };
+	static _MM_ALIGN16 const __m128x PFV2	 =
+		{ .sf = {-cPI1_8, -cPI3_8,    -1.f,     1.f} };
+	static _MM_ALIGN16 const __m128x PFV3	 =
+		{ .sf = {-cPI3_8,  cPI1_8,     0.f,     0.f} };
+	static _MM_ALIGN16 const __m128x PFV4	 =
+		{ .sf = { cPI3_8,  cPI3_8,  cPI2_8,  cPI2_8} };
+	static _MM_ALIGN16 const __m128x PFV5	 =
+		{ .sf = {-cPI1_8,  cPI1_8, -cPI2_8,  cPI2_8} };
+	static _MM_ALIGN16 const __m128x PFV6	 =
+		{ .sf = { cPI1_8,  cPI3_8,     1.f,     1.f} };
+	static _MM_ALIGN16 const __m128x PFV7	 =
+		{ .sf = {-cPI3_8,  cPI1_8,     0.f,     0.f} };
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+
+	XMM0	 = _mm_load_ps(x+16);
+	XMM1	 = _mm_load_ps(x+20);
+	XMM2	 = _mm_load_ps(x+24);
+	XMM3	 = _mm_load_ps(x+28);
+	XMM4	 = XMM0;
+	XMM5	 = XMM1;
+	XMM6	 = XMM2;
+	XMM7	 = XMM3;
+
+	XMM0	 = _mm_sub_ps(XMM0, PM128(x   ));
+	XMM1	 = _mm_sub_ps(XMM1, PM128(x+ 4));
+	XMM2	 = _mm_sub_ps(XMM2, PM128(x+ 8));
+	XMM3	 = _mm_sub_ps(XMM3, PM128(x+12));
+	XMM4	 = _mm_add_ps(XMM4, PM128(x   ));
+	XMM5	 = _mm_add_ps(XMM5, PM128(x+ 4));
+	XMM6	 = _mm_add_ps(XMM6, PM128(x+ 8));
+	XMM7	 = _mm_add_ps(XMM7, PM128(x+12));
+	_mm_store_ps(x+16, XMM4);
+	_mm_store_ps(x+20, XMM5);
+	_mm_store_ps(x+24, XMM6);
+	_mm_store_ps(x+28, XMM7);
+
+#if	defined(__SSE3__)
+	XMM4	 = _mm_moveldup_ps(XMM0);
+	XMM5	 = XMM1;
+	XMM0	 = _mm_movehdup_ps(XMM0);
+	XMM6	 = XMM2;
+	XMM7	 = XMM3;
+#else
+	XMM4	 = XMM0;
+	XMM5	 = XMM1;
+	XMM6	 = XMM2;
+	XMM7	 = XMM3;
+
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+	XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,2,0,0));
+#endif
+	XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,3,1,1));
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,0));
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,2,1,0));
+	XMM6	 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(3,3,0,1));
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,2,0,0));
+	XMM7	 = _mm_shuffle_ps(XMM7, XMM7, _MM_SHUFFLE(3,2,1,1));
+	XMM0	 = _mm_mul_ps(XMM0, PFV0.ps);
+	XMM4	 = _mm_mul_ps(XMM4, PFV1.ps);
+	XMM1	 = _mm_mul_ps(XMM1, PFV2.ps);
+	XMM5	 = _mm_mul_ps(XMM5, PFV3.ps);
+	XMM2	 = _mm_mul_ps(XMM2, PFV4.ps);
+	XMM6	 = _mm_mul_ps(XMM6, PFV5.ps);
+	XMM3	 = _mm_mul_ps(XMM3, PFV6.ps);
+	XMM7	 = _mm_mul_ps(XMM7, PFV7.ps);
+	XMM0	 = _mm_add_ps(XMM0, XMM4);
+	XMM1	 = _mm_add_ps(XMM1, XMM5);
+	XMM2	 = _mm_add_ps(XMM2, XMM6);
+	XMM3	 = _mm_add_ps(XMM3, XMM7);
+	_mm_store_ps(x   , XMM0);
+	_mm_store_ps(x+ 4, XMM1);
+	_mm_store_ps(x+ 8, XMM2);
+	_mm_store_ps(x+12, XMM3);
+#else														/* SSE Optimize */
   REG_TYPE r0     = x[30] - x[14];
   REG_TYPE r1     = x[31] - x[15];
 
@@ -207,6 +662,7 @@
 	   x[17] += x[1];
 	   x[0]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
 	   x[1]   = MULT_NORM( r1 * cPI1_8  -  r0 * cPI3_8 );
+#endif														/* SSE Optimize */
 
 	   mdct_butterfly_16(x);
 	   mdct_butterfly_16(x+16);
@@ -214,10 +670,194 @@
 }
 
 /* N point first stage butterfly (in place, 2 register) */
+#ifdef __SSE__												/* SSE Optimize */
+STIN void mdct_butterfly_first_backward(int n,float *T,
+					float *x,
+					int points, float *zX0, float *zX1)
+{
+	float	*X1	 = x +  points - 8;
+	float	*X2	 = x + (points>>1) - 8;
+
+	/*
+		Part of X2[*]=0.f
+	*/
+	while(X2>=zX0){
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(X1+4);
+		XMM1	 = _mm_load_ps(X1  );
+#if	defined(__SSE3__)
+		XMM2	 = _mm_moveldup_ps(XMM0);
+		XMM3	 = _mm_moveldup_ps(XMM1);
+		XMM0	 = _mm_movehdup_ps(XMM0);
+		XMM1	 = _mm_movehdup_ps(XMM1);
+#else
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		XMM0	 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+		XMM1	 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+		XMM2	 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+		XMM3	 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+		XMM4	 = _mm_load_ps(T   );
+		XMM5	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM2	 = _mm_mul_ps(XMM2, XMM4);
+		XMM3	 = _mm_mul_ps(XMM3, XMM5);
+		XMM0	 = _mm_mul_ps(XMM0, XMM6);
+		XMM1	 = _mm_mul_ps(XMM1, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		_mm_store_ps(X2+4, XMM0);
+		_mm_store_ps(X2  , XMM1);
+		X1	-= 8;
+		X2	-= 8;
+		T	+= 16;
+	}
+	/*
+		Part of Normal
+	*/
+	while(X1>=zX1){
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(X1+4);
+		XMM1	 = _mm_load_ps(X1  );
+		XMM2	 = _mm_load_ps(X2+4);
+		XMM3	 = _mm_load_ps(X2  );
+		XMM4	 = XMM0;
+		XMM5	 = XMM1;
+		XMM0	 = _mm_sub_ps(XMM0, XMM2);
+		XMM1	 = _mm_sub_ps(XMM1, XMM3);
+		XMM4	 = _mm_add_ps(XMM4, XMM2);
+		XMM5	 = _mm_add_ps(XMM5, XMM3);
+#if	defined(__SSE3__)
+		XMM2	 = _mm_moveldup_ps(XMM0);
+		XMM3	 = _mm_moveldup_ps(XMM1);
+		_mm_store_ps(X1+4, XMM4);
+		_mm_store_ps(X1  , XMM5);
+		XMM0	 = _mm_movehdup_ps(XMM0);
+		XMM1	 = _mm_movehdup_ps(XMM1);
+#else
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		_mm_store_ps(X1+4, XMM4);
+		_mm_store_ps(X1  , XMM5);
+		XMM0	 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+		XMM1	 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+		XMM2	 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+		XMM3	 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+		XMM4	 = _mm_load_ps(T   );
+		XMM5	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM2	 = _mm_mul_ps(XMM2, XMM4);
+		XMM3	 = _mm_mul_ps(XMM3, XMM5);
+		XMM0	 = _mm_mul_ps(XMM0, XMM6);
+		XMM1	 = _mm_mul_ps(XMM1, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		_mm_store_ps(X2+4, XMM0);
+		_mm_store_ps(X2  , XMM1);
+		X1	-= 8;
+		X2	-= 8;
+		T	+= 16;
+	}
+	/*
+		Part of X1[*]=0.f
+	*/
+	T	+= n;
+	while(X2>=x){
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(X2+4);
+		XMM1	 = _mm_load_ps(X2  );
+		_mm_store_ps(X1+4, XMM0);
+		_mm_store_ps(X1  , XMM1);
+#if	defined(__SSE3__)
+		XMM2	 = _mm_moveldup_ps(XMM0);
+		XMM3	 = _mm_moveldup_ps(XMM1);
+		XMM0	 = _mm_movehdup_ps(XMM0);
+		XMM1	 = _mm_movehdup_ps(XMM1);
+#else
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		XMM0	 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+		XMM1	 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+		XMM2	 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+		XMM3	 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+		XMM4	 = _mm_load_ps(T   );
+		XMM5	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM2	 = _mm_mul_ps(XMM2, XMM4);
+		XMM3	 = _mm_mul_ps(XMM3, XMM5);
+		XMM0	 = _mm_mul_ps(XMM0, XMM6);
+		XMM1	 = _mm_mul_ps(XMM1, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		_mm_store_ps(X2+4, XMM0);
+		_mm_store_ps(X2  , XMM1);
+		X1	-= 8;
+		X2	-= 8;
+		T	+= 16;
+	}
+}
+#endif														/* SSE Optimize */
+
 STIN void mdct_butterfly_first(DATA_TYPE *T,
 					DATA_TYPE *x,
 					int points){
   
+#ifdef __SSE__												/* SSE Optimize */
+	float	*X1	 = x +  points - 8;
+	float	*X2	 = x + (points>>1) - 8;
+
+	do{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(X1+4);
+		XMM1	 = _mm_load_ps(X1  );
+		XMM2	 = _mm_load_ps(X2+4);
+		XMM3	 = _mm_load_ps(X2  );
+		XMM4	 = XMM0;
+		XMM5	 = XMM1;
+		XMM0	 = _mm_sub_ps(XMM0, XMM2);
+		XMM1	 = _mm_sub_ps(XMM1, XMM3);
+		XMM4	 = _mm_add_ps(XMM4, XMM2);
+		XMM5	 = _mm_add_ps(XMM5, XMM3);
+#if	defined(__SSE3__)
+		XMM2	 = _mm_moveldup_ps(XMM0);
+		XMM3	 = _mm_moveldup_ps(XMM1);
+		_mm_store_ps(X1+4, XMM4);
+		_mm_store_ps(X1  , XMM5);
+		XMM0	 = _mm_movehdup_ps(XMM0);
+		XMM1	 = _mm_movehdup_ps(XMM1);
+#else
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		_mm_store_ps(X1+4, XMM4);
+		_mm_store_ps(X1  , XMM5);
+		XMM0	 = _mm_shuffle_ps(XMM0 , XMM0 , _MM_SHUFFLE(3,3,1,1));
+		XMM1	 = _mm_shuffle_ps(XMM1 , XMM1 , _MM_SHUFFLE(3,3,1,1));
+		XMM2	 = _mm_shuffle_ps(XMM2 , XMM2 , _MM_SHUFFLE(2,2,0,0));
+		XMM3	 = _mm_shuffle_ps(XMM3 , XMM3 , _MM_SHUFFLE(2,2,0,0));
+#endif
+		XMM4	 = _mm_load_ps(T   );
+		XMM5	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM2	 = _mm_mul_ps(XMM2, XMM4);
+		XMM3	 = _mm_mul_ps(XMM3, XMM5);
+		XMM0	 = _mm_mul_ps(XMM0, XMM6);
+		XMM1	 = _mm_mul_ps(XMM1, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		_mm_store_ps(X2+4, XMM0);
+		_mm_store_ps(X2  , XMM1);
+		X1	-= 8;
+		X2	-= 8;
+		T	+= 16;
+	}while(X2>=x);
+#else														/* SSE Optimize */
   DATA_TYPE *x1        = x          + points      - 8;
   DATA_TYPE *x2        = x          + (points>>1) - 8;
   REG_TYPE   r0;
@@ -258,14 +898,144 @@
     T+=16;
 
   }while(x2>=x);
+#endif														/* SSE Optimize */
 }
 
 /* N/stage point generic N stage butterfly (in place, 2 register) */
+#ifdef __SSE__												/* SSE Optimize */
+STIN void mdct_butterfly_generic(mdct_lookup *init,
+#else														/* SSE Optimize */
 STIN void mdct_butterfly_generic(DATA_TYPE *T,
+#endif														/* SSE Optimize */
 					  DATA_TYPE *x,
 					  int points,
 					  int trigint){
   
+#ifdef __SSE__												/* SSE Optimize */
+	float *T;
+	float *x1	 = x +  points     - 8;
+	float *x2	 = x + (points>>1) - 8;
+	switch(trigint)
+	{
+		default :
+			T	 = init->trig;
+			do
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+				XMM0	 = _mm_load_ps(x1  );
+				XMM1	 = _mm_load_ps(x2  );
+				XMM2	 = _mm_load_ps(x1+4);
+				XMM3	 = _mm_load_ps(x2+4);
+				XMM4	 = XMM0;
+				XMM5	 = XMM2;
+				XMM0	 = _mm_sub_ps(XMM0, XMM1);
+				XMM2	 = _mm_sub_ps(XMM2, XMM3);
+				XMM4	 = _mm_add_ps(XMM4, XMM1);
+				XMM5	 = _mm_add_ps(XMM5, XMM3);
+				XMM1	 = XMM0;
+				XMM3	 = XMM2;
+				_mm_store_ps(x1  , XMM4);
+				_mm_store_ps(x1+4, XMM5);
+#if	defined(__SSE3__)
+				XMM0	 = _mm_movehdup_ps(XMM0);
+				XMM1	 = _mm_moveldup_ps(XMM1);
+				XMM2	 = _mm_movehdup_ps(XMM2);
+				XMM3	 = _mm_moveldup_ps(XMM3);
+#else
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0));
+#endif
+				XMM4	 = _mm_load_ps(T+trigint*3);
+				XMM5	 = _mm_load_ps(T+trigint*3);
+				XMM6	 = _mm_load_ps(T+trigint*2);
+				XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(0,1,0,1));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(1,0,1,0));
+				XMM0	 = _mm_mul_ps(XMM0, XMM4);
+				XMM1	 = _mm_mul_ps(XMM1, XMM5);
+				XMM4	 = _mm_load_ps(T+trigint  );
+				XMM5	 = _mm_load_ps(T+trigint  );
+				XMM6	 = _mm_load_ps(T          );
+				XMM3	 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(0,1,0,1));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(1,0,1,0));
+				XMM2	 = _mm_mul_ps(XMM2, XMM4);
+				XMM3	 = _mm_mul_ps(XMM3, XMM5);
+				XMM0	 = _mm_add_ps(XMM0, XMM1);
+				XMM2	 = _mm_add_ps(XMM2, XMM3);
+				_mm_store_ps(x2  , XMM0);
+				_mm_store_ps(x2+4, XMM2);
+				T	+= trigint*4;
+				x1	-= 8;
+				x2	-= 8;
+			}
+			while(x2>=x);
+			return;
+		case  8:
+			T	 = init->trig_butterfly_generic8;
+			break;
+		case 16:
+			T	 = init->trig_butterfly_generic16;
+			break;
+		case 32:
+			T	 = init->trig_butterfly_generic32;
+			break;
+		case 64:
+			T	 = init->trig_butterfly_generic64;
+			break;
+	}
+	_mm_prefetch(T   , _MM_HINT_NTA);
+	do
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		_mm_prefetch(T+16, _MM_HINT_NTA);
+		XMM0	 = _mm_load_ps(x1  );
+		XMM1	 = _mm_load_ps(x2  );
+		XMM2	 = _mm_load_ps(x1+4);
+		XMM3	 = _mm_load_ps(x2+4);
+		XMM4	 = XMM0;
+		XMM5	 = XMM2;
+		XMM0	 = _mm_sub_ps(XMM0, XMM1);
+		XMM2	 = _mm_sub_ps(XMM2, XMM3);
+		XMM4	 = _mm_add_ps(XMM4, XMM1);
+		XMM5	 = _mm_add_ps(XMM5, XMM3);
+#if	defined(__SSE3__)
+		XMM1	 = _mm_moveldup_ps(XMM0);
+		XMM3	 = _mm_moveldup_ps(XMM2);
+		_mm_store_ps(x1  , XMM4);
+		_mm_store_ps(x1+4, XMM5);
+		XMM0	 = _mm_movehdup_ps(XMM0);
+		XMM2	 = _mm_movehdup_ps(XMM2);
+#else
+		XMM1	 = XMM0;
+		XMM3	 = XMM2;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(3,3,1,1));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0));
+		_mm_store_ps(x1  , XMM4);
+		_mm_store_ps(x1+4, XMM5);
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1));
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0));
+#endif
+		XMM4	 = _mm_load_ps(T   );
+		XMM5	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		XMM2	 = _mm_add_ps(XMM2, XMM3);
+		_mm_store_ps(x2  , XMM0);
+		_mm_store_ps(x2+4, XMM2);
+		T	+= 16;
+		x1	-= 8;
+		x2	-= 8;
+	}
+	while(x2>=x);
+#else														/* SSE Optimize */
   DATA_TYPE *x1        = x          + points      - 8;
   DATA_TYPE *x2        = x          + (points>>1) - 8;
   REG_TYPE   r0;
@@ -312,23 +1082,57 @@
     x2-=8;
 
   }while(x2>=x);
+#endif														/* SSE Optimize */
+}
+
+#ifdef __SSE__												/* SSE Optimize */
+STIN void mdct_butterflies_backward(mdct_lookup *init,
+			     float *x,
+			     int points, float *x0, float *x1){
+  
+  int stages=init->log2n-5;
+  int i,j;
+  
+  if(--stages>0){
+    mdct_butterfly_first_backward(init->n,init->trig_butterfly_first,x,points,x0,x1);
+  }
+
+  for(i=1;--stages>0;i++){
+    for(j=0;j<(1<<i);j++)
+      mdct_butterfly_generic(init,x+(points>>i)*j,points>>i,4<<i);
+  }
+
+  for(j=0;j<points;j+=32)
+    mdct_butterfly_32(x+j);
+
 }
+#endif														/* SSE Optimize */
 
 STIN void mdct_butterflies(mdct_lookup *init,
 			     DATA_TYPE *x,
 			     int points){
   
+#ifndef __SSE__												/* SSE Optimize */
   DATA_TYPE *T=init->trig;
+#endif														/* SSE Optimize */
   int stages=init->log2n-5;
   int i,j;
   
   if(--stages>0){
+#ifdef __SSE__												/* SSE Optimize */
+    mdct_butterfly_first(init->trig_butterfly_first,x,points);
+#else														/* SSE Optimize */
     mdct_butterfly_first(T,x,points);
+#endif														/* SSE Optimize */
   }
 
   for(i=1;--stages>0;i++){
     for(j=0;j<(1<<i);j++)
+#ifdef __SSE__												/* SSE Optimize */
+      mdct_butterfly_generic(init,x+(points>>i)*j,points>>i,4<<i);
+#else														/* SSE Optimize */
       mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
+#endif														/* SSE Optimize */
   }
 
   for(j=0;j<points;j+=32)
@@ -340,6 +1144,16 @@
   if(l){
     if(l->trig)_ogg_free(l->trig);
     if(l->bitrev)_ogg_free(l->bitrev);
+#ifdef __SSE__												/* SSE Optimize */
+    if(l->trig_bitreverse)_ogg_free(l->trig_bitreverse);
+    if(l->trig_forward)_ogg_free(l->trig_forward);
+    if(l->trig_backward)_ogg_free(l->trig_backward);
+    if(l->trig_butterfly_first)_ogg_free(l->trig_butterfly_first);
+    if(l->trig_butterfly_generic8)_ogg_free(l->trig_butterfly_generic8);
+    if(l->trig_butterfly_generic16)_ogg_free(l->trig_butterfly_generic16);
+    if(l->trig_butterfly_generic32)_ogg_free(l->trig_butterfly_generic32);
+    if(l->trig_butterfly_generic64)_ogg_free(l->trig_butterfly_generic64);
+#endif														/* SSE Optimize */
     memset(l,0,sizeof(*l));
   }
 }
@@ -348,6 +1162,76 @@
 			    DATA_TYPE *x){
   int        n       = init->n;
   int       *bit     = init->bitrev;
+#ifdef __SSE__												/* SSE Optimize */
+	float *w0      = x;
+	float *w1      = x = w0+(n>>1);
+	float *T       = init->trig_bitreverse;
+	
+	do
+	{
+		float *x0	 = x+bit[0];
+		float *x1	 = x+bit[1];
+		float *x2	 = x+bit[2];
+		float *x3	 = x+bit[3];
+		
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		w1		 -= 4;
+		
+		XMM0	 = _mm_lddqu_ps(x0);
+		XMM1	 = _mm_lddqu_ps(x1);
+		XMM4	 = _mm_lddqu_ps(x2);
+		XMM7	 = _mm_lddqu_ps(x3);
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		XMM5	 = XMM0;
+		XMM6	 = XMM1;
+		
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(0,1,0,1));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(0,1,0,1));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM4, _MM_SHUFFLE(0,0,0,0));
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(0,0,0,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,1,1,1));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(1,1,1,1));
+		XMM4	 = _mm_load_ps(T  );
+		XMM7	 = _mm_load_ps(T+4);
+
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+		XMM2	 = _mm_add_ps(XMM2, XMM3);
+		XMM5	 = _mm_sub_ps(XMM5, XMM6);
+
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		XMM2	 = _mm_mul_ps(XMM2, XMM4);
+		XMM5	 = _mm_mul_ps(XMM5, XMM7);
+
+		XMM0	 = _mm_mul_ps(XMM0, PFV_0P5.ps);
+		XMM2	 = _mm_add_ps(XMM2, XMM5);
+
+		XMM1	 = XMM0;
+		XMM3	 = XMM2;
+
+#if	defined(__SSE3__)
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_addsub_ps(XMM1, XMM3);
+#else
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNRN.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_RNRN.ps);
+
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_sub_ps(XMM1, XMM3);
+#endif
+		_mm_store_ps(w0, XMM0);
+		_mm_storeh_pi((__m64*)(w1  ), XMM1);
+		_mm_storel_pi((__m64*)(w1+2), XMM1);
+		
+		T		+= 8;
+		bit		+= 4;
+		w0		+= 4;
+		
+	}
+	while(w0<w1);
+#else														/* SSE Optimize */
   DATA_TYPE *w0      = x;
   DATA_TYPE *w1      = x = w0+(n>>1);
   DATA_TYPE *T       = init->trig+n;
@@ -392,6 +1276,7 @@
 	      w0    += 4;
 
   }while(w0<w1);
+#endif														/* SSE Optimize */
 }
 
 void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
@@ -399,6 +1284,263 @@
   int n2=n>>1;
   int n4=n>>2;
 
+#ifdef __SSE__												/* SSE Optimize */
+	/* rotate */
+	
+	float *iX	 = in+n2-8;
+	float *oX0	 = out+n2+n4;
+	float *T	 = init->trig_backward;
+	float *oX1   = oX0;
+	float *zX0, *zX1;
+	
+	if(n<1024)
+	{
+		do
+		{
+			int c0, c1;
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			XMM0	 = _mm_load_ps(iX- 8);
+			XMM1	 = _mm_load_ps(iX- 4);
+			XMM2	 = _mm_load_ps(iX   );
+			XMM3	 = _mm_load_ps(iX+ 4);
+			XMM0	 = _mm_cmpneq_ps(XMM0, PFV_0.ps);
+			XMM1	 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+			XMM2	 = _mm_cmpneq_ps(XMM2, PFV_0.ps);
+			XMM3	 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+			XMM0	 = _mm_or_ps(XMM0, XMM1);
+			XMM2	 = _mm_or_ps(XMM2, XMM3);
+			c0		 = _mm_movemask_ps(XMM0);
+			c1		 = _mm_movemask_ps(XMM2);
+			c0		|= c1;
+			if(!c0)
+			{
+				oX0		-= 8;
+				_mm_store_ps(oX0   , PFV_0.ps);
+				_mm_store_ps(oX0+ 4, PFV_0.ps);
+				_mm_store_ps(oX1   , PFV_0.ps);
+				_mm_store_ps(oX1+ 4, PFV_0.ps);
+				iX		-= 16;
+				oX1		+= 8;
+				T		+= 32;
+			}
+			else
+				break;
+		}while(iX>=in);
+	}
+	else
+	{
+		do
+		{
+			int c0, c1;
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			XMM0	 = _mm_load_ps(iX-24);
+			XMM1	 = _mm_load_ps(iX-20);
+			XMM2	 = _mm_load_ps(iX-16);
+			XMM3	 = _mm_load_ps(iX-12);
+			XMM0	 = _mm_cmpneq_ps(XMM0, PFV_0.ps);
+			XMM1	 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+			XMM2	 = _mm_cmpneq_ps(XMM2, PFV_0.ps);
+			XMM3	 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+			XMM0	 = _mm_or_ps(XMM0, XMM1);
+			XMM2	 = _mm_or_ps(XMM2, XMM3);
+			XMM4	 = _mm_load_ps(iX- 8);
+			XMM5	 = _mm_load_ps(iX- 4);
+			XMM1	 = _mm_load_ps(iX   );
+			XMM3	 = _mm_load_ps(iX+ 4);
+			XMM4	 = _mm_cmpneq_ps(XMM4, PFV_0.ps);
+			XMM5	 = _mm_cmpneq_ps(XMM5, PFV_0.ps);
+			XMM1	 = _mm_cmpneq_ps(XMM1, PFV_0.ps);
+			XMM3	 = _mm_cmpneq_ps(XMM3, PFV_0.ps);
+			XMM4	 = _mm_or_ps(XMM4, XMM5);
+			XMM1	 = _mm_or_ps(XMM1, XMM3);
+			XMM0	 = _mm_or_ps(XMM0, XMM4);
+			XMM2	 = _mm_or_ps(XMM2, XMM1);
+			c0		 = _mm_movemask_ps(XMM0);
+			c1		 = _mm_movemask_ps(XMM2);
+			c0		|= c1;
+			if(!c0)
+			{
+				oX0		-= 16;
+				_mm_store_ps(oX0   , PFV_0.ps);
+				_mm_store_ps(oX0+ 4, PFV_0.ps);
+				_mm_store_ps(oX0+ 8, PFV_0.ps);
+				_mm_store_ps(oX0+12, PFV_0.ps);
+				_mm_store_ps(oX1   , PFV_0.ps);
+				_mm_store_ps(oX1+ 4, PFV_0.ps);
+				_mm_store_ps(oX1+ 8, PFV_0.ps);
+				_mm_store_ps(oX1+12, PFV_0.ps);
+				iX		-= 32;
+				oX1		+= 16;
+				T		+= 64;
+			}
+			else
+				break;
+		}while(iX>=in);
+	}
+	zX0	 = oX0;
+	zX1	 = oX1;
+	while(iX>=in)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		oX0		-= 4;
+		XMM0	 = _mm_load_ps(iX  );
+		XMM4	 = _mm_load_ps(iX+4);
+		XMM2	 = _mm_load_ps(T   );
+		XMM3	 = _mm_load_ps(T+ 4);
+		XMM1	 = XMM0;
+		XMM5	 = XMM0;
+		XMM6	 = XMM4;
+		XMM7	 = XMM4;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(1,3,1,3));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(0,0,0,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM5, _MM_SHUFFLE(2,2,2,2));
+		XMM4	 = _mm_load_ps(T+ 8);
+		XMM5	 = _mm_load_ps(T+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM2);
+		XMM1	 = _mm_mul_ps(XMM1, XMM3);
+		XMM6	 = _mm_mul_ps(XMM6, XMM4);
+		XMM7	 = _mm_mul_ps(XMM7, XMM5);
+		XMM0	 = _mm_sub_ps(XMM0, XMM1);
+		XMM6	 = _mm_sub_ps(XMM6, XMM7);
+		_mm_store_ps(oX0, XMM0);
+		_mm_store_ps(oX1, XMM6);
+		iX		-= 8;
+		oX1		+= 4;
+		T		+= 16;
+	}
+	
+	mdct_butterflies_backward(init,out+n2,n2,zX0,zX1);
+	mdct_bitreverse(init,out);
+	
+	/* roatate + window */
+	
+	{
+		float *oX1	 = out+n2+n4;
+		float *oX2	 = out+n2+n4;
+		float *iX	 = out;
+		float *T	 = init->trig_backward+n;
+		
+		do
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			oX1		-=4;
+			XMM0	 = _mm_load_ps(iX  );
+			XMM4	 = _mm_load_ps(iX+4);
+			XMM2	 = _mm_load_ps(T  );
+			XMM3	 = _mm_load_ps(T+4);
+			XMM1	 = XMM0;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM4	 = XMM0;
+			XMM5	 = XMM1;
+			XMM0	 = _mm_mul_ps(XMM0, XMM2);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM4	 = _mm_mul_ps(XMM4, XMM3);
+			XMM5	 = _mm_mul_ps(XMM5, XMM2);
+			XMM0	 = _mm_sub_ps(XMM0, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM5);
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+			XMM4	 = _mm_xor_ps(XMM4, PCS_RRRR.ps);
+			_mm_store_ps(oX1, XMM0);
+			_mm_store_ps(oX2, XMM4);
+			oX2		+= 4;
+			iX		+= 8;
+			T		+= 8;
+		}while(iX<oX1);
+		
+		iX	 = out+n2+n4;
+		oX1	 = out+n4;
+		oX2	 = oX1;
+		
+		do
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			oX1		-= 16;
+			iX		-= 16;
+			XMM0	 = _mm_load_ps(iX+12);
+			XMM1	 = _mm_load_ps(iX+ 8);
+			XMM2	 = _mm_load_ps(iX+ 4);
+			XMM3	 = _mm_load_ps(iX   );
+			_mm_store_ps(oX1+12, XMM0);
+			_mm_store_ps(oX1+ 8, XMM1);
+			_mm_store_ps(oX1+ 4, XMM2);
+			_mm_store_ps(oX1   , XMM3);
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+			XMM0	 = _mm_xor_ps(XMM0, PCS_RRRR.ps);
+			XMM1	 = _mm_xor_ps(XMM1, PCS_RRRR.ps);
+			XMM2	 = _mm_xor_ps(XMM2, PCS_RRRR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_RRRR.ps);
+			_mm_store_ps(oX2   , XMM0);
+			_mm_store_ps(oX2+ 4, XMM1);
+			_mm_store_ps(oX2+ 8, XMM2);
+			_mm_store_ps(oX2+12, XMM3);
+			oX2		+= 16;
+		}while(oX2<iX);
+		
+		iX	 = out+n2+n4;
+		oX1	 = out+n2+n4;
+		oX2	 = out+n2;
+		
+		if(n4>16)
+		{
+			do
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+				oX1		-= 32;
+				XMM0	 = _mm_load_ps(iX+28);
+				XMM1	 = _mm_load_ps(iX+24);
+				XMM2	 = _mm_load_ps(iX+20);
+				XMM3	 = _mm_load_ps(iX+16);
+				XMM4	 = _mm_load_ps(iX+12);
+				XMM5	 = _mm_load_ps(iX+ 8);
+				XMM6	 = _mm_load_ps(iX+ 4);
+				XMM7	 = _mm_load_ps(iX   );
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+				XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+				XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+				XMM6	 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(0,1,2,3));
+				XMM7	 = _mm_shuffle_ps(XMM7, XMM7, _MM_SHUFFLE(0,1,2,3));
+				_mm_store_ps(oX1   , XMM0);
+				_mm_store_ps(oX1+ 4, XMM1);
+				_mm_store_ps(oX1+ 8, XMM2);
+				_mm_store_ps(oX1+12, XMM3);
+				_mm_store_ps(oX1+16, XMM4);
+				_mm_store_ps(oX1+20, XMM5);
+				_mm_store_ps(oX1+24, XMM6);
+				_mm_store_ps(oX1+28, XMM7);
+				iX		+= 32;
+			}while(oX1>oX2);
+		}
+		else
+		{
+			do
+			{
+				__m128	XMM0, XMM1, XMM2, XMM3;
+				oX1		-= 16;
+				XMM0	 = _mm_load_ps(iX+12);
+				XMM1	 = _mm_load_ps(iX+ 8);
+				XMM2	 = _mm_load_ps(iX+ 4);
+				XMM3	 = _mm_load_ps(iX   );
+				XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+				XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+				XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+				XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+				_mm_store_ps(oX1   , XMM0);
+				_mm_store_ps(oX1+ 4, XMM1);
+				_mm_store_ps(oX1+ 8, XMM2);
+				_mm_store_ps(oX1+12, XMM3);
+				iX		+= 16;
+			}while(oX1>oX2);
+		}
+	}
+#else														/* SSE Optimize */
   /* rotate */
 
   DATA_TYPE *iX = in+n2-7;
@@ -488,8 +1630,175 @@
       iX+=4;
     }while(oX1>oX2);
   }
+#endif														/* SSE Optimize */
 }
 
+#ifdef __SSE__												/* SSE Optimize */
+void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out, DATA_TYPE *out1){
+	int n	 = init->n;
+	int n2	 = n>>1;
+	int n4	 = n>>2;
+	int n8	 = n>>3;
+	float *w	 = (float*)_ogg_alloca(n*sizeof(*w)); /* forward needs working space */
+	float *w2	 = w+n2;
+	
+	/* rotate */
+	
+	/* window + rotate + step 1 */
+	
+	int i, j;
+	
+	float *x0	 = in+n2+n4-8;
+	float *x1	 = in+n2+n4;
+	float *T	 = init->trig_forward;
+	
+	for(i=0,j=n2-2;i<n8;i+=4,j-=4)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(x0    + 4);
+		XMM4	 = _mm_load_ps(x0       );
+		XMM1	 = _mm_load_ps(x0+i*4+ 8);
+		XMM5	 = _mm_load_ps(x0+i*4+12);
+		XMM2	 = _mm_load_ps(T   );
+		XMM3	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		XMM1	 = XMM0;
+		XMM5	 = XMM4;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,0,3,3));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,1,1));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,0,3,3));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+		XMM0	 = _mm_mul_ps(XMM0, XMM2);
+		XMM1	 = _mm_mul_ps(XMM1, XMM3);
+		XMM4	 = _mm_mul_ps(XMM4, XMM6);
+		XMM5	 = _mm_mul_ps(XMM5, XMM7);
+		XMM0	 = _mm_sub_ps(XMM0, XMM1);
+		XMM4	 = _mm_sub_ps(XMM4, XMM5);
+		_mm_storel_pi((__m64*)(w2+i  ), XMM0);
+		_mm_storeh_pi((__m64*)(w2+j  ), XMM0);
+		_mm_storel_pi((__m64*)(w2+i+2), XMM4);
+		_mm_storeh_pi((__m64*)(w2+j-2), XMM4);
+		x0	-= 8;
+		T	+= 16;
+	}
+
+	x0	 = in;
+	x1	 = in+n2-8;
+	
+	for(;i<n4;i+=4,j-=4)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM1	 = _mm_load_ps(x1+4);
+		XMM5	 = _mm_load_ps(x1  );
+		XMM0	 = _mm_load_ps(x0  );
+		XMM4	 = _mm_load_ps(x0+4);
+		XMM2	 = _mm_load_ps(T   );
+		XMM3	 = _mm_load_ps(T+ 4);
+		XMM6	 = _mm_load_ps(T+ 8);
+		XMM7	 = _mm_load_ps(T+12);
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+		XMM0	 = _mm_sub_ps(XMM0, XMM1);
+		XMM4	 = _mm_sub_ps(XMM4, XMM5);
+		XMM1	 = XMM0;
+		XMM5	 = XMM4;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,0,3,3));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,1,1));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,0,3,3));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+		XMM0	 = _mm_mul_ps(XMM0, XMM2);
+		XMM1	 = _mm_mul_ps(XMM1, XMM3);
+		XMM4	 = _mm_mul_ps(XMM4, XMM6);
+		XMM5	 = _mm_mul_ps(XMM5, XMM7);
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		_mm_storel_pi((__m64*)(w2+i  ), XMM0);
+		_mm_storeh_pi((__m64*)(w2+j  ), XMM0);
+		_mm_storel_pi((__m64*)(w2+i+2), XMM4);
+		_mm_storeh_pi((__m64*)(w2+j-2), XMM4);
+		x0	+= 8;
+		x1	-= 8;
+		T	+= 16;
+	}
+
+	mdct_butterflies(init, w+n2, n2);
+	mdct_bitreverse(init, w);
+	
+	/* roatate + window */
+	
+	T	 = init->trig_forward+n;
+	x0	  =out +n2;
+
+	if(out1!=NULL)
+	{
+		x1	  =out1+n2;
+		for(i=0;i<n4;i+=4){
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+			x0	-= 4;
+			x1	-= 4;
+			XMM0	 = _mm_load_ps(w+4);
+			XMM4	 = _mm_load_ps(w  );
+			XMM2	 = XMM0;
+			XMM1	 = _mm_load_ps(T   );
+			XMM3	 = _mm_load_ps(T+ 4);
+			XMM6	 = _mm_load_ps(T+ 8);
+			XMM7	 = _mm_load_ps(T+12);
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(0,2,0,2));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM4,_MM_SHUFFLE(1,3,1,3));
+			XMM4	 = XMM0;
+			XMM5	 = XMM2;
+			XMM0	 = _mm_mul_ps(XMM0, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM3);
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM5	 = _mm_mul_ps(XMM5, XMM7);
+			XMM0	 = _mm_sub_ps(XMM0, XMM2);
+			XMM4	 = _mm_add_ps(XMM4, XMM5);
+			_mm_store_ps(x0    , XMM0);
+			_mm_store_ps(x1    , XMM0);
+			_mm_store_ps(out +i, XMM4);
+			_mm_store_ps(out1+i, XMM4);
+			w	+= 8;
+			T	+= 16;
+		}
+	}
+	else
+	{
+		for(i=0;i<n4;i+=4){
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+			x0	-= 4;
+			XMM0	 = _mm_load_ps(w+4);
+			XMM4	 = _mm_load_ps(w  );
+			XMM2	 = XMM0;
+			XMM1	 = _mm_load_ps(T   );
+			XMM3	 = _mm_load_ps(T+ 4);
+			XMM6	 = _mm_load_ps(T+ 8);
+			XMM7	 = _mm_load_ps(T+12);
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4,_MM_SHUFFLE(0,2,0,2));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM4,_MM_SHUFFLE(1,3,1,3));
+			XMM4	 = XMM0;
+			XMM5	 = XMM2;
+			XMM0	 = _mm_mul_ps(XMM0, XMM1);
+			XMM2	 = _mm_mul_ps(XMM2, XMM3);
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,1,2,3));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM5	 = _mm_mul_ps(XMM5, XMM7);
+			XMM0	 = _mm_sub_ps(XMM0, XMM2);
+			XMM4	 = _mm_add_ps(XMM4, XMM5);
+			_mm_store_ps(x0    , XMM0);
+			_mm_store_ps(out +i, XMM4);
+			w	+= 8;
+			T	+= 16;
+		}
+	}
+#else														/* SSE Optimize */
 void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
   int n=init->n;
   int n2=n>>1;
@@ -560,5 +1869,5 @@
     w+=2;
     T+=2;
   }
+#endif														/* SSE Optimize */
 }
-
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/mdct.h libvorbis-1.2.0-sse/lib/mdct.h
--- libvorbis-1.2.0/lib/mdct.h	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/mdct.h	2007-08-02 12:43:10.000000000 +0200
@@ -58,6 +58,16 @@
   int log2n;
   
   DATA_TYPE *trig;
+#ifdef __SSE__												/* SSE Optimize */
+  DATA_TYPE *trig_bitreverse;
+  DATA_TYPE *trig_forward;
+  DATA_TYPE *trig_backward;
+  DATA_TYPE *trig_butterfly_first;
+  DATA_TYPE *trig_butterfly_generic8;
+  DATA_TYPE *trig_butterfly_generic16;
+  DATA_TYPE *trig_butterfly_generic32;
+  DATA_TYPE *trig_butterfly_generic64;
+#endif														/* SSE Optimize */
   int       *bitrev;
 
   DATA_TYPE scale;
@@ -65,7 +75,11 @@
 
 extern void mdct_init(mdct_lookup *lookup,int n);
 extern void mdct_clear(mdct_lookup *l);
+#ifdef __SSE__												/* SSE Optimize */
+extern void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out, DATA_TYPE *out1);
+#else														/* SSE Optimize */
 extern void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out);
+#endif														/* SSE Optimize */
 extern void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out);
 
 #endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/misc.h libvorbis-1.2.0-sse/lib/misc.h
--- libvorbis-1.2.0/lib/misc.h	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/misc.h	2007-08-02 12:43:10.000000000 +0200
@@ -45,6 +45,20 @@
 #endif
 #endif
 
+#ifdef __SSE__												/* SSE Optimize */
+#undef _ogg_malloc
+#undef _ogg_calloc
+#undef _ogg_realloc
+#undef _ogg_free
+#undef _ogg_alloca
+
+#define _ogg_malloc(x) xmm_malloc(x)
+#define _ogg_calloc(x,y) xmm_calloc((x), (y))
+#define _ogg_realloc(x,y) xmm_realloc((x), (y))
+#define _ogg_alloca(x) xmm_align(alloca((x)+16))
+#define _ogg_free(x) xmm_free(x)
+#endif														/* SSE Optimize */
+
 #endif
 
 
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/psy.c libvorbis-1.2.0-sse/lib/psy.c
--- libvorbis-1.2.0/lib/psy.c	2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/psy.c	2007-08-02 12:43:10.000000000 +0200
@@ -21,6 +21,9 @@
 #include "vorbis/codec.h"
 #include "codec_internal.h"
 
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 #include "masking.h"
 #include "psy.h"
 #include "os.h"
@@ -59,6 +62,186 @@
 static float nnmid_th=0.2;
 
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float PNEGINF[4] = {NEGINF, NEGINF, NEGINF, NEGINF};
+
+static const int temp_bfn8[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+25,25,25,25,25,25,25,25,17,17,17,17,17,17,17,17,
+ 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const int temp_bfn4[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
+25,25,25,25,25,25,25,25,21,21,21,21,17,17,17,17,
+13,13,13,13, 9, 9, 9, 9, 5, 5, 5, 5, 1, 1, 1, 1,
+};
+
+static _MM_ALIGN16 const float PTEMP_BFN1[1] = {
+	-8.0000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN2[2] = {
+	-4.2000000e+001, -7.9000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN3[3] = {
+	-3.0000000e+001, -5.5000000e+001, -8.0000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN4[4] = {
+	-2.3000000e+001, -4.1000000e+001, -5.9000000e+001, -7.7000000e+001, 
+	
+};
+static _MM_ALIGN16 const float PTEMP_BFN5[5] = {
+	-2.0000000e+001, -3.5000000e+001, -5.0000000e+001, -6.5000000e+001, 
+	-8.0000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN6[6] = {
+	-1.7000000e+001, -2.9000000e+001, -4.1000000e+001, -5.3000000e+001, 
+	-6.5000000e+001, -7.7000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN7[7] = {
+	-1.5000000e+001, -2.5000000e+001, -3.5000000e+001, -4.5000000e+001, 
+	-5.5000000e+001, -6.5000000e+001, -7.5000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN8[8] = {
+	-1.4000000e+001, -2.3000000e+001, -3.2000000e+001, -4.1000000e+001, 
+	-5.0000000e+001, -5.9000000e+001, -6.8000000e+001, -7.7000000e+001, 
+	
+};
+static _MM_ALIGN16 const float PTEMP_BFN9[9] = {
+	-1.3000000e+001, -2.1000000e+001, -2.9000000e+001, -3.7000000e+001, 
+	-4.5000000e+001, -5.3000000e+001, -6.1000000e+001, -6.9000000e+001, 
+	-7.7000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN10[10] = {
+	-1.2000000e+001, -1.9000000e+001, -2.6000000e+001, -3.3000000e+001, 
+	-4.0000000e+001, -4.7000000e+001, -5.4000000e+001, -6.1000000e+001, 
+	-6.8000000e+001, -7.5000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN12[12] = {
+	-1.1000000e+001, -1.7000000e+001, -2.3000000e+001, -2.9000000e+001, 
+	-3.5000000e+001, -4.1000000e+001, -4.7000000e+001, -5.3000000e+001, 
+	-5.9000000e+001, -6.5000000e+001, -7.1000000e+001, -7.7000000e+001, 
+	
+};
+static _MM_ALIGN16 const float PTEMP_BFN15[15] = {
+	-1.0000000e+001, -1.5000000e+001, -2.0000000e+001, -2.5000000e+001, 
+	-3.0000000e+001, -3.5000000e+001, -4.0000000e+001, -4.5000000e+001, 
+	-5.0000000e+001, -5.5000000e+001, -6.0000000e+001, -6.5000000e+001, 
+	-7.0000000e+001, -7.5000000e+001, -8.0000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN18[18] = {
+	-9.0000000e+000, -1.3000000e+001, -1.7000000e+001, -2.1000000e+001, 
+	-2.5000000e+001, -2.9000000e+001, -3.3000000e+001, -3.7000000e+001, 
+	-4.1000000e+001, -4.5000000e+001, -4.9000000e+001, -5.3000000e+001, 
+	-5.7000000e+001, -6.1000000e+001, -6.5000000e+001, -6.9000000e+001, 
+	-7.3000000e+001, -7.7000000e+001, 
+};
+static _MM_ALIGN16 const float PTEMP_BFN25[25] = {
+	-8.0000000e+000, -1.1000000e+001, -1.4000000e+001, -1.7000000e+001, 
+	-2.0000000e+001, -2.3000000e+001, -2.6000000e+001, -2.9000000e+001, 
+	-3.2000000e+001, -3.5000000e+001, -3.8000000e+001, -4.1000000e+001, 
+	-4.4000000e+001, -4.7000000e+001, -5.0000000e+001, -5.3000000e+001, 
+	-5.6000000e+001, -5.9000000e+001, -6.2000000e+001, -6.5000000e+001, 
+	-6.8000000e+001, -7.1000000e+001, -7.4000000e+001, -7.7000000e+001, 
+	-8.0000000e+001, 
+};
+
+static const float *PTEMP_BFN[26]	 = {
+	NULL,
+	PTEMP_BFN1, PTEMP_BFN2, PTEMP_BFN3, PTEMP_BFN4, 
+	PTEMP_BFN5, PTEMP_BFN6, PTEMP_BFN7, PTEMP_BFN8, 
+	PTEMP_BFN9, PTEMP_BFN10, PTEMP_BFN12, PTEMP_BFN12, 
+	PTEMP_BFN15, PTEMP_BFN15, PTEMP_BFN15, PTEMP_BFN18, 
+	PTEMP_BFN18, PTEMP_BFN18, PTEMP_BFN25, PTEMP_BFN25, 
+	PTEMP_BFN25, PTEMP_BFN25, PTEMP_BFN25, PTEMP_BFN25, 
+	PTEMP_BFN25
+};
+
+/*
+	for shellsort fix4 by SSE compare
+*/
+static _MM_ALIGN16 const __m128x Sort4IndexConvTable[64] = {
+	{.si32 = {3,2,1,0}},	/* A>B>C>D		000000	00 */
+	{.si32 = {3,2,0,1}},	/* B>A>C>D		000001	01 */
+	{.si32 = {3,1,2,0}},	/* A>C>B>D		000010	02 */
+	{.si32 = {0,1,2,3}},	/*                      03 */
+	{.si32 = {2,3,1,0}},	/* A>B>D>C		000100	04 */
+	{.si32 = {2,3,0,1}},	/* B>A>D>C		000101	05 */
+	{.si32 = {0,1,2,3}},	/*                      06 */
+	{.si32 = {0,1,2,3}},	/*                      07 */
+	{.si32 = {0,1,2,3}},	/*                      08 */
+	{.si32 = {0,1,2,3}},	/*                      09 */
+	{.si32 = {0,1,2,3}},	/*                      10 */
+	{.si32 = {0,1,2,3}},	/*                      11 */
+	{.si32 = {0,1,2,3}},	/*                      12 */
+	{.si32 = {2,0,3,1}},	/* B>D>A>C		001101	13 */
+	{.si32 = {0,1,2,3}},	/*                      14 */
+	{.si32 = {0,1,2,3}},	/*                      15 */
+	{.si32 = {0,1,2,3}},	/*                      16 */
+	{.si32 = {3,0,2,1}},	/* B>C>A>D		010001	17 */
+	{.si32 = {3,1,0,2}},	/* C>A>B>D		010010	18 */
+	{.si32 = {3,0,1,2}},	/* C>B>A>D		010011	19 */
+	{.si32 = {0,1,2,3}},	/*                      20 */
+	{.si32 = {0,1,2,3}},	/*                      21 */
+	{.si32 = {0,1,2,3}},	/*                      22 */
+	{.si32 = {0,1,2,3}},	/*                      23 */
+	{.si32 = {0,1,2,3}},	/*                      24 */
+	{.si32 = {0,3,2,1}},	/* B>C>D>A		011001	25 */
+	{.si32 = {0,1,2,3}},	/*                      26 */
+	{.si32 = {0,3,1,2}},	/* C>B>D>A		011011	27 */
+	{.si32 = {0,1,2,3}},	/*                      28 */
+	{.si32 = {0,2,3,1}},	/* B>D>C>A		011101	29 */
+	{.si32 = {0,1,2,3}},	/*                      30 */
+	{.si32 = {0,1,2,3}},	/*                      31 */
+	{.si32 = {0,1,2,3}},	/*                      32 */
+	{.si32 = {0,1,2,3}},	/*                      33 */
+	{.si32 = {1,3,2,0}},	/* A>C>D>B		100010	34 */
+	{.si32 = {0,1,2,3}},	/*                      35 */
+	{.si32 = {2,1,3,0}},	/* A>D>B>C		100100	36 */
+	{.si32 = {0,1,2,3}},	/*                      37 */
+	{.si32 = {1,2,3,0}},	/* A>D>C>B		100110	38 */
+	{.si32 = {0,1,2,3}},	/*                      39 */
+	{.si32 = {0,1,2,3}},	/*                      40 */
+	{.si32 = {0,1,2,3}},	/*                      41 */
+	{.si32 = {0,1,2,3}},	/*                      42 */
+	{.si32 = {0,1,2,3}},	/*                      43 */
+	{.si32 = {2,1,0,3}},	/* D>A>B>C		101100	44 */
+	{.si32 = {2,0,1,3}},	/* D>B>A>C		101101	45 */
+	{.si32 = {1,2,0,3}},	/* D>A>C>B		101110	46 */
+	{.si32 = {0,1,2,3}},	/*                      47 */
+	{.si32 = {0,1,2,3}},	/*                      48 */
+	{.si32 = {0,1,2,3}},	/*                      49 */
+	{.si32 = {1,3,0,2}},	/* C>A>D>B		110010	50 */
+	{.si32 = {0,1,2,3}},	/*                      51 */
+	{.si32 = {0,1,2,3}},	/*                      52 */
+	{.si32 = {0,1,2,3}},	/*                      53 */
+	{.si32 = {0,1,2,3}},	/*                      54 */
+	{.si32 = {0,1,2,3}},	/*                      55 */
+	{.si32 = {0,1,2,3}},	/*                      56 */
+	{.si32 = {0,1,2,3}},	/*                      57 */
+	{.si32 = {1,0,3,2}},	/* C>D>A>B		111010	58 */
+	{.si32 = {0,1,3,2}},	/* C>D>B>A		111011	59 */
+	{.si32 = {0,1,2,3}},	/*                      60 */
+	{.si32 = {0,2,1,3}},	/* D>B>C>A		111101	61 */
+	{.si32 = {1,0,2,3}},	/* D>C>A>B		111110	62 */
+	{.si32 = {0,1,2,3}}	/* D>C>B>A		111111	63 */
+};
+
+_MM_ALIGN16 float findex[2048];
+_MM_ALIGN16 float findex2[2048];
+
+#endif														/* SSE Optimize */
+
 vorbis_look_psy_global *_vp_global_look(vorbis_info *vi){
   codec_setup_info *ci=vi->codec_setup;
   vorbis_info_psy_global *gi=&ci->psy_g_param;
@@ -422,6 +605,66 @@
     _analysis_output_always("noiseoff2",ls++,p->noiseoffset[2],n,1,0,0);
   }
 #endif
+#ifdef __SSE__												/* SSE Optimize */
+	if(findex[1]==0.f)
+	{
+		for(i=0;i<2048;i++)
+		{
+			findex[i]	 = (float)(i);
+			findex2[i]	 = (float)(i*i);
+		}
+	}
+	{
+		short* sb = (short*)p->bark;
+		for(i=0;i<n;i++)
+		{
+			if(sb[i*2+1]>=0)
+				break;
+		}
+		p->midpoint1	 = i;
+		p->midpoint1_4	 = p->midpoint1&(~3);
+		p->midpoint1_8	 = p->midpoint1_4&(~7);
+		p->midpoint1_16	 = p->midpoint1_8&(~15);
+		for(;i<n;i++)
+		{
+			if(sb[i*2]>=n)
+				break;
+		}
+		p->midpoint2	 = i;
+		i = (p->midpoint1+3)&(~3);
+		p->midpoint2_4	 = (p->midpoint2-i)&(~3);
+		p->midpoint2_8	 = p->midpoint2_4&(~7);
+		p->midpoint2_16	 = p->midpoint2_8&(~15);
+		p->midpoint2_4	+= i;
+		p->midpoint2_8	+= i;
+		p->midpoint2_16	+= i;
+	}
+	p->octsft=_ogg_malloc(n*sizeof(*p->octsft));
+	p->octend=_ogg_malloc(n*sizeof(*p->octend));
+	p->octpos=_ogg_malloc(n*sizeof(*p->octpos));
+	for(i=0;i<n;i++)
+	{
+		long oc	 = p->octave[i];
+		oc	 = oc>>p->shiftoc;
+
+		if(oc>=P_BANDS)oc=P_BANDS-1;
+		if(oc<0)oc=0;
+		
+		p->octsft[i]	 = oc;
+		p->octpos[i]	 = ((p->octave[i]+p->octave[i+1])>>1)-p->firstoc;
+
+	}
+	for(i=0;i<n;i++)
+	{
+		long oc=p->octave[i];
+		long j = i, k;
+		while(i+1<n && p->octave[i+1]==oc){
+			i++;
+		}
+		for(k=j;k<=i;k++)
+			p->octend[k] = i;
+	}
+#endif														/* SSE Optimize */
 }
 
 void _vp_psy_clear(vorbis_look_psy *p){
@@ -445,6 +688,11 @@
       }
       _ogg_free(p->noiseoffset);
     }
+#ifdef __SSE__												/* SSE Optimize */
+    if(p->octsft)_ogg_free(p->octsft);
+    if(p->octend)_ogg_free(p->octend);
+    if(p->octpos)_ogg_free(p->octpos);
+#endif														/* SSE Optimize */
     memset(p,0,sizeof(*p));
   }
 }
@@ -458,6 +706,9 @@
   int i,post1;
   int seedptr;
   const float *posts,*curve;
+#ifdef __SSE__												/* SSE Optimize */
+	__m128	SAMP	 = _mm_load_ss(&amp);
+#endif														/* SSE Optimize */
 
   int choice=(int)((amp+dBoffset-P_LEVEL_0)*.1f);
   choice=max(choice,0);
@@ -467,6 +718,42 @@
   post1=(int)posts[1];
   seedptr=oc+(posts[0]-EHMER_OFFSET)*linesper-(linesper>>1);
 
+#ifdef __SSE__												/* SSE Optimize */
+	i	 = posts[0];
+	if(seedptr<0)
+	{
+		int preseedptr	 = seedptr;
+		seedptr	 = (8-((-seedptr)&7));
+		i	+= ((seedptr-preseedptr)>>3);
+	}
+	if((post1-i)*8+seedptr>=n)
+		post1	 = (n-1-seedptr)/8+i+1;
+	{
+		int post05	 = ((post1-i)&(~1))+i;
+		for(;i<post05;i+=2)
+		{
+			__m128	XMM0	 = _mm_load_ss(curve+i  );
+			__m128	XMM1	 = _mm_load_ss(curve+i+1);
+			__m128	XMM2	 = _mm_load_ss(seed+seedptr   );
+			__m128	XMM3	 = _mm_load_ss(seed+seedptr+ 8);
+			XMM0	 = _mm_add_ss(XMM0, SAMP);
+			XMM1	 = _mm_add_ss(XMM1, SAMP);
+			XMM0	 = _mm_max_ss(XMM0, XMM2);
+			XMM1	 = _mm_max_ss(XMM1, XMM3);
+			_mm_store_ss(seed+seedptr   , XMM0);
+			_mm_store_ss(seed+seedptr+ 8, XMM1);
+			seedptr	+= 16;
+		}
+		if(post1!=i)
+		{
+			__m128	XMM0	 = _mm_load_ss(curve+i  );
+			__m128	XMM2	 = _mm_load_ss(seed+seedptr   );
+			XMM0	 = _mm_add_ss(XMM0, SAMP);
+			XMM0	 = _mm_max_ss(XMM0, XMM2);
+			_mm_store_ss(seed+seedptr   , XMM0);
+		}
+	}
+#else														/* SSE Optimize */
   for(i=posts[0];i<post1;i++){
     if(seedptr>0){
       float lin=amp+curve[i];
@@ -475,6 +762,7 @@
     seedptr+=linesper;
     if(seedptr>=n)break;
   }
+#endif														/* SSE Optimize */
 }
 
 static void seed_loop(vorbis_look_psy *p,
@@ -491,6 +779,22 @@
 
   for(i=0;i<n;i++){
     float max=f[i];
+#ifdef __SSE__												/* SSE Optimize */
+	long	oc;
+	long	ei=p->octend[i];
+	if(i>ei)
+		continue;
+	oc	 = p->octave[i];
+	while(i<ei)
+	{
+		i++;
+		if(f[i]>max)max	 = f[i];
+	}
+	
+	if(max+6.f>flr[i])
+	{
+		oc	 = p->octsft[i];
+#else
     long oc=p->octave[i];
     while(i+1<n && p->octave[i+1]==oc){
       i++;
@@ -502,6 +806,7 @@
 
       if(oc>=P_BANDS)oc=P_BANDS-1;
       if(oc<0)oc=0;
+#endif
 
       seed_curve(seed,
 		 curves[oc],
@@ -519,6 +824,80 @@
   float *ampstack=alloca(n*sizeof(*ampstack));
   long   stack=0;
   long   pos=0;
+#ifdef __SSE__												/* SSE Optimize */
+  long   i=0;
+
+	for(;i<n;i++)
+	{
+		if(stack<2)
+		{
+			posstack[stack]=i;
+			ampstack[stack++]=seeds[i];
+		}
+		else
+		{
+			while(1)
+			{
+				if(seeds[i]<ampstack[stack-1])
+				{
+					posstack[stack]=i;
+					ampstack[stack++]=seeds[i];
+					break;
+				}
+				else
+				{
+					if(i<posstack[stack-1]+linesper)
+					{
+						if(stack>1 && ampstack[stack-1]<=ampstack[stack-2] && i<posstack[stack-2]+linesper)
+						{
+							/* we completely overlap, making stack-1 irrelevant.  pop it */
+							stack--;
+LOOP_WITH_CHECK_STACK:
+							continue;
+						}
+					}
+					posstack[stack]=i;
+					ampstack[stack++]=seeds[i];
+					break;
+				}
+			}
+			i	++;
+			break;
+		}
+	}
+	for(;i<n;i++)
+	{
+		while(1)
+		{
+			if(seeds[i]<ampstack[stack-1])
+			{
+				posstack[stack]=i;
+				ampstack[stack++]=seeds[i];
+				break;
+			}
+			else
+			{
+				if(i<posstack[stack-1]+linesper)
+				{
+					if(ampstack[stack-1]<=ampstack[stack-2] && i<posstack[stack-2]+linesper)
+					{
+						/* we completely overlap, making stack-1 irrelevant.  pop it */
+						stack--;
+						if(stack<2)
+						{
+							goto LOOP_WITH_CHECK_STACK;
+						}
+						else
+							continue;
+					}
+				}
+				posstack[stack]=i;
+				ampstack[stack++]=seeds[i];
+				break;
+			}
+		}
+	}
+#else														/* SSE Optimize */
   long   i;
 
   for(i=0;i<n;i++){
@@ -548,10 +927,40 @@
       }
     }
   }
+#endif														/* SSE Optimize */
 
   /* the stack now contains only the positions that are relevant. Scan
      'em straight through */
 
+#ifdef __SSE__												/* SSE Optimize */
+	for(i=0;i<stack-1;i++)
+	{
+		long endpos;
+		if(ampstack[i+1]>ampstack[i])
+		{
+			endpos	 = posstack[i+1];
+		}
+		else
+		{
+			endpos	 = posstack[i]+linesper+1; /* +1 is important, else bin 0 is
+					discarded in short frames */
+		}
+		if(endpos>n)
+			endpos	 = n;
+		for(;pos<endpos;pos++)
+			seeds[pos]=ampstack[i];
+	}
+	if(i<stack)
+	{
+		long endpos;
+		endpos	 = posstack[i]+linesper+1; /* +1 is important, else bin 0 is
+				discarded in short frames */
+		if(endpos>n)
+			endpos	 = n;
+		for(;pos<endpos;pos++)
+			seeds[pos]=ampstack[i];
+	}
+#else														/* SSE Optimize */
   for(i=0;i<stack;i++){
     long endpos;
     if(i<stack-1 && ampstack[i+1]>ampstack[i]){
@@ -564,6 +973,7 @@
     for(;pos<endpos;pos++)
       seeds[pos]=ampstack[i];
   }
+#endif														/* SSE Optimize */
   
   /* there.  Linear time.  I now remember this was on a problem set I
      had in Grad Skool... I didn't solve it at the time ;-) */
@@ -575,6 +985,121 @@
 static void max_seeds(vorbis_look_psy *p,
 		      float *seed,
 		      float *flr){
+#ifdef __SSE__												/* SSE Optimize */
+	long	n	 = p->total_octave_lines;
+	int		linesper	 = p->eighth_octave_lines;
+	long	linpos	 = 0;
+	long	pos;
+	float	TEMP[p->n] __attribute__((aligned(16)));
+	
+	seed_chase(seed,linesper,n); /* for masking */
+	{
+		__m128	PVAL	 = _mm_set_ps1(p->vi->tone_abs_limit);
+		long ln	 = n&(~15);
+		for(pos=0;pos<ln;pos+=16)
+		{
+			__m128	XMM0	 = _mm_load_ps(seed+pos   );
+			__m128	XMM1	 = _mm_load_ps(seed+pos+ 4);
+			__m128	XMM2	 = _mm_load_ps(seed+pos+ 8);
+			__m128	XMM3	 = _mm_load_ps(seed+pos+12);
+			XMM0	 = _mm_min_ps(XMM0, PVAL);
+			XMM1	 = _mm_min_ps(XMM1, PVAL);
+			XMM2	 = _mm_min_ps(XMM2, PVAL);
+			XMM3	 = _mm_min_ps(XMM3, PVAL);
+			_mm_store_ps(seed+pos   , XMM0);
+			_mm_store_ps(seed+pos+ 4, XMM1);
+			_mm_store_ps(seed+pos+ 8, XMM2);
+			_mm_store_ps(seed+pos+12, XMM3);
+		}
+		ln	 = n&(~7);
+		for(;pos<ln;pos+=8)
+		{
+			__m128	XMM0	 = _mm_load_ps(seed+pos   );
+			__m128	XMM1	 = _mm_load_ps(seed+pos+ 4);
+			XMM0	 = _mm_min_ps(XMM0, PVAL);
+			XMM1	 = _mm_min_ps(XMM1, PVAL);
+			_mm_store_ps(seed+pos   , XMM0);
+			_mm_store_ps(seed+pos+ 4, XMM1);
+		}
+		ln	 = n&(~3);
+		for(;pos<ln;pos+=4)
+		{
+			__m128	XMM0	 = _mm_load_ps(seed+pos   );
+			XMM0	 = _mm_min_ps(XMM0, PVAL);
+			_mm_store_ps(seed+pos   , XMM0);
+		}
+		for(;pos<n;pos++)
+		{
+			__m128	XMM0	 = _mm_load_ss(seed+pos   );
+			XMM0	 = _mm_min_ss(XMM0, PVAL);
+			_mm_store_ss(seed+pos, XMM0);
+		}
+	}
+	pos	 = p->octave[0]-p->firstoc-(linesper>>1);
+	if(linpos+1<p->n)
+	{
+		float minV	 = seed[pos];
+		long end	 = p->octpos[linpos];
+		while(pos+1<=end)
+		{
+			pos	++;
+			if((seed[pos]>NEGINF && seed[pos]<minV) || minV==NEGINF)
+				minV	 = seed[pos];
+		}
+		end	 = pos+p->firstoc;
+		for(;linpos<p->n&&p->octave[linpos]<=end;)
+		{
+			int ep = p->octend[linpos];
+			for(;linpos<=ep;linpos++)
+				TEMP[linpos]	 = minV;
+		}
+	}
+	while(linpos+1<p->n)
+	{
+		float minV	 = seed[pos];
+		long end	 = p->octpos[linpos];
+		while(pos+1<=end)
+		{
+			pos	++;
+			if(seed[pos]<minV)
+				minV	 = seed[pos];
+		}
+		end	 = pos+p->firstoc;
+		for(;linpos<p->n&&p->octave[linpos]<=end;)
+		{
+			int ep = p->octend[linpos];
+			for(;linpos<=ep;linpos++)
+				TEMP[linpos]	 = minV;
+		}
+	}
+	
+	{
+		float minV	 = seed[p->total_octave_lines-1];
+		for(;linpos<p->n;linpos++)
+			TEMP[linpos]	 = minV;
+	}
+	{
+		for(pos=0;pos<p->n;pos+=16)
+		{
+			__m128	XMM0	 = _mm_load_ps(flr+pos    );
+			__m128	XMM4	 = _mm_load_ps(TEMP+pos   );
+			__m128	XMM1	 = _mm_load_ps(flr+pos+  4);
+			__m128	XMM5	 = _mm_load_ps(TEMP+pos+ 4);
+			__m128	XMM2	 = _mm_load_ps(flr+pos+  8);
+			__m128	XMM6	 = _mm_load_ps(TEMP+pos+ 8);
+			__m128	XMM3	 = _mm_load_ps(flr+pos+ 12);
+			__m128	XMM7	 = _mm_load_ps(TEMP+pos+12);
+			XMM0	 = _mm_max_ps(XMM0, XMM4);
+			XMM1	 = _mm_max_ps(XMM1, XMM5);
+			XMM2	 = _mm_max_ps(XMM2, XMM6);
+			XMM3	 = _mm_max_ps(XMM3, XMM7);
+			_mm_store_ps(flr+pos   , XMM0);
+			_mm_store_ps(flr+pos+ 4, XMM1);
+			_mm_store_ps(flr+pos+ 8, XMM2);
+			_mm_store_ps(flr+pos+12, XMM3);
+		}
+	}
+#else														/* SSE Optimize */
   long   n=p->total_octave_lines;
   int    linesper=p->eighth_octave_lines;
   long   linpos=0;
@@ -605,8 +1130,2399 @@
       if(flr[linpos]<minV)flr[linpos]=minV;
   }
   
+#endif														/* SSE Optimize */
+}
+
+#ifdef __SSE__												/* SSE Optimize */
+/*
+	A	 = tY * tXX - tX * tXY;
+	B	 = tN * tXY - tX * tY;
+	D	 = tN * tXX - tX * tX;
+	R	 = (A + x * B) / D;
+
+	Input
+	TN		(N3 ,N2 ,N1 ,N0 )
+	XMM0	 = (XY0,Y0 ,XX0,X0 )
+	XMM1	 = (XY1,Y1 ,XX1,X1 )
+	XMM4	 = (XY2,Y2 ,XX2,X2 )
+	XMM3	 = (XY3,Y3 ,XX3,X3 )
+
+	Phase 1.
+
+	Phase 2.
+	XMM0	 = 	(X3 ,X2 ,X1 ,X0 )
+	XMM1	 = 	(XX3,XX2,XX1,XX0)
+	XMM2	 = 	(Y3 ,Y2 ,Y1 ,Y0 )
+	XMM3	 = 	(XY3,XY2,XY1,XY0)
+
+	Phase 3.
+	XMM4	 = Y*XX
+	XMM5	 = X*XY
+	XMM6	 = XY*TN
+	XMM7	 = X*Y
+
+	Phase 4.
+	XMM4	 = Y*XX - X*XY	... A
+	XMM5	 = XY*TN - X*Y	... B
+	XMM6	 = XX*TN
+	XMM7	 = X*X
+	XMM1	 = XX*TN - X*X	... D
+	
+	Phase 5.
+	XMM4	 = PX*B
+	XMM4	 = PX*B+A
+	XMM4	 = (A+PX*B)/D
+*/
+#define bark_noise_hybridmp_SSE_SUBC()										\
+{																			\
+	__m128 XMM2, XMM5, XMM6, XMM7;											\
+	XMM2 = XMM0;															\
+	XMM5 = XMM4;															\
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));			\
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));			\
+	XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));			\
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));			\
+	XMM1 = XMM0;															\
+	XMM3 = XMM2;															\
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));			\
+	XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));			\
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));			\
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));			\
+	XMM4 = XMM2;															\
+	XMM5 = XMM0;															\
+	XMM6 = XMM3;															\
+	XMM7 = XMM0;															\
+	XMM4 = _mm_mul_ps(XMM4, XMM1);											\
+	XMM5 = _mm_mul_ps(XMM5, XMM3);											\
+	XMM3 = _mm_load_ps(findex+i);											\
+	XMM6 = _mm_mul_ps(XMM6, TN.ps);											\
+	XMM1 = _mm_mul_ps(XMM1, TN.ps);											\
+	XMM7 = _mm_mul_ps(XMM7, XMM2);											\
+	XMM0 = _mm_mul_ps(XMM0, XMM0);											\
+	XMM4 = _mm_sub_ps(XMM4, XMM5);											\
+	XMM6 = _mm_sub_ps(XMM6, XMM7);											\
+	XMM1 = _mm_sub_ps(XMM1, XMM0);											\
+	XMM6 = _mm_mul_ps(XMM6, XMM3);											\
+	XMM3 = _mm_rcp_ps(XMM1);												\
+	XMM4 = _mm_add_ps(XMM4, XMM6);											\
+	XMM1 = _mm_mul_ps(XMM1, XMM3);											\
+	XMM1 = _mm_mul_ps(XMM1, XMM3);											\
+	XMM3 = _mm_add_ps(XMM3, XMM3);											\
+	XMM3 = _mm_sub_ps(XMM3, XMM1);											\
+	XMM4 = _mm_mul_ps(XMM4, XMM3);											\
 }
+#define bark_noise_hybridmp_SSE_SUBC2()										\
+{																			\
+	__m128 XMM2, XMM5, XMM6, XMM7;											\
+	XMM2 = XMM0;															\
+	XMM5 = XMM4;															\
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));			\
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));			\
+	XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));			\
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));			\
+	XMM1 = XMM0;															\
+	XMM3 = XMM2;															\
+	XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));			\
+	XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));			\
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));			\
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));			\
+	XMM4 = XMM2;															\
+	XMM5 = XMM0;															\
+	XMM6 = XMM3;															\
+	XMM7 = XMM0;															\
+	XMM4 = _mm_mul_ps(XMM4, XMM1);											\
+	XMM5 = _mm_mul_ps(XMM5, XMM3);											\
+	XMM3 = _mm_load_ps(findex+i);											\
+	XMM6 = _mm_mul_ps(XMM6, TN.ps);											\
+	XMM1 = _mm_mul_ps(XMM1, TN.ps);											\
+	XMM7 = _mm_mul_ps(XMM7, XMM2);											\
+	XMM0 = _mm_mul_ps(XMM0, XMM0);											\
+	XMM4 = _mm_sub_ps(XMM4, XMM5);											\
+	XMM6 = _mm_sub_ps(XMM6, XMM7);											\
+	XMM1 = _mm_sub_ps(XMM1, XMM0);											\
+	PA	 = XMM4;															\
+	PB	 = XMM6;															\
+	XMM6 = _mm_mul_ps(XMM6, XMM3);											\
+	XMM3 = _mm_rcp_ps(XMM1);												\
+	XMM4 = _mm_add_ps(XMM4, XMM6);											\
+	XMM1 = _mm_mul_ps(XMM1, XMM3);											\
+	XMM1 = _mm_mul_ps(XMM1, XMM3);											\
+	XMM3 = _mm_add_ps(XMM3, XMM3);											\
+	XMM3 = _mm_sub_ps(XMM3, XMM1);											\
+	PD	 = XMM3;															\
+	XMM4 = _mm_mul_ps(XMM4, XMM3);											\
+}
+#endif														/* SSE Optimize */
+
+#ifdef __SSE__												/* SSE Optimize */
+static void bark_noise_hybridmp(vorbis_look_psy *p,
+								const float *f,
+								float *noise,
+								const float offset,
+								const int fixed,
+								float *work,
+								float *tf){
+	int		n = p->n;
+	float	*N		 = work;
+	__m128	*XXYY	 = (__m128*)(N+n);
+	float	*xxyy	 = N+n;
+	short	*sb	 = (short*)p->bark;
+	
+	int		i, j;
+	int		lo, hi;
+	int		midpoint1, midpoint2;
+	float	tN, tX, tXX, tY, tXY;
+	float	R, A, B, D;
+	float	x;
+	float	*TN = N;
+	__m128	*TXXYY = XXYY;
+	
+	__m128	OFFSET;
+	__m128	PXXYY	 = _mm_setzero_ps();
+	__m128	PA, PB, PD;
+	_MM_ALIGN16 __m128	TEMP[16];
+	int	p0, p1;
+	
+	// Phase 1
+	_mm_prefetch((const char*)(f     ), _MM_HINT_NTA);
+	_mm_prefetch((const char*)(findex2     ), _MM_HINT_NTA);
+	_mm_prefetch((const char*)(f  +16), _MM_HINT_NTA);
+	_mm_prefetch((const char*)(findex2  +16), _MM_HINT_NTA);
+	OFFSET	 = _mm_set_ps1(offset);
+	{
+		__m128	XMM0	 = _mm_load_ps(f   );
+		__m128	XMM1	 = _mm_load_ps(f+ 4);
+		__m128	XMM2	 = _mm_load_ps(f+ 8);
+		__m128	XMM3	 = _mm_load_ps(f+12);
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		XMM4	 = OFFSET;
+		XMM5	 = _mm_load_ps(PFV_1.sf);
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		XMM1	 = _mm_add_ps(XMM1, XMM4);
+		XMM2	 = _mm_add_ps(XMM2, XMM4);
+		XMM3	 = _mm_add_ps(XMM3, XMM4);
+		XMM0	 = _mm_max_ps(XMM0, XMM5);
+		XMM1	 = _mm_max_ps(XMM1, XMM5);
+		XMM2	 = _mm_max_ps(XMM2, XMM5);
+		XMM3	 = _mm_max_ps(XMM3, XMM5);
+		XMM4	 = XMM0;
+		XMM5	 = XMM1;
+		XMM6	 = XMM2;
+		XMM7	 = XMM3;
+		XMM0	 = _mm_mul_ps(XMM0, XMM0);
+		XMM1	 = _mm_mul_ps(XMM1, XMM1);
+		XMM2	 = _mm_mul_ps(XMM2, XMM2);
+		XMM3	 = _mm_mul_ps(XMM3, XMM3);
+		_mm_store_ps(TN   , XMM0);	/* N */
+		_mm_store_ps(TN+ 4, XMM1);
+		_mm_store_ps(TN+ 8, XMM2);
+		_mm_store_ps(TN+12, XMM3);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		TEMP[ 1]	 = XMM0;	/* Y */
+		PXXYY	 = _mm_move_ss(PXXYY, TEMP[1]);
+		XMM4	 = _mm_load_ps(findex   );
+		TEMP[ 5]	 = XMM1;
+		XMM5	 = _mm_load_ps(findex+ 4);
+		TEMP[ 9]	 = XMM2;
+		XMM6	 = _mm_load_ps(findex+ 8);
+		TEMP[13]	 = XMM3;
+		XMM7	 = _mm_load_ps(findex+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		TEMP[ 3]	 = XMM0;	/* XY */
+		TEMP[ 7]	 = XMM1;
+		TEMP[11]	 = XMM2;
+		TEMP[15]	 = XMM3;
+		XMM0	 = _mm_load_ps(TN   );	/* N */
+		XMM1	 = _mm_load_ps(TN+ 4);
+		XMM2	 = _mm_load_ps(TN+ 8);
+		XMM3	 = _mm_load_ps(TN+12);
+		XMM4	 = _mm_mul_ps(XMM4, XMM0);
+		XMM5	 = _mm_mul_ps(XMM5, XMM1);
+		XMM6	 = _mm_mul_ps(XMM6, XMM2);
+		XMM7	 = _mm_mul_ps(XMM7, XMM3);
+		TEMP[ 0]	 = XMM4;	/* X */
+		TEMP[ 4]	 = XMM5;
+		TEMP[ 8]	 = XMM6;
+		TEMP[12]	 = XMM7;
+		XMM4	 = _mm_load_ps(findex2   );
+		XMM5	 = _mm_load_ps(findex2+ 4);
+		XMM6	 = _mm_load_ps(findex2+ 8);
+		XMM7	 = _mm_load_ps(findex2+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM4	 = TEMP[0];	// X
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM5	 = TEMP[1];	// Y
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM6	 = XMM0;	// XX
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		XMM7	 = TEMP[3];	// XY
+		XMM0	 = XMM4;
+		TEMP[ 6]	 = XMM1;
+		XMM1	 = XMM5;
+		// i=0-3
+		// PXXYY	 = (0, 0, 0, Y^2)
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,2,3,2));
+		TEMP[10]	 = XMM2;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+		TEMP[14]	 = XMM3;
+		XMM6	 = XMM4;
+		XMM7	 = XMM0;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(2,0,2,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+		XMM5	 = TEMP[ 4];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(3,1,3,1));
+		XMM1	 = TEMP[ 5];	// Y
+		// XXYY[i+0]	 = (XY,  Y, XX,  X)	 = (0, Y^3, 0, 0)
+		// To Fix (0, Y^3*.5f, 0, Y^2*.5f)
+		XMM4	 = _mm_add_ps(XMM4, PXXYY);
+		TN[ 0]	*= 0.5;
+		XMM4	 = _mm_mul_ps(XMM4, PFV_0P5.ps);
+		TN[ 1]	+= TN[ 0];
+		XMM6	 = _mm_add_ps(XMM6, XMM4);
+		TN[ 2]	+= TN[ 1];
+		XMM0	 = _mm_add_ps(XMM0, XMM6);
+		TN[ 3]	+= TN[ 2];
+		XMM7	 = _mm_add_ps(XMM7, XMM0);
+		TXXYY[ 0]	 = XMM4;
+		XMM4	 = TEMP[ 6];	// XX
+		TXXYY[ 1]	 = XMM6;
+		XMM6	 = TEMP[ 7];	// XY
+		TXXYY[ 2]	 = XMM0;
+		XMM0	 = XMM5;
+		TXXYY[ 3]	 = XMM7;
+		XMM7	 = XMM1;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(3,2,3,2));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(3,2,3,2));
+		XMM4	 = XMM5;
+		XMM6	 = XMM0;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM1, _MM_SHUFFLE(2,0,2,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+		XMM1	 = TEMP[ 8];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM7, _MM_SHUFFLE(2,0,2,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = TEMP[ 9];	// Y
+		XMM5	 = _mm_add_ps(XMM5, TXXYY[ 3]);
+		TN[ 4]	+= TN[ 3];
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		TN[ 5]	+= TN[ 4];
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		TN[ 6]	+= TN[ 5];
+		XMM6	 = _mm_add_ps(XMM6, XMM0);
+		TN[ 7]	+= TN[ 6];
+		TXXYY[ 4]	 = XMM5;
+		XMM5	 = TEMP[10];	// XX
+		TXXYY[ 5]	 = XMM4;
+		XMM4	 = TEMP[11];	// XY
+		TXXYY[ 6]	 = XMM0;
+		XMM0	 = XMM1;
+		TXXYY[ 7]	 = XMM6;
+		XMM6	 = XMM7;
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(1,0,1,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,2,3,2));
+		XMM5	 = XMM1;
+		XMM4	 = XMM0;
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(2,0,2,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = TEMP[12];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(3,1,3,1));
+		XMM6	 = TEMP[13];	// Y
+		XMM1	 = _mm_add_ps(XMM1, TXXYY[ 7]);
+		TN[ 8]	+= TN[ 7];
+		XMM5	 = _mm_add_ps(XMM5, XMM1);
+		TN[ 9]	+= TN[ 8];
+		XMM0	 = _mm_add_ps(XMM0, XMM5);
+		TN[10]	+= TN[ 9];
+		XMM4	 = _mm_add_ps(XMM4, XMM0);
+		TN[11]	+= TN[10];
+		TXXYY[ 8]	 = XMM1;
+		XMM1	 = TEMP[14];	// XX
+		TXXYY[ 9]	 = XMM5;
+		XMM5	 = TEMP[15];	// XY
+		TXXYY[10]	 = XMM0;
+		XMM0	 = XMM7;
+		TXXYY[11]	 = XMM4;
+		XMM4	 = XMM6;
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(1,0,1,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(3,2,3,2));
+		XMM1	 = XMM7;
+		XMM5	 = XMM0;
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(2,0,2,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = _mm_add_ps(XMM7, TXXYY[11]);
+		TN[12]	+= TN[11];
+		XMM1	 = _mm_add_ps(XMM1, XMM7);
+		TN[13]	+= TN[12];
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		TN[14]	+= TN[13];
+		XMM5	 = _mm_add_ps(XMM5, XMM0);
+		TN[15]	+= TN[14];
+		TXXYY[12]	 = XMM7;
+		TXXYY[13]	 = XMM1;
+		TXXYY[14]	 = XMM0;
+		TXXYY[15]	 = XMM5;
+		TN		+= 16;
+		TXXYY	+= 16;
+	}
+	for(i=16;i<n;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		_mm_prefetch((const char*)(f+i+16), _MM_HINT_NTA);
+		_mm_prefetch((const char*)(findex2+i+16), _MM_HINT_NTA);
+		XMM0	 = _mm_load_ps(f+i   );
+		XMM1	 = _mm_load_ps(f+i+ 4);
+		XMM2	 = _mm_load_ps(f+i+ 8);
+		XMM3	 = _mm_load_ps(f+i+12);
+		XMM4	 = OFFSET;
+		XMM5	 = _mm_load_ps(PFV_1.sf);
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		XMM1	 = _mm_add_ps(XMM1, XMM4);
+		XMM2	 = _mm_add_ps(XMM2, XMM4);
+		XMM3	 = _mm_add_ps(XMM3, XMM4);
+		XMM0	 = _mm_max_ps(XMM0, XMM5);
+		XMM1	 = _mm_max_ps(XMM1, XMM5);
+		XMM2	 = _mm_max_ps(XMM2, XMM5);
+		XMM3	 = _mm_max_ps(XMM3, XMM5);
+		XMM4	 = XMM0;
+		XMM5	 = XMM1;
+		XMM6	 = XMM2;
+		XMM7	 = XMM3;
+		XMM0	 = _mm_mul_ps(XMM0, XMM0);
+		XMM1	 = _mm_mul_ps(XMM1, XMM1);
+		XMM2	 = _mm_mul_ps(XMM2, XMM2);
+		XMM3	 = _mm_mul_ps(XMM3, XMM3);
+		_mm_store_ps(TN   , XMM0);
+		_mm_store_ps(TN+ 4, XMM1);
+		_mm_store_ps(TN+ 8, XMM2);
+		_mm_store_ps(TN+12, XMM3);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		TEMP[ 1]	 = XMM0;	/* Y */
+		XMM4	 = _mm_load_ps(findex+i   );
+		TEMP[ 5]	 = XMM1;
+		XMM5	 = _mm_load_ps(findex+i+ 4);
+		TEMP[ 9]	 = XMM2;
+		XMM6	 = _mm_load_ps(findex+i+ 8);
+		TEMP[13]	 = XMM3;
+		XMM7	 = _mm_load_ps(findex+i+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		TEMP[ 3]	 = XMM0;	/* XY */
+		TEMP[ 7]	 = XMM1;
+		TEMP[11]	 = XMM2;
+		TEMP[15]	 = XMM3;
+		XMM0	 = _mm_load_ps(TN   );	/* N */
+		XMM1	 = _mm_load_ps(TN+ 4);
+		XMM2	 = _mm_load_ps(TN+ 8);
+		XMM3	 = _mm_load_ps(TN+12);
+		XMM4	 = _mm_mul_ps(XMM4, XMM0);
+		XMM5	 = _mm_mul_ps(XMM5, XMM1);
+		XMM6	 = _mm_mul_ps(XMM6, XMM2);
+		XMM7	 = _mm_mul_ps(XMM7, XMM3);
+		TEMP[ 0]	 = XMM4;	/* X */
+		TEMP[ 4]	 = XMM5;
+		TEMP[ 8]	 = XMM6;
+		TEMP[12]	 = XMM7;
+		XMM4	 = _mm_load_ps(findex2+i   );
+		XMM5	 = _mm_load_ps(findex2+i+ 4);
+		XMM6	 = _mm_load_ps(findex2+i+ 8);
+		XMM7	 = _mm_load_ps(findex2+i+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM4	 = TEMP[ 0];	// X
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM5	 = TEMP[ 1];	// Y
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM6	 = XMM0;	/* XX */
+		XMM0	 = XMM4;
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		XMM7	 = TEMP[ 3];	// XY
+		TEMP[ 6]	 = XMM1;
+		XMM1	 = XMM5;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,2,3,2));
+		TEMP[10]	 = XMM2;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+		TEMP[14]	 = XMM3;
+		XMM6	 = XMM4;
+		XMM7	 = XMM0;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(2,0,2,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+		XMM5	 = TEMP[ 4];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(3,1,3,1));
+		XMM1	 = TEMP[ 5];	// Y
+		XMM4	 = _mm_add_ps(XMM4, TXXYY[-1]);
+		TN[ 0]	+= TN[-1];
+		XMM6	 = _mm_add_ps(XMM6, XMM4);
+		TN[ 1]	+= TN[ 0];
+		XMM0	 = _mm_add_ps(XMM0, XMM6);
+		TN[ 2]	+= TN[ 1];
+		XMM7	 = _mm_add_ps(XMM7, XMM0);
+		TN[ 3]	+= TN[ 2];
+		TXXYY[ 0]	 = XMM4;
+		XMM4	 = TEMP[ 6];	// XX
+		TXXYY[ 1]	 = XMM6;
+		XMM6	 = TEMP[ 7];	// XY
+		TXXYY[ 2]	 = XMM0;
+		XMM0	 = XMM5;
+		TXXYY[ 3]	 = XMM7;
+		XMM7	 = XMM1;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(3,2,3,2));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(3,2,3,2));
+		XMM4	 = XMM5;
+		XMM6	 = XMM0;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM1, _MM_SHUFFLE(2,0,2,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM1, _MM_SHUFFLE(3,1,3,1));
+		XMM1	 = TEMP[ 8];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM7, _MM_SHUFFLE(2,0,2,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM7, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = TEMP[ 9];	// Y
+		XMM5	 = _mm_add_ps(XMM5, TXXYY[ 3]);
+		TN[ 4]	+= TN[ 3];
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		TN[ 5]	+= TN[ 4];
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		TN[ 6]	+= TN[ 5];
+		XMM6	 = _mm_add_ps(XMM6, XMM0);
+		TN[ 7]	+= TN[ 6];
+		TXXYY[ 4]	 = XMM5;
+		XMM5	 = TEMP[10];	// XX
+		TXXYY[ 5]	 = XMM4;
+		XMM4	 = TEMP[11];	// XY
+		TXXYY[ 6]	 = XMM0;
+		XMM0	 = XMM1;
+		TXXYY[ 7]	 = XMM6;
+		XMM6	 = XMM7;
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(1,0,1,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,2,3,2));
+		XMM5	 = XMM1;
+		XMM4	 = XMM0;
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(2,0,2,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = TEMP[12];	// X
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(3,1,3,1));
+		XMM6	 = TEMP[13];	// Y
+		XMM1	 = _mm_add_ps(XMM1, TXXYY[ 7]);
+		TN[ 8]	+= TN[ 7];
+		XMM5	 = _mm_add_ps(XMM5, XMM1);
+		TN[ 9]	+= TN[ 8];
+		XMM0	 = _mm_add_ps(XMM0, XMM5);
+		TN[10]	+= TN[ 9];
+		XMM4	 = _mm_add_ps(XMM4, XMM0);
+		TN[11]	+= TN[10];
+		TXXYY[ 8]	 = XMM1;
+		XMM1	 = TEMP[14];	// XX
+		TXXYY[ 9]	 = XMM5;
+		XMM5	 = TEMP[15];	// XY
+		TXXYY[10]	 = XMM0;
+		XMM0	 = XMM7;
+		TXXYY[11]	 = XMM4;
+		XMM4	 = XMM6;
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(1,0,1,0));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM5, _MM_SHUFFLE(3,2,3,2));
+		XMM1	 = XMM7;
+		XMM5	 = XMM0;
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(2,0,2,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM4, _MM_SHUFFLE(3,1,3,1));
+		XMM7	 = _mm_add_ps(XMM7, TXXYY[11]);
+		TN[12]	+= TN[11];
+		XMM1	 = _mm_add_ps(XMM1, XMM7);
+		TN[13]	+= TN[12];
+		XMM0	 = _mm_add_ps(XMM0, XMM1);
+		TN[14]	+= TN[13];
+		XMM5	 = _mm_add_ps(XMM5, XMM0);
+		TN[15]	+= TN[14];
+		TXXYY[12]	 = XMM7;
+		TXXYY[13]	 = XMM1;
+		TXXYY[14]	 = XMM0;
+		TXXYY[15]	 = XMM5;
+		TN		+= 16;
+		TXXYY	+= 16;
+	}
+	for(i=0;i<p->midpoint1_4;i+=4)
+	{
+		__m128	XMM0, XMM1, XMM4, XMM3;
+		__m128x	TN, TN1;
+		int	p0, p1, p2, p3;
+		p0	 =-sb[i*2+1];
+		p1	 =-sb[i*2+3];
+		p2	 =-sb[i*2+5];
+		p3	 =-sb[i*2+7];
+		
+		XMM0	 = XXYY[p0];
+		XMM1	 = XXYY[p1];
+		XMM4	 = XXYY[p2];
+		XMM3	 = XXYY[p3];
+		
+		TN.sf[0]	 = N[p0];
+		TN.sf[1]	 = N[p1];
+		TN.sf[2]	 = N[p2];
+		TN.sf[3]	 = N[p3];
+		
+		XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+		
+		p0	 = sb[i*2  ];
+		p1	 = sb[i*2+2];
+		p2	 = sb[i*2+4];
+		p3	 = sb[i*2+6];
+		
+		XMM0	 = _mm_add_ps(XMM0, XXYY[p0]);
+		XMM1	 = _mm_add_ps(XMM1, XXYY[p1]);
+		XMM4	 = _mm_add_ps(XMM4, XXYY[p2]);
+		XMM3	 = _mm_add_ps(XMM3, XXYY[p3]);
+		
+		TN1.sf[0]	 = N[p0];
+		TN1.sf[1]	 = N[p1];
+		TN1.sf[2]	 = N[p2];
+		TN1.sf[3]	 = N[p3];
+		
+		TN.ps	 = _mm_add_ps(TN.ps, TN1.ps);
+		
+		bark_noise_hybridmp_SSE_SUBC();
+		XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+		XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+		_mm_store_ps(noise+i  , XMM4);
+	}
+	if(p->midpoint2-i<4)
+	{
+		x	 = (float)i;
+		for (;i<p->midpoint1;i++,x+=1.f)
+		{
+			lo	 = sb[i*2+1];
+			hi	 = sb[i*2];
+			
+			tN	 = N[hi] + N[-lo];
+			tX	 = xxyy[hi*4  ] - xxyy[-lo*4  ];
+			tXX	 = xxyy[hi*4+1] + xxyy[-lo*4+1];
+			tY	 = xxyy[hi*4+2] + xxyy[-lo*4+2];
+			tXY	 = xxyy[hi*4+3] - xxyy[-lo*4+3];
+			
+			A	 = tY * tXX - tX * tXY;
+			B	 = tN * tXY - tX * tY;
+			D	 = tN * tXX - tX * tX;
+			R	 = (A + x * B) / D;
+			if(R<0.f)
+				R	 = 0.f;
+			
+			noise[i]	 = R - offset;
+		}
+		for (;i<p->midpoint2;i++,x+=1.f)
+		{
+			lo	 = sb[i*2+1];
+			hi	 = sb[i*2];
+			
+			tN	 = N[hi] - N[lo];
+			tX	 = xxyy[hi*4  ] - xxyy[lo*4  ];
+			tXX	 = xxyy[hi*4+1] - xxyy[lo*4+1];
+			tY	 = xxyy[hi*4+2] - xxyy[lo*4+2];
+			tXY	 = xxyy[hi*4+3] - xxyy[lo*4+3];
+			
+			A	 = tY * tXX - tX * tXY;
+			B	 = tN * tXY - tX * tY;
+			D	 = tN * tXX - tX * tX;
+			R	 = (A + x * B) / D;
+			if(R<0.f)
+				R	 = 0.f;
+			noise[i]	 = R - offset;
+		}
+		j	 = (i+3)&(~3);
+		j	 = (j>=n)?n:j;
+		for (;i<j;i++,x+=1.f)
+		{
+			R	 = (A + x * B) / D;
+			if(R<0.f)
+				R	 = 0.f;
+			
+			noise[i]	 = R - offset;
+		}
+		PA	 = _mm_set_ps1(A);
+		PB	 = _mm_set_ps1(B);
+		PD	 = _mm_set_ps1(1.f/D);
+	}
+	else
+	{
+		switch(p->midpoint1%4)
+		{
+			case 0:
+				break;
+			case 1:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1, p2, p3;
+					p0	 =-sb[i*2+1];
+					p1	 = sb[i*2+2];
+					p2	 = sb[i*2+4];
+					p3	 = sb[i*2+6];
+					
+					XMM0	 = XXYY[p0];
+					XMM1	 = XXYY[p1];
+					XMM4	 = XXYY[p2];
+					XMM3	 = XXYY[p3];
+					
+					TN.sf[0]	 = N[p0];
+					TN.sf[1]	 = N[p1];
+					TN.sf[2]	 = N[p2];
+					TN.sf[3]	 = N[p3];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					
+					p0	 = sb[i*2  ];
+					p1	 = sb[i*2+3];
+					p2	 = sb[i*2+5];
+					p3	 = sb[i*2+7];
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0]);
+					XMM1	 = _mm_sub_ps(XMM1, XXYY[p1]);
+					XMM4	 = _mm_sub_ps(XMM4, XXYY[p2]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p3]);
+					
+					TN1.sf[0]	 = N[p0];
+					TN1.sf[1]	 = N[p1];
+					TN1.sf[2]	 = N[p2];
+					TN1.sf[3]	 = N[p3];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNNR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+			case 2:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1, p2, p3;
+					p0	 =-sb[i*2+1];
+					p1	 =-sb[i*2+3];
+					p2	 = sb[i*2+4];
+					p3	 = sb[i*2+6];
+					
+					XMM0	 = XXYY[p0];
+					XMM1	 = XXYY[p1];
+					XMM4	 = XXYY[p2];
+					XMM3	 = XXYY[p3];
+					
+					TN.sf[0]	 = N[p0];
+					TN.sf[1]	 = N[p1];
+					TN.sf[2]	 = N[p2];
+					TN.sf[3]	 = N[p3];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+					
+					p0	 = sb[i*2  ];
+					p1	 = sb[i*2+2];
+					p2	 = sb[i*2+5];
+					p3	 = sb[i*2+7];
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0]);
+					XMM1	 = _mm_add_ps(XMM1, XXYY[p1]);
+					XMM4	 = _mm_sub_ps(XMM4, XXYY[p2]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p3]);
+					
+					TN1.sf[0]	 = N[p0];
+					TN1.sf[1]	 = N[p1];
+					TN1.sf[2]	 = N[p2];
+					TN1.sf[3]	 = N[p3];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNRR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+			case 3:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1, p2, p3;
+					p0	 =-sb[i*2+1];
+					p1	 =-sb[i*2+3];
+					p2	 =-sb[i*2+5];
+					p3	 = sb[i*2+6];
+					
+					XMM0	 = XXYY[p0];
+					XMM1	 = XXYY[p1];
+					XMM4	 = XXYY[p2];
+					XMM3	 = XXYY[p3];
+					
+					TN.sf[0]	 = N[p0];
+					TN.sf[1]	 = N[p1];
+					TN.sf[2]	 = N[p2];
+					TN.sf[3]	 = N[p3];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+					XMM4	 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+					
+					p0	 = sb[i*2  ];
+					p1	 = sb[i*2+2];
+					p2	 = sb[i*2+4];
+					p3	 = sb[i*2+7];
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0]);
+					XMM1	 = _mm_add_ps(XMM1, XXYY[p1]);
+					XMM4	 = _mm_add_ps(XMM4, XXYY[p2]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p3]);
+					
+					TN1.sf[0]	 = N[p0];
+					TN1.sf[1]	 = N[p1];
+					TN1.sf[2]	 = N[p2];
+					TN1.sf[3]	 = N[p3];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NRRR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+		}
+		for(;i<p->midpoint2_16;i+=16)
+		{
+			register __m128	XMM0, XMM1, XMM2, XMM3;
+			register __m128	XMM4, XMM5, XMM6, XMM7;
+			__m128x	TN0, TN1, TN2;
+			int	p0, p1, p2, p3;
+			p0	 = sb[i*2   ];
+			p1	 = sb[i*2+ 2];
+			p2	 = sb[i*2+ 4];
+			p3	 = sb[i*2+ 6];
+			XMM0	 = XXYY[p0];
+			XMM1	 = XXYY[p1];
+			XMM4	 = XXYY[p2];
+			XMM3	 = XXYY[p3];
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			p0	 = sb[i*2+ 1];
+			p1	 = sb[i*2+ 3];
+			p2	 = sb[i*2+ 5];
+			p3	 = sb[i*2+ 7];
+			XMM2	 = XXYY[p0];
+			XMM5	 = XXYY[p1];
+			XMM6	 = XXYY[p2];
+			XMM7	 = XXYY[p3];
+			XMM0	 = _mm_sub_ps(XMM0, XMM2);
+			XMM1	 = _mm_sub_ps(XMM1, XMM5);
+			XMM4	 = _mm_sub_ps(XMM4, XMM6);
+			XMM3	 = _mm_sub_ps(XMM3, XMM7);
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM2	 = XMM0;
+			XMM5	 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = TN0.ps;
+			XMM6	 = TN1.ps;
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+			p0	 = sb[i*2+ 8];
+			p1	 = sb[i*2+10];
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = _mm_sub_ps(XMM7, XMM6);
+			p2	 = sb[i*2+12];
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM7;
+			p3	 = sb[i*2+14];
+			XMM4	 = XMM2;
+			XMM5	 = XMM0;
+			XMM6	 = XMM3;
+			XMM7	 = XMM0;
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM3);
+			XMM3	 = TN0.ps;
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM3	 = _mm_load_ps(findex+i   );
+			XMM7	 = _mm_mul_ps(XMM7, XMM2);
+			XMM2	 = XXYY[p0];
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM4	 = _mm_sub_ps(XMM4, XMM5);
+			XMM5	 = XXYY[p1];
+			XMM6	 = _mm_sub_ps(XMM6, XMM7);
+			XMM7	 = XXYY[p2];
+			XMM1	 = _mm_sub_ps(XMM1, XMM0);
+			XMM0	 = XXYY[p3];
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM3	 = _mm_rcp_ps(XMM1);
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			XMM4	 = _mm_add_ps(XMM4, XMM6);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			p0	 = sb[i*2+ 9];
+			p1	 = sb[i*2+11];
+			XMM3	 = _mm_add_ps(XMM3, XMM3);
+			p2	 = sb[i*2+13];
+			p3	 = sb[i*2+15];
+			XMM3	 = _mm_sub_ps(XMM3, XMM1);
+			XMM1	 = _mm_load_ps(PFV_0.sf);
+			XMM6	 = XXYY[p0];
+			XMM4	 = _mm_mul_ps(XMM4, XMM3);
+			XMM3	 = OFFSET;
+			XMM4	 = _mm_max_ps(XMM4, XMM1);
+			XMM1	 = XXYY[p1];
+			XMM4	 = _mm_sub_ps(XMM4, XMM3);
+			XMM3	 = XXYY[p2];
+			_mm_store_ps(noise+i   , XMM4);
+			XMM4	 = XXYY[p3];
+			XMM2	 = _mm_sub_ps(XMM2, XMM6);
+			XMM5	 = _mm_sub_ps(XMM5, XMM1);
+			XMM7	 = _mm_sub_ps(XMM7, XMM3);
+			XMM0	 = _mm_sub_ps(XMM0, XMM4);
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM6	 = XMM2;
+			XMM1	 = XMM7;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = TN0.ps;
+			XMM3	 = TN1.ps;
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,2,3,2));
+			p0	 = sb[i*2+16];
+			p1	 = sb[i*2+18];
+			XMM4	 = _mm_sub_ps(XMM4, XMM3);
+			XMM5	 = XMM2;
+			XMM0	 = XMM6;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			p2	 = sb[i*2+20];
+			p3	 = sb[i*2+22];
+			TN0.ps	 = XMM4;
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(2,0,2,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,3,1));
+			TN2.sf[0]	 = N[p0];
+			TN2.sf[1]	 = N[p1];
+			TN2.sf[2]	 = N[p2];
+			TN2.sf[3]	 = N[p3];
+			XMM7	 = XMM6;
+			XMM1	 = XMM2;
+			XMM3	 = XMM0;
+			XMM4	 = XMM2;
+			XMM7	 = _mm_mul_ps(XMM7, XMM5);
+			XMM1	 = _mm_mul_ps(XMM1, XMM0);
+			XMM0	 = TN0.ps;
+			XMM3	 = _mm_mul_ps(XMM3, XMM0);
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			XMM0	 = _mm_load_ps(findex+i+ 4);
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM6	 = XXYY[p0];
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM7	 = _mm_sub_ps(XMM7, XMM1);
+			XMM1	 = XXYY[p1];
+			XMM3	 = _mm_sub_ps(XMM3, XMM4);
+			XMM4	 = XXYY[p2];
+			XMM5	 = _mm_sub_ps(XMM5, XMM2);
+			XMM2	 = XXYY[p3];
+			XMM3	 = _mm_mul_ps(XMM3, XMM0);
+			XMM0	 = _mm_rcp_ps(XMM5);
+			p0	 = sb[i*2+17];
+			p1	 = sb[i*2+19];
+			XMM7	 = _mm_add_ps(XMM7, XMM3);
+			XMM3	 = XXYY[p0];
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			p2	 = sb[i*2+21];
+			p3	 = sb[i*2+23];
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			XMM0	 = _mm_add_ps(XMM0, XMM0);
+			TN1.sf[0]	 = N[p0];
+			XMM0	 = _mm_sub_ps(XMM0, XMM5);
+			XMM5	 = _mm_load_ps(PFV_0.sf);
+			XMM7	 = _mm_mul_ps(XMM7, XMM0);
+			TN1.sf[1]	 = N[p1];
+			XMM0	 = OFFSET;
+			XMM7	 = _mm_max_ps(XMM7, XMM5);
+			TN1.sf[2]	 = N[p2];
+			XMM5	 = XXYY[p1];
+			XMM7	 = _mm_sub_ps(XMM7, XMM0);
+			TN1.sf[3]	 = N[p3];
+			XMM0	 = XXYY[p2];
+			_mm_store_ps(noise+i+ 4, XMM7);
+			XMM7	 = XXYY[p3];
+			XMM6	 = _mm_sub_ps(XMM6, XMM3);
+			XMM1	 = _mm_sub_ps(XMM1, XMM5);
+			XMM4	 = _mm_sub_ps(XMM4, XMM0);
+			XMM2	 = _mm_sub_ps(XMM2, XMM7);
+			XMM3	 = XMM6;
+			XMM5	 = XMM4;
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = TN2.ps;
+			XMM0	 = TN1.ps;
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM2, _MM_SHUFFLE(3,2,3,2));
+			p0	 = sb[i*2+24];
+			p1	 = sb[i*2+26];
+			XMM1	 = XMM6;
+			XMM2	 = XMM3;
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = _mm_sub_ps(XMM7, XMM0);
+			p2	 = sb[i*2+28];
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM7;
+			p3	 = sb[i*2+30];
+			XMM4	 = XMM3;
+			XMM5	 = XMM6;
+			XMM0	 = XMM2;
+			XMM7	 = XMM6;
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM2);
+			XMM2	 = TN0.ps;
+			XMM0	 = _mm_mul_ps(XMM0, XMM2);
+			XMM1	 = _mm_mul_ps(XMM1, XMM2);
+			XMM2	 = _mm_load_ps(findex+i+ 8);
+			XMM7	 = _mm_mul_ps(XMM7, XMM3);
+			XMM3	 = XXYY[p0];
+			XMM6	 = _mm_mul_ps(XMM6, XMM6);
+			XMM4	 = _mm_sub_ps(XMM4, XMM5);
+			XMM5	 = XXYY[p1];
+			XMM0	 = _mm_sub_ps(XMM0, XMM7);
+			XMM7	 = XXYY[p2];
+			XMM1	 = _mm_sub_ps(XMM1, XMM6);
+			XMM6	 = XXYY[p3];
+			XMM0	 = _mm_mul_ps(XMM0, XMM2);
+			XMM2	 = _mm_rcp_ps(XMM1);
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM1	 = _mm_mul_ps(XMM1, XMM2);
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			XMM1	 = _mm_mul_ps(XMM1, XMM2);
+			p0	 = sb[i*2+25];
+			p1	 = sb[i*2+27];
+			XMM2	 = _mm_add_ps(XMM2, XMM2);
+			p2	 = sb[i*2+29];
+			p3	 = sb[i*2+31];
+			XMM2	 = _mm_sub_ps(XMM2, XMM1);
+			XMM1	 = _mm_load_ps(PFV_0.sf);
+			XMM0	 = XXYY[p0];
+			XMM4	 = _mm_mul_ps(XMM4, XMM2);
+			XMM2	 = OFFSET;
+			XMM4	 = _mm_max_ps(XMM4, XMM1);
+			XMM1	 = XXYY[p1];
+			XMM4	 = _mm_sub_ps(XMM4, XMM2);
+			XMM2	 = XXYY[p2];
+			_mm_store_ps(noise+i+ 8, XMM4);
+			XMM4	 = XXYY[p3];
+			XMM3	 = _mm_sub_ps(XMM3, XMM0);
+			XMM5	 = _mm_sub_ps(XMM5, XMM1);
+			XMM7	 = _mm_sub_ps(XMM7, XMM2);
+			XMM6	 = _mm_sub_ps(XMM6, XMM4);
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM0	 = XMM3;
+			XMM1	 = XMM7;
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM5, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = TN0.ps;
+			XMM2	 = TN1.ps;
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM6, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = _mm_sub_ps(XMM4, XMM2);
+			XMM5	 = XMM3;
+			XMM6	 = XMM0;
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = XMM0;
+			XMM1	 = XMM3;
+			XMM2	 = XMM6;
+			XMM4	 = XMM3;
+			XMM7	 = _mm_mul_ps(XMM7, XMM5);
+			XMM1	 = _mm_mul_ps(XMM1, XMM6);
+			XMM6	 = TN0.ps;
+			XMM2	 = _mm_mul_ps(XMM2, XMM6);
+			XMM5	 = _mm_mul_ps(XMM5, XMM6);
+			XMM6	 = _mm_load_ps(findex+i+12);
+			XMM4	 = _mm_mul_ps(XMM4, XMM0);
+			XMM3	 = _mm_mul_ps(XMM3, XMM3);
+			XMM7	 = _mm_sub_ps(XMM7, XMM1);
+			XMM2	 = _mm_sub_ps(XMM2, XMM4);
+			XMM5	 = _mm_sub_ps(XMM5, XMM3);
+			XMM2	 = _mm_mul_ps(XMM2, XMM6);
+			XMM6	 = _mm_rcp_ps(XMM5);
+			XMM7	 = _mm_add_ps(XMM7, XMM2);
+			XMM5	 = _mm_mul_ps(XMM5, XMM6);
+			XMM5	 = _mm_mul_ps(XMM5, XMM6);
+			XMM6	 = _mm_add_ps(XMM6, XMM6);
+			XMM6	 = _mm_sub_ps(XMM6, XMM5);
+			XMM5	 = _mm_load_ps(PFV_0.sf);
+			XMM7	 = _mm_mul_ps(XMM7, XMM6);
+			XMM6	 = OFFSET;
+			XMM7	 = _mm_max_ps(XMM7, XMM5);
+			XMM7	 = _mm_sub_ps(XMM7, XMM6);
+			_mm_store_ps(noise+i+12, XMM7);
+		}
+		for(;i<p->midpoint2_8;i+=8)
+		{
+			register __m128	XMM0, XMM1, XMM2, XMM3;
+			register __m128	XMM4, XMM5, XMM6, XMM7;
+			__m128x	TN0, TN1;
+			int	p0, p1, p2, p3;
+			p0	 = sb[i*2   ];
+			p1	 = sb[i*2+ 2];
+			p2	 = sb[i*2+ 4];
+			p3	 = sb[i*2+ 6];
+			XMM0	 = XXYY[p0];
+			XMM1	 = XXYY[p1];
+			XMM4	 = XXYY[p2];
+			XMM3	 = XXYY[p3];
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			p0	 = sb[i*2+ 1];
+			p1	 = sb[i*2+ 3];
+			p2	 = sb[i*2+ 5];
+			p3	 = sb[i*2+ 7];
+			XMM2	 = XXYY[p0];
+			XMM5	 = XXYY[p1];
+			XMM6	 = XXYY[p2];
+			XMM7	 = XXYY[p3];
+			XMM0	 = _mm_sub_ps(XMM0, XMM2);
+			XMM1	 = _mm_sub_ps(XMM1, XMM5);
+			XMM4	 = _mm_sub_ps(XMM4, XMM6);
+			XMM3	 = _mm_sub_ps(XMM3, XMM7);
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM2	 = XMM0;
+			XMM5	 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = TN0.ps;
+			XMM6	 = TN1.ps;
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+			p0	 = sb[i*2+ 8];
+			p1	 = sb[i*2+10];
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = _mm_sub_ps(XMM7, XMM6);
+			p2	 = sb[i*2+12];
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM7;
+			p3	 = sb[i*2+14];
+			XMM4	 = XMM2;
+			XMM5	 = XMM0;
+			XMM6	 = XMM3;
+			XMM7	 = XMM0;
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM3);
+			XMM3	 = TN0.ps;
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM3	 = _mm_load_ps(findex+i   );
+			XMM7	 = _mm_mul_ps(XMM7, XMM2);
+			XMM2	 = XXYY[p0];
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM4	 = _mm_sub_ps(XMM4, XMM5);
+			XMM5	 = XXYY[p1];
+			XMM6	 = _mm_sub_ps(XMM6, XMM7);
+			XMM7	 = XXYY[p2];
+			XMM1	 = _mm_sub_ps(XMM1, XMM0);
+			XMM0	 = XXYY[p3];
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM3	 = _mm_rcp_ps(XMM1);
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			XMM4	 = _mm_add_ps(XMM4, XMM6);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			p0	 = sb[i*2+ 9];
+			p1	 = sb[i*2+11];
+			XMM3	 = _mm_add_ps(XMM3, XMM3);
+			p2	 = sb[i*2+13];
+			p3	 = sb[i*2+15];
+			XMM3	 = _mm_sub_ps(XMM3, XMM1);
+			XMM1	 = _mm_load_ps(PFV_0.sf);
+			XMM6	 = XXYY[p0];
+			XMM4	 = _mm_mul_ps(XMM4, XMM3);
+			XMM3	 = OFFSET;
+			XMM4	 = _mm_max_ps(XMM4, XMM1);
+			XMM1	 = XXYY[p1];
+			XMM4	 = _mm_sub_ps(XMM4, XMM3);
+			XMM3	 = XXYY[p2];
+			_mm_store_ps(noise+i   , XMM4);
+			XMM4	 = XXYY[p3];
+			XMM2	 = _mm_sub_ps(XMM2, XMM6);
+			XMM5	 = _mm_sub_ps(XMM5, XMM1);
+			XMM7	 = _mm_sub_ps(XMM7, XMM3);
+			XMM0	 = _mm_sub_ps(XMM0, XMM4);
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM6	 = XMM2;
+			XMM1	 = XMM7;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = TN0.ps;
+			XMM3	 = TN1.ps;
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = _mm_sub_ps(XMM4, XMM3);
+			XMM5	 = XMM2;
+			XMM0	 = XMM6;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM4;
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM1, _MM_SHUFFLE(2,0,2,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = XMM6;
+			XMM1	 = XMM2;
+			XMM3	 = XMM0;
+			XMM4	 = XMM2;
+			XMM7	 = _mm_mul_ps(XMM7, XMM5);
+			XMM1	 = _mm_mul_ps(XMM1, XMM0);
+			XMM0	 = TN0.ps;
+			XMM3	 = _mm_mul_ps(XMM3, XMM0);
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			XMM0	 = _mm_load_ps(findex+i+ 4);
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM2	 = _mm_mul_ps(XMM2, XMM2);
+			XMM7	 = _mm_sub_ps(XMM7, XMM1);
+			XMM3	 = _mm_sub_ps(XMM3, XMM4);
+			XMM5	 = _mm_sub_ps(XMM5, XMM2);
+			XMM3	 = _mm_mul_ps(XMM3, XMM0);
+			XMM0	 = _mm_rcp_ps(XMM5);
+			XMM7	 = _mm_add_ps(XMM7, XMM3);
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			XMM5	 = _mm_mul_ps(XMM5, XMM0);
+			XMM0	 = _mm_add_ps(XMM0, XMM0);
+			XMM0	 = _mm_sub_ps(XMM0, XMM5);
+			XMM5	 = _mm_load_ps(PFV_0.sf);
+			XMM7	 = _mm_mul_ps(XMM7, XMM0);
+			XMM0	 = OFFSET;
+			XMM7	 = _mm_max_ps(XMM7, XMM5);
+			XMM7	 = _mm_sub_ps(XMM7, XMM0);
+			_mm_store_ps(noise+i+ 4, XMM7);
+		}
+		for(;i<p->midpoint2_4;i+=4)
+		{
+			register __m128	XMM0, XMM1, XMM2, XMM3;
+			register __m128	XMM4, XMM5, XMM6, XMM7;
+			__m128x	TN0, TN1;
+			int	p0, p1, p2, p3;
+			p0	 = sb[i*2   ];
+			p1	 = sb[i*2+ 2];
+			p2	 = sb[i*2+ 4];
+			p3	 = sb[i*2+ 6];
+			
+			XMM0	 = XXYY[p0];
+			XMM1	 = XXYY[p1];
+			XMM4	 = XXYY[p2];
+			XMM3	 = XXYY[p3];
+			
+			TN0.sf[0]	 = N[p0];
+			TN0.sf[1]	 = N[p1];
+			TN0.sf[2]	 = N[p2];
+			TN0.sf[3]	 = N[p3];
+			
+			p0	 = sb[i*2+ 1];
+			p1	 = sb[i*2+ 3];
+			p2	 = sb[i*2+ 5];
+			p3	 = sb[i*2+ 7];
+			
+			XMM2	 = XXYY[p0];
+			XMM5	 = XXYY[p1];
+			XMM6	 = XXYY[p2];
+			XMM7	 = XXYY[p3];
+
+			XMM0	 = _mm_sub_ps(XMM0, XMM2);
+			XMM1	 = _mm_sub_ps(XMM1, XMM5);
+			XMM4	 = _mm_sub_ps(XMM4, XMM6);
+			XMM3	 = _mm_sub_ps(XMM3, XMM7);
+			
+			XMM2	 = XMM0;
+			XMM5	 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+			TN1.sf[0]	 = N[p0];
+			TN1.sf[1]	 = N[p1];
+			XMM7	 = TN0.ps;
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+			TN1.sf[2]	 = N[p2];
+			TN1.sf[3]	 = N[p3];
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM6	 = TN1.ps;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM7	 = _mm_sub_ps(XMM7, XMM6);
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+			TN0.ps	 = XMM7;
+			XMM4	 = XMM2;
+			XMM5	 = XMM0;
+			XMM6	 = XMM3;
+			XMM7	 = XMM0;
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM3);
+			XMM3	 = TN0.ps;
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM3	 = _mm_load_ps(findex+i   );
+			XMM7	 = _mm_mul_ps(XMM7, XMM2);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM4	 = _mm_sub_ps(XMM4, XMM5);
+			XMM6	 = _mm_sub_ps(XMM6, XMM7);
+			XMM1	 = _mm_sub_ps(XMM1, XMM0);
+			XMM6	 = _mm_mul_ps(XMM6, XMM3);
+			XMM3	 = _mm_rcp_ps(XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM6);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, XMM3);
+			XMM3	 = _mm_add_ps(XMM3, XMM3);
+			XMM3	 = _mm_sub_ps(XMM3, XMM1);
+			XMM1	 = _mm_load_ps(PFV_0.sf);
+			XMM4	 = _mm_mul_ps(XMM4, XMM3);
+			XMM3	 = OFFSET;
+			XMM4	 = _mm_max_ps(XMM4, XMM1);
+			XMM4	 = _mm_sub_ps(XMM4, XMM3);
+			_mm_store_ps(noise+i   , XMM4);
+		}
+		if(i!=n)
+		{
+			__m128	XMM0, XMM1, XMM4, XMM3;
+			__m128x	TN, TN1;
+			int	p0, p1, p2;
+			switch(p->midpoint2%4)
+			{
+				case 0:
+					{
+						lo	 = sb[i*2-1];
+						hi	 = sb[i*2-2];
+						
+						tN	 = N[hi] - N[lo];
+						tX	 = xxyy[hi*4  ] - xxyy[lo*4  ];
+						tXX	 = xxyy[hi*4+1] - xxyy[lo*4+1];
+						tY	 = xxyy[hi*4+2] - xxyy[lo*4+2];
+						tXY	 = xxyy[hi*4+3] - xxyy[lo*4+3];
+						
+						A	 = tY * tXX - tX * tXY;
+						B	 = tN * tXY - tX * tY;
+						D	 = tN * tXX - tX * tX;
+						PA	 = _mm_set_ps1(A);
+						PB	 = _mm_set_ps1(B);
+						PD	 = _mm_set_ps1(1.f/D);
+					}
+					break;
+				case 1:
+					{
+						p0	 = sb[i*2  ];
+						
+						XMM0	 = XXYY[p0];
+						
+						TN.ps	 = _mm_set_ps1(N[p0]);
+						
+						p0	 = sb[i*2+1];
+						
+						XMM1	 =
+						XMM4	 =
+						XMM3	 =
+						XMM0	 = _mm_sub_ps(XMM0, XXYY[p0]);
+						
+						TN1.ps	 = _mm_set_ps1(N[p0]);
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, TN1.ps);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(0,0,0,0));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(0,0,0,0));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(0,0,0,0));
+					}
+					break;
+				case 2:
+					{
+						p0	 = sb[i*2  ];
+						p1	 = sb[i*2+2];
+						
+						XMM0	 = XXYY[p0];
+						XMM1	 = XXYY[p1];
+						
+						TN.sf[0]	 = N[p0];
+						TN.sf[1]	 =
+						TN.sf[2]	 =
+						TN.sf[3]	 = N[p1];
+						
+						p0	 = sb[i*2+1];
+						p1	 = sb[i*2+3];
+						
+						XMM0	 = _mm_sub_ps(XMM0, XXYY[p0]);
+						XMM4	 =
+						XMM3	 =
+						XMM1	 = _mm_sub_ps(XMM1, XXYY[p1]);
+						
+						TN1.sf[0]	 = N[p0];
+						TN1.sf[1]	 =
+						TN1.sf[2]	 =
+						TN1.sf[3]	 = N[p1];
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, TN1.ps);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(1,1,1,1));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(1,1,1,1));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(1,1,1,1));
+					}
+					break;
+				case 3:
+					{
+						p0	 = sb[i*2  ];
+						p1	 = sb[i*2+2];
+						p2	 = sb[i*2+4];
+						
+						XMM0	 = XXYY[p0];
+						XMM1	 = XXYY[p1];
+						XMM4	 = XXYY[p2];
+						
+						TN.sf[0]	 = N[p0];
+						TN.sf[1]	 = N[p1];
+						TN.sf[2]	 =
+						TN.sf[3]	 = N[p2];
+						
+						p0	 = sb[i*2+1];
+						p1	 = sb[i*2+3];
+						p2	 = sb[i*2+5];
+						
+						XMM0	 = _mm_sub_ps(XMM0, XXYY[p0]);
+						XMM1	 = _mm_sub_ps(XMM1, XXYY[p1]);
+						XMM3	 =
+						XMM4	 = _mm_sub_ps(XMM4, XXYY[p2]);
+						
+						TN1.sf[0]	 = N[p0];
+						TN1.sf[1]	 = N[p1];
+						TN1.sf[2]	 =
+						TN1.sf[3]	 = N[p2];
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, TN1.ps);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_max_ps(XMM4, PFV_0.ps);
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(2,2,2,2));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(2,2,2,2));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(2,2,2,2));
+					}
+					break;
+			}
+		}
+	}
+	if(i<n)
+	{
+		__m128	XMM0	 = PA;
+		__m128	XMM1	 = PB;
+		__m128	XMM2	 = _mm_set_ps1(-offset);
+		XMM0	 = _mm_mul_ps(XMM0, PD);
+		XMM1	 = _mm_mul_ps(XMM1, PD);
+		XMM0	 = _mm_sub_ps(XMM0, OFFSET);
+		if(i%8!=0)
+		{
+			__m128	XMM4	 = _mm_load_ps(findex+i   );
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM4	 = _mm_max_ps(XMM4, XMM2);
+			_mm_store_ps(noise+i  , XMM4);
+			i	+= 4;
+		}
+		if(i%16!=0)
+		{
+			__m128	XMM4	 = _mm_load_ps(findex+i   );
+			__m128	XMM5	 = _mm_load_ps(findex+i+ 4);
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM5	 = _mm_add_ps(XMM5, XMM0);
+			XMM4	 = _mm_max_ps(XMM4, XMM2);
+			XMM5	 = _mm_max_ps(XMM5, XMM2);
+			_mm_store_ps(noise+i  , XMM4);
+			_mm_store_ps(noise+i+4, XMM5);
+			i	+= 8;
+		}
+		for(;i<n;i+=16)
+		{
+			__m128	XMM4	 = _mm_load_ps(findex+i   );
+			__m128	XMM5	 = _mm_load_ps(findex+i+ 4);
+			__m128	XMM6	 = _mm_load_ps(findex+i+ 8);
+			__m128	XMM7	 = _mm_load_ps(findex+i+12);
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM6	 = _mm_mul_ps(XMM6, XMM1);
+			XMM7	 = _mm_mul_ps(XMM7, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM5	 = _mm_add_ps(XMM5, XMM0);
+			XMM6	 = _mm_add_ps(XMM6, XMM0);
+			XMM7	 = _mm_add_ps(XMM7, XMM0);
+			XMM4	 = _mm_max_ps(XMM4, XMM2);
+			XMM5	 = _mm_max_ps(XMM5, XMM2);
+			XMM6	 = _mm_max_ps(XMM6, XMM2);
+			XMM7	 = _mm_max_ps(XMM7, XMM2);
+			_mm_store_ps(noise+i   , XMM4);
+			_mm_store_ps(noise+i+ 4, XMM5);
+			_mm_store_ps(noise+i+ 8, XMM6);
+			_mm_store_ps(noise+i+12, XMM7);
+		}
+	}
+
+	if (fixed <= 0) return;
+
+	midpoint1	 = (fixed+1)/2;
+	midpoint2	 = n-fixed/2;
+	
+	j	 = midpoint1&(~7);
+	p1	 = fixed / 2;
+	p0	 = p1 - 3;
+	
+	for(i=0;i<j;i+=8)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		__m128x	TN, TN1;
+
+		XMM5	 = _mm_lddqu_ps(N+p0);
+		XMM0	 = XXYY[p0+3];
+		XMM1	 = XXYY[p0+2];
+		XMM4	 = XXYY[p0+1];
+		XMM3	 = XXYY[p0  ];
+		TN.ps	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+		XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+		XMM5	 = _mm_lddqu_ps(N+p1);
+		XMM0	 = _mm_add_ps(XMM0, XXYY[p1  ]);
+		XMM1	 = _mm_add_ps(XMM1, XXYY[p1+1]);
+		XMM4	 = _mm_add_ps(XMM4, XXYY[p1+2]);
+		XMM3	 = _mm_add_ps(XMM3, XXYY[p1+3]);
+		TN.ps	 = _mm_add_ps(TN.ps, XMM5);
+		XMM2 = XMM0;
+		XMM5 = XMM4;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+		XMM1 = XMM0;
+		XMM3 = XMM2;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+		XMM4 = XMM2;
+		XMM5 = XMM0;
+		XMM6 = XMM3;
+		XMM7 = XMM0;
+		XMM4 = _mm_mul_ps(XMM4, XMM1);
+		XMM5 = _mm_mul_ps(XMM5, XMM3);
+		XMM3 = _mm_load_ps(findex+i);
+		XMM6 = _mm_mul_ps(XMM6, TN.ps);
+		XMM1 = _mm_mul_ps(XMM1, TN.ps);
+		XMM7 = _mm_mul_ps(XMM7, XMM2);
+		XMM2	 = _mm_lddqu_ps(N+p0-4);
+		XMM0 = _mm_mul_ps(XMM0, XMM0);
+		XMM4 = _mm_sub_ps(XMM4, XMM5);
+		XMM5	 = XXYY[p0-1];
+		XMM6 = _mm_sub_ps(XMM6, XMM7);
+		XMM7	 = XXYY[p0-2];
+		XMM1 = _mm_sub_ps(XMM1, XMM0);
+		XMM0	 = XXYY[p0-3];
+		XMM6 = _mm_mul_ps(XMM6, XMM3);
+		XMM3 = _mm_rcp_ps(XMM1);
+		XMM4 = _mm_add_ps(XMM4, XMM6);
+		XMM6	 = XXYY[p0-4];
+		XMM1 = _mm_mul_ps(XMM1, XMM3);
+		TN1.ps	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+		XMM1 = _mm_mul_ps(XMM1, XMM3);
+		XMM5	 = _mm_xor_ps(XMM5, PCS_RNNR.ps);
+		XMM3 = _mm_add_ps(XMM3, XMM3);
+		XMM7	 = _mm_xor_ps(XMM7, PCS_RNNR.ps);
+		XMM3 = _mm_sub_ps(XMM3, XMM1);
+		XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+		XMM4 = _mm_mul_ps(XMM4, XMM3);
+		XMM6	 = _mm_xor_ps(XMM6, PCS_RNNR.ps);
+		XMM2	 = _mm_lddqu_ps(N+p1+4);
+		XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+		XMM5	 = _mm_add_ps(XMM5, XXYY[p1+4]);
+		XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+		XMM7	 = _mm_add_ps(XMM7, XXYY[p1+5]);
+		XMM0	 = _mm_add_ps(XMM0, XXYY[p1+6]);
+		_mm_store_ps(noise+i  , XMM4);
+		XMM6	 = _mm_add_ps(XMM6, XXYY[p1+7]);
+		TN1.ps	 = _mm_add_ps(TN1.ps, XMM2);
+		XMM1 = XMM5;
+		XMM2 = XMM0;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(1,0,1,0));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM7, _MM_SHUFFLE(3,2,3,2));
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(1,0,1,0));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM6, _MM_SHUFFLE(3,2,3,2));
+		XMM7 = XMM5;
+		XMM6 = XMM1;
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM0, _MM_SHUFFLE(2,0,2,0));
+		XMM7	 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(3,1,3,1));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(2,0,2,0));
+		XMM6	 = _mm_shuffle_ps(XMM6, XMM2, _MM_SHUFFLE(3,1,3,1));
+		XMM0 = XMM1;
+		XMM2 = XMM5;
+		XMM3 = XMM6;
+		XMM4 = XMM5;
+		XMM0 = _mm_mul_ps(XMM0, XMM7);
+		XMM2 = _mm_mul_ps(XMM2, XMM6);
+		XMM6 = _mm_load_ps(findex+i+4);
+		XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+		XMM7 = _mm_mul_ps(XMM7, TN1.ps);
+		XMM4 = _mm_mul_ps(XMM4, XMM1);
+		XMM5 = _mm_mul_ps(XMM5, XMM5);
+		XMM0 = _mm_sub_ps(XMM0, XMM2);
+		XMM3 = _mm_sub_ps(XMM3, XMM4);
+		XMM7 = _mm_sub_ps(XMM7, XMM5);
+		XMM3 = _mm_mul_ps(XMM3, XMM6);
+		XMM6 = _mm_rcp_ps(XMM7);
+		XMM0 = _mm_add_ps(XMM0, XMM3);
+		XMM7 = _mm_mul_ps(XMM7, XMM6);
+		XMM7 = _mm_mul_ps(XMM7, XMM6);
+		XMM6 = _mm_add_ps(XMM6, XMM6);
+		XMM6 = _mm_sub_ps(XMM6, XMM7);
+		XMM6 = _mm_mul_ps(XMM6, XMM0);
+		XMM6	 = _mm_sub_ps(XMM6, OFFSET);
+		XMM6	 = _mm_min_ps(XMM6, PM128(noise+i+4));
+		_mm_store_ps(noise+i+4, XMM6);
+		p0 -= 8;
+		p1 += 8;
+	}
+	j	 = midpoint1&(~3);
+	for(;i<j;i+=4)
+	{
+		__m128	XMM0, XMM1, XMM4, XMM3, XMM5;
+		__m128x	TN;
+		
+		XMM5	 = _mm_lddqu_ps(N+p0);
+		XMM0	 = XXYY[p0+3];
+		XMM1	 = XXYY[p0+2];
+		XMM4	 = XXYY[p0+1];
+		XMM3	 = XXYY[p0  ];
+		TN.ps	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(0,1,2,3));
+		
+		XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+		XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+		
+		XMM5	 = _mm_lddqu_ps(N+p1);
+		XMM0	 = _mm_add_ps(XMM0, XXYY[p1  ]);
+		XMM1	 = _mm_add_ps(XMM1, XXYY[p1+1]);
+		XMM4	 = _mm_add_ps(XMM4, XXYY[p1+2]);
+		XMM3	 = _mm_add_ps(XMM3, XXYY[p1+3]);
+		
+		TN.ps	 = _mm_add_ps(TN.ps, XMM5);
+		
+		bark_noise_hybridmp_SSE_SUBC();
+		XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+		XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+		_mm_store_ps(noise+i  , XMM4);
+		p0 -= 4;
+		p1 += 4;
+	}
+	if(midpoint2-i<4)
+	{
+		x	 = (float)i;
+		for (;i<midpoint1;i++,x+=1.f)
+		{
+			hi	 = i + fixed / 2;
+			lo	 = hi - fixed;
+	
+			tN	 = N[hi] + N[-lo];
+			tX	 = xxyy[hi*4  ] - xxyy[-lo*4  ];
+			tXX	 = xxyy[hi*4+1] + xxyy[-lo*4+1];
+			tY	 = xxyy[hi*4+2] + xxyy[-lo*4+2];
+			tXY	 = xxyy[hi*4+3] - xxyy[-lo*4+3];
+	
+			A	 = tY * tXX - tX * tXY;
+			B	 = tN * tXY - tX * tY;
+			D	 = tN * tXX - tX * tX;
+			R	 = (A + x * B) / D;
+	
+			if(R - offset < noise[i])
+				noise[i]	 = R - offset;
+		}
+		for (;i<midpoint2;i++,x+=1.f)
+		{
+			hi	 = i + fixed / 2;
+			lo	 = hi - fixed;
+	
+			tN	 = N[hi] - N[lo];
+			tX	 = xxyy[hi*4  ] - xxyy[lo*4  ];
+			tXX	 = xxyy[hi*4+1] - xxyy[lo*4+1];
+			tY	 = xxyy[hi*4+2] - xxyy[lo*4+2];
+			tXY	 = xxyy[hi*4+3] - xxyy[lo*4+3];
+			
+			A	 = tY * tXX - tX * tXY;
+			B	 = tN * tXY - tX * tY;
+			D	 = tN * tXX - tX * tX;
+			R	 = (A + x * B) / D;
+			if(R - offset < noise[i])
+				noise[i]	 = R - offset;
+		}
+		j	 = (i+3)&(~3);
+		j	 = (j>=n)?n:j;
+		for (;i<j;i++,x+=1.f)
+		{
+			R	 = (A + x * B) / D;
+			if(R - offset < noise[i])
+				noise[i]	 = R - offset;
+		}
+		PA	 = _mm_set_ps1(A);
+		PB	 = _mm_set_ps1(B);
+		PD	 = _mm_set_ps1(D);
+	}
+	else
+	{
+		switch(midpoint1%4)
+		{
+			case 0:
+				break;
+			case 1:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1;
+					p0	 = -((i  ) + fixed / 2 - fixed);
+					p1	 = (i+1) + fixed / 2;
+					
+					XMM0	 = XXYY[p0  ];
+					XMM1	 = XXYY[p1  ];
+					XMM4	 = XXYY[p1+1];
+					XMM3	 = XXYY[p1+2];
+					
+					TN.sf[0]	 = N[p0  ];
+					TN.sf[1]	 = N[p1  ];
+					TN.sf[2]	 = N[p1+1];
+					TN.sf[3]	 = N[p1+2];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					
+					p0	 = (i  ) + fixed / 2;
+					p1	-= fixed;
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0  ]);
+					XMM1	 = _mm_sub_ps(XMM1, XXYY[p1  ]);
+					XMM4	 = _mm_sub_ps(XMM4, XXYY[p1+1]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+2]);
+					
+					TN1.sf[0]	 = N[p0  ];
+					TN1.sf[1]	 = N[p1  ];
+					TN1.sf[2]	 = N[p1+1];
+					TN1.sf[3]	 = N[p1+2];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNNR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+			case 2:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1;
+					p0	 = -((i  ) + fixed / 2 - fixed);
+					p1	 = (i+2) + fixed / 2;
+					
+					XMM0	 = XXYY[p0  ];
+					XMM1	 = XXYY[p0-1];
+					XMM4	 = XXYY[p1  ];
+					XMM3	 = XXYY[p1+1];
+					
+					TN.sf[0]	 = N[p0  ];
+					TN.sf[1]	 = N[p0-1];
+					TN.sf[2]	 = N[p1  ];
+					TN.sf[3]	 = N[p1+1];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+					
+					p0	 = (i  ) + fixed / 2;
+					p1	-= fixed;
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0  ]);
+					XMM1	 = _mm_add_ps(XMM1, XXYY[p0+1]);
+					XMM4	 = _mm_sub_ps(XMM4, XXYY[p1  ]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+1]);
+					
+					TN1.sf[0]	 = N[p0  ];
+					TN1.sf[1]	 = N[p0+1];
+					TN1.sf[2]	 = N[p1  ];
+					TN1.sf[3]	 = N[p1+1];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NNRR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+			case 3:
+				{
+					__m128	XMM0, XMM1, XMM4, XMM3;
+					__m128x	TN, TN1;
+					int	p0, p1;
+					p0	 = -((i  ) + fixed / 2 - fixed);
+					p1	 = (i+3) + fixed / 2;
+					
+					XMM0	 = XXYY[p0  ];
+					XMM1	 = XXYY[p0-1];
+					XMM4	 = XXYY[p0-2];
+					XMM3	 = XXYY[p1  ];
+					
+					TN.sf[0]	 = N[p0  ];
+					TN.sf[1]	 = N[p0-1];
+					TN.sf[2]	 = N[p0-2];
+					TN.sf[3]	 = N[p1  ];
+					
+					XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+					XMM1	 = _mm_xor_ps(XMM1, PCS_RNNR.ps);
+					XMM4	 = _mm_xor_ps(XMM4, PCS_RNNR.ps);
+					
+					p0	 = (i  ) + fixed / 2;
+					p1	-= fixed;
+					
+					XMM0	 = _mm_add_ps(XMM0, XXYY[p0  ]);
+					XMM1	 = _mm_add_ps(XMM1, XXYY[p0+1]);
+					XMM4	 = _mm_sub_ps(XMM4, XXYY[p0+2]);
+					XMM3	 = _mm_sub_ps(XMM3, XXYY[p1  ]);
+					
+					TN1.sf[0]	 = N[p0  ];
+					TN1.sf[1]	 = N[p0+1];
+					TN1.sf[2]	 = N[p0+2];
+					TN1.sf[3]	 = N[p1  ];
+					
+					TN.ps	 = _mm_sub_ps(TN.ps, _mm_xor_ps(TN1.ps, PCS_NRRR.ps));
+					
+					bark_noise_hybridmp_SSE_SUBC();
+					XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+					XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+					_mm_store_ps(noise+i  , XMM4);
+					i	+= 4;
+				}
+				break;
+		}
+		p0	 = i  + fixed / 2;
+		p1	 = p0 - fixed;
+		j	 = ((midpoint2-i)&(~15))+i;
+		for(;i<j;i+=16)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			__m128	XMM4, XMM5, XMM6, XMM7;
+			__m128x	TN, TN1;
+			
+			XMM0	 = XXYY[p0   ];
+			XMM1	 = XXYY[p0+ 1];
+			XMM4	 = XXYY[p0+ 2];
+			XMM3	 = XXYY[p0+ 3];
+			TN.ps	 = _mm_lddqu_ps(N+p0   );
+			XMM5	 = _mm_lddqu_ps(N+p1   );
+			XMM0	 = _mm_sub_ps(XMM0, XXYY[p1   ]);
+			XMM1	 = _mm_sub_ps(XMM1, XXYY[p1+ 1]);
+			XMM4	 = _mm_sub_ps(XMM4, XXYY[p1+ 2]);
+			XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+ 3]);
+			TN.ps	 = _mm_sub_ps(TN.ps, XMM5);
+			XMM2 = XMM0;
+			XMM5 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+			XMM1 = XMM0;
+			XMM3 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+			XMM4 = XMM2;
+			XMM5 = XMM0;
+			XMM6 = XMM3;
+			XMM7 = XMM0;
+			XMM4 = _mm_mul_ps(XMM4, XMM1);
+			XMM5 = _mm_mul_ps(XMM5, XMM3);
+			XMM3 = _mm_load_ps(findex+i   );
+			XMM6 = _mm_mul_ps(XMM6, TN.ps);
+			XMM1 = _mm_mul_ps(XMM1, TN.ps);
+			XMM7 = _mm_mul_ps(XMM7, XMM2);
+			XMM2	 = XXYY[p0+ 4];
+			XMM0 = _mm_mul_ps(XMM0, XMM0);
+			XMM4 = _mm_sub_ps(XMM4, XMM5);
+			XMM5	 = XXYY[p0+ 5];
+			XMM6 = _mm_sub_ps(XMM6, XMM7);
+			XMM7	 = XXYY[p0+ 6];
+			XMM1 = _mm_sub_ps(XMM1, XMM0);
+			XMM0	 = XXYY[p0+ 7];
+			XMM6 = _mm_mul_ps(XMM6, XMM3);
+
+			TN1.ps	 = _mm_lddqu_ps(N+p0+ 4);
+			XMM3 = _mm_rcp_ps(XMM1);
+			XMM4 = _mm_add_ps(XMM4, XMM6);
+			XMM6	 = _mm_lddqu_ps(N+p1+ 4);
+			XMM1 = _mm_mul_ps(XMM1, XMM3);
+			XMM2	 = _mm_sub_ps(XMM2, XXYY[p1+ 4]);
+			XMM1 = _mm_mul_ps(XMM1, XMM3);
+			XMM5	 = _mm_sub_ps(XMM5, XXYY[p1+ 5]);
+			XMM3 = _mm_add_ps(XMM3, XMM3);
+			XMM7	 = _mm_sub_ps(XMM7, XXYY[p1+ 6]);
+			XMM3 = _mm_sub_ps(XMM3, XMM1);
+			XMM0	 = _mm_sub_ps(XMM0, XXYY[p1+ 7]);
+			XMM4 = _mm_mul_ps(XMM4, XMM3);
+			TN1.ps	 = _mm_sub_ps(TN1.ps, XMM6);
+			XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+			XMM1 = XMM2;
+			XMM4	 = _mm_min_ps(XMM4, PM128(noise+i   ));
+			XMM6 = XMM7;
+			_mm_store_ps(noise+i   , XMM4);
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,2,3,2));
+			XMM5 = XMM2;
+			XMM0 = XMM1;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(2,0,2,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+			XMM7 = XMM1;
+			XMM6 = XMM2;
+			XMM3 = XMM0;
+			XMM4 = XMM2;
+			XMM7 = _mm_mul_ps(XMM7, XMM5);
+			XMM6 = _mm_mul_ps(XMM6, XMM0);
+			XMM0 = _mm_load_ps(findex+i+ 4);
+			XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+			XMM5 = _mm_mul_ps(XMM5, TN1.ps);
+			XMM4 = _mm_mul_ps(XMM4, XMM1);
+			XMM1	 = XXYY[p0+ 8];
+			XMM2 = _mm_mul_ps(XMM2, XMM2);
+			XMM7 = _mm_sub_ps(XMM7, XMM6);
+			XMM6	 = XXYY[p0+ 9];
+			XMM3 = _mm_sub_ps(XMM3, XMM4);
+			XMM4	 = XXYY[p0+10];
+			XMM5 = _mm_sub_ps(XMM5, XMM2);
+			XMM2	 = XXYY[p0+11];
+			XMM3 = _mm_mul_ps(XMM3, XMM0);
+			TN.ps	 = _mm_lddqu_ps(N+p0+ 8);
+			XMM0 = _mm_rcp_ps(XMM5);
+			XMM7 = _mm_add_ps(XMM7, XMM3);
+			XMM3	 = _mm_lddqu_ps(N+p1+ 8);
+			XMM5 = _mm_mul_ps(XMM5, XMM0);
+			XMM1	 = _mm_sub_ps(XMM1, XXYY[p1+ 8]);
+			XMM5 = _mm_mul_ps(XMM5, XMM0);
+			XMM6	 = _mm_sub_ps(XMM6, XXYY[p1+ 9]);
+			XMM0 = _mm_add_ps(XMM0, XMM0);
+			XMM4	 = _mm_sub_ps(XMM4, XXYY[p1+10]);
+			XMM0 = _mm_sub_ps(XMM0, XMM5);
+			XMM2	 = _mm_sub_ps(XMM2, XXYY[p1+11]);
+			XMM7 = _mm_mul_ps(XMM7, XMM0);
+			TN.ps	 = _mm_sub_ps(TN.ps, XMM3);
+			XMM7	 = _mm_sub_ps(XMM7, OFFSET);
+			XMM5 = XMM1;
+			XMM7	 = _mm_min_ps(XMM7, PM128(noise+i+ 4));
+			XMM3 = XMM4;
+			_mm_store_ps(noise+i+ 4, XMM7);
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(1,0,1,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM2, _MM_SHUFFLE(3,2,3,2));
+			XMM6 = XMM1;
+			XMM2 = XMM5;
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(2,0,2,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(3,1,3,1));
+			XMM4 = XMM5;
+			XMM3 = XMM1;
+			XMM0 = XMM2;
+			XMM7 = XMM1;
+			XMM4 = _mm_mul_ps(XMM4, XMM6);
+			XMM3 = _mm_mul_ps(XMM3, XMM2);
+			XMM2 = _mm_load_ps(findex+i+ 8);
+			XMM0 = _mm_mul_ps(XMM0, TN.ps);
+			XMM6 = _mm_mul_ps(XMM6, TN.ps);
+			XMM7 = _mm_mul_ps(XMM7, XMM5);
+			XMM5	 = XXYY[p0+12];
+			XMM1 = _mm_mul_ps(XMM1, XMM1);
+			XMM4 = _mm_sub_ps(XMM4, XMM3);
+			XMM3	 = XXYY[p0+13];
+			XMM0 = _mm_sub_ps(XMM0, XMM7);
+			XMM7	 = XXYY[p0+14];
+			XMM6 = _mm_sub_ps(XMM6, XMM1);
+			XMM1	 = XXYY[p0+15];
+			XMM0 = _mm_mul_ps(XMM0, XMM2);
+			TN1.ps	 = _mm_lddqu_ps(N+p0+12);
+			XMM2 = _mm_rcp_ps(XMM6);
+			XMM4 = _mm_add_ps(XMM4, XMM0);
+			XMM0	 = _mm_lddqu_ps(N+p1+12);
+			XMM6 = _mm_mul_ps(XMM6, XMM2);
+			XMM5	 = _mm_sub_ps(XMM5, XXYY[p1+12]);
+			XMM6 = _mm_mul_ps(XMM6, XMM2);
+			XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+13]);
+			XMM2 = _mm_add_ps(XMM2, XMM2);
+			XMM7	 = _mm_sub_ps(XMM7, XXYY[p1+14]);
+			XMM2 = _mm_sub_ps(XMM2, XMM6);
+			XMM1	 = _mm_sub_ps(XMM1, XXYY[p1+15]);
+			XMM4 = _mm_mul_ps(XMM4, XMM2);
+			TN1.ps	 = _mm_sub_ps(TN1.ps, XMM0);
+			XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+			XMM6 = XMM5;
+			XMM4	 = _mm_min_ps(XMM4, PM128(noise+i+ 8));
+			XMM0 = XMM7;
+			_mm_store_ps(noise+i+ 8, XMM4);
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM3, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM3 = XMM5;
+			XMM1 = XMM6;
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(3,1,3,1));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(3,1,3,1));
+			XMM7 = XMM6;
+			XMM0 = XMM5;
+			XMM2 = XMM1;
+			XMM4 = XMM5;
+			XMM7 = _mm_mul_ps(XMM7, XMM3);
+			XMM0 = _mm_mul_ps(XMM0, XMM1);
+			XMM1 = _mm_load_ps(findex+i+12);
+			XMM2 = _mm_mul_ps(XMM2, TN1.ps);
+			XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+			XMM4 = _mm_mul_ps(XMM4, XMM6);
+			XMM5 = _mm_mul_ps(XMM5, XMM5);
+			XMM7 = _mm_sub_ps(XMM7, XMM0);
+			XMM2 = _mm_sub_ps(XMM2, XMM4);
+			XMM3 = _mm_sub_ps(XMM3, XMM5);
+			XMM2 = _mm_mul_ps(XMM2, XMM1);
+			XMM1 = _mm_rcp_ps(XMM3);
+			XMM7 = _mm_add_ps(XMM7, XMM2);
+			XMM3 = _mm_mul_ps(XMM3, XMM1);
+			XMM3 = _mm_mul_ps(XMM3, XMM1);
+			XMM1 = _mm_add_ps(XMM1, XMM1);
+			XMM1 = _mm_sub_ps(XMM1, XMM3);
+			XMM7 = _mm_mul_ps(XMM7, XMM1);
+			XMM7	 = _mm_sub_ps(XMM7, OFFSET);
+			XMM7	 = _mm_min_ps(XMM7, PM128(noise+i+12));
+			_mm_store_ps(noise+i+12, XMM7);
 
+			p0 += 16;
+			p1 += 16;
+		}
+		j	 = ((midpoint2-i)&(~7))+i;
+		for(;i<j;i+=8)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			__m128	XMM4, XMM5, XMM6, XMM7;
+			__m128x	TN, TN1;
+			
+			XMM0	 = XXYY[p0  ];
+			XMM1	 = XXYY[p0+1];
+			XMM4	 = XXYY[p0+2];
+			XMM3	 = XXYY[p0+3];
+			TN.ps	 = _mm_lddqu_ps(N+p0   );
+			XMM5	 = _mm_lddqu_ps(N+p1   );
+			XMM0	 = _mm_sub_ps(XMM0, XXYY[p1  ]);
+			XMM1	 = _mm_sub_ps(XMM1, XXYY[p1+1]);
+			XMM4	 = _mm_sub_ps(XMM4, XXYY[p1+2]);
+			XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+3]);
+			TN.ps	 = _mm_sub_ps(TN.ps, XMM5);
+			XMM2 = XMM0;
+			XMM5 = XMM4;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(1,0,1,0));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,2,3,2));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM3, _MM_SHUFFLE(1,0,1,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(3,2,3,2));
+			XMM1 = XMM0;
+			XMM3 = XMM2;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM4, _MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+			XMM4 = XMM2;
+			XMM5 = XMM0;
+			XMM6 = XMM3;
+			XMM7 = XMM0;
+			XMM4 = _mm_mul_ps(XMM4, XMM1);
+			XMM5 = _mm_mul_ps(XMM5, XMM3);
+			XMM3 = _mm_load_ps(findex+i  );
+			XMM6 = _mm_mul_ps(XMM6, TN.ps);
+			XMM1 = _mm_mul_ps(XMM1, TN.ps);
+			XMM7 = _mm_mul_ps(XMM7, XMM2);
+			XMM2	 = XXYY[p0+4];
+			XMM0 = _mm_mul_ps(XMM0, XMM0);
+			XMM4 = _mm_sub_ps(XMM4, XMM5);
+			XMM5	 = XXYY[p0+5];
+			XMM6 = _mm_sub_ps(XMM6, XMM7);
+			XMM7	 = XXYY[p0+6];
+			XMM1 = _mm_sub_ps(XMM1, XMM0);
+			XMM0	 = XXYY[p0+7];
+			XMM6 = _mm_mul_ps(XMM6, XMM3);
+			TN1.ps	 = _mm_lddqu_ps(N+p0+ 4);
+			XMM3 = _mm_rcp_ps(XMM1);
+			XMM4 = _mm_add_ps(XMM4, XMM6);
+			XMM6	 = _mm_lddqu_ps(N+p1+ 4);
+			XMM1 = _mm_mul_ps(XMM1, XMM3);
+			XMM2	 = _mm_sub_ps(XMM2, XXYY[p1+4]);
+			XMM1 = _mm_mul_ps(XMM1, XMM3);
+			XMM5	 = _mm_sub_ps(XMM5, XXYY[p1+5]);
+			XMM3 = _mm_add_ps(XMM3, XMM3);
+			XMM7	 = _mm_sub_ps(XMM7, XXYY[p1+6]);
+			XMM3 = _mm_sub_ps(XMM3, XMM1);
+			XMM0	 = _mm_sub_ps(XMM0, XXYY[p1+7]);
+			XMM4 = _mm_mul_ps(XMM4, XMM3);
+			TN1.ps	 = _mm_sub_ps(TN1.ps, XMM6);
+			XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+			XMM1 = XMM2;
+			XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+			XMM6 = XMM7;
+			_mm_store_ps(noise+i  , XMM4);
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(1,0,1,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(3,2,3,2));
+			XMM7	 = _mm_shuffle_ps(XMM7, XMM0, _MM_SHUFFLE(1,0,1,0));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,2,3,2));
+			XMM5 = XMM2;
+			XMM0 = XMM1;
+			XMM2	 = _mm_shuffle_ps(XMM2, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(2,0,2,0));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+			XMM7 = XMM1;
+			XMM6 = XMM2;
+			XMM3 = XMM0;
+			XMM4 = XMM2;
+			XMM7 = _mm_mul_ps(XMM7, XMM5);
+			XMM6 = _mm_mul_ps(XMM6, XMM0);
+			XMM0 = _mm_load_ps(findex+i+4);
+			XMM3 = _mm_mul_ps(XMM3, TN1.ps);
+			XMM5 = _mm_mul_ps(XMM5, TN1.ps);
+			XMM4 = _mm_mul_ps(XMM4, XMM1);
+			XMM2 = _mm_mul_ps(XMM2, XMM2);
+			XMM7 = _mm_sub_ps(XMM7, XMM6);
+			XMM3 = _mm_sub_ps(XMM3, XMM4);
+			XMM5 = _mm_sub_ps(XMM5, XMM2);
+			XMM3 = _mm_mul_ps(XMM3, XMM0);
+			XMM0 = _mm_rcp_ps(XMM5);
+			XMM7 = _mm_add_ps(XMM7, XMM3);
+			XMM5 = _mm_mul_ps(XMM5, XMM0);
+			XMM5 = _mm_mul_ps(XMM5, XMM0);
+			XMM0 = _mm_add_ps(XMM0, XMM0);
+			XMM0 = _mm_sub_ps(XMM0, XMM5);
+			XMM7 = _mm_mul_ps(XMM7, XMM0);
+			XMM7	 = _mm_sub_ps(XMM7, OFFSET);
+			XMM7	 = _mm_min_ps(XMM7, PM128(noise+i+4));
+			_mm_store_ps(noise+i+4, XMM7);
+
+			p0 += 8;
+			p1 += 8;
+		}
+		j	 = midpoint2&(~3);
+		for(;i<j;i+=4)
+		{
+			__m128	XMM0, XMM1, XMM4, XMM3;
+			__m128x	TN;
+			__m128	XMM5;
+			
+			XMM0	 = XXYY[p0  ];
+			XMM1	 = XXYY[p0+1];
+			XMM4	 = XXYY[p0+2];
+			XMM3	 = XXYY[p0+3];
+			TN.ps	 = _mm_lddqu_ps(N+p0   );
+			XMM5	 = _mm_lddqu_ps(N+p1   );
+			XMM0	 = _mm_sub_ps(XMM0, XXYY[p1  ]);
+			XMM1	 = _mm_sub_ps(XMM1, XXYY[p1+1]);
+			XMM4	 = _mm_sub_ps(XMM4, XXYY[p1+2]);
+			XMM3	 = _mm_sub_ps(XMM3, XXYY[p1+3]);
+			
+			TN.ps	 = _mm_sub_ps(TN.ps, XMM5);
+			
+			bark_noise_hybridmp_SSE_SUBC();
+			XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+			XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+			_mm_store_ps(noise+i  , XMM4);
+			p0 += 4;
+			p1 += 4;
+		}
+		if(i!=n)
+		{
+			switch(midpoint2%4)
+			{
+				case 0:
+					{
+						hi	 = (i-1) + fixed / 2;
+						lo	 = hi - fixed;
+						
+						tN	 = N[hi] - N[lo];
+						tX	 = xxyy[hi*4  ] - xxyy[lo*4  ];
+						tXX	 = xxyy[hi*4+1] - xxyy[lo*4+1];
+						tY	 = xxyy[hi*4+2] - xxyy[lo*4+2];
+						tXY	 = xxyy[hi*4+3] - xxyy[lo*4+3];
+						
+						A	 = tY * tXX - tX * tXY;
+						B	 = tN * tXY - tX * tY;
+						D	 = tN * tXX - tX * tX;
+						PA	 = _mm_set_ps1(A);
+						PB	 = _mm_set_ps1(B);
+						PD	 = _mm_set_ps1(1.f/D);
+					}
+					break;
+				case 1:
+					{
+						__m128	XMM0, XMM1, XMM4, XMM3;
+						__m128x	TN, TN1;
+						int p0	 = (i  ) + fixed / 2;
+						
+						XMM0	 =
+						XMM1	 =
+						XMM4	 =
+						XMM3	 = XXYY[p0];
+						
+						TN.ps	 = _mm_set_ps1(N[p0]);
+						
+						p0	-= fixed;
+						
+						XMM0	 =
+						XMM4	 =
+						XMM3	 =
+						XMM1	 = _mm_sub_ps(XMM3, XXYY[p0]);
+						
+						TN1.ps	 = _mm_set_ps1(N[p0]);
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, TN1.ps);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(0,0,0,0));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(0,0,0,0));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(0,0,0,0));
+					}
+					break;
+				case 2:
+					{
+						__m128	XMM0, XMM1, XMM4, XMM3;
+						__m128x	TN;
+						__m128	XMM5;
+						int p0	 = (i  ) + fixed / 2;
+						
+						XMM5	 = _mm_lddqu_ps(N+p0);
+						XMM0	 = XXYY[p0  ];
+						XMM1	 =
+						XMM4	 =
+						XMM3	 = XXYY[p0+1];
+						TN.ps	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(1,1,1,0));
+						
+						p0	-= fixed;
+						
+						XMM5	 = _mm_lddqu_ps(N+p0);
+						XMM0	 = _mm_sub_ps(XMM0, XXYY[p0  ]);
+						XMM4	 =
+						XMM3	 =
+						XMM1	 = _mm_sub_ps(XMM3, XXYY[p0+1]);
+						XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(1,1,1,0));
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, XMM5);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(1,1,1,1));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(1,1,1,1));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(1,1,1,1));
+					}
+					break;
+				case 3:
+					{
+						__m128	XMM0, XMM1, XMM4, XMM3;
+						__m128x	TN;
+						__m128	XMM5;
+						int p0	 = (i  ) + fixed / 2;
+						
+						XMM5	 = _mm_lddqu_ps(N+p0);
+						XMM0	 = XXYY[p0  ];
+						XMM1	 = XXYY[p0+1];
+						XMM4	 =
+						XMM3	 = XXYY[p0+2];
+						TN.ps	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,0));
+						
+						p0	-=  fixed;
+						
+						XMM5	 = _mm_lddqu_ps(N+p0);
+						XMM0	 = _mm_sub_ps(XMM0, XXYY[p0  ]);
+						XMM1	 = _mm_sub_ps(XMM1, XXYY[p0+1]);
+						XMM4	 = 
+						XMM3	 = _mm_sub_ps(XMM3, XXYY[p0+2]);
+						XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,0));
+						
+						TN.ps	 = _mm_sub_ps(TN.ps, XMM5);
+						
+						bark_noise_hybridmp_SSE_SUBC2();
+						XMM4	 = _mm_sub_ps(XMM4, OFFSET);
+						XMM4	 = _mm_min_ps(XMM4, PM128(noise+i  ));
+						_mm_store_ps(noise+i  , XMM4);
+						i	+= 4;
+						PA		 = _mm_shuffle_ps(PA, PA, _MM_SHUFFLE(2,2,2,2));
+						PB		 = _mm_shuffle_ps(PB, PB, _MM_SHUFFLE(2,2,2,2));
+						PD		 = _mm_shuffle_ps(PD, PD, _MM_SHUFFLE(2,2,2,2));
+					}
+					break;
+			}
+		}
+	}
+	if(i<n)
+	{
+		__m128	XMM0	 = PA;
+		__m128	XMM1	 = PB;
+		XMM0	 = _mm_mul_ps(XMM0, PD);
+		XMM1	 = _mm_mul_ps(XMM1, PD);
+		XMM0	 = _mm_sub_ps(XMM0, OFFSET);
+		if(i%8!=0)
+		{
+			__m128	XMM4	 = _mm_load_ps(findex+i);
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM4	 = _mm_min_ps(XMM4, PM128(noise+i   ));
+			_mm_store_ps(noise+i   , XMM4);
+			i	+= 4;
+		}
+		if(i%16!=0)
+		{
+			__m128	XMM4	 = _mm_load_ps(findex+i  );
+			__m128	XMM5	 = _mm_load_ps(findex+i+4);
+			__m128	XMM6	 = _mm_load_ps(noise+i   );
+			__m128	XMM7	 = _mm_load_ps(noise+i+ 4);
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM5	 = _mm_add_ps(XMM5, XMM0);
+			XMM6	 = _mm_min_ps(XMM6, XMM4);
+			XMM7	 = _mm_min_ps(XMM7, XMM5);
+			_mm_store_ps(noise+i   , XMM6);
+			_mm_store_ps(noise+i+ 4, XMM7);
+			i	+= 8;
+		}
+		for(;i<n;i+=32)
+		{
+			__m128	XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+			XMM4	 = _mm_load_ps(findex+i   );
+			XMM5	 = _mm_load_ps(findex+i+ 4);
+			XMM6	 = _mm_load_ps(noise+i    );
+			XMM7	 = _mm_load_ps(noise+i+  4);
+			XMM2	 = _mm_load_ps(findex+i+ 8);
+			XMM3	 = _mm_load_ps(findex+i+12);
+			XMM4	 = _mm_mul_ps(XMM4, XMM1);
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM4	 = _mm_add_ps(XMM4, XMM0);
+			XMM5	 = _mm_add_ps(XMM5, XMM0);
+			XMM6	 = _mm_min_ps(XMM6, XMM4);
+			XMM7	 = _mm_min_ps(XMM7, XMM5);
+			XMM4	 = _mm_load_ps(noise+i+  8);
+			XMM5	 = _mm_load_ps(noise+i+ 12);
+			_mm_store_ps(noise+i   , XMM6);
+			_mm_store_ps(noise+i+ 4, XMM7);
+			XMM2	 = _mm_mul_ps(XMM2, XMM1);
+			XMM3	 = _mm_mul_ps(XMM3, XMM1);
+			XMM2	 = _mm_add_ps(XMM2, XMM0);
+			XMM3	 = _mm_add_ps(XMM3, XMM0);
+			XMM2	 = _mm_min_ps(XMM2, XMM4);
+			XMM3	 = _mm_min_ps(XMM3, XMM5);
+			_mm_store_ps(noise+i+ 8, XMM2);
+			_mm_store_ps(noise+i+12, XMM3);
+		}
+	}
+#else														/* SSE Optimize */
 static void bark_noise_hybridmp(int n,const long *b,
                                 const float *f,
                                 float *noise,
@@ -760,6 +3676,7 @@
     R = (A + x * B) / D;
     if (R - offset < noise[i]) noise[i] = R - offset;
   }
+#endif														/* SSE Optimize */
 }
 
 static float FLOOR1_fromdB_INV_LOOKUP[256]={
@@ -839,10 +3756,145 @@
  
   if(sliding_lowpass>n)sliding_lowpass=n;
   
+#ifdef __SSE__												/* SSE Optimize */
+{
+#if	defined(_MSC_VER)
+	int j;
+	for(j=0;j<256;j+=16)
+	{
+		_mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j  ), _MM_HINT_NTA);
+		_mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j+8), _MM_HINT_NTA);
+	}
+	_asm{
+		push	ebp
+		push	ebx
+		mov		ecx, sliding_lowpass
+		mov		edi, mdct
+		mov		esi, codedflr
+		mov		ebx, residue
+		lea		ecx, [esi+ecx*4]
+		align	4
+	_vp_remove_floor_0:
+		mov		eax, [esi   ]
+		mov		edx, [esi+ 4]
+		movss	xmm0, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm1, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mov		eax, [esi+ 8]
+		mov		edx, [esi+12]
+		mulss	xmm0, [edi   ]
+		mulss	xmm1, [edi+ 4]
+		movss	xmm2, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm3, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mov		eax, [esi+16]
+		mov		edx, [esi+20]
+		mulss	xmm2, [edi+ 8]
+		mulss	xmm3, [edi+12]
+		movss	xmm4, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm5, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mov		eax, [esi+24]
+		mov		edx, [esi+28]
+		movss	[ebx   ], xmm0
+		movss	[ebx+ 4], xmm1
+		movss	xmm6, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm7, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mulss	xmm4, [edi+16]
+		mulss	xmm5, [edi+20]
+		mov		eax, [esi+32]
+		mov		edx, [esi+36]
+		movss	[ebx+ 8], xmm2
+		movss	[ebx+12], xmm3
+		movss	xmm0, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm1, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mulss	xmm6, [edi+24]
+		mulss	xmm7, [edi+28]
+		mov		eax, [esi+40]
+		mov		edx, [esi+44]
+		movss	[ebx+16], xmm4
+		movss	[ebx+20], xmm5
+		mulss	xmm0, [edi+32]
+		mulss	xmm1, [edi+36]
+		movss	[ebx+24], xmm6
+		movss	[ebx+28], xmm7
+		movss	xmm2, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm3, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mov		eax, [esi+48]
+		mov		edx, [esi+52]
+		mulss	xmm2, [edi+40]
+		mulss	xmm3, [edi+44]
+		movss	xmm4, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm5, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mov		eax, [esi+56]
+		mov		edx, [esi+60]
+		movss	[ebx+32], xmm0
+		movss	[ebx+36], xmm1
+		movss	xmm6, FLOOR1_fromdB_INV_LOOKUP[eax*4]
+		movss	xmm7, FLOOR1_fromdB_INV_LOOKUP[edx*4]
+		mulss	xmm4, [edi+48]
+		mulss	xmm5, [edi+52]
+		movss	[ebx+40], xmm2
+		movss	[ebx+44], xmm3
+		mulss	xmm6, [edi+56]
+		mulss	xmm7, [edi+60]
+		movss	[ebx+48], xmm4
+		movss	[ebx+52], xmm5
+		lea		ebx, [ebx+64]
+		lea		esi, [esi+64]
+		lea		edi, [edi+64]
+		movss	[ebx+56-64], xmm6
+		movss	[ebx+60-64], xmm7
+
+		cmp		esi, ecx
+		jl		_vp_remove_floor_0
+		pop		ebx
+		pop		ebp
+	};
+  for(i=sliding_lowpass;i<n;i++)
+    residue[i]=0.;
+#else
+	int j;
+	float *work = (float*)_ogg_alloca(sliding_lowpass*sizeof(float));
+
+	for(j=0;j<256;j+=16)
+	{
+		_mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j  ), _MM_HINT_NTA);
+		_mm_prefetch((const char*)(FLOOR1_fromdB_INV_LOOKUP+j+8), _MM_HINT_NTA);
+	}
+	for(i=0;i<sliding_lowpass;i+=4)
+	{
+		work[i  ]	 = FLOOR1_fromdB_INV_LOOKUP[codedflr[i  ]];
+		work[i+1]	 = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+1]];
+		work[i+2]	 = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+2]];
+		work[i+3]	 = FLOOR1_fromdB_INV_LOOKUP[codedflr[i+3]];
+	}
+	for(i=0;i<sliding_lowpass;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(mdct+i   );
+		XMM4	 = _mm_load_ps(work+i   );
+		XMM1	 = _mm_load_ps(mdct+i+ 4);
+		XMM5	 = _mm_load_ps(work+i+ 4);
+		XMM2	 = _mm_load_ps(mdct+i+ 8);
+		XMM6	 = _mm_load_ps(work+i+ 8);
+		XMM3	 = _mm_load_ps(mdct+i+12);
+		XMM7	 = _mm_load_ps(work+i+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		_mm_store_ps(residue+i   , XMM0);
+		_mm_store_ps(residue+i+ 4, XMM1);
+		_mm_store_ps(residue+i+ 8, XMM2);
+		_mm_store_ps(residue+i+12, XMM3);
+	}
+#endif
+}
+#else														/* SSE Optimize */
   for(i=0;i<sliding_lowpass;i++){
     residue[i]=
       mdct[i]*FLOOR1_fromdB_INV_LOOKUP[codedflr[i]];
   }
+#endif														/* SSE Optimize */
 
   for(;i<n;i++)
     residue[i]=0.;
@@ -854,8 +3906,43 @@
 		   float *logmask){
 
   int i,n=p->n;
+#ifdef __SSE__												/* SSE Optimize */
+	float *work		 = (float*)_ogg_alloca(n*sizeof(*work)*2);
+	float *bwork	 = (float*)_ogg_alloca(n*sizeof(float)*5);
+
+#else														/* SSE Optimize */
   float *work=alloca(n*sizeof(*work));
+#endif														/* SSE Optimize */
+
+#ifdef __SSE__												/* SSE Optimize */
+	bark_noise_hybridmp(p,logmdct,logmask,
+		      140.,-1, bwork, work+n);
+
+	for(i=0;i<n;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(logmdct+i   );
+		XMM4	 = _mm_load_ps(logmask+i   );
+		XMM1	 = _mm_load_ps(logmdct+i+ 4);
+		XMM5	 = _mm_load_ps(logmask+i+ 4);
+		XMM2	 = _mm_load_ps(logmdct+i+ 8);
+		XMM6	 = _mm_load_ps(logmask+i+ 8);
+		XMM3	 = _mm_load_ps(logmdct+i+12);
+		XMM7	 = _mm_load_ps(logmask+i+12);
+		XMM0	 = _mm_sub_ps(XMM0, XMM4);
+		XMM1	 = _mm_sub_ps(XMM1, XMM5);
+		XMM2	 = _mm_sub_ps(XMM2, XMM6);
+		XMM3	 = _mm_sub_ps(XMM3, XMM7);
+		_mm_store_ps(work+i   , XMM0);
+		_mm_store_ps(work+i+ 4, XMM1);
+		_mm_store_ps(work+i+ 8, XMM2);
+		_mm_store_ps(work+i+12, XMM3);
+	}
 
+	bark_noise_hybridmp(p,work,logmask,0.,
+		      p->vi->noisewindowfixed, bwork, work+n);
+#else														/* SSE Optimize */
   bark_noise_hybridmp(n,p->bark,logmdct,logmask,
 		      140.,-1);
 
@@ -863,8 +3950,33 @@
 
   bark_noise_hybridmp(n,p->bark,work,logmask,0.,
 		      p->vi->noisewindowfixed);
+#endif														/* SSE Optimize */
 
+#ifdef __SSE__												/* SSE Optimize */
+	for(i=0;i<n;i+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(logmdct+i   );
+		XMM4	 = _mm_load_ps(work+i   );
+		XMM1	 = _mm_load_ps(logmdct+i+ 4);
+		XMM5	 = _mm_load_ps(work+i+ 4);
+		XMM2	 = _mm_load_ps(logmdct+i+ 8);
+		XMM6	 = _mm_load_ps(work+i+ 8);
+		XMM3	 = _mm_load_ps(logmdct+i+12);
+		XMM7	 = _mm_load_ps(work+i+12);
+		XMM0	 = _mm_sub_ps(XMM0, XMM4);
+		XMM1	 = _mm_sub_ps(XMM1, XMM5);
+		XMM2	 = _mm_sub_ps(XMM2, XMM6);
+		XMM3	 = _mm_sub_ps(XMM3, XMM7);
+		_mm_store_ps(work+i   , XMM0);
+		_mm_store_ps(work+i+ 4, XMM1);
+		_mm_store_ps(work+i+ 8, XMM2);
+		_mm_store_ps(work+i+12, XMM3);
+	}
+#else														/* SSE Optimize */
   for(i=0;i<n;i++)work[i]=logmdct[i]-work[i];
+#endif														/* SSE Optimize */
   
 #if 0
   {
@@ -900,12 +4012,109 @@
     	  ((p->vi->noisecompand[dB]-p->vi->noisecompand_high[dB])*noise_compand_level);
   	}
   }
+#ifdef __SSE__												/* SSE Optimize */
+	{
+		static _MM_ALIGN16 const __m128x NCLMAX	 = {
+			.sf = {
+				NOISE_COMPAND_LEVELS-1, NOISE_COMPAND_LEVELS-1,
+				NOISE_COMPAND_LEVELS-1, NOISE_COMPAND_LEVELS-1
+			}
+		};
+		int spm4 = (i+15)&(~15);
+		for(;i<spm4;i++){
+			int dB	 = logmask[i]+.5;
+			if(dB>=NOISE_COMPAND_LEVELS)
+				dB	 = NOISE_COMPAND_LEVELS-1;
+			if(dB<0)
+				dB	 = 0;
+			logmask[i]	 =  work[i]+p->vi->noisecompand[dB];
+		}
+		{
+			register float* fwork2	 = (float*)(work+n);
+			for(i=spm4;i<n;i+=16)
+			{
+#if	!defined(__SSE2__)
+				__m64	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+				__m128	XMM0, XMM1, XMM2, XMM3;
+				XMM0	 = _mm_load_ps(logmask+i   );
+				XMM1	 = _mm_load_ps(logmask+i+ 4);
+				XMM2	 = _mm_load_ps(logmask+i+ 8);
+				XMM3	 = _mm_load_ps(logmask+i+12);
+				XMM0	 = _mm_min_ps(XMM0, NCLMAX.ps);
+				XMM1	 = _mm_min_ps(XMM1, NCLMAX.ps);
+				XMM2	 = _mm_min_ps(XMM2, NCLMAX.ps);
+				XMM3	 = _mm_min_ps(XMM3, NCLMAX.ps);
+				XMM0	 = _mm_max_ps(XMM0, PFV_0.ps);
+				XMM1	 = _mm_max_ps(XMM1, PFV_0.ps);
+				XMM2	 = _mm_max_ps(XMM2, PFV_0.ps);
+				XMM3	 = _mm_max_ps(XMM3, PFV_0.ps);
+#if	defined(__SSE2__)
+				_mm_store_si128((__m128i*)(fwork2+i   ), _mm_cvtps_epi32(XMM0));
+				_mm_store_si128((__m128i*)(fwork2+i+ 4), _mm_cvtps_epi32(XMM1));
+				_mm_store_si128((__m128i*)(fwork2+i+ 8), _mm_cvtps_epi32(XMM2));
+				_mm_store_si128((__m128i*)(fwork2+i+12), _mm_cvtps_epi32(XMM3));
+			}
+#else
+				MM0		 = _mm_cvtps_pi32(XMM0);
+				MM2		 = _mm_cvtps_pi32(XMM1);
+				MM4		 = _mm_cvtps_pi32(XMM2);
+				MM6 	 = _mm_cvtps_pi32(XMM3);
+				XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+				XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+				XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+				XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+				MM1		 = _mm_cvtps_pi32(XMM0);
+				MM3		 = _mm_cvtps_pi32(XMM1);
+				MM5		 = _mm_cvtps_pi32(XMM2);
+				MM7		 = _mm_cvtps_pi32(XMM3);
+				PM64(fwork2+i   )	 = MM0;
+				PM64(fwork2+i+ 4)	 = MM2;
+				PM64(fwork2+i+ 8)	 = MM4;
+				PM64(fwork2+i+ 2)	 = MM1;
+				PM64(fwork2+i+12)	 = MM6;
+				PM64(fwork2+i+ 6)	 = MM3;
+				PM64(fwork2+i+10)	 = MM5;
+				PM64(fwork2+i+14)	 = MM7;
+			}
+			_mm_empty();
+#endif
+			for(i=spm4;i<n;i+=4)
+			{
+				fwork2[i  ]	 = p->vi->noisecompand[*((int*)(fwork2+i  ))];
+				fwork2[i+1]	 = p->vi->noisecompand[*((int*)(fwork2+i+1))];
+				fwork2[i+2]	 = p->vi->noisecompand[*((int*)(fwork2+i+2))];
+				fwork2[i+3]	 = p->vi->noisecompand[*((int*)(fwork2+i+3))];
+			}
+			for(i=spm4;i<n;i+=16)
+			{
+				__m128	XMM0	 = _mm_load_ps(fwork2+i   );
+				__m128	XMM4	 = _mm_load_ps(work+i   );
+				__m128	XMM1	 = _mm_load_ps(fwork2+i+ 4);
+				__m128	XMM5	 = _mm_load_ps(work+i+ 4);
+				__m128	XMM2	 = _mm_load_ps(fwork2+i+ 8);
+				__m128	XMM6	 = _mm_load_ps(work+i+ 8);
+				__m128	XMM3	 = _mm_load_ps(fwork2+i+12);
+				__m128	XMM7	 = _mm_load_ps(work+i+12);
+				XMM0	 = _mm_add_ps(XMM0, XMM4);
+				XMM1	 = _mm_add_ps(XMM1, XMM5);
+				XMM2	 = _mm_add_ps(XMM2, XMM6);
+				XMM3	 = _mm_add_ps(XMM3, XMM7);
+				_mm_store_ps(logmask+i   , XMM0);
+				_mm_store_ps(logmask+i+ 4, XMM1);
+				_mm_store_ps(logmask+i+ 8, XMM2);
+				_mm_store_ps(logmask+i+12, XMM3);
+			}
+		}
+	}
+#else														/* SSE Optimize */
   for(;i<n;i++){
     int dB=logmask[i]+.5;
     if(dB>=NOISE_COMPAND_LEVELS)dB=NOISE_COMPAND_LEVELS-1;
     if(dB<0)dB=0;
     logmask[i]= work[i]+p->vi->noisecompand[dB];
   }
+#endif														/* SSE Optimize */
 
 }
 
@@ -917,6 +4126,48 @@
 
   int i,n=p->n;
 
+#ifdef __SSE__												/* SSE Optimize */
+	int seedsize = (p->total_octave_lines+31)&(~31);
+	float *seed = (float*)_ogg_alloca(sizeof(*seed)*seedsize);
+	float att=local_specmax+p->vi->ath_adjatt;
+	{
+		__m128	XMM0	 = _mm_load_ps(PNEGINF);
+		for(i=0;i<seedsize;i+=32)
+		{
+			_mm_store_ps(seed+i   , XMM0);
+			_mm_store_ps(seed+i+ 4, XMM0);
+			_mm_store_ps(seed+i+ 8, XMM0);
+			_mm_store_ps(seed+i+12, XMM0);
+			_mm_store_ps(seed+i+16, XMM0);
+			_mm_store_ps(seed+i+20, XMM0);
+			_mm_store_ps(seed+i+24, XMM0);
+			_mm_store_ps(seed+i+28, XMM0);
+		}
+	}
+	/* set the ATH (floating below localmax, not global max by a
+	   specified att) */
+	if(att<p->vi->ath_maxatt)att=p->vi->ath_maxatt;
+	
+	{
+		__m128	pm = _mm_set_ps1(att);
+		for(i=0;i<n;i+=16)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			XMM0	 = _mm_load_ps(p->ath+i   );
+			XMM1	 = _mm_load_ps(p->ath+i+ 4);
+			XMM2	 = _mm_load_ps(p->ath+i+ 8);
+			XMM3	 = _mm_load_ps(p->ath+i+12);
+			XMM0	 = _mm_add_ps(XMM0, pm);
+			XMM1	 = _mm_add_ps(XMM1, pm);
+			XMM2	 = _mm_add_ps(XMM2, pm);
+			XMM3	 = _mm_add_ps(XMM3, pm);
+			_mm_store_ps(logmask+i   , XMM0);
+			_mm_store_ps(logmask+i+ 4, XMM1);
+			_mm_store_ps(logmask+i+ 8, XMM2);
+			_mm_store_ps(logmask+i+12, XMM3);
+		}
+	}
+#else														/* SSE Optimize */
   float *seed=alloca(sizeof(*seed)*p->total_octave_lines);
   float att=local_specmax+p->vi->ath_adjatt;
   for(i=0;i<p->total_octave_lines;i++)seed[i]=NEGINF;
@@ -927,6 +4178,7 @@
   
   for(i=0;i<n;i++)
     logmask[i]=p->ath[i]+att;
+#endif														/* SSE Optimize */
 
   /* tone masking */
   seed_loop(p,(const float ***)p->tonecurves,logfft,logmask,seed,global_specmax);
@@ -946,17 +4198,52 @@
 			int end_block,
 			int blocktype, int modenumber,
 			int nW_modenumber,
+#ifdef	__SSE__												/* SSE Optimize */
+			int lW_blocktype, int lW_modenumber, int lW_no,
+			float *tlogmdct){
+#else														/* SSE Optimize */
 			int lW_blocktype, int lW_modenumber, int lW_no){
+#endif														/* SSE Optimize */
 
   int i,j,n=p->n;
   int m2_sw=0,  padth; /* aoTuV for M2 */
   int it_sw, *m3n, m3_count; /* aoTuV for M3 */
   int m4_end, lp_pos, m4_start; /* aoTuV for M4 */
   float de, coeffi, cx; /* aoTuV for M1 */
-  float toneth; /* aoTuV for M2 */
+  /*float toneth;*/ /* aoTuV for M2 */
   float noise_rate, noise_rate_low, noise_center, rate_mod; /* aoTuV for M3 */
   float m4_thres; /* aoTuV for M4 */
   float toneatt=p->vi->tone_masteratt[offset_select];
+#ifdef __SSE__												/* SSE Optimize */
+  static _MM_ALIGN16 const __m128x PCOEFFI =
+	  { .sf = {-17.2f, -17.2f, -17.2f, -17.2f} };
+  static __m128 PCX0;
+  static __m128 PCX1;
+  static _MM_ALIGN16 const __m128x PM160 =
+	  { .sf = {-160.f, -160.f, -160.f, -160.f} };
+  static _MM_ALIGN16 const __m128x PM140 =
+	  { .sf = {-140.f, -140.f, -140.f, -140.f} };
+  static _MM_ALIGN16 const __m128x PP0001 =
+	  { .sf = {0.0001f, 0.0001f, 0.0001f, 0.0001f} };
+  static _MM_ALIGN16 const __m128x PP1 =
+	  { .sf = {0.1f, 0.1f, 0.1f, 0.1f} };
+  static _MM_ALIGN16 const __m128x P5 =
+	  { .sf = {5.f, 5.f, 5.f, 5.f} };
+  static _MM_ALIGN16 const __m128x P20 =
+	  { .sf = {20.f, 20.f, 20.f, 20.f} };
+  static _MM_ALIGN16 const __m128x P30 =
+	  { .sf = {30.f, 30.f, 30.f, 30.f} };
+  __m128 PTONEATT;
+  __m128 PNOISEMAXSUPP;
+  __m128 PLOW_COMPAND;
+  __m128 PPADTH;
+  __m128 PNOISE_CENTER;
+  __m128 PNOISE_RATE;
+  __m128 PNOISE_RATE_LOW;
+  __m128 PFV_C0, PFV_C1;
+  __m128 PM4_THRES;
+  int midpoint;
+#endif														/* SSE Optimize */
 
   cx = p->m_val;
   m3n = p->m3n;
@@ -997,13 +4284,49 @@
   			if((lW_no*m3_count) < 24) noise_center = lW_no*m3_count;
   		}
   		if(offset_select == 1){
+#ifdef __SSE__												/* SSE Optimize */
+		  for(i=0; i<128; i+=16)
+		  {
+			__m128 XMM0 = _mm_load_ps(tempmdct+i   );
+			__m128 XMM1 = _mm_load_ps(tempmdct+i+ 4);
+			__m128 XMM2 = _mm_load_ps(tempmdct+i+ 8);
+			__m128 XMM3 = _mm_load_ps(tempmdct+i+12);
+			XMM0 = _mm_sub_ps(XMM0, P5.ps);
+			XMM1 = _mm_sub_ps(XMM1, P5.ps);
+			XMM2 = _mm_sub_ps(XMM2, P5.ps);
+			XMM3 = _mm_sub_ps(XMM3, P5.ps);
+			_mm_store_ps(tempmdct+i   , XMM0);
+			_mm_store_ps(tempmdct+i+ 4, XMM1);
+			_mm_store_ps(tempmdct+i+ 8, XMM2);
+			_mm_store_ps(tempmdct+i+12, XMM3);
+		  }
+#else														/* SSE Optimize */
   			for(i=0; i<128; i++) tempmdct[i] -= 5;
+#endif														/* SSE Optimize */
   		}
   	}else{ /* non_impulse - @Short(impulse) case */
   		noise_rate = 0.7;
   		noise_center = 0;
   		if(offset_select == 1){
+#ifdef __SSE__												/* SSE Optimize */
+		  for(i=0; i<128; i+=16)
+		  {
+			__m128 XMM0 = _mm_load_ps(lastmdct+i   );
+			__m128 XMM1 = _mm_load_ps(lastmdct+i+ 4);
+			__m128 XMM2 = _mm_load_ps(lastmdct+i+ 8);
+			__m128 XMM3 = _mm_load_ps(lastmdct+i+12);
+			XMM0 = _mm_sub_ps(XMM0, P5.ps);
+			XMM1 = _mm_sub_ps(XMM1, P5.ps);
+			XMM2 = _mm_sub_ps(XMM2, P5.ps);
+			XMM3 = _mm_sub_ps(XMM3, P5.ps);
+			_mm_store_ps(tempmdct+i   , XMM0);
+			_mm_store_ps(tempmdct+i+ 4, XMM1);
+			_mm_store_ps(tempmdct+i+ 8, XMM2);
+			_mm_store_ps(tempmdct+i+12, XMM3);
+		  }
+#else														/* SSE Optimize */
   			for(i=0; i<128; i++) tempmdct[i] = lastmdct[i] - 5;
+#endif														/* SSE Optimize */
   		}
   	}
   	noise_rate_low = 0;
@@ -1023,6 +4346,686 @@
   	else lp_pos=end_block;
   }
 
+#ifdef __SSE__												/* SSE Optimize */
+/*
+  printf("M4S = %d\n", m4_start);
+  printf("M4E = %d\n", m4_end);
+  printf("LP  = %d\n\n", lp_pos);
+*/
+  if(offset_select==1)
+  {
+	PTONEATT        = _mm_set_ps1(toneatt);
+	PNOISEMAXSUPP   = _mm_set_ps1(p->vi->noisemaxsupp);
+	PLOW_COMPAND    = _mm_set_ps1(low_compand);
+	PPADTH          = _mm_set_ps1(1.0f/padth);
+	PNOISE_CENTER   = _mm_set_ps1(noise_center);
+	PNOISE_RATE     = _mm_set_ps1(noise_rate);
+	PNOISE_RATE_LOW = _mm_set_ps1(noise_rate_low);
+	PCX0            = _mm_set_ps1(-0.005 *cx);
+	PCX1            = _mm_set_ps1(-0.0003*cx);
+	PFV_C0          = _mm_set_ps1(1.0f-17.2f*cx*0.005f);
+	PFV_C1          = _mm_set_ps1(1.0f-17.2f*cx*0.0003f);
+	PM4_THRES       = _mm_set_ps1(m4_thres);
+	if(it_sw){
+	  for(i=0;i<n;i+=16)
+	  {
+		__m128	XMM0	 = _mm_load_ps(logmdct+i   );
+		__m128	XMM1	 = _mm_load_ps(logmdct+i+ 4);
+		__m128	XMM2	 = _mm_load_ps(logmdct+i+ 8);
+		__m128	XMM3	 = _mm_load_ps(logmdct+i+12);
+		XMM0	 = _mm_sub_ps(XMM0, P5.ps);
+		XMM1	 = _mm_sub_ps(XMM1, P5.ps);
+		XMM2	 = _mm_sub_ps(XMM2, P5.ps);
+		XMM3	 = _mm_sub_ps(XMM3, P5.ps);
+		_mm_store_ps(tlogmdct+i   , XMM0);
+		_mm_store_ps(tlogmdct+i+ 4, XMM1);
+		_mm_store_ps(tlogmdct+i+ 8, XMM2);
+		_mm_store_ps(tlogmdct+i+12, XMM3);
+	  }
+	}
+	midpoint	 = (m3n[1]+4)&(~3);
+	for(i=0;i<midpoint;i++)
+	{
+	  float val= noise[i]+p->noiseoffset[1][i];
+	  float tval= tone[i]+toneatt;
+	  tval-=low_compand;
+	  if(val>p->vi->noisemaxsupp)val=p->vi->noisemaxsupp;
+
+	  if(m2_sw){
+		if((logmdct[i]-lastmdct[i]) > 20){
+		  if(i > m3n[3]) val -= (logmdct[i]-lastmdct[i]-20)/padth;
+		  else val -= (logmdct[i]-lastmdct[i]-20)/(padth+padth);
+		}
+	  }
+
+	  if(it_sw){
+		const float* ptempbuf = PTEMP_BFN[temp_bfn[i]];
+		for(j=1; j<=temp_bfn[i]; j++,ptempbuf++){
+		  float tempbuf = logmdct[i]+(*ptempbuf);
+		  if( (tempmdct[i+j] < tempbuf) && (tempmdct[i+j] < tlogmdct[i+j]) )
+			tempmdct[i+j] = tlogmdct[i+j];
+		}
+		if(val > tval){
+		  if( (val>lastmdct[i]) && (logmdct[i]>(tempmdct[i]+noise_center)) ){
+			float valmask=0;
+			tempmdct[i] = logmdct[i];
+
+			if(logmdct[i]>lastmdct[i]){
+			  rate_mod = noise_rate;
+			}else{
+			  rate_mod = noise_rate_low;
+			}
+			if(i > m3n[1]){
+			  if((val-tval)>30) valmask=((val-tval-30)/10+30)*rate_mod;
+			  else valmask=(val-tval)*rate_mod;
+			}else if(i > m3n[2]){
+			  if((val-tval)>20) valmask=((val-tval-20)/10+20)*rate_mod;
+			  else valmask=(val-tval)*rate_mod;
+			}else if(i > m3n[3]){
+			  if((val-tval)>10) valmask=((val-tval-10)/10+10)*rate_mod*0.5;
+			  else valmask=(val-tval)*rate_mod*0.5;
+			}else{
+			  if((val-tval)>10) valmask=((val-tval-10)/10+10)*rate_mod*0.3;
+			  else valmask=(val-tval)*rate_mod*0.3;
+			}
+			if((val-valmask)>lastmdct[i])val-=valmask;
+			else val=lastmdct[i];
+		  }
+		}
+	  }
+
+	  if(val>tval){
+		logmask[i]=val;
+	  }else logmask[i]=tval;
+
+	  coeffi = -17.2;
+	  val = val - logmdct[i];
+
+	  if(val > coeffi){
+		de = 1.0-((val-coeffi)*0.005*cx);
+		if(de < 0) de = 0.0001;
+	  }else
+		de = 1.0-((val-coeffi)*0.0003*cx);
+	  mdct[i] *= de;
+	}
+	if(n<=m4_start&&n<=lp_pos)
+	{
+	  for(;i<n;i+=4)
+	  {
+		__m128 PVAL  = _mm_load_ps(noise+i);
+		__m128 PTVAL = _mm_load_ps(tone+i);
+		PVAL  = _mm_add_ps(PVAL, PM128(p->noiseoffset[1]+i));
+		PTVAL = _mm_add_ps(PTVAL, PTONEATT);
+		PVAL  = _mm_min_ps(PVAL, PNOISEMAXSUPP);
+		PTVAL = _mm_sub_ps(PTVAL, PLOW_COMPAND);
+		if(m2_sw)
+		{
+		  __m128 XMM0 = _mm_load_ps(logmdct+i);
+		  __m128 XMM1 = _mm_load_ps(lastmdct+i);
+		  __m128 XMM2 = _mm_load_ps(P20.sf);
+		  XMM0 = _mm_sub_ps(XMM0, XMM1);
+		  XMM0 = _mm_sub_ps(XMM0, XMM2);
+		  XMM1 = XMM0;
+		  XMM0 = _mm_mul_ps(XMM0, PPADTH);
+		  XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+		  XMM1 = _mm_andnot_ps(XMM1, XMM0);
+		  PVAL = _mm_sub_ps(PVAL, XMM1);
+		}
+		if(it_sw){
+		  int k;
+		  for(k=0;k<4;k++)
+		  {
+			const float* ptempbuf = PTEMP_BFN[temp_bfn[i+k]];
+			__m128 PLOGMDCT = _mm_set_ps1(logmdct[i+k]);
+			if(((i+k)&3)==3)
+			{
+			  for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				__m128 XMM3, XMM4, XMM5;
+				XMM0 = _mm_load_ps(ptempbuf  );
+				XMM3 = _mm_load_ps(ptempbuf+4);
+				XMM1 = _mm_load_ps(tempmdct+i+j+k  );
+				XMM4 = _mm_load_ps(tempmdct+i+j+k+4);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+				XMM2 = _mm_load_ps(tlogmdct+i+j+k  );
+				XMM5 = _mm_load_ps(tlogmdct+i+j+k+4);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM3 = _mm_min_ps(XMM3, XMM5);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM3 = _mm_cmple_ps(XMM3, XMM4);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM4 = _mm_and_ps(XMM4, XMM3);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM3 = _mm_andnot_ps(XMM3, XMM5);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				XMM4 = _mm_or_ps(XMM4, XMM3);
+				_mm_store_ps(tempmdct+i+j+k  , XMM1);
+				_mm_store_ps(tempmdct+i+j+k+4, XMM4);
+			  }
+			  for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_load_ps(tempmdct+i+j+k);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_load_ps(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_store_ps(tempmdct+i+j+k, XMM1);
+			  }
+			}
+			else
+			{
+			  for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				__m128 XMM3, XMM4, XMM5;
+				XMM0 = _mm_load_ps(ptempbuf  );
+				XMM3 = _mm_load_ps(ptempbuf+4);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k  );
+				XMM4 = _mm_lddqu_ps(tempmdct+i+j+k+4);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k  );
+				XMM5 = _mm_lddqu_ps(tlogmdct+i+j+k+4);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM3 = _mm_min_ps(XMM3, XMM5);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM3 = _mm_cmple_ps(XMM3, XMM4);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM4 = _mm_and_ps(XMM4, XMM3);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM3 = _mm_andnot_ps(XMM3, XMM5);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				XMM4 = _mm_or_ps(XMM4, XMM3);
+				_mm_storeu_ps(tempmdct+i+j+k  , XMM1);
+				_mm_storeu_ps(tempmdct+i+j+k+4, XMM4);
+			  }
+			  for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k  );
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k  );
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storeu_ps(tempmdct+i+j+k, XMM1);
+			  }
+			}
+			switch(temp_bfn[i+k]-j)
+			{
+			case 0 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ss(ptempbuf);
+				XMM1 = _mm_load_ss(tempmdct+i+j+k);
+				XMM0 = _mm_add_ss(XMM0, PLOGMDCT);
+				XMM2 = _mm_load_ss(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ss(XMM0, XMM2);
+				XMM0 = _mm_cmple_ss(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_store_ss(tempmdct+i+j+k, XMM1);
+			  }
+			  break;
+			case 1 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_loadl_pi(XMM0, (__m64*)ptempbuf);
+				XMM1 = _mm_loadl_pi(XMM1, (__m64*)(tempmdct+i+j+k));
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_loadl_pi(XMM2, (__m64*)(tlogmdct+i+j+k));
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+			  }
+			  break;
+			case 2 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+				XMM1 = _mm_movehl_ps(XMM1, XMM1);
+				_mm_store_ss(tempmdct+i+j+k+2, XMM1);
+			  }
+			  break;
+			case 3 :
+			  break;
+			}
+		  }
+		  {
+			__m128 XMM0 = _mm_cmpgt_ps(PVAL, _mm_max_ps(PTVAL, PM128(lastmdct+i)));
+			if(_mm_movemask_ps(XMM0))
+			{
+			  __m128 XMM1 = _mm_cmpgt_ps(PM128(logmdct+i), _mm_add_ps(PM128(tempmdct+i), PNOISE_CENTER));
+			  __m128 XMM2, XMM3, XMM4;
+			  XMM0 = _mm_and_ps(XMM0, XMM1);
+			  if(_mm_movemask_ps(XMM0))
+			  {
+				XMM1 = _mm_load_ps(logmdct+i);
+				XMM2 = XMM0;
+				XMM3 = XMM1;
+				XMM3 = _mm_or_ps(
+				  _mm_and_ps(XMM3, XMM2),
+				  _mm_andnot_ps(XMM2, PM128(tempmdct+i))
+				);
+				_mm_store_ps(tempmdct+i, XMM3);
+				XMM1 = _mm_cmpgt_ps(XMM1, PM128(lastmdct+i));
+				XMM2 = _mm_or_ps(
+				  _mm_and_ps(PNOISE_RATE, XMM1),
+				  _mm_andnot_ps(XMM1, PNOISE_RATE_LOW)
+				);	/* rate_mod */
+				XMM1 = _mm_sub_ps(PVAL, PTVAL);
+				XMM3 = XMM1;
+				XMM1 = _mm_sub_ps(XMM1, P30.ps);
+				XMM4 = _mm_cmpgt_ps(XMM1, PFV_0.ps);
+				XMM1 = _mm_mul_ps(XMM1, PP1.ps);
+				XMM1 = _mm_add_ps(XMM1, P30.ps);
+				XMM1 = _mm_and_ps(XMM1, XMM4);
+				XMM4 = _mm_andnot_ps(XMM4, XMM3);
+				XMM1 = _mm_or_ps(XMM1, XMM4);
+				XMM1 = _mm_mul_ps(XMM1, XMM2);
+				XMM3 = PVAL;
+				XMM3 = _mm_sub_ps(XMM3, XMM1);
+				XMM3 = _mm_max_ps(XMM3, PM128(lastmdct+i));
+				XMM3 = _mm_and_ps(XMM3, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, PVAL);
+				PVAL = _mm_or_ps(XMM3, XMM0);
+			  }
+			}
+		  }
+		}
+		_mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+		{
+		  __m128 XMM0, XMM1, XMM2;
+		  PVAL = _mm_sub_ps(PVAL, PM128(logmdct+i));
+		  XMM0 = PVAL;
+		  XMM1 = PVAL;
+		  XMM2 = PVAL;
+		  XMM0 = _mm_cmpgt_ps(XMM0, PCOEFFI.ps);
+		  XMM1 = _mm_mul_ps(XMM1, PCX0);
+		  XMM2 = _mm_mul_ps(XMM2, PCX1);
+		  XMM1 = _mm_add_ps(XMM1, PFV_C0);
+		  XMM2 = _mm_add_ps(XMM2, PFV_C1);
+		  XMM1 = _mm_max_ps(XMM1, PP0001.ps);
+		  XMM1 = _mm_and_ps(XMM1, XMM0);
+		  XMM0 = _mm_andnot_ps(XMM0, XMM2);
+		  XMM1 = _mm_or_ps(XMM1, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, PM128(mdct+i));
+		  _mm_store_ps(mdct+i, XMM1);
+		}
+	  }
+	}
+	else if(lp_pos>=m4_end&&n>lp_pos)
+	{
+	  char RunMode[2048];
+	  j	 = (m3n[1]+4)&(~3);
+	  midpoint	 = m4_start&(~3);
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 1;	/* SSE-1 */
+	  midpoint	 = (m4_start+4)&(~3);	/* i>m4_start is not 1=>m4_start */
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 0;	/* Normal */
+	  midpoint	 = m4_end&(~3);
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 2;	/* SSE-2 */
+	  midpoint	 = (m4_end+3)&(~3);
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 0;	/* Normal */
+	  midpoint	 = lp_pos&(~3);
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 1;	/* SSE-1 */
+	  midpoint	 = (lp_pos+3)&(~3);
+	  for(;j<midpoint;j+=4)
+		RunMode[j] = 3;	/* SSE-3 */
+	  for(;j<n;j+=4)
+		RunMode[j] = 4;	/* SSE-4 */
+	  for(;i<n;i+=4)
+	  {
+		__m128 PVAL  = _mm_load_ps(noise+i);
+		__m128 PTVAL = _mm_load_ps(tone+i);
+		PVAL  = _mm_add_ps(PVAL, PM128(p->noiseoffset[1]+i));
+		PTVAL = _mm_add_ps(PTVAL, PTONEATT);
+		PVAL  = _mm_min_ps(PVAL, PNOISEMAXSUPP);
+		PTVAL = _mm_sub_ps(PTVAL, PLOW_COMPAND);
+		if(m2_sw)
+		{
+		  __m128 XMM0 = _mm_load_ps(logmdct+i);
+		  __m128 XMM1 = _mm_load_ps(lastmdct+i);
+		  __m128 XMM2 = _mm_load_ps(P20.sf);
+		  XMM0 = _mm_sub_ps(XMM0, XMM1);
+		  XMM0 = _mm_sub_ps(XMM0, XMM2);
+		  XMM1 = XMM0;
+		  XMM0 = _mm_mul_ps(XMM0, PPADTH);
+		  XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+		  XMM1 = _mm_andnot_ps(XMM1, XMM0);
+		  PVAL = _mm_sub_ps(PVAL, XMM1);
+		}
+		if(it_sw){
+		  int k;
+		  for(k=0;k<4;k++)
+		  {
+			const float* ptempbuf = PTEMP_BFN[temp_bfn[i+k]];
+			__m128 PLOGMDCT = _mm_set_ps1(logmdct[i+k]);
+			if(((i+k)&3)==3)
+			{
+			  for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				__m128 XMM3, XMM4, XMM5;
+				XMM0 = _mm_load_ps(ptempbuf  );
+				XMM3 = _mm_load_ps(ptempbuf+4);
+				XMM1 = _mm_load_ps(tempmdct+i+j+k  );
+				XMM4 = _mm_load_ps(tempmdct+i+j+k+4);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+				XMM2 = _mm_load_ps(tlogmdct+i+j+k  );
+				XMM5 = _mm_load_ps(tlogmdct+i+j+k+4);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM3 = _mm_min_ps(XMM3, XMM5);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM3 = _mm_cmple_ps(XMM3, XMM4);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM4 = _mm_and_ps(XMM4, XMM3);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM3 = _mm_andnot_ps(XMM3, XMM5);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				XMM4 = _mm_or_ps(XMM4, XMM3);
+				_mm_store_ps(tempmdct+i+j+k  , XMM1);
+				_mm_store_ps(tempmdct+i+j+k+4, XMM4);
+			  }
+			  for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_load_ps(tempmdct+i+j+k);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_load_ps(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_store_ps(tempmdct+i+j+k, XMM1);
+			  }
+			}
+			else
+			{
+			  for(j=1; j<temp_bfn8[i+k]; j+=8, ptempbuf+=8)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				__m128 XMM3, XMM4, XMM5;
+				XMM0 = _mm_load_ps(ptempbuf  );
+				XMM3 = _mm_load_ps(ptempbuf+4);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k  );
+				XMM4 = _mm_lddqu_ps(tempmdct+i+j+k+4);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM3 = _mm_add_ps(XMM3, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k  );
+				XMM5 = _mm_lddqu_ps(tlogmdct+i+j+k+4);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM3 = _mm_min_ps(XMM3, XMM5);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM3 = _mm_cmple_ps(XMM3, XMM4);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM4 = _mm_and_ps(XMM4, XMM3);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM3 = _mm_andnot_ps(XMM3, XMM5);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				XMM4 = _mm_or_ps(XMM4, XMM3);
+				_mm_storeu_ps(tempmdct+i+j+k  , XMM1);
+				_mm_storeu_ps(tempmdct+i+j+k+4, XMM4);
+			  }
+			  for(; j<temp_bfn4[i+k]; j+=4, ptempbuf+=4)
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k  );
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k  );
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storeu_ps(tempmdct+i+j+k, XMM1);
+			  }
+			}
+			switch(temp_bfn[i+k]-j)
+			{
+			case 0 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ss(ptempbuf);
+				XMM1 = _mm_load_ss(tempmdct+i+j+k);
+				XMM0 = _mm_add_ss(XMM0, PLOGMDCT);
+				XMM2 = _mm_load_ss(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ss(XMM0, XMM2);
+				XMM0 = _mm_cmple_ss(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_store_ss(tempmdct+i+j+k, XMM1);
+			  }
+			  break;
+			case 1 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_loadl_pi(XMM0, (__m64*)ptempbuf);
+				XMM1 = _mm_loadl_pi(XMM1, (__m64*)(tempmdct+i+j+k));
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_loadl_pi(XMM2, (__m64*)(tlogmdct+i+j+k));
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+			  }
+			  break;
+			case 2 :
+			  {
+				__m128 XMM0, XMM1, XMM2;
+				XMM0 = _mm_load_ps(ptempbuf);
+				XMM1 = _mm_lddqu_ps(tempmdct+i+j+k);
+				XMM0 = _mm_add_ps(XMM0, PLOGMDCT);
+				XMM2 = _mm_lddqu_ps(tlogmdct+i+j+k);
+				XMM0 = _mm_min_ps(XMM0, XMM2);
+				XMM0 = _mm_cmple_ps(XMM0, XMM1);
+				XMM1 = _mm_and_ps(XMM1, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, XMM2);
+				XMM1 = _mm_or_ps(XMM1, XMM0);
+				_mm_storel_pi((__m64*)(tempmdct+i+j+k), XMM1);
+				XMM1 = _mm_movehl_ps(XMM1, XMM1);
+				_mm_store_ss(tempmdct+i+j+k+2, XMM1);
+			  }
+			  break;
+			case 3 :
+			  break;
+			}
+		  }
+		  {
+			__m128 XMM0 = _mm_cmpgt_ps(PVAL, _mm_max_ps(PTVAL, PM128(lastmdct+i)));
+			if(_mm_movemask_ps(XMM0))
+			{
+			  __m128 XMM1 = _mm_cmpgt_ps(PM128(logmdct+i), _mm_add_ps(PM128(tempmdct+i), PNOISE_CENTER));
+			  __m128 XMM2, XMM3, XMM4;
+			  XMM0 = _mm_and_ps(XMM0, XMM1);
+			  if(_mm_movemask_ps(XMM0))
+			  {
+				XMM1 = _mm_load_ps(logmdct+i);
+				XMM2 = XMM0;
+				XMM3 = XMM1;
+				XMM3 = _mm_or_ps(
+				  _mm_and_ps(XMM3, XMM2),
+				  _mm_andnot_ps(XMM2, PM128(tempmdct+i))
+				);
+				_mm_store_ps(tempmdct+i, XMM3);
+				XMM1 = _mm_cmpgt_ps(XMM1, PM128(lastmdct+i));
+				XMM2 = _mm_or_ps(
+				  _mm_and_ps(PNOISE_RATE, XMM1),
+				  _mm_andnot_ps(XMM1, PNOISE_RATE_LOW)
+				);	/* rate_mod */
+				XMM1 = _mm_sub_ps(PVAL, PTVAL);
+				XMM3 = XMM1;
+				XMM1 = _mm_sub_ps(XMM1, P30.ps);
+				XMM4 = _mm_cmpgt_ps(XMM1, PFV_0.ps);
+				XMM1 = _mm_mul_ps(XMM1, PP1.ps);
+				XMM1 = _mm_add_ps(XMM1, P30.ps);
+				XMM1 = _mm_and_ps(XMM1, XMM4);
+				XMM4 = _mm_andnot_ps(XMM4, XMM3);
+				XMM1 = _mm_or_ps(XMM1, XMM4);
+				XMM1 = _mm_mul_ps(XMM1, XMM2);
+				XMM3 = PVAL;
+				XMM3 = _mm_sub_ps(XMM3, XMM1);
+				XMM3 = _mm_max_ps(XMM3, PM128(lastmdct+i));
+				XMM3 = _mm_and_ps(XMM3, XMM0);
+				XMM0 = _mm_andnot_ps(XMM0, PVAL);
+				PVAL = _mm_or_ps(XMM3, XMM0);
+			  }
+			}
+		  }
+		}
+		switch(RunMode[i])
+		{
+			default:
+			case 0: /* Default */
+			  {
+				int k;
+				__m128x T0, T1;
+				T0.ps = PVAL;
+				T1.ps = PTVAL;
+				for(k=0;k<4;k++){
+				  float val  = T0.sf[k];
+				  float tval = T1.sf[k];
+				  if(i+k>=lp_pos)logmdct[i+k]=-160;
+				  if(val>tval){
+					logmask[i+k]=val;
+				  }else if((i+k>m4_start) && (i+k<m4_end) && (logmdct[i+k]>-140)){
+					if(logmdct[i+k]>val){
+					  if(logmdct[i+k]<tval)tval-=(tval-val)*m4_thres;
+					}else{
+					  if(val<tval)tval-=(tval-val)*m4_thres;
+					}
+					logmask[i+k]=tval;
+				  }else logmask[i+k]=tval;
+				  T1.sf[k] = tval;
+				}
+				PTVAL = T1.ps;
+			  }
+			  break;
+			case 1: /* SSE-1 */
+			  _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+			  break;
+			case 2: /* SSE-2(m4_start - m4_end) */
+			  {
+				/*
+				  A: val>tval
+				  B: logmdct>-140
+				  C: logmdct>val
+				  D: logmdct<tval
+				  E: val<tval
+				  T0 = A for val
+				  T1 = a(b|B(Cd|ce)) for logmdct
+				  T2 = T0|T1 for tval
+				  T3 = t2 for tval*
+				  logmask = val&T0 | tval&T1 | tval*&T3
+				  tval = tval&T2 | tval*&T3
+				*/
+				__m128 XMM0, XMM1, XMM2, XMM3, XMM4;
+				XMM4 = _mm_cmpgt_ps(PVAL, PTVAL);				/* T0:A */
+				if(_mm_movemask_ps(XMM4)==15)
+				  _mm_store_ps(logmask+i, PVAL);
+				else
+				{
+				  XMM2 = _mm_cmple_ps(PM128(logmdct+i), PVAL);	/* c */
+				  XMM0 = _mm_cmple_ps(PTVAL, PM128(logmdct+i));	/* d */
+				  XMM1 = _mm_cmple_ps(PTVAL, PVAL);				/* e */
+				  XMM1 = _mm_and_ps(XMM1, XMM2);				/* ce */
+				  XMM2 = _mm_andnot_ps(XMM2, XMM0);				/* Cd */
+				  XMM1 = _mm_or_ps(XMM1, XMM2);					/* Cd|ce */
+				  XMM3 = _mm_cmple_ps(PM128(logmdct+i),PM140.ps);	/* b */
+				  XMM2 = XMM3;
+				  XMM2 = _mm_andnot_ps(XMM2, XMM1);				/* B(Cd|ce) */
+				  XMM3 = _mm_or_ps(XMM3, XMM2);					/* b|B(Cd|ce) */
+				  XMM1 = XMM4;
+				  XMM1 = _mm_andnot_ps(XMM1, XMM3);				/* T1:a(b|B(Cd|ce)) */
+				  XMM2 = _mm_or_ps(XMM4, XMM1);					/* T2:T0|T1 */
+				  XMM4 = _mm_and_ps(XMM4, PVAL);				/* val&T0 */
+				  XMM1 = _mm_and_ps(XMM1, PTVAL);				/* tval&T1 */
+				  XMM3 = _mm_sub_ps(PVAL, PTVAL);
+				  XMM3 = _mm_mul_ps(XMM3, PM4_THRES);
+				  XMM3 = _mm_add_ps(XMM3, PTVAL);				/* tval* */
+				  PTVAL = _mm_and_ps(PTVAL, XMM2);				/* tval&T2 */
+				  XMM2 = _mm_andnot_ps(XMM2, XMM3);				/* tval*&T3 */
+				  PTVAL = _mm_or_ps(PTVAL, XMM2);				/* tval = tval&T2 | tval*&T3 */
+				  XMM1 = _mm_or_ps(XMM1, XMM2);					/* tval&T1 | tval*&T3 */
+				  XMM4 = _mm_or_ps(XMM4, XMM1);					/* val&T0 | tval&T1 | tval*&T3 */
+				  _mm_store_ps(logmask+i, XMM4);
+				}
+			  }
+			  break;
+			case 3: /* SSE-3(block include lp_pos) */
+			  {
+				int k;
+				for(k=0;k<4;k++)
+				  if(i+k>=lp_pos)logmdct[i+k]=-160;
+			  }
+			  _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+			  break;
+			case 4: /* SSE-4(i>=lp_pos) */
+			  _mm_store_ps(logmdct+i, PM160.ps);
+			  _mm_store_ps(logmask+i, _mm_max_ps(PVAL, PTVAL));
+			  break;
+		}
+		{
+		  __m128 XMM0, XMM1, XMM2;
+		  PVAL = _mm_sub_ps(PVAL, PM128(logmdct+i));
+		  XMM0 = PVAL;
+		  XMM1 = PVAL;
+		  XMM2 = PVAL;
+		  XMM0 = _mm_cmpgt_ps(XMM0, PCOEFFI.ps);
+		  XMM1 = _mm_mul_ps(XMM1, PCX0);
+		  XMM2 = _mm_mul_ps(XMM2, PCX1);
+		  XMM1 = _mm_add_ps(XMM1, PFV_C0);
+		  XMM2 = _mm_add_ps(XMM2, PFV_C1);
+		  XMM1 = _mm_max_ps(XMM1, PP0001.ps);
+		  XMM1 = _mm_and_ps(XMM1, XMM0);
+		  XMM0 = _mm_andnot_ps(XMM0, XMM2);
+		  XMM1 = _mm_or_ps(XMM1, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, PM128(mdct+i));
+		  _mm_store_ps(mdct+i, XMM1);
+		}
+	  }
+	}
+	else
+	  goto SAFE_MODE;
+  }
+  else
+  {
+SAFE_MODE:
+#endif														/* SSE Optimize */
   for(i=0;i<n;i++){
     float val= noise[i]+p->noiseoffset[offset_select][i];
     float tval= tone[i]+toneatt;
@@ -1144,9 +5147,160 @@
       
     }
   }
+#ifdef __SSE__												/* SSE Optimize */
+  }
+#endif														/* SSE Optimize */
 
   /** @ M3 SET lastmdct **/
   if(offset_select == 1){
+#ifdef __SSE__												/* SSE Optimize */
+	if(n == 1024)
+	{
+	  if(!nW_modenumber)
+	  {
+		for(i=0; i<128; i+=16)
+		{
+		  __m128	XMM0, XMM1, XMM2, XMM3;
+		  __m128	XMM4, XMM5, XMM6, XMM7;
+		  XMM0	 = _mm_load_ps(logmdct+i*8    );
+		  XMM1	 = _mm_load_ps(logmdct+i*8+  4);
+		  XMM2	 = _mm_load_ps(logmdct+i*8+  8);
+		  XMM3	 = _mm_load_ps(logmdct+i*8+ 12);
+		  XMM4	 = _mm_load_ps(logmdct+i*8+ 16);
+		  XMM5	 = _mm_load_ps(logmdct+i*8+ 20);
+		  XMM6	 = _mm_load_ps(logmdct+i*8+ 24);
+		  XMM7	 = _mm_load_ps(logmdct+i*8+ 28);
+		  XMM0	 = _mm_min_ps(XMM0, XMM1);
+		  XMM2	 = _mm_min_ps(XMM2, XMM3);
+		  XMM4	 = _mm_min_ps(XMM4, XMM5);
+		  XMM6	 = _mm_min_ps(XMM6, XMM7);
+		  XMM1	 = XMM0;
+		  XMM5	 = XMM4;
+		  XMM0	 = _mm_shuffle_ps(XMM0, XMM2, _MM_SHUFFLE(2,0,2,0));
+		  XMM4	 = _mm_shuffle_ps(XMM4, XMM6, _MM_SHUFFLE(2,0,2,0));
+		  XMM3	 = _mm_load_ps(logmdct+i*8+ 32);
+		  XMM7	 = _mm_load_ps(logmdct+i*8+ 36);
+		  XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+		  XMM5	 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(3,1,3,1));
+		  XMM2	 = XMM0;
+		  XMM6	 = XMM1;
+		  XMM0	 = _mm_shuffle_ps(XMM0, XMM4, _MM_SHUFFLE(2,0,2,0));
+		  XMM1	 = _mm_shuffle_ps(XMM1, XMM5, _MM_SHUFFLE(2,0,2,0));
+		  XMM2	 = _mm_shuffle_ps(XMM2, XMM4, _MM_SHUFFLE(3,1,3,1));
+		  XMM6	 = _mm_shuffle_ps(XMM6, XMM5, _MM_SHUFFLE(3,1,3,1));
+		  XMM4	 = _mm_load_ps(logmdct+i*8+ 40);
+		  XMM5	 = _mm_load_ps(logmdct+i*8+ 44);
+		  XMM1	 = _mm_min_ps(XMM1, XMM0);
+		  XMM0	 = _mm_load_ps(logmdct+i*8+ 48);
+		  XMM6	 = _mm_min_ps(XMM6, XMM2);
+		  XMM2	 = _mm_load_ps(logmdct+i*8+ 52);
+		  XMM6	 = _mm_min_ps(XMM6, XMM1);
+		  XMM1	 = _mm_load_ps(logmdct+i*8+ 56);
+		  _mm_store_ps(lastmdct+i   , XMM6);
+		  XMM6	 = _mm_load_ps(logmdct+i*8+ 60);
+		  XMM3	 = _mm_min_ps(XMM3, XMM7);
+		  XMM4	 = _mm_min_ps(XMM4, XMM5);
+		  XMM0	 = _mm_min_ps(XMM0, XMM2);
+		  XMM1	 = _mm_min_ps(XMM1, XMM6);
+		  XMM7	 = XMM3;
+		  XMM2	 = XMM0;
+		  XMM3	 = _mm_shuffle_ps(XMM3, XMM4, _MM_SHUFFLE(2,0,2,0));
+		  XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(2,0,2,0));
+		  XMM5	 = _mm_load_ps(logmdct+i*8+ 64);
+		  XMM6	 = _mm_load_ps(logmdct+i*8+ 68);
+		  XMM7	 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(3,1,3,1));
+		  XMM2	 = _mm_shuffle_ps(XMM2, XMM1, _MM_SHUFFLE(3,1,3,1));
+		  XMM4	 = XMM3;
+		  XMM1	 = XMM7;
+		  XMM3	 = _mm_shuffle_ps(XMM3, XMM0, _MM_SHUFFLE(2,0,2,0));
+		  XMM7	 = _mm_shuffle_ps(XMM7, XMM2, _MM_SHUFFLE(2,0,2,0));
+		  XMM4	 = _mm_shuffle_ps(XMM4, XMM0, _MM_SHUFFLE(3,1,3,1));
+		  XMM1	 = _mm_shuffle_ps(XMM1, XMM2, _MM_SHUFFLE(3,1,3,1));
+		  XMM0	 = _mm_load_ps(logmdct+i*8+ 72);
+		  XMM2	 = _mm_load_ps(logmdct+i*8+ 76);
+		  XMM7	 = _mm_min_ps(XMM7, XMM3);
+		  XMM3	 = _mm_load_ps(logmdct+i*8+ 80);
+		  XMM1	 = _mm_min_ps(XMM1, XMM4);
+		  XMM4	 = _mm_load_ps(logmdct+i*8+ 84);
+		  XMM1	 = _mm_min_ps(XMM1, XMM7);
+		  XMM7	 = _mm_load_ps(logmdct+i*8+ 88);
+		  _mm_store_ps(lastmdct+i+ 4, XMM1);
+		  XMM1	 = _mm_load_ps(logmdct+i*8+ 92);
+		  XMM5	 = _mm_min_ps(XMM5, XMM6);
+		  XMM0	 = _mm_min_ps(XMM0, XMM2);
+		  XMM3	 = _mm_min_ps(XMM3, XMM4);
+		  XMM7	 = _mm_min_ps(XMM7, XMM1);
+		  XMM6	 = XMM5;
+		  XMM4	 = XMM3;
+		  XMM5	 = _mm_shuffle_ps(XMM5, XMM0, _MM_SHUFFLE(2,0,2,0));
+		  XMM3	 = _mm_shuffle_ps(XMM3, XMM7, _MM_SHUFFLE(2,0,2,0));
+		  XMM2	 = _mm_load_ps(logmdct+i*8+ 96);
+		  XMM1	 = _mm_load_ps(logmdct+i*8+100);
+		  XMM6	 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,1,3,1));
+		  XMM4	 = _mm_shuffle_ps(XMM4, XMM7, _MM_SHUFFLE(3,1,3,1));
+		  XMM0	 = XMM5;
+		  XMM7	 = XMM6;
+		  XMM5	 = _mm_shuffle_ps(XMM5, XMM3, _MM_SHUFFLE(2,0,2,0));
+		  XMM6	 = _mm_shuffle_ps(XMM6, XMM4, _MM_SHUFFLE(2,0,2,0));
+		  XMM0	 = _mm_shuffle_ps(XMM0, XMM3, _MM_SHUFFLE(3,1,3,1));
+		  XMM7	 = _mm_shuffle_ps(XMM7, XMM4, _MM_SHUFFLE(3,1,3,1));
+		  XMM3	 = _mm_load_ps(logmdct+i*8+104);
+		  XMM4	 = _mm_load_ps(logmdct+i*8+108);
+		  XMM6	 = _mm_min_ps(XMM6, XMM5);
+		  XMM5	 = _mm_load_ps(logmdct+i*8+112);
+		  XMM7	 = _mm_min_ps(XMM7, XMM0);
+		  XMM0	 = _mm_load_ps(logmdct+i*8+116);
+		  XMM7	 = _mm_min_ps(XMM7, XMM6);
+		  XMM6	 = _mm_load_ps(logmdct+i*8+120);
+		  _mm_store_ps(lastmdct+i+ 8, XMM7);
+		  XMM7	 = _mm_load_ps(logmdct+i*8+124);
+		  XMM2	 = _mm_min_ps(XMM2, XMM1);
+		  XMM3	 = _mm_min_ps(XMM3, XMM4);
+		  XMM5	 = _mm_min_ps(XMM5, XMM0);
+		  XMM6	 = _mm_min_ps(XMM6, XMM7);
+		  XMM1	 = XMM2;
+		  XMM0	 = XMM5;
+		  XMM2	 = _mm_shuffle_ps(XMM2, XMM3, _MM_SHUFFLE(2,0,2,0));
+		  XMM5	 = _mm_shuffle_ps(XMM5, XMM6, _MM_SHUFFLE(2,0,2,0));
+		  XMM1	 = _mm_shuffle_ps(XMM1, XMM3, _MM_SHUFFLE(3,1,3,1));
+		  XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(3,1,3,1));
+		  XMM3	 = XMM2;
+		  XMM6	 = XMM1;
+		  XMM2	 = _mm_shuffle_ps(XMM2, XMM5, _MM_SHUFFLE(2,0,2,0));
+		  XMM1	 = _mm_shuffle_ps(XMM1, XMM0, _MM_SHUFFLE(2,0,2,0));
+		  XMM3	 = _mm_shuffle_ps(XMM3, XMM5, _MM_SHUFFLE(3,1,3,1));
+		  XMM6	 = _mm_shuffle_ps(XMM6, XMM0, _MM_SHUFFLE(3,1,3,1));
+		  XMM1	 = _mm_min_ps(XMM1, XMM2);
+		  XMM6	 = _mm_min_ps(XMM6, XMM3);
+		  XMM6	 = _mm_min_ps(XMM6, XMM1);
+		  _mm_store_ps(lastmdct+i+12, XMM6);
+		}
+	  }
+	}
+	else
+	  if(n == 128)
+	  {
+		for(i=0;i<128;i+=32)
+		{
+		  __m128	XMM0	 = _mm_load_ps(logmdct+i   );
+		  __m128	XMM1	 = _mm_load_ps(logmdct+i+ 4);
+		  __m128	XMM2	 = _mm_load_ps(logmdct+i+ 8);
+		  __m128	XMM3	 = _mm_load_ps(logmdct+i+12);
+		  __m128	XMM4	 = _mm_load_ps(logmdct+i+16);
+		  __m128	XMM5	 = _mm_load_ps(logmdct+i+20);
+		  __m128	XMM6	 = _mm_load_ps(logmdct+i+24);
+		  __m128	XMM7	 = _mm_load_ps(logmdct+i+28);
+		  _mm_store_ps(lastmdct+i   , XMM0);
+		  _mm_store_ps(lastmdct+i+ 4, XMM1);
+		  _mm_store_ps(lastmdct+i+ 8, XMM2);
+		  _mm_store_ps(lastmdct+i+12, XMM3);
+		  _mm_store_ps(lastmdct+i+16, XMM4);
+		  _mm_store_ps(lastmdct+i+20, XMM5);
+		  _mm_store_ps(lastmdct+i+24, XMM6);
+		  _mm_store_ps(lastmdct+i+28, XMM7);
+		}
+	  }
+#else														/* SSE Optimize */
 	if(n == 1024){
 		if(!nW_modenumber){
 			for(i=0; i<128; i++){
@@ -1161,6 +5315,7 @@
 	}else if(n == 128){
 		for(i=0; i<128; i++) lastmdct[i] = logmdct[i];
 	}
+#endif														/* SSE Optimize */
   }
 }
 
@@ -1177,7 +5332,11 @@
   return(amp);
 }
 
+#ifdef	__SSE__												/* SSE Optimize */
+static inline void couple_lossless(float A, float B, 
+#else														/* SSE Optimize */
 static void couple_lossless(float A, float B, 
+#endif														/* SSE Optimize */
 			    float *qA, float *qB){
   int test1=fabs(*qA)>fabs(*qB);
   test1-= fabs(*qA)<fabs(*qB);
@@ -1197,7 +5356,80 @@
   }
 }
 
-static float hypot_lookup[32]={
+#ifdef	__SSE__												/* SSE Optimize */
+	/*
+		Phase 1.
+			fabs(*qA)>fabs(*qB)	test1 =  1
+			fabs(*qA)>fabs(*qB)	test1 = -1
+			fabs(*qA)=fabs(*qB)	fabs(qA)> fabs(B)	test1 = -1
+			fabs(*qA)=fabs(*qB)	fabs(qA)<=fabs(B)	test1 =  1
+		
+		Phase 2.
+			*qB	 = S(*qA)^(*qA-*qB)	(test1==1)
+			*qB	 = S(*qB)^(*qA-*qB)	(test1!=1)
+			*qA= Old *qA			(test1==1)
+			*qA= Old *qB			(test1!=1)
+		
+		Phase 3.
+			*qB	 = -fabs(*qA)*2.f	(*qB >fabs(*qA)*1.9999f)
+			*qB	 = *qB				(*qB<=fabs(*qA)*1.9999f)
+			*qA	 = -*qA				(*qB >fabs(*qA)*1.9999f)
+			*qA	 =  *qA				(*qB<=fabs(*qA)*1.9999f)
+	*/
+static inline void couple_lossless_ps(float *A, float *B, float *qA, float *qB)
+{
+	/*
+		Phase 1
+	*/
+	__m128	PQA	 = _mm_load_ps(qA);
+	__m128	PQB	 = _mm_load_ps(qB);
+	__m128	FQA	 = _mm_and_ps(PQA, PABSMASK.ps);
+	__m128	FQB	 = _mm_and_ps(PQB, PABSMASK.ps);
+	__m128	XMM0	 = _mm_and_ps(PM128(A), PABSMASK.ps);
+	__m128	XMM1	 = _mm_and_ps(PM128(B), PABSMASK.ps);
+	__m128	PTEST1;
+	__m128	PTEST2;
+	__m128	PFQA2M;
+	
+	XMM0	 = _mm_cmpgt_ps(XMM0, XMM1);
+	XMM1	 = _mm_cmpneq_ps(FQA, FQB);
+	PTEST1	 = _mm_or_ps(
+					_mm_and_ps(_mm_cmpgt_ps(FQA, FQB), XMM1),
+					_mm_andnot_ps(XMM1, XMM0)
+				);
+	PTEST2	 = PTEST1;
+	
+	/*
+		Phase 2
+	*/
+	XMM0	 = _mm_and_ps(PQA, PCS_RRRR.ps);	/* Sign of PQA */
+	XMM1	 = _mm_and_ps(PQB, PCS_RRRR.ps);	/* Sign of PQB */
+	XMM0	 = _mm_and_ps(XMM0, PTEST2);
+	XMM1	 = _mm_andnot_ps(PTEST2, XMM1);
+	XMM0	 = _mm_or_ps(XMM0, XMM1);				/* Sign of new *qB */
+	XMM1	 = _mm_sub_ps(PQA, PQB);				/* New *qB Body */
+	XMM1	 = _mm_xor_ps(XMM1, XMM0);				/* New qB */
+	PQA		 = _mm_and_ps(PQA, PTEST1);
+	PQB		 = _mm_andnot_ps(PTEST1, PQB);
+	XMM0	 = _mm_or_ps(PQA, PQB);					/* New qA */
+	
+	/*
+		Phase 3
+	*/
+	PFQA2M	 = _mm_mul_ps(FQA, PFV_2.ps);
+	
+	PTEST1	 = _mm_cmpge_ps(XMM1, PFQA2M);		/* Mask of *qB >= fabs(*qA)*2.f */
+	PTEST2	 = PTEST1;
+	PQB		 = _mm_xor_ps(PFQA2M, PCS_RRRR.ps);	/* -fabs(qA)*2.f */
+	PQA		 = _mm_xor_ps(XMM0   , PCS_RRRR.ps);	/* -qA */
+	PQB		 = _mm_or_ps(_mm_and_ps(PQB, PTEST1), _mm_andnot_ps(PTEST1, XMM1));
+	PQA		 = _mm_or_ps(_mm_and_ps(PQA, PTEST2), _mm_andnot_ps(PTEST2, XMM0));
+	_mm_store_ps(qB, PQB);
+	_mm_store_ps(qA, PQA);
+}
+#endif														/* SSE Optimize */
+
+static const float hypot_lookup[32]={
   -0.009935, -0.011245, -0.012726, -0.014397, 
   -0.016282, -0.018407, -0.020800, -0.023494, 
   -0.026522, -0.029923, -0.033737, -0.038010, 
@@ -1207,7 +5439,11 @@
   -0.159093, -0.175146, -0.192286, -0.210490, 
   -0.229718, -0.249913, -0.271001, -0.292893};
 
+#ifdef	__SSE__												/* SSE Optimize */
+static inline void precomputed_couple_point(float premag,
+#else														/* SSE Optimize */
 static void precomputed_couple_point(float premag,
+#endif														/* SSE Optimize */
 				     int floorA,int floorB,
 				     float *mag, float *ang){
   
@@ -1221,6 +5457,73 @@
   *ang=0.f;
 }
 
+#ifdef	__SSE__												/* SSE Optimize */
+static inline void precomputed_couple_point_ps(float *premag,
+				     int *floorA,int *floorB,
+				     float *mag, float *ang){
+	__m128	XMM0;
+	__m128x	PI0, PI1;
+#ifdef	__SSE2__
+	{
+		__m128i	PFA	 = PM128I(floorA);
+		__m128i	PFB	 = PM128I(floorB);
+		__m128i	XMM0	 = PFA;
+		__m128i	XMM1	 = PFA;
+		__m128i	XMM2	 = _mm_set_epi32(31, 31, 31, 31);
+		__m128i	PFI0	 = _mm_setzero_si128();
+		__m128i XMM3 = PFI0;
+		XMM0	 = _mm_cmpgt_epi32(XMM0, PFB);
+		PFA		 = _mm_and_si128(PFA, XMM0);
+		XMM0	 = _mm_andnot_si128(XMM0, PFB);
+		PFA		 = _mm_or_si128(PFA, XMM0);
+		PI1.pi	 = PFA;
+
+		XMM1	 = _mm_sub_epi32(XMM1, PFB);
+		XMM3	 = _mm_cmpgt_epi32(XMM3, XMM1);
+		XMM1	 = _mm_xor_si128(XMM1, XMM3);
+		XMM1	 = _mm_sub_epi32(XMM1, XMM3);
+		XMM2	 = _mm_sub_epi32(XMM2, XMM1);
+		XMM3	 = XMM2;
+		XMM3	 = _mm_cmpgt_epi32(XMM3, PFI0);
+		XMM2	 = _mm_and_si128(XMM2, XMM3);
+		PI0.pi	 = XMM2;
+	}
+	PI0.sf[0]	 = hypot_lookup[PI0.si32[0]];
+	PI0.sf[1]	 = hypot_lookup[PI0.si32[1]];
+	PI0.sf[2]	 = hypot_lookup[PI0.si32[2]];
+	PI0.sf[3]	 = hypot_lookup[PI0.si32[3]];
+	PI1.sf[0]	 = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[0]];
+	PI1.sf[1]	 = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[1]];
+	PI1.sf[2]	 = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[2]];
+	PI1.sf[3]	 = FLOOR1_fromdB_INV_LOOKUP[PI1.si32[3]];
+#else
+	int test0	 = (*(floorA  )>*(floorB  ))-1;
+	int test1	 = (*(floorA+1)>*(floorB+1))-1;
+	int test2	 = (*(floorA+2)>*(floorB+2))-1;
+	int test3	 = (*(floorA+3)>*(floorB+3))-1;
+	int offset0	 = 31-abs(*(floorA  )-*(floorB  ));
+	int offset1	 = 31-abs(*(floorA+1)-*(floorB+1));
+	int offset2	 = 31-abs(*(floorA+2)-*(floorB+2));
+	int offset3	 = 31-abs(*(floorA+3)-*(floorB+3));
+	PI0.sf[0]	 = hypot_lookup[((offset0<0)-1)&offset0];
+	PI0.sf[1]	 = hypot_lookup[((offset1<0)-1)&offset1];
+	PI0.sf[2]	 = hypot_lookup[((offset2<0)-1)&offset2];
+	PI0.sf[3]	 = hypot_lookup[((offset3<0)-1)&offset3];
+
+	PI1.sf[0]	 = FLOOR1_fromdB_INV_LOOKUP[(*(floorB  )&test0)|(*(floorA  )&(~test0))];
+	PI1.sf[1]	 = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+1)&test1)|(*(floorA+1)&(~test1))];
+	PI1.sf[2]	 = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+2)&test2)|(*(floorA+2)&(~test2))];
+	PI1.sf[3]	 = FLOOR1_fromdB_INV_LOOKUP[(*(floorB+3)&test3)|(*(floorA+3)&(~test3))];
+#endif
+
+	XMM0	 = _mm_add_ps(PI0.ps, PFV_1.ps);
+	XMM0	 = _mm_mul_ps(XMM0, PI1.ps);
+	XMM0	 = _mm_mul_ps(XMM0, PM128(premag));
+	_mm_store_ps(mag, XMM0);
+	_mm_store_ps(ang, _mm_setzero_ps());
+}
+#endif														/* SSE Optimize */
+
 /* just like below, this is currently set up to only do
    single-step-depth coupling.  Otherwise, we'd have to do more
    copying (which will be inevitable later) */
@@ -1237,6 +5540,56 @@
   if(-a>b)return -sqrt(a*a-b*b);
   return sqrt(b*b-a*a);
 }
+#ifdef	__SSE__												/* SSE Optimize */
+/*
+	a>0 b>0						 sqrt(a*a+b*b)
+	a>0 b<=0 a>abs(b)			 sqrt(a*a-b*b)
+	a>0 b<=0 a<=abs(b)			-sqrt(b*b-a*a)
+	a<=0 b<0					-sqrt(a*a+b*b)
+	a<=0 b>=0 abs(a)>abs(b)		-sqrt(a*a-b*b)
+	a<=0 b>=0 abs(a)<=abs(b)	 sqrt(b*b-a*a)
+
+	sa	sb	fa<=fb	rs	s(a*b)	s(a*b)&(fa<=fb)	s(a*b)&(fa<=fb)^sa
+	0	0	*		0	0		0				0
+	0	1	0		0	1		0				0
+	0	1	1		1	1		1				1
+	1	1	*		1	0		0				1
+	1	0	0		1	1		0				1
+	1	0	1		0	1		1				0
+
+	sa	sb	fa<=fb	(a&~(fa<=fb))|(b&(fa<=fb))	(a&(fa<=fb))|(b&~(fa<=fb))
+	0	0	*		*							*
+	0	1	0		a							b
+	0	1	1		b							a
+	1	1	*		*							*
+	1	0	0		a							b
+	1	0	1		b							a
+*/
+static inline __m128 dipole_hypot_ps(float* a, float *b)
+{
+	__m128	XMM0, XMM1, XMM2, XMM3;
+	__m128	A	 = _mm_load_ps(a);
+	__m128	B	 = _mm_load_ps(b);
+	__m128	PMASK	 = _mm_cmple_ps(_mm_and_ps(A, PABSMASK.ps), _mm_and_ps(B, PABSMASK.ps));
+	XMM2	 = _mm_cmplt_ps(_mm_mul_ps(A, B), PFV_0.ps);	/* XMM2 = MASK(S(A*B) */
+	XMM0	 = _mm_and_ps(A, PCS_RRRR.ps);					/* XMM0 = SA */
+	XMM1	 = _mm_xor_ps(
+					_mm_and_ps(
+						_mm_and_ps(XMM2, PCS_RRRR.ps),
+						PMASK
+					),
+					XMM0
+				);												/* XMM1 = Sign of result */
+	A		 = _mm_mul_ps(A, A);
+	B		 = _mm_mul_ps(B, B);
+	XMM2	 = _mm_and_ps(XMM2, PCS_RRRR.ps);
+	XMM3	 = _mm_min_ps(A, B);
+	XMM0	 = _mm_max_ps(A, B);
+	XMM3	 = _mm_or_ps(XMM3, XMM2);
+	B		 = _mm_or_ps(_mm_sqrt_ps(_mm_add_ps(XMM0, XMM3)), XMM1);
+	return	B;
+}
+#endif														/* SSE Optimize */
 static float round_hypot(float a, float b){
   if(a>0.){
     if(b>0.)return sqrt(a*a+b*b);
@@ -1247,8 +5600,121 @@
   if(-a>b)return -sqrt(a*a+b*b);
   return sqrt(b*b+a*a);
 }
+#ifdef	__SSE__												/* SSE Optimize */
+#define round_hypot_ps(d, PA, PB)																		\
+{																										\
+	__m128	R0, SA;																						\
+	{																									\
+		__m128	SAMB;																					\
+		{																								\
+			__m128	FASB;																				\
+			{																							\
+				__m128	P2A, P2B;																		\
+				{																						\
+					__m128	FA, FB;																		\
+					{																					\
+						__m128	A	 = _mm_load_ps(PA);													\
+						__m128	B	 = _mm_load_ps(PB);													\
+						SA		 = _mm_and_ps(A, PCS_RRRR.ps);		/* sign of a */					\
+						FA		 = _mm_and_ps(A, PABSMASK.ps);		/* FA = fabs(a) */				\
+						FB		 = _mm_and_ps(B, PABSMASK.ps);		/* FB = fabs(b) */				\
+						P2A		 = _mm_mul_ps(A, A);					/* a*a */						\
+						P2B		 = _mm_mul_ps(B, B);					/* b*b */						\
+						SAMB	 = _mm_mul_ps(A, B);					/* a*b */						\
+					}																					\
+					FASB	 = _mm_cmple_ps(FA, FB);					/* mask of fa<fb */				\
+				}																						\
+				R0		 = _mm_add_ps(P2A, P2B);						/* a*a+b*b */					\
+			}																							\
+			FASB	 = _mm_and_ps(FASB, PCS_RRRR.ps);				/* sign of F(a)-F(b) */			\
+			R0		 = _mm_sqrt_ps(R0);									/* sqrt(a*a+b*b) */				\
+			SAMB	 = _mm_and_ps(SAMB, FASB);															\
+		}																								\
+		SA		 = _mm_xor_ps(SA, SAMB);								/* If a<0, reverse sign */		\
+	}																									\
+	R0		 = _mm_xor_ps(R0, SA);										/* set sign to result */		\
+	_mm_store_ps(d, R0);																				\
+}
+#endif														/* SSE Optimize */
 /* modified hypot by aoyumi 
     better method should be found. */
+#ifdef	__SSE__												/* SSE Optimize */
+#if	0
+/*
+	a>0 b>0						 sqrt(a*a+b*b*0.92)
+	a>0 b<=0 a>abs(b)			 sqrt(a*a-b*b*0.16)
+	a>0 b<=0 a<=abs(b)			-sqrt(b*b-a*a*0.16)
+	a<=0 b<0					-sqrt(a*a+b*b*0.92)
+	a<=0 b>=0 abs(a)>b			-sqrt(a*a-b*b*0.16)
+	a<=0 b>=0 abs(a)<=b			 sqrt(b*b-a*a*0.16)
+
+	sa	sb	fa<=fb	rs	s(a*b)	s(a*b)&(fa<=fb)	s(a*b)&(fa<=fb)^sa
+	0	0	*		0	0		0				0
+	0	1	0		0	1		0				0
+	0	1	1		1	1		1				1
+	1	1	*		1	0		0				1
+	1	0	0		1	1		0				1
+	1	0	1		0	1		1				0
+*/
+static inline __m128 min_indemnity_dipole_hypot_ps(float* a, float *b)
+{
+	static _MM_ALIGN16 const __m128x PFV_p92 =
+		{ .sf = {0.92f, 0.92f, 0.92f, 0.92f} };
+	static _MM_ALIGN16 const __m128x PFV_mp16 =
+		{ .sf = {-0.16f, -0.16f, -0.16f, -0.16f} };
+	static _MM_ALIGN16 const __m128x PFV_mp5 =
+		{ .sf = {-0.5f, -0.5f, -0.5f, -0.5f} };
+	static _MM_ALIGN16 const __m128x PFV_1p5 =
+		{ .sf = {1.5f, 1.5f, 1.5f, 1.5f} };
+	__m128	XMM0, XMM1, XMM2, XMM3;
+	__m128	A	 = _mm_load_ps(a);
+	__m128	B	 = _mm_load_ps(b);
+	__m128	PMASK	 = _mm_cmple_ps(_mm_and_ps(A, PABSMASK.ps), _mm_and_ps(B, PABSMASK.ps));
+	XMM2	 = _mm_cmplt_ps(_mm_mul_ps(A, B), PFV_0.ps);	/* XMM2 = MASK(S(A*B) */
+	XMM0	 = _mm_and_ps(A, PCS_RRRR.ps);					/* XMM0 = SA */
+	XMM3	 = XMM2;
+	XMM3	 = _mm_and_ps(XMM3, PMASK);
+	XMM1	 = XMM3;
+	XMM1	 = _mm_and_ps(XMM1, PCS_RRRR.ps);
+	XMM1	 = _mm_xor_ps(XMM1, XMM0);
+	A		 = _mm_mul_ps(A, A);
+	B		 = _mm_mul_ps(B, B);
+	XMM0	 = _mm_or_ps(
+					_mm_and_ps(PFV_mp16.ps, XMM2),
+					_mm_andnot_ps(XMM2, PFV_p92.ps)
+				);												/* XMM0 = Packed Multi Value */
+	XMM2	 = XMM3;
+	PMASK	 = B;
+	B		 = _mm_or_ps(
+					_mm_and_ps(B, XMM2),
+					_mm_andnot_ps(XMM2, A)
+				);
+	A		 = _mm_or_ps(
+					_mm_and_ps(A, XMM3),
+					_mm_andnot_ps(XMM3, PMASK)
+				);
+	A		 = _mm_mul_ps(A, XMM0);
+	B		 = _mm_add_ps(B, A);
+#if	1
+	XMM0	 = _mm_rsqrt_ps(B);
+	XMM2	 = XMM0;
+	XMM3	 = B;
+	XMM3	 = _mm_mul_ps(XMM3, XMM0);
+	XMM3	 = _mm_mul_ps(XMM3, XMM0);
+	XMM3	 = _mm_mul_ps(XMM3, XMM0);
+	XMM3	 = _mm_mul_ps(XMM3, PFV_mp5.ps);
+	XMM2	 = _mm_mul_ps(XMM2, PFV_1p5.ps);
+	XMM2	 = _mm_add_ps(XMM2, XMM3);
+	B		 = _mm_mul_ps(B, XMM2);
+#else
+	B		 = _mm_sqrt_ps(B);
+#endif
+	B		 = _mm_or_ps(B, XMM1);
+	return	B;
+}
+#endif
+#endif														/* SSE Optimize */
+#if !defined(__SSE__)										/* SSE Optimize */
 static float min_indemnity_dipole_hypot(float a, float b){
   float thnor=0.92;
   float threv=0.84;
@@ -1263,6 +5729,8 @@
   if(-a>b)return -sqrt(a2-b2+b2*threv);
   return sqrt(b2-a2+a2*threv);
 }
+#endif														/* SSE Optimize */
+
 
 /* revert to round hypot for now */
 float **_vp_quantize_couple_memo(vorbis_block *vb,
@@ -1281,8 +5749,228 @@
     	float *mdctA=mdct[vi->coupling_ang[i]];
     	
     	ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+#ifdef	__SSE__												/* SSE Optimize */
+		for(j=0;j<n;j+=16)
+		{
+			static _MM_ALIGN16 const float PFV_p92[4]	 = {0.92f, 0.92f, 0.92f, 0.92f};
+			static _MM_ALIGN16 const float PFV_mp16[4]	 = {-0.16f, -0.16f, -0.16f, -0.16f};
+			static _MM_ALIGN16 const float PFV_mp5[4]	 = {-0.5f, -0.5f, -0.5f, -0.5f};
+			static _MM_ALIGN16 const float PFV_1p5[4]	 = {1.5f, 1.5f, 1.5f, 1.5f};
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			__m128	XMM4, XMM5, XMM6, XMM7;
+			XMM0	 = _mm_load_ps(mdctM+j   );
+			XMM1	 = _mm_load_ps(mdctA+j   );
+			XMM2	 = _mm_load_ps(PABSMASK.sf);
+			XMM3	 = _mm_load_ps(PFV_0.sf);
+			XMM4	 = XMM0;
+			XMM5	 = XMM0;
+			XMM6	 = XMM0;
+			XMM7	 = XMM1;
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM4	 = _mm_and_ps(XMM4, XMM2);
+			XMM7	 = _mm_and_ps(XMM7, XMM2);
+			XMM2	 = _mm_load_ps(PCS_RRRR.sf);
+			XMM5	 = _mm_cmplt_ps(XMM5, XMM3);
+			XMM4	 = _mm_cmple_ps(XMM4, XMM7);
+			XMM6	 = _mm_and_ps(XMM6, XMM2);
+			XMM3	 = XMM5;
+			XMM3	 = _mm_and_ps(XMM3, XMM4);
+			XMM7	 = XMM3;
+			XMM7	 = _mm_and_ps(XMM7, XMM2);
+			XMM2	 = _mm_load_ps(PFV_p92);
+			XMM7	 = _mm_xor_ps(XMM7, XMM6);
+			XMM6	 = _mm_load_ps(PFV_mp16);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM6	 = _mm_and_ps(XMM6, XMM5);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM4	 = XMM1;
+			XMM6	 = _mm_or_ps(XMM6, XMM5);
+			XMM5	 = XMM3;
+			XMM2	 = XMM0;
+			XMM1	 = _mm_and_ps(XMM1, XMM5);
+			XMM0	 = _mm_and_ps(XMM0, XMM3);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM3	 = _mm_andnot_ps(XMM3, XMM4);
+			XMM2	 = _mm_load_ps(PFV_mp5);
+			XMM4	 = _mm_load_ps(PFV_1p5);
+			XMM1	 = _mm_or_ps(XMM1, XMM5);
+			XMM0	 = _mm_or_ps(XMM0, XMM3);
+			XMM0	 = _mm_mul_ps(XMM0, XMM6);
+			XMM1	 = _mm_add_ps(XMM1, XMM0);
+			XMM6	 = _mm_rsqrt_ps(XMM1);
+			XMM5	 = XMM6;
+			XMM3	 = XMM1;
+			XMM3	 = _mm_mul_ps(XMM3, XMM6);
+			XMM3	 = _mm_mul_ps(XMM3, XMM6);
+			XMM0	 = _mm_load_ps(mdctM+j+ 4);
+			XMM3	 = _mm_mul_ps(XMM3, XMM6);
+			XMM6	 = _mm_load_ps(mdctA+j+ 4);
+			XMM3	 = _mm_mul_ps(XMM3, XMM2);
+			XMM2	 = _mm_load_ps(PABSMASK.sf);
+			XMM5	 = _mm_mul_ps(XMM5, XMM4);
+			XMM4	 = _mm_load_ps(PFV_0.sf);
+			XMM5	 = _mm_add_ps(XMM5, XMM3);
+			XMM3	 = XMM0;
+			XMM1		 = _mm_mul_ps(XMM1, XMM5);
+			XMM5	 = XMM0;
+			XMM1		 = _mm_or_ps(XMM1, XMM7);
+			XMM7	 = XMM0;
+			_mm_store_ps(ret[i]+j   , XMM1);
+			XMM1	 = XMM6;
+			XMM5	 = _mm_mul_ps(XMM5, XMM6);
+			XMM3	 = _mm_and_ps(XMM3, XMM2);
+			XMM1	 = _mm_and_ps(XMM1, XMM2);
+			XMM2	 = _mm_load_ps(PCS_RRRR.sf);
+			XMM5	 = _mm_cmplt_ps(XMM5, XMM4);
+			XMM3	 = _mm_cmple_ps(XMM3, XMM1);
+			XMM7	 = _mm_and_ps(XMM7, XMM2);
+			XMM4	 = XMM5;
+			XMM4	 = _mm_and_ps(XMM4, XMM3);
+			XMM1	 = XMM4;
+			XMM1	 = _mm_and_ps(XMM1, XMM2);
+			XMM2	 = _mm_load_ps(PFV_p92);
+			XMM1	 = _mm_xor_ps(XMM1, XMM7);
+			XMM7	 = _mm_load_ps(PFV_mp16);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM7	 = _mm_and_ps(XMM7, XMM5);
+			XMM6	 = _mm_mul_ps(XMM6, XMM6);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM3	 = XMM6;
+			XMM7	 = _mm_or_ps(XMM7, XMM5);
+			XMM5	 = XMM4;
+			XMM2	 = XMM0;
+			XMM6	 = _mm_and_ps(XMM6, XMM5);
+			XMM0	 = _mm_and_ps(XMM0, XMM4);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM4	 = _mm_andnot_ps(XMM4, XMM3);
+			XMM2	 = _mm_load_ps(PFV_mp5);
+			XMM3	 = _mm_load_ps(PFV_1p5);
+			XMM6	 = _mm_or_ps(XMM6, XMM5);
+			XMM0	 = _mm_or_ps(XMM0, XMM4);
+			XMM0	 = _mm_mul_ps(XMM0, XMM7);
+			XMM6	 = _mm_add_ps(XMM6, XMM0);
+			XMM7	 = _mm_rsqrt_ps(XMM6);
+			XMM5	 = XMM7;
+			XMM4	 = XMM6;
+			XMM4	 = _mm_mul_ps(XMM4, XMM7);
+			XMM4	 = _mm_mul_ps(XMM4, XMM7);
+			XMM0	 = _mm_load_ps(mdctM+j+ 8);
+			XMM4	 = _mm_mul_ps(XMM4, XMM7);
+			XMM7	 = _mm_load_ps(mdctA+j+ 8);
+			XMM4	 = _mm_mul_ps(XMM4, XMM2);
+			XMM2	 = _mm_load_ps(PABSMASK.sf);
+			XMM5	 = _mm_mul_ps(XMM5, XMM3);
+			XMM3	 = _mm_load_ps(PFV_0.sf);
+			XMM5	 = _mm_add_ps(XMM5, XMM4);
+			XMM4	 = XMM0;
+			XMM6		 = _mm_mul_ps(XMM6, XMM5);
+			XMM5	 = XMM0;
+			XMM6		 = _mm_or_ps(XMM6, XMM1);
+			XMM1	 = XMM0;
+			_mm_store_ps(ret[i]+j+ 4, XMM6);
+			XMM6	 = XMM7;
+			XMM5	 = _mm_mul_ps(XMM5, XMM7);
+			XMM4	 = _mm_and_ps(XMM4, XMM2);
+			XMM6	 = _mm_and_ps(XMM6, XMM2);
+			XMM2	 = _mm_load_ps(PCS_RRRR.sf);
+			XMM5	 = _mm_cmplt_ps(XMM5, XMM3);
+			XMM4	 = _mm_cmple_ps(XMM4, XMM6);
+			XMM1	 = _mm_and_ps(XMM1, XMM2);
+			XMM3	 = XMM5;
+			XMM3	 = _mm_and_ps(XMM3, XMM4);
+			XMM6	 = XMM3;
+			XMM6	 = _mm_and_ps(XMM6, XMM2);
+			XMM2	 = _mm_load_ps(PFV_p92);
+			XMM6	 = _mm_xor_ps(XMM6, XMM1);
+			XMM1	 = _mm_load_ps(PFV_mp16);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM1	 = _mm_and_ps(XMM1, XMM5);
+			XMM7	 = _mm_mul_ps(XMM7, XMM7);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM4	 = XMM7;
+			XMM1	 = _mm_or_ps(XMM1, XMM5);
+			XMM5	 = XMM3;
+			XMM2	 = XMM0;
+			XMM7	 = _mm_and_ps(XMM7, XMM5);
+			XMM0	 = _mm_and_ps(XMM0, XMM3);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM3	 = _mm_andnot_ps(XMM3, XMM4);
+			XMM2	 = _mm_load_ps(PFV_mp5);
+			XMM4	 = _mm_load_ps(PFV_1p5);
+			XMM7	 = _mm_or_ps(XMM7, XMM5);
+			XMM0	 = _mm_or_ps(XMM0, XMM3);
+			XMM0	 = _mm_mul_ps(XMM0, XMM1);
+			XMM7	 = _mm_add_ps(XMM7, XMM0);
+			XMM1	 = _mm_rsqrt_ps(XMM7);
+			XMM5	 = XMM1;
+			XMM3	 = XMM7;
+			XMM3	 = _mm_mul_ps(XMM3, XMM1);
+			XMM3	 = _mm_mul_ps(XMM3, XMM1);
+			XMM0	 = _mm_load_ps(mdctM+j+12);
+			XMM3	 = _mm_mul_ps(XMM3, XMM1);
+			XMM1	 = _mm_load_ps(mdctA+j+12);
+			XMM3	 = _mm_mul_ps(XMM3, XMM2);
+			XMM2	 = _mm_load_ps(PABSMASK.sf);
+			XMM5	 = _mm_mul_ps(XMM5, XMM4);
+			XMM4	 = _mm_load_ps(PFV_0.sf);
+			XMM5	 = _mm_add_ps(XMM5, XMM3);
+			XMM3	 = XMM0;
+			XMM7		 = _mm_mul_ps(XMM7, XMM5);
+			XMM5	 = XMM0;
+			XMM7		 = _mm_or_ps(XMM7, XMM6);
+			XMM6	 = XMM0;
+			_mm_store_ps(ret[i]+j+ 8, XMM7);
+			XMM7	 = XMM1;
+			XMM5	 = _mm_mul_ps(XMM5, XMM1);
+			XMM3	 = _mm_and_ps(XMM3, XMM2);
+			XMM7	 = _mm_and_ps(XMM7, XMM2);
+			XMM2	 = _mm_load_ps(PCS_RRRR.sf);
+			XMM5	 = _mm_cmplt_ps(XMM5, XMM4);
+			XMM3	 = _mm_cmple_ps(XMM3, XMM7);
+			XMM6	 = _mm_and_ps(XMM6, XMM2);
+			XMM4	 = XMM5;
+			XMM4	 = _mm_and_ps(XMM4, XMM3);
+			XMM7	 = XMM4;
+			XMM7	 = _mm_and_ps(XMM7, XMM2);
+			XMM2	 = _mm_load_ps(PFV_p92);
+			XMM7	 = _mm_xor_ps(XMM7, XMM6);
+			XMM6	 = _mm_load_ps(PFV_mp16);
+			XMM0	 = _mm_mul_ps(XMM0, XMM0);
+			XMM6	 = _mm_and_ps(XMM6, XMM5);
+			XMM1	 = _mm_mul_ps(XMM1, XMM1);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM3	 = XMM1;
+			XMM6	 = _mm_or_ps(XMM6, XMM5);
+			XMM5	 = XMM4;
+			XMM2	 = XMM0;
+			XMM1	 = _mm_and_ps(XMM1, XMM5);
+			XMM0	 = _mm_and_ps(XMM0, XMM4);
+			XMM5	 = _mm_andnot_ps(XMM5, XMM2);
+			XMM4	 = _mm_andnot_ps(XMM4, XMM3);
+			XMM2	 = _mm_load_ps(PFV_mp5);
+			XMM3	 = _mm_load_ps(PFV_1p5);
+			XMM1	 = _mm_or_ps(XMM1, XMM5);
+			XMM0	 = _mm_or_ps(XMM0, XMM4);
+			XMM0	 = _mm_mul_ps(XMM0, XMM6);
+			XMM1	 = _mm_add_ps(XMM1, XMM0);
+			XMM6	 = _mm_rsqrt_ps(XMM1);
+			XMM5	 = XMM6;
+			XMM4	 = XMM1;
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM4	 = _mm_mul_ps(XMM4, XMM6);
+			XMM4	 = _mm_mul_ps(XMM4, XMM2);
+			XMM5	 = _mm_mul_ps(XMM5, XMM3);
+			XMM5	 = _mm_add_ps(XMM5, XMM4);
+			XMM1		 = _mm_mul_ps(XMM1, XMM5);
+			XMM1		 = _mm_or_ps(XMM1, XMM7);
+			_mm_store_ps(ret[i]+j+12, XMM1);
+		}
+#else														/* SSE Optimize */
     	for(j=0;j<n;j++)
     	 ret[i][j]=min_indemnity_dipole_hypot(mdctM[j],mdctA[j]);
+#endif														/* SSE Optimize */
   	}
   }else{
     for(i=0;i<vi->coupling_steps;i++){
@@ -1290,24 +5978,3308 @@
     	float *mdctA=mdct[vi->coupling_ang[i]];
     	
     	ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+#ifdef	__SSE__												/* SSE Optimize */
+		{
+			float	*p	 = ret[i];
+			int limit4	 = limit&(~7);
+			for(j=0;j<limit4;j+=8)
+			{
+				_mm_store_ps(p+j  , dipole_hypot_ps(mdctM+j  , mdctA+j  ));
+				_mm_store_ps(p+j+4, dipole_hypot_ps(mdctM+j+4, mdctA+j+4));
+			}
+			limit4	 = limit&(~3);
+			for(;j<limit4;j+=4)
+			{
+				_mm_store_ps(p+j  , dipole_hypot_ps(mdctM+j  , mdctA+j  ));
+			}
+			for(;j<limit;j++)
+				p[j]	 = dipole_hypot(mdctM[j],mdctA[j]);
+			limit4	 = (limit+3)&(~3);
+			limit4	 = (limit4>=n)?n:limit4;
+			for(;j<limit4;j++)
+				p[j]	 = round_hypot(mdctM[j],mdctA[j]);
+			limit4	 = (limit+7)&(~7);
+			limit4	 = (limit4>=n)?n:limit4;
+			for(;j<limit4;j+=4)
+			{
+				round_hypot_ps(&p[j  ], &mdctM[j  ], &mdctA[j  ]);
+			}
+			for(;j<n;j+=8)
+			{
+				round_hypot_ps(&p[j  ], &mdctM[j  ], &mdctA[j  ]);
+				round_hypot_ps(&p[j+4], &mdctM[j+4], &mdctA[j+4]);
+			}
+		}
+#else														/* SSE Optimize */
     	for(j=0;j<limit;j++)
     	 ret[i][j]=dipole_hypot(mdctM[j],mdctA[j]);
     	for(;j<n;j++)
       	 ret[i][j]=round_hypot(mdctM[j],mdctA[j]);
+#endif														/* SSE Optimize */
   	}
   }
   return(ret);
 }
 
 /* this is for per-channel noise normalization */
-static int apsort(const void *a, const void *b){
-  float f1=fabs(**(float**)a);
-  float f2=fabs(**(float**)b);
-  return (f1<f2)-(f1>f2);
+#ifdef	__SSE__												/* SSE Optimize */
+#define C(a,b)\
+  (data[a]>=data[b])
+/*
+0	ACBA
+1	DDCB
+2	ACDC
+
+0<1	D>A D>C C>B B>A
+0<2	000 000 D>B C>A
+Cond.		(0<2<<4)|(0<1)	SCODE
+
+D>C>B>A		111111	63		3210
+C>D>B>A		111011	59		2310
+D>B>C>A		111101	61		3120
+B>D>C>A		011101	29		1320
+C>B>D>A		011011	27		2130
+B>C>D>A		011001	25		1230
+D>C>A>B		111110	30		3201
+C>D>A>B		111010	58		2301
+D>A>C>B		101110	46		3021
+A>D>C>B		100110	38		0321
+C>A>D>B		110010	50		2031
+A>C>D>B		100010	18		0231
+D>B>A>C		101101	45		3102
+B>D>A>C		001101	13		1302
+D>A>B>C		101100	44		3012
+A>D>B>C		100100	36		0312
+B>A>D>C		000101	 5		1032
+A>B>D>C		000100	 4		0132
+C>B>A>D		010011	19		2103
+B>C>A>D		010001	17		1203
+C>A>B>D		010010	18		2013
+A>C>B>D		000010	 2		0213
+B>A>C>D		000001	 1		1023
+A>B>C>D		000000	 0		0123
+
+A>B>C>D		000000	 0		0123
+B>A>C>D		000001	 1		1023
+A>C>B>D		000010	 2		0213
+A>B>D>C		000100	 4		0132
+B>A>D>C		000101	 5		1032
+B>D>A>C		001101	13		1302
+B>C>A>D		010001	17		1203
+C>A>B>D		010010	18		2013
+C>B>A>D		010011	19		2103
+B>C>D>A		011001	25		1230
+C>B>D>A		011011	27		2130
+B>D>C>A		011101	29		1320
+A>C>D>B		100010	34		0231
+A>D>B>C		100100	36		0312
+A>D>C>B		100110	38		0321
+D>A>B>C		101100	44		3012
+D>B>A>C		101101	45		3102
+D>A>C>B		101110	46		3021
+C>A>D>B		110010	50		2031
+C>D>A>B		111010	58		2301
+C>D>B>A		111011	59		2310
+D>B>C>A		111101	61		3120
+D>C>A>B		111110	62		3201
+D>C>B>A		111111	63		3210
+
+*/
+
+static inline void SORT4x2(float *i, int *n)
+{
+	int	c0, c1;
+#if	defined(__SSE2__)
+	__m128i	XMM0, XMM1;
+	static _MM_ALIGN16 const __m128x PI4 =
+		{ .si32 = {4, 4, 4, 4} };
+#endif
+	{
+		__m128	P0, P1, P2, P3, P4, P5;
+		P0	 = _mm_load_ps(i  );
+		P3	 = _mm_load_ps(i+4);
+		P1	 = P0;
+		P2	 = P0;
+		P4	 = P3;
+		P5	 = P3;
+		P0	 = _mm_shuffle_ps(P0, P0, _MM_SHUFFLE(0,2,1,0));
+		P1	 = _mm_shuffle_ps(P1, P1, _MM_SHUFFLE(3,3,2,1));
+		P2	 = _mm_shuffle_ps(P2, P2, _MM_SHUFFLE(0,2,3,2));
+		P3	 = _mm_shuffle_ps(P3, P3, _MM_SHUFFLE(0,2,1,0));
+		P4	 = _mm_shuffle_ps(P4, P4, _MM_SHUFFLE(3,3,2,1));
+		P5	 = _mm_shuffle_ps(P5, P5, _MM_SHUFFLE(0,2,3,2));
+	
+		P1	 = _mm_cmplt_ps(P1, P0);
+		P2	 = _mm_cmplt_ps(P2, P0);
+		P4	 = _mm_cmplt_ps(P4, P3);
+		P5	 = _mm_cmplt_ps(P5, P3);
+		c0	 = _mm_movemask_ps(P2);
+		c1	 = _mm_movemask_ps(P5);
+		c0	 = c0 << 4;
+		c1	 = c1 << 4;
+		c0	 = c0|_mm_movemask_ps(P1);
+		c1	 = c1|_mm_movemask_ps(P4);
+	}
+#if	defined(__SSE2__)
+	{
+		__m128i *mx = (__m128i*)n;
+		XMM1	 = Sort4IndexConvTable[c1].pi;
+		XMM0	 = Sort4IndexConvTable[c0].pi;
+		XMM1	 = _mm_add_epi32(XMM1, PI4.pi);
+		_mm_storeu_si128(mx, XMM0);
+		_mm_storeu_si128(mx + 1, XMM1);
+	}
+#else
+	n[0]	 =Sort4IndexConvTable[c0].si32[0];
+	n[1]	 =Sort4IndexConvTable[c0].si32[1];
+	n[2]	 =Sort4IndexConvTable[c0].si32[2];
+	n[3]	 =Sort4IndexConvTable[c0].si32[3];
+	n[4]	 =Sort4IndexConvTable[c1].si32[0]+4;
+	n[5]	 =Sort4IndexConvTable[c1].si32[1]+4;
+	n[6]	 =Sort4IndexConvTable[c1].si32[2]+4;
+	n[7]	 =Sort4IndexConvTable[c1].si32[3]+4;
+#endif
 }
 
-/*** optimization of sort (for 8 or 32 element) ***/
-#ifdef OPT_SORT
+static inline void sortindex_fix8(int *index,
+                         float *data,
+                         int offset){
+	_MM_ALIGN16 int n[8];
+	index	+= offset;
+	data	+= offset;
+	SORT4x2(data, n);
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ss(data+n[0]);
+		XMM4	 = _mm_load_ss(data+n[4]);
+		XMM1	 = _mm_load_ss(data+n[1]);
+		XMM5	 = _mm_load_ss(data+n[5]);
+		XMM2	 = _mm_load_ss(data+n[2]);
+		XMM6	 = _mm_load_ss(data+n[6]);
+		XMM3	 = _mm_load_ss(data+n[3]);
+		XMM7	 = _mm_load_ss(data+n[7]);
+		if(!_mm_comilt_ss(XMM0, XMM4)){
+			index[0]	 = n[0]+offset;
+			if(!_mm_comilt_ss(XMM1, XMM4)){
+				index[1]	 = n[1]+offset;
+				if(!_mm_comilt_ss(XMM2, XMM4)){
+					index[2]	 = n[2]+offset;
+					if(!_mm_comilt_ss(XMM3, XMM4)){
+						index[3]	 = n[3]+offset;
+						index[4]	 = n[4]+offset;
+						index[5]	 = n[5]+offset;
+						index[6]	 = n[6]+offset;
+						index[7]	 = n[7]+offset;
+					}else{
+						index[3]	 = n[4]+offset;
+SORT8_4_35:
+						if(!_mm_comilt_ss(XMM3, XMM5)){
+							index[4]	 = n[3]+offset;
+							index[5]	 = n[5]+offset;
+							index[6]	 = n[6]+offset;
+							index[7]	 = n[7]+offset;
+						}else{
+							index[4]	 = n[5]+offset;
+SORT8_5_36:
+							if(!_mm_comilt_ss(XMM3, XMM6)){
+								index[5]	 = n[3]+offset;
+								index[6]	 = n[6]+offset;
+								index[7]	 = n[7]+offset;
+							}else{
+								index[5]	 = n[6]+offset;
+SORT8_6_37:
+								if(!_mm_comilt_ss(XMM3, XMM7)){
+									index[6]	 = n[3]+offset;
+									index[7]	 = n[7]+offset;
+								}else{
+									index[6]	 = n[7]+offset;
+									index[7]	 = n[3]+offset;
+								}
+							}
+						}
+					}
+				}else{
+					index[2]	 = n[4]+offset;
+SORT8_3_25:
+					if(!_mm_comilt_ss(XMM2, XMM5)){
+						index[3]	 = n[2]+offset;
+						goto SORT8_4_35;
+					}else{
+						index[3]	 = n[5]+offset;
+SORT8_4_26:
+						if(!_mm_comilt_ss(XMM2, XMM6)){
+							index[4]	 = n[2]+offset;
+							goto SORT8_5_36;
+						}else{
+							index[4]	 = n[6]+offset;
+SORT8_5_27:
+							if(!_mm_comilt_ss(XMM2, XMM7)){
+								index[5]	 = n[2]+offset;
+								goto SORT8_6_37;
+							}else{
+								index[5]	 = n[7]+offset;
+								index[6]	 = n[2]+offset;
+								index[7]	 = n[3]+offset;
+							}
+						}
+					}
+				}
+			}else{
+				index[1]	 = n[4]+offset;
+SORT8_2_15:
+				if(!_mm_comilt_ss(XMM1, XMM5)){
+					index[2]	 = n[1]+offset;
+					goto SORT8_3_25;
+				}else{
+					index[2]	 = n[5]+offset;
+SORT8_3_16:
+					if(!_mm_comilt_ss(XMM1, XMM6)){
+						index[3]	 = n[1]+offset;
+						goto SORT8_4_26;
+					}else{
+						index[3]	 = n[6]+offset;
+SORT8_4_17:
+						if(!_mm_comilt_ss(XMM1, XMM7)){
+							index[4]	 = n[1]+offset;
+							goto SORT8_5_27;
+						}else{
+							index[4]	 = n[7]+offset;
+							index[5]	 = n[1]+offset;
+							index[6]	 = n[2]+offset;
+							index[7]	 = n[3]+offset;
+						}
+					}
+				}
+			}
+		}else{
+			index[0]	 = n[4]+offset;
+			if(!_mm_comilt_ss(XMM0, XMM5)){
+				index[1]	 = n[0]+offset;
+				goto SORT8_2_15;
+			}else{
+				index[1]	 = n[5]+offset;
+				if(!_mm_comilt_ss(XMM0, XMM6)){
+					index[2]	 = n[0]+offset;
+					goto SORT8_3_16;
+				}else{
+					index[2]	 = n[6]+offset;
+					if(!_mm_comilt_ss(XMM0, XMM7)){
+						index[3]	 = n[0]+offset;
+						goto SORT8_4_17;
+					}else{
+						index[3]	 = n[7]+offset;
+						index[4]	 = n[0]+offset;
+						index[5]	 = n[1]+offset;
+						index[6]	 = n[2]+offset;
+						index[7]	 = n[3]+offset;
+					}
+				}
+			}
+		}
+	}
+}
+static inline void sortindex_fix16(int *index,
+						  int *n,
+						  float *data,
+						  int j){
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+	index	+= j;
+	n		+= j;
+	XMM0	 = _mm_load_ss(data+n[0]);
+	XMM4	 = _mm_load_ss(data+n[8]);
+	XMM1	 = _mm_load_ss(data+n[1]);
+	XMM5	 = _mm_load_ss(data+n[9]);
+	XMM2	 = _mm_load_ss(data+n[2]);
+	XMM6	 = _mm_load_ss(data+n[10]);
+	XMM3	 = _mm_load_ss(data+n[3]);
+	XMM7	 = _mm_load_ss(data+n[11]);
+	if(!_mm_comilt_ss(XMM0, XMM4)){
+	  index[0]	 = n[0];
+	  if(!_mm_comilt_ss(XMM1, XMM4)){
+		index[1]	 = n[1];
+		if(!_mm_comilt_ss(XMM2, XMM4)){
+		  index[2]	 = n[2];
+		  if(!_mm_comilt_ss(XMM3, XMM4)){
+			index[3]	 = n[3];
+			XMM0	 = _mm_load_ss(data+n[4]);
+			XMM1	 = _mm_load_ss(data+n[5]);
+			XMM2	 = _mm_load_ss(data+n[6]);
+			XMM3	 = _mm_load_ss(data+n[7]);
+			if(!_mm_comilt_ss(XMM0, XMM4)){
+			  index[4]	 = n[4];
+			  if(!_mm_comilt_ss(XMM1, XMM4)){
+				index[5]	 = n[5];
+				if(!_mm_comilt_ss(XMM2, XMM4)){
+				  index[6]	 = n[6];
+				  if(!_mm_comilt_ss(XMM3, XMM4)){
+					index[7]	 = n[7];
+					index[8]	 = n[8];
+					index[9]	 = n[9];
+					index[10]	 = n[10];
+					index[11]	 = n[11];
+					index[12]	 = n[12];
+					index[13]	 = n[13];
+					index[14]	 = n[14];
+					index[15]	 = n[15];
+				  }else{
+					index[7]	 = n[8];
+SORT16_080709:
+					if(!_mm_comilt_ss(XMM3, XMM5)){
+					  index[8]	 = n[7];
+					  index[9]	 = n[9];
+					  index[10]	 = n[10];
+					  index[11]	 = n[11];
+					  index[12]	 = n[12];
+					  index[13]	 = n[13];
+					  index[14]	 = n[14];
+					  index[15]	 = n[15];
+					}else{
+					  index[8]	 = n[9];
+SORT16_09070A:
+					  if(!_mm_comilt_ss(XMM3, XMM6)){
+						index[9]	 = n[7];
+						index[10]	 = n[10];
+						index[11]	 = n[11];
+						index[12]	 = n[12];
+						index[13]	 = n[13];
+						index[14]	 = n[14];
+						index[15]	 = n[15];
+					  }else{
+						index[9]	 = n[10];
+SORT16_0A070B:
+						if(!_mm_comilt_ss(XMM3, XMM7)){
+						  index[10]	 = n[7];
+						  index[11]	 = n[11];
+						  index[12]	 = n[12];
+						  index[13]	 = n[13];
+						  index[14]	 = n[14];
+						  index[15]	 = n[15];
+						}else{
+						  index[10]	 = n[11];
+						  XMM4	 = _mm_load_ss(data+n[12]);
+						  XMM5	 = _mm_load_ss(data+n[13]);
+						  XMM6	 = _mm_load_ss(data+n[14]);
+						  XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_0B070C:
+						  if(!_mm_comilt_ss(XMM3, XMM4)){
+							index[11]	 = n[7];
+							index[12]	 = n[12];
+							index[13]	 = n[13];
+							index[14]	 = n[14];
+							index[15]	 = n[15];
+						  }else{
+							index[11]	 = n[12];
+SORT16_0C070D:
+							if(!_mm_comilt_ss(XMM3, XMM5)){
+							  index[12]	 = n[7];
+							  index[13]	 = n[13];
+							  index[14]	 = n[14];
+							  index[15]	 = n[15];
+							}else{
+							  index[12]	 = n[13];
+SORT16_0D070E:
+							  if(!_mm_comilt_ss(XMM3, XMM6)){
+								index[13]	 = n[7];
+								index[14]	 = n[14];
+								index[15]	 = n[15];
+							  }else{
+								index[13]	 = n[14];
+SORT16_0E070F:
+								if(!_mm_comilt_ss(XMM3, XMM7)){
+								  index[14]	 = n[7];
+								  index[15]	 = n[15];
+								}else{
+								  index[14]	 = n[15];
+								  index[15]	 = n[7];
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}else{
+				  index[6]	 = n[8];
+SORT16_070609:
+				  if(!_mm_comilt_ss(XMM2, XMM5)){
+					index[7]	 = n[6];
+					goto SORT16_080709;
+				  }else{
+					index[7]	 = n[9];
+SORT16_08060A:
+					if(!_mm_comilt_ss(XMM2, XMM6)){
+					  index[8]	 = n[6];
+					  goto SORT16_09070A;
+					}else{
+					  index[8]	 = n[10];
+SORT16_09060B:
+					  if(!_mm_comilt_ss(XMM2, XMM7)){
+						index[9]	 = n[6];
+						goto SORT16_0A070B;
+					  }else{
+						index[9]	 = n[11];
+						XMM4	 = _mm_load_ss(data+n[12]);
+						XMM5	 = _mm_load_ss(data+n[13]);
+						XMM6	 = _mm_load_ss(data+n[14]);
+						XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_0A060C:
+						if(!_mm_comilt_ss(XMM2, XMM4)){
+						  index[10]	 = n[6];
+						  goto SORT16_0B070C;
+						}else{
+						  index[10]	 = n[12];
+SORT16_0B060D:
+						  if(!_mm_comilt_ss(XMM2, XMM5)){
+							index[11]	 = n[6];
+							goto SORT16_0C070D;
+						  }else{
+							index[11]	 = n[13];
+SORT16_0C060E:
+							if(!_mm_comilt_ss(XMM2, XMM6)){
+							  index[12]	 = n[6];
+							  goto SORT16_0D070E;
+							}else{
+							  index[12]	 = n[14];
+SORT16_0D060F:
+							  if(!_mm_comilt_ss(XMM2, XMM7)){
+								index[13]	 = n[6];
+								goto SORT16_0E070F;
+							  }else{
+								index[13]	 = n[15];
+								index[14]	 = n[6];
+								index[15]	 = n[7];
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }else{
+				index[5]	 = n[8];
+SORT16_060509:
+				if(!_mm_comilt_ss(XMM1, XMM5)){
+				  index[6]	 = n[5];
+				  goto SORT16_070609;
+				}else{
+				  index[6]	 = n[9];
+SORT16_07050A:
+				  if(!_mm_comilt_ss(XMM1, XMM6)){
+					index[7]	 = n[5];
+					goto SORT16_08060A;
+				  }else{
+					index[7]	 = n[10];
+SORT16_08050B:
+					if(!_mm_comilt_ss(XMM1, XMM7)){
+					  index[8]	 = n[5];
+					  goto SORT16_09060B;
+					}else{
+					  index[8]	 = n[11];
+					  XMM4	 = _mm_load_ss(data+n[12]);
+					  XMM5	 = _mm_load_ss(data+n[13]);
+					  XMM6	 = _mm_load_ss(data+n[14]);
+					  XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_09050C:
+					  if(!_mm_comilt_ss(XMM1, XMM4)){
+						index[9]	 = n[5];
+						goto SORT16_0A060C;
+					  }else{
+						index[9]	 = n[12];
+SORT16_0A050D:
+						if(!_mm_comilt_ss(XMM1, XMM5)){
+						  index[10]	 = n[5];
+						  goto SORT16_0B060D;
+						}else{
+						  index[10]	 = n[13];
+SORT16_0B050E:
+						  if(!_mm_comilt_ss(XMM1, XMM6)){
+							index[11]	 = n[5];
+							goto SORT16_0C060E;
+						  }else{
+							index[11]	 = n[14];
+SORT16_0C050F:
+							if(!_mm_comilt_ss(XMM1, XMM7)){
+							  index[12]	 = n[5];
+							  goto SORT16_0D060F;
+							}else{
+							  index[12]	 = n[15];
+							  index[13]	 = n[5];
+							  index[14]	 = n[6];
+							  index[15]	 = n[7];
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}else{
+			  index[4]	 = n[8];
+SORT16_050409:
+			  if(!_mm_comilt_ss(XMM0, XMM5)){
+				index[5]	 = n[4];
+				goto SORT16_060509;
+			  }else{
+				index[5]	 = n[9];
+SORT16_06040A:
+				if(!_mm_comilt_ss(XMM0, XMM6)){
+				  index[6]	 = n[4];
+				  goto SORT16_07050A;
+				}else{
+				  index[6]	 = n[10];
+SORT16_07040B:
+				  if(!_mm_comilt_ss(XMM0, XMM7)){
+					index[7]	 = n[4];
+					goto SORT16_08050B;
+				  }else{
+					index[7]	 = n[11];
+					XMM4	 = _mm_load_ss(data+n[12]);
+					XMM5	 = _mm_load_ss(data+n[13]);
+					XMM6	 = _mm_load_ss(data+n[14]);
+					XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_08040C:
+					if(!_mm_comilt_ss(XMM0, XMM4)){
+					  index[8]	 = n[4];
+					  goto SORT16_09050C;
+					}else{
+					  index[8]	 = n[12];
+SORT16_09040D:
+					  if(!_mm_comilt_ss(XMM0, XMM5)){
+						index[9]	 = n[4];
+						goto SORT16_0A050D;
+					  }else{
+						index[9]	 = n[13];
+SORT16_0A040E:
+						if(!_mm_comilt_ss(XMM0, XMM6)){
+						  index[10]	 = n[4];
+						  goto SORT16_0B050E;
+						}else{
+						  index[10]	 = n[14];
+SORT16_0B040F:
+						  if(!_mm_comilt_ss(XMM0, XMM7)){
+							index[11]	 = n[4];
+							goto SORT16_0C050F;
+						  }else{
+							index[11]	 = n[15];
+							index[12]	 = n[4];
+							index[13]	 = n[5];
+							index[14]	 = n[6];
+							index[15]	 = n[7];
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }else{
+			index[3]	 = n[8];
+SORT16_040309:
+			if(!_mm_comilt_ss(XMM3, XMM5)){
+			  index[4]	 = n[3];
+			  XMM0	 = _mm_load_ss(data+n[4]);
+			  XMM1	 = _mm_load_ss(data+n[5]);
+			  XMM2	 = _mm_load_ss(data+n[6]);
+			  XMM3	 = _mm_load_ss(data+n[7]);
+			  goto SORT16_050409;
+			}else{
+			  index[4]	 = n[9];
+SORT16_05030A:
+			  if(!_mm_comilt_ss(XMM3, XMM6)){
+				index[5]	 = n[3];
+				XMM0	 = _mm_load_ss(data+n[4]);
+				XMM1	 = _mm_load_ss(data+n[5]);
+				XMM2	 = _mm_load_ss(data+n[6]);
+				XMM3	 = _mm_load_ss(data+n[7]);
+				goto SORT16_06040A;
+			  }else{
+				index[5]	 = n[10];
+SORT16_06030B:
+				if(!_mm_comilt_ss(XMM3, XMM7)){
+				  index[6]	 = n[3];
+				  XMM0	 = _mm_load_ss(data+n[4]);
+				  XMM1	 = _mm_load_ss(data+n[5]);
+				  XMM2	 = _mm_load_ss(data+n[6]);
+				  XMM3	 = _mm_load_ss(data+n[7]);
+				  goto SORT16_07040B;
+				}else{
+				  index[6]	 = n[11];
+				  XMM4	 = _mm_load_ss(data+n[12]);
+				  XMM5	 = _mm_load_ss(data+n[13]);
+				  XMM6	 = _mm_load_ss(data+n[14]);
+				  XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_07030C:
+				  if(!_mm_comilt_ss(XMM3, XMM4)){
+					index[7]	 = n[3];
+					XMM0	 = _mm_load_ss(data+n[4]);
+					XMM1	 = _mm_load_ss(data+n[5]);
+					XMM2	 = _mm_load_ss(data+n[6]);
+					XMM3	 = _mm_load_ss(data+n[7]);
+					goto SORT16_08040C;
+				  }else{
+					index[7]	 = n[12];
+SORT16_08030D:
+					if(!_mm_comilt_ss(XMM3, XMM5)){
+					  index[8]	 = n[3];
+					  XMM0	 = _mm_load_ss(data+n[4]);
+					  XMM1	 = _mm_load_ss(data+n[5]);
+					  XMM2	 = _mm_load_ss(data+n[6]);
+					  XMM3	 = _mm_load_ss(data+n[7]);
+					  goto SORT16_09040D;
+					}else{
+					  index[8]	 = n[13];
+SORT16_09030E:
+					  if(!_mm_comilt_ss(XMM3, XMM6)){
+						index[9]	 = n[3];
+						XMM0	 = _mm_load_ss(data+n[4]);
+						XMM1	 = _mm_load_ss(data+n[5]);
+						XMM2	 = _mm_load_ss(data+n[6]);
+						XMM3	 = _mm_load_ss(data+n[7]);
+						goto SORT16_0A040E;
+					  }else{
+						index[9]	 = n[14];
+SORT16_0A030F:
+						if(!_mm_comilt_ss(XMM3, XMM7)){
+						  index[10]	 = n[3];
+						  XMM0	 = _mm_load_ss(data+n[4]);
+						  XMM1	 = _mm_load_ss(data+n[5]);
+						  XMM2	 = _mm_load_ss(data+n[6]);
+						  XMM3	 = _mm_load_ss(data+n[7]);
+						  goto SORT16_0B040F;
+						}else{
+						  index[10]	 = n[15];
+						  index[11]	 = n[3];
+						  index[12]	 = n[4];
+						  index[13]	 = n[5];
+						  index[14]	 = n[6];
+						  index[15]	 = n[7];
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}else{
+		  index[2]	 = n[8];
+SORT16_030209:
+		  if(!_mm_comilt_ss(XMM2, XMM5)){
+			index[3]	 = n[2];
+			goto SORT16_040309;
+		  }else{
+			index[3]	 = n[9];
+SORT16_04020A:
+			if(!_mm_comilt_ss(XMM2, XMM6)){
+			  index[4]	 = n[2];
+			  goto SORT16_05030A;
+			}else{
+			  index[4]	 = n[10];
+SORT16_05020B:
+			  if(!_mm_comilt_ss(XMM2, XMM7)){
+				index[5]	 = n[2];
+				goto SORT16_06030B;
+			  }else{
+				index[5]	 = n[11];
+				XMM4	 = _mm_load_ss(data+n[12]);
+				XMM5	 = _mm_load_ss(data+n[13]);
+				XMM6	 = _mm_load_ss(data+n[14]);
+				XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_06020C:
+				if(!_mm_comilt_ss(XMM2, XMM4)){
+				  index[6]	 = n[2];
+				  goto SORT16_07030C;
+				}else{
+				  index[6]	 = n[12];
+SORT16_07020D:
+				  if(!_mm_comilt_ss(XMM2, XMM5)){
+					index[7]	 = n[2];
+					goto SORT16_08030D;
+				  }else{
+					index[7]	 = n[13];
+SORT16_08020E:
+					if(!_mm_comilt_ss(XMM2, XMM6)){
+					  index[8]	 = n[2];
+					  goto SORT16_09030E;
+					}else{
+					  index[8]	 = n[14];
+SORT16_09020F:
+					  if(!_mm_comilt_ss(XMM2, XMM7)){
+						index[9]	 = n[2];
+						goto SORT16_0A030F;
+					  }else{
+						index[9]	 = n[15];
+						index[10]	 = n[2];
+						index[11]	 = n[3];
+						index[12]	 = n[4];
+						index[13]	 = n[5];
+						index[14]	 = n[6];
+						index[15]	 = n[7];
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }else{
+		index[1]	 = n[8];
+SORT16_020109:
+		if(!_mm_comilt_ss(XMM1, XMM5)){
+		  index[2]	 = n[1];
+		  goto SORT16_030209;
+		}else{
+		  index[2]	 = n[9];
+SORT16_03010A:
+		  if(!_mm_comilt_ss(XMM1, XMM6)){
+			index[3]	 = n[1];
+			goto SORT16_04020A;
+		  }else{
+			index[3]	 = n[10];
+SORT16_04010B:
+			if(!_mm_comilt_ss(XMM1, XMM7)){
+			  index[4]	 = n[1];
+			  goto SORT16_05020B;
+			}else{
+			  index[4]	 = n[11];
+			  XMM4	 = _mm_load_ss(data+n[12]);
+			  XMM5	 = _mm_load_ss(data+n[13]);
+			  XMM6	 = _mm_load_ss(data+n[14]);
+			  XMM7	 = _mm_load_ss(data+n[15]);
+SORT16_05010C:
+			  if(!_mm_comilt_ss(XMM1, XMM4)){
+				index[5]	 = n[1];
+				goto SORT16_06020C;
+			  }else{
+				index[5]	 = n[12];
+SORT16_06010D:
+				if(!_mm_comilt_ss(XMM1, XMM5)){
+				  index[6]	 = n[1];
+				  goto SORT16_07020D;
+				}else{
+				  index[6]	 = n[13];
+SORT16_07010E:
+				  if(!_mm_comilt_ss(XMM1, XMM6)){
+					index[7]	 = n[1];
+					goto SORT16_08020E;
+				  }else{
+					index[7]	 = n[14];
+SORT16_08010F:
+					if(!_mm_comilt_ss(XMM1, XMM7)){
+					  index[8]	 = n[1];
+					  goto SORT16_09020F;
+					}else{
+					  index[8]	 = n[15];
+					  index[9]	 = n[1];
+					  index[10]	 = n[2];
+					  index[11]	 = n[3];
+					  index[12]	 = n[4];
+					  index[13]	 = n[5];
+					  index[14]	 = n[6];
+					  index[15]	 = n[7];
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	}else{
+	  index[0]	 = n[8];
+	  if(!_mm_comilt_ss(XMM0, XMM5)){
+		index[1]	 = n[0];
+		goto SORT16_020109;
+	  }else{
+		index[1]	 = n[9];
+		if(!_mm_comilt_ss(XMM0, XMM6)){
+		  index[2]	 = n[0];
+		  goto SORT16_03010A;
+		}else{
+		  index[2]	 = n[10];
+		  if(!_mm_comilt_ss(XMM0, XMM7)){
+			index[3]	 = n[0];
+			goto SORT16_04010B;
+		  }else{
+			index[3]	 = n[11];
+			XMM4	 = _mm_load_ss(data+n[12]);
+			XMM5	 = _mm_load_ss(data+n[13]);
+			XMM6	 = _mm_load_ss(data+n[14]);
+			XMM7	 = _mm_load_ss(data+n[15]);
+			if(!_mm_comilt_ss(XMM0, XMM4)){
+			  index[4]	 = n[0];
+			  goto SORT16_05010C;
+			}else{
+			  index[4]	 = n[12];
+			  if(!_mm_comilt_ss(XMM0, XMM5)){
+				index[5]	 = n[0];
+				goto SORT16_06010D;
+			  }else{
+				index[5]	 = n[13];
+				if(!_mm_comilt_ss(XMM0, XMM6)){
+				  index[6]	 = n[0];
+				  goto SORT16_07010E;
+				}else{
+				  index[6]	 = n[14];
+				  if(!_mm_comilt_ss(XMM0, XMM7)){
+					index[7]	 = n[0];
+					goto SORT16_08010F;
+				  }else{
+					index[7]	 = n[15];
+					index[8]	 = n[0];
+					index[9]	 = n[1];
+					index[10]	 = n[2];
+					index[11]	 = n[3];
+					index[12]	 = n[4];
+					index[13]	 = n[5];
+					index[14]	 = n[6];
+					index[15]	 = n[7];
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	}
+}
+static inline void sortindex_fix32(int *index,
+						  float *data,
+						  int offset){
+	_MM_ALIGN16 int n[32];
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+	sortindex_fix8(index,data,offset   );
+	sortindex_fix8(index,data,offset+ 8);
+	sortindex_fix8(index,data,offset+16);
+	sortindex_fix8(index,data,offset+24);
+	index+=offset;
+	sortindex_fix16(n,index,data, 0);
+	sortindex_fix16(n,index,data,16);
+	XMM0	 = _mm_load_ss(data+n[0]);
+	XMM4	 = _mm_load_ss(data+n[16]);
+	XMM1	 = _mm_load_ss(data+n[1]);
+	XMM5	 = _mm_load_ss(data+n[17]);
+	XMM2	 = _mm_load_ss(data+n[2]);
+	XMM6	 = _mm_load_ss(data+n[18]);
+	XMM3	 = _mm_load_ss(data+n[3]);
+	XMM7	 = _mm_load_ss(data+n[19]);
+	if(!_mm_comilt_ss(XMM0, XMM4)){
+	  index[0]	 = n[0];
+	  if(!_mm_comilt_ss(XMM1, XMM4)){
+		index[1]	 = n[1];
+		if(!_mm_comilt_ss(XMM2, XMM4)){
+		  index[2]	 = n[2];
+		  if(!_mm_comilt_ss(XMM3, XMM4)){
+			index[3]	 = n[3];
+			XMM0	 = _mm_load_ss(data+n[4]);
+			XMM1	 = _mm_load_ss(data+n[5]);
+			XMM2	 = _mm_load_ss(data+n[6]);
+			XMM3	 = _mm_load_ss(data+n[7]);
+			if(!_mm_comilt_ss(XMM0, XMM4)){
+			  index[4]	 = n[4];
+			  if(!_mm_comilt_ss(XMM1, XMM4)){
+				index[5]	 = n[5];
+				if(!_mm_comilt_ss(XMM2, XMM4)){
+				  index[6]	 = n[6];
+				  if(!_mm_comilt_ss(XMM3, XMM4)){
+					index[7]	 = n[7];
+					XMM0	 = _mm_load_ss(data+n[8]);
+					XMM1	 = _mm_load_ss(data+n[9]);
+					XMM2	 = _mm_load_ss(data+n[10]);
+					XMM3	 = _mm_load_ss(data+n[11]);
+					if(!_mm_comilt_ss(XMM0, XMM4)){
+					  index[8]	 = n[8];
+					  if(!_mm_comilt_ss(XMM1, XMM4)){
+						index[9]	 = n[9];
+						if(!_mm_comilt_ss(XMM2, XMM4)){
+						  index[10]	 = n[10];
+						  if(!_mm_comilt_ss(XMM3, XMM4)){
+							index[11]	 = n[11];
+							XMM0	 = _mm_load_ss(data+n[12]);
+							XMM1	 = _mm_load_ss(data+n[13]);
+							XMM2	 = _mm_load_ss(data+n[14]);
+							XMM3	 = _mm_load_ss(data+n[15]);
+							if(!_mm_comilt_ss(XMM0, XMM4)){
+							  index[12]	 = n[12];
+							  if(!_mm_comilt_ss(XMM1, XMM4)){
+								index[13]	 = n[13];
+								if(!_mm_comilt_ss(XMM2, XMM4)){
+								  index[14]	 = n[14];
+								  if(!_mm_comilt_ss(XMM3, XMM4)){
+									index[15]	 = n[15];
+									index[16]	 = n[16];
+									index[17]	 = n[17];
+									index[18]	 = n[18];
+									index[19]	 = n[19];
+									index[20]	 = n[20];
+									index[21]	 = n[21];
+									index[22]	 = n[22];
+									index[23]	 = n[23];
+									index[24]	 = n[24];
+									index[25]	 = n[25];
+									index[26]	 = n[26];
+									index[27]	 = n[27];
+									index[28]	 = n[28];
+									index[29]	 = n[29];
+									index[30]	 = n[30];
+									index[31]	 = n[31];
+								  }else{
+									index[15]	 = n[16];
+SORT32_100F11:
+									if(!_mm_comilt_ss(XMM3, XMM5)){
+									  index[16]	 = n[15];
+									  index[17]	 = n[17];
+									  index[18]	 = n[18];
+									  index[19]	 = n[19];
+									  index[20]	 = n[20];
+									  index[21]	 = n[21];
+									  index[22]	 = n[22];
+									  index[23]	 = n[23];
+									  index[24]	 = n[24];
+									  index[25]	 = n[25];
+									  index[26]	 = n[26];
+									  index[27]	 = n[27];
+									  index[28]	 = n[28];
+									  index[29]	 = n[29];
+									  index[30]	 = n[30];
+									  index[31]	 = n[31];
+									}else{
+									  index[16]	 = n[17];
+SORT32_110F12:
+									  if(!_mm_comilt_ss(XMM3, XMM6)){
+										index[17]	 = n[15];
+										index[18]	 = n[18];
+										index[19]	 = n[19];
+										index[20]	 = n[20];
+										index[21]	 = n[21];
+										index[22]	 = n[22];
+										index[23]	 = n[23];
+										index[24]	 = n[24];
+										index[25]	 = n[25];
+										index[26]	 = n[26];
+										index[27]	 = n[27];
+										index[28]	 = n[28];
+										index[29]	 = n[29];
+										index[30]	 = n[30];
+										index[31]	 = n[31];
+									  }else{
+										index[17]	 = n[18];
+SORT32_120F13:
+										if(!_mm_comilt_ss(XMM3, XMM7)){
+										  index[18]	 = n[15];
+										  index[19]	 = n[19];
+										  index[20]	 = n[20];
+										  index[21]	 = n[21];
+										  index[22]	 = n[22];
+										  index[23]	 = n[23];
+										  index[24]	 = n[24];
+										  index[25]	 = n[25];
+										  index[26]	 = n[26];
+										  index[27]	 = n[27];
+										  index[28]	 = n[28];
+										  index[29]	 = n[29];
+										  index[30]	 = n[30];
+										  index[31]	 = n[31];
+										}else{
+										  index[18]	 = n[19];
+										  XMM4	 = _mm_load_ss(data+n[20]);
+										  XMM5	 = _mm_load_ss(data+n[21]);
+										  XMM6	 = _mm_load_ss(data+n[22]);
+										  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_130F14:
+										  if(!_mm_comilt_ss(XMM3, XMM4)){
+											index[19]	 = n[15];
+											index[20]	 = n[20];
+											index[21]	 = n[21];
+											index[22]	 = n[22];
+											index[23]	 = n[23];
+											index[24]	 = n[24];
+											index[25]	 = n[25];
+											index[26]	 = n[26];
+											index[27]	 = n[27];
+											index[28]	 = n[28];
+											index[29]	 = n[29];
+											index[30]	 = n[30];
+											index[31]	 = n[31];
+										  }else{
+											index[19]	 = n[20];
+SORT32_140F15:
+											if(!_mm_comilt_ss(XMM3, XMM5)){
+											  index[20]	 = n[15];
+											  index[21]	 = n[21];
+											  index[22]	 = n[22];
+											  index[23]	 = n[23];
+											  index[24]	 = n[24];
+											  index[25]	 = n[25];
+											  index[26]	 = n[26];
+											  index[27]	 = n[27];
+											  index[28]	 = n[28];
+											  index[29]	 = n[29];
+											  index[30]	 = n[30];
+											  index[31]	 = n[31];
+											}else{
+											  index[20]	 = n[21];
+SORT32_150F16:
+											  if(!_mm_comilt_ss(XMM3, XMM6)){
+												index[21]	 = n[15];
+												index[22]	 = n[22];
+												index[23]	 = n[23];
+												index[24]	 = n[24];
+												index[25]	 = n[25];
+												index[26]	 = n[26];
+												index[27]	 = n[27];
+												index[28]	 = n[28];
+												index[29]	 = n[29];
+												index[30]	 = n[30];
+												index[31]	 = n[31];
+											  }else{
+												index[21]	 = n[22];
+SORT32_160F17:
+												if(!_mm_comilt_ss(XMM3, XMM7)){
+												  index[22]	 = n[15];
+												  index[23]	 = n[23];
+												  index[24]	 = n[24];
+												  index[25]	 = n[25];
+												  index[26]	 = n[26];
+												  index[27]	 = n[27];
+												  index[28]	 = n[28];
+												  index[29]	 = n[29];
+												  index[30]	 = n[30];
+												  index[31]	 = n[31];
+												}else{
+												  index[22]	 = n[23];
+												  XMM4	 = _mm_load_ss(data+n[24]);
+												  XMM5	 = _mm_load_ss(data+n[25]);
+												  XMM6	 = _mm_load_ss(data+n[26]);
+												  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_170F18:
+												  if(!_mm_comilt_ss(XMM3, XMM4)){
+													index[23]	 = n[15];
+													index[24]	 = n[24];
+													index[25]	 = n[25];
+													index[26]	 = n[26];
+													index[27]	 = n[27];
+													index[28]	 = n[28];
+													index[29]	 = n[29];
+													index[30]	 = n[30];
+													index[31]	 = n[31];
+												  }else{
+													index[23]	 = n[24];
+SORT32_180F19:
+													if(!_mm_comilt_ss(XMM3, XMM5)){
+													  index[24]	 = n[15];
+													  index[25]	 = n[25];
+													  index[26]	 = n[26];
+													  index[27]	 = n[27];
+													  index[28]	 = n[28];
+													  index[29]	 = n[29];
+													  index[30]	 = n[30];
+													  index[31]	 = n[31];
+													}else{
+													  index[24]	 = n[25];
+SORT32_190F1A:
+													  if(!_mm_comilt_ss(XMM3, XMM6)){
+														index[25]	 = n[15];
+														index[26]	 = n[26];
+														index[27]	 = n[27];
+														index[28]	 = n[28];
+														index[29]	 = n[29];
+														index[30]	 = n[30];
+														index[31]	 = n[31];
+													  }else{
+														index[25]	 = n[26];
+SORT32_1A0F1B:
+														if(!_mm_comilt_ss(XMM3, XMM7)){
+														  index[26]	 = n[15];
+														  index[27]	 = n[27];
+														  index[28]	 = n[28];
+														  index[29]	 = n[29];
+														  index[30]	 = n[30];
+														  index[31]	 = n[31];
+														}else{
+														  index[26]	 = n[27];
+														  XMM4	 = _mm_load_ss(data+n[28]);
+														  XMM5	 = _mm_load_ss(data+n[29]);
+														  XMM6	 = _mm_load_ss(data+n[30]);
+														  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_1B0F1C:
+														  if(!_mm_comilt_ss(XMM3, XMM4)){
+															index[27]	 = n[15];
+															index[28]	 = n[28];
+															index[29]	 = n[29];
+															index[30]	 = n[30];
+															index[31]	 = n[31];
+														  }else{
+															index[27]	 = n[28];
+SORT32_1C0F1D:
+															if(!_mm_comilt_ss(XMM3, XMM5)){
+															  index[28]	 = n[15];
+															  index[29]	 = n[29];
+															  index[30]	 = n[30];
+															  index[31]	 = n[31];
+															}else{
+															  index[28]	 = n[29];
+SORT32_1D0F1E:
+															  if(!_mm_comilt_ss(XMM3, XMM6)){
+																index[29]	 = n[15];
+																index[30]	 = n[30];
+																index[31]	 = n[31];
+															  }else{
+																index[29]	 = n[30];
+SORT32_1E0F1F:
+																if(!_mm_comilt_ss(XMM3, XMM7)){
+																  index[30]	 = n[15];
+																  index[31]	 = n[31];
+																}else{
+																  index[30]	 = n[31];
+																  index[31]	 = n[15];
+																}
+															  }
+															}
+														  }
+														}
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}else{
+								  index[14]	 = n[16];
+SORT32_0F0E11:
+								  if(!_mm_comilt_ss(XMM2, XMM5)){
+									index[15]	 = n[14];
+									goto SORT32_100F11;
+								  }else{
+									index[15]	 = n[17];
+SORT32_100E12:
+									if(!_mm_comilt_ss(XMM2, XMM6)){
+									  index[16]	 = n[14];
+									  goto SORT32_110F12;
+									}else{
+									  index[16]	 = n[18];
+SORT32_110E13:
+									  if(!_mm_comilt_ss(XMM2, XMM7)){
+										index[17]	 = n[14];
+										goto SORT32_120F13;
+									  }else{
+										index[17]	 = n[19];
+										XMM4	 = _mm_load_ss(data+n[20]);
+										XMM5	 = _mm_load_ss(data+n[21]);
+										XMM6	 = _mm_load_ss(data+n[22]);
+										XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_120E14:
+										if(!_mm_comilt_ss(XMM2, XMM4)){
+										  index[18]	 = n[14];
+										  goto SORT32_130F14;
+										}else{
+										  index[18]	 = n[20];
+SORT32_130E15:
+										  if(!_mm_comilt_ss(XMM2, XMM5)){
+											index[19]	 = n[14];
+											goto SORT32_140F15;
+										  }else{
+											index[19]	 = n[21];
+SORT32_140E16:
+											if(!_mm_comilt_ss(XMM2, XMM6)){
+											  index[20]	 = n[14];
+											  goto SORT32_150F16;
+											}else{
+											  index[20]	 = n[22];
+SORT32_150E17:
+											  if(!_mm_comilt_ss(XMM2, XMM7)){
+												index[21]	 = n[14];
+												goto SORT32_160F17;
+											  }else{
+												index[21]	 = n[23];
+												XMM4	 = _mm_load_ss(data+n[24]);
+												XMM5	 = _mm_load_ss(data+n[25]);
+												XMM6	 = _mm_load_ss(data+n[26]);
+												XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_160E18:
+												if(!_mm_comilt_ss(XMM2, XMM4)){
+												  index[22]	 = n[14];
+												  goto SORT32_170F18;
+												}else{
+												  index[22]	 = n[24];
+SORT32_170E19:
+												  if(!_mm_comilt_ss(XMM2, XMM5)){
+													index[23]	 = n[14];
+													goto SORT32_180F19;
+												  }else{
+													index[23]	 = n[25];
+SORT32_180E1A:
+													if(!_mm_comilt_ss(XMM2, XMM6)){
+													  index[24]	 = n[14];
+													  goto SORT32_190F1A;
+													}else{
+													  index[24]	 = n[26];
+SORT32_190E1B:
+													  if(!_mm_comilt_ss(XMM2, XMM7)){
+														index[25]	 = n[14];
+														goto SORT32_1A0F1B;
+													  }else{
+														index[25]	 = n[27];
+														XMM4	 = _mm_load_ss(data+n[28]);
+														XMM5	 = _mm_load_ss(data+n[29]);
+														XMM6	 = _mm_load_ss(data+n[30]);
+														XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_1A0E1C:
+														if(!_mm_comilt_ss(XMM2, XMM4)){
+														  index[26]	 = n[14];
+														  goto SORT32_1B0F1C;
+														}else{
+														  index[26]	 = n[28];
+SORT32_1B0E1D:
+														  if(!_mm_comilt_ss(XMM2, XMM5)){
+															index[27]	 = n[14];
+															goto SORT32_1C0F1D;
+														  }else{
+															index[27]	 = n[29];
+SORT32_1C0E1E:
+															if(!_mm_comilt_ss(XMM2, XMM6)){
+															  index[28]	 = n[14];
+															  goto SORT32_1D0F1E;
+															}else{
+															  index[28]	 = n[30];
+SORT32_1D0E1F:
+															  if(!_mm_comilt_ss(XMM2, XMM7)){
+																index[29]	 = n[14];
+																goto SORT32_1E0F1F;
+															  }else{
+																index[29]	 = n[31];
+																index[30]	 = n[14];
+																index[31]	 = n[15];
+															  }
+															}
+														  }
+														}
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }else{
+								index[13]	 = n[16];
+SORT32_0E0D11:
+								if(!_mm_comilt_ss(XMM1, XMM5)){
+								  index[14]	 = n[13];
+								  goto SORT32_0F0E11;
+								}else{
+								  index[14]	 = n[17];
+SORT32_0F0D12:
+								  if(!_mm_comilt_ss(XMM1, XMM6)){
+									index[15]	 = n[13];
+									goto SORT32_100E12;
+								  }else{
+									index[15]	 = n[18];
+SORT32_100D13:
+									if(!_mm_comilt_ss(XMM1, XMM7)){
+									  index[16]	 = n[13];
+									  goto SORT32_110E13;
+									}else{
+									  index[16]	 = n[19];
+									  XMM4	 = _mm_load_ss(data+n[20]);
+									  XMM5	 = _mm_load_ss(data+n[21]);
+									  XMM6	 = _mm_load_ss(data+n[22]);
+									  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_110D14:
+									  if(!_mm_comilt_ss(XMM1, XMM4)){
+										index[17]	 = n[13];
+										goto SORT32_120E14;
+									  }else{
+										index[17]	 = n[20];
+SORT32_120D15:
+										if(!_mm_comilt_ss(XMM1, XMM5)){
+										  index[18]	 = n[13];
+										  goto SORT32_130E15;
+										}else{
+										  index[18]	 = n[21];
+SORT32_130D16:
+										  if(!_mm_comilt_ss(XMM1, XMM6)){
+											index[19]	 = n[13];
+											goto SORT32_140E16;
+										  }else{
+											index[19]	 = n[22];
+SORT32_140D17:
+											if(!_mm_comilt_ss(XMM1, XMM7)){
+											  index[20]	 = n[13];
+											  goto SORT32_150E17;
+											}else{
+											  index[20]	 = n[23];
+											  XMM4	 = _mm_load_ss(data+n[24]);
+											  XMM5	 = _mm_load_ss(data+n[25]);
+											  XMM6	 = _mm_load_ss(data+n[26]);
+											  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_150D18:
+											  if(!_mm_comilt_ss(XMM1, XMM4)){
+												index[21]	 = n[13];
+												goto SORT32_160E18;
+											  }else{
+												index[21]	 = n[24];
+SORT32_160D19:
+												if(!_mm_comilt_ss(XMM1, XMM5)){
+												  index[22]	 = n[13];
+												  goto SORT32_170E19;
+												}else{
+												  index[22]	 = n[25];
+SORT32_170D1A:
+												  if(!_mm_comilt_ss(XMM1, XMM6)){
+													index[23]	 = n[13];
+													goto SORT32_180E1A;
+												  }else{
+													index[23]	 = n[26];
+SORT32_180D1B:
+													if(!_mm_comilt_ss(XMM1, XMM7)){
+													  index[24]	 = n[13];
+													  goto SORT32_190E1B;
+													}else{
+													  index[24]	 = n[27];
+													  XMM4	 = _mm_load_ss(data+n[28]);
+													  XMM5	 = _mm_load_ss(data+n[29]);
+													  XMM6	 = _mm_load_ss(data+n[30]);
+													  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_190D1C:
+													  if(!_mm_comilt_ss(XMM1, XMM4)){
+														index[25]	 = n[13];
+														goto SORT32_1A0E1C;
+													  }else{
+														index[25]	 = n[28];
+SORT32_1A0D1D:
+														if(!_mm_comilt_ss(XMM1, XMM5)){
+														  index[26]	 = n[13];
+														  goto SORT32_1B0E1D;
+														}else{
+														  index[26]	 = n[29];
+SORT32_1B0D1E:
+														  if(!_mm_comilt_ss(XMM1, XMM6)){
+															index[27]	 = n[13];
+															goto SORT32_1C0E1E;
+														  }else{
+															index[27]	 = n[30];
+SORT32_1C0D1F:
+															if(!_mm_comilt_ss(XMM1, XMM7)){
+															  index[28]	 = n[13];
+															  goto SORT32_1D0E1F;
+															}else{
+															  index[28]	 = n[31];
+															  index[29]	 = n[13];
+															  index[30]	 = n[14];
+															  index[31]	 = n[15];
+															}
+														  }
+														}
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}else{
+							  index[12]	 = n[16];
+SORT32_0D0C11:
+							  if(!_mm_comilt_ss(XMM0, XMM5)){
+								index[13]	 = n[12];
+								goto SORT32_0E0D11;
+							  }else{
+								index[13]	 = n[17];
+SORT32_0E0C12:
+								if(!_mm_comilt_ss(XMM0, XMM6)){
+								  index[14]	 = n[12];
+								  goto SORT32_0F0D12;
+								}else{
+								  index[14]	 = n[18];
+SORT32_0F0C13:
+								  if(!_mm_comilt_ss(XMM0, XMM7)){
+									index[15]	 = n[12];
+									goto SORT32_100D13;
+								  }else{
+									index[15]	 = n[19];
+									XMM4	 = _mm_load_ss(data+n[20]);
+									XMM5	 = _mm_load_ss(data+n[21]);
+									XMM6	 = _mm_load_ss(data+n[22]);
+									XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_100C14:
+									if(!_mm_comilt_ss(XMM0, XMM4)){
+									  index[16]	 = n[12];
+									  goto SORT32_110D14;
+									}else{
+									  index[16]	 = n[20];
+SORT32_110C15:
+									  if(!_mm_comilt_ss(XMM0, XMM5)){
+										index[17]	 = n[12];
+										goto SORT32_120D15;
+									  }else{
+										index[17]	 = n[21];
+SORT32_120C16:
+										if(!_mm_comilt_ss(XMM0, XMM6)){
+										  index[18]	 = n[12];
+										  goto SORT32_130D16;
+										}else{
+										  index[18]	 = n[22];
+SORT32_130C17:
+										  if(!_mm_comilt_ss(XMM0, XMM7)){
+											index[19]	 = n[12];
+											goto SORT32_140D17;
+										  }else{
+											index[19]	 = n[23];
+											XMM4	 = _mm_load_ss(data+n[24]);
+											XMM5	 = _mm_load_ss(data+n[25]);
+											XMM6	 = _mm_load_ss(data+n[26]);
+											XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_140C18:
+											if(!_mm_comilt_ss(XMM0, XMM4)){
+											  index[20]	 = n[12];
+											  goto SORT32_150D18;
+											}else{
+											  index[20]	 = n[24];
+SORT32_150C19:
+											  if(!_mm_comilt_ss(XMM0, XMM5)){
+												index[21]	 = n[12];
+												goto SORT32_160D19;
+											  }else{
+												index[21]	 = n[25];
+SORT32_160C1A:
+												if(!_mm_comilt_ss(XMM0, XMM6)){
+												  index[22]	 = n[12];
+												  goto SORT32_170D1A;
+												}else{
+												  index[22]	 = n[26];
+SORT32_170C1B:
+												  if(!_mm_comilt_ss(XMM0, XMM7)){
+													index[23]	 = n[12];
+													goto SORT32_180D1B;
+												  }else{
+													index[23]	 = n[27];
+													XMM4	 = _mm_load_ss(data+n[28]);
+													XMM5	 = _mm_load_ss(data+n[29]);
+													XMM6	 = _mm_load_ss(data+n[30]);
+													XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_180C1C:
+													if(!_mm_comilt_ss(XMM0, XMM4)){
+													  index[24]	 = n[12];
+													  goto SORT32_190D1C;
+													}else{
+													  index[24]	 = n[28];
+SORT32_190C1D:
+													  if(!_mm_comilt_ss(XMM0, XMM5)){
+														index[25]	 = n[12];
+														goto SORT32_1A0D1D;
+													  }else{
+														index[25]	 = n[29];
+SORT32_1A0C1E:
+														if(!_mm_comilt_ss(XMM0, XMM6)){
+														  index[26]	 = n[12];
+														  goto SORT32_1B0D1E;
+														}else{
+														  index[26]	 = n[30];
+SORT32_1B0C1F:
+														  if(!_mm_comilt_ss(XMM0, XMM7)){
+															index[27]	 = n[12];
+															goto SORT32_1C0D1F;
+														  }else{
+															index[27]	 = n[31];
+															index[28]	 = n[12];
+															index[29]	 = n[13];
+															index[30]	 = n[14];
+															index[31]	 = n[15];
+														  }
+														}
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }else{
+							index[11]	 = n[16];
+SORT32_0C0B11:
+							if(!_mm_comilt_ss(XMM3, XMM5)){
+							  index[12]	 = n[11];
+							  XMM0	 = _mm_load_ss(data+n[12]);
+							  XMM1	 = _mm_load_ss(data+n[13]);
+							  XMM2	 = _mm_load_ss(data+n[14]);
+							  XMM3	 = _mm_load_ss(data+n[15]);
+							  goto SORT32_0D0C11;
+							}else{
+							  index[12]	 = n[17];
+SORT32_0D0B12:
+							  if(!_mm_comilt_ss(XMM3, XMM6)){
+								index[13]	 = n[11];
+								XMM0	 = _mm_load_ss(data+n[12]);
+								XMM1	 = _mm_load_ss(data+n[13]);
+								XMM2	 = _mm_load_ss(data+n[14]);
+								XMM3	 = _mm_load_ss(data+n[15]);
+								goto SORT32_0E0C12;
+							  }else{
+								index[13]	 = n[18];
+SORT32_0E0B13:
+								if(!_mm_comilt_ss(XMM3, XMM7)){
+								  index[14]	 = n[11];
+								  XMM0	 = _mm_load_ss(data+n[12]);
+								  XMM1	 = _mm_load_ss(data+n[13]);
+								  XMM2	 = _mm_load_ss(data+n[14]);
+								  XMM3	 = _mm_load_ss(data+n[15]);
+								  goto SORT32_0F0C13;
+								}else{
+								  index[14]	 = n[19];
+								  XMM4	 = _mm_load_ss(data+n[20]);
+								  XMM5	 = _mm_load_ss(data+n[21]);
+								  XMM6	 = _mm_load_ss(data+n[22]);
+								  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0F0B14:
+								  if(!_mm_comilt_ss(XMM3, XMM4)){
+									index[15]	 = n[11];
+									XMM0	 = _mm_load_ss(data+n[12]);
+									XMM1	 = _mm_load_ss(data+n[13]);
+									XMM2	 = _mm_load_ss(data+n[14]);
+									XMM3	 = _mm_load_ss(data+n[15]);
+									goto SORT32_100C14;
+								  }else{
+									index[15]	 = n[20];
+SORT32_100B15:
+									if(!_mm_comilt_ss(XMM3, XMM5)){
+									  index[16]	 = n[11];
+									  XMM0	 = _mm_load_ss(data+n[12]);
+									  XMM1	 = _mm_load_ss(data+n[13]);
+									  XMM2	 = _mm_load_ss(data+n[14]);
+									  XMM3	 = _mm_load_ss(data+n[15]);
+									  goto SORT32_110C15;
+									}else{
+									  index[16]	 = n[21];
+SORT32_110B16:
+									  if(!_mm_comilt_ss(XMM3, XMM6)){
+										index[17]	 = n[11];
+										XMM0	 = _mm_load_ss(data+n[12]);
+										XMM1	 = _mm_load_ss(data+n[13]);
+										XMM2	 = _mm_load_ss(data+n[14]);
+										XMM3	 = _mm_load_ss(data+n[15]);
+										goto SORT32_120C16;
+									  }else{
+										index[17]	 = n[22];
+SORT32_120B17:
+										if(!_mm_comilt_ss(XMM3, XMM7)){
+										  index[18]	 = n[11];
+										  XMM0	 = _mm_load_ss(data+n[12]);
+										  XMM1	 = _mm_load_ss(data+n[13]);
+										  XMM2	 = _mm_load_ss(data+n[14]);
+										  XMM3	 = _mm_load_ss(data+n[15]);
+										  goto SORT32_130C17;
+										}else{
+										  index[18]	 = n[23];
+										  XMM4	 = _mm_load_ss(data+n[24]);
+										  XMM5	 = _mm_load_ss(data+n[25]);
+										  XMM6	 = _mm_load_ss(data+n[26]);
+										  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_130B18:
+										  if(!_mm_comilt_ss(XMM3, XMM4)){
+											index[19]	 = n[11];
+											XMM0	 = _mm_load_ss(data+n[12]);
+											XMM1	 = _mm_load_ss(data+n[13]);
+											XMM2	 = _mm_load_ss(data+n[14]);
+											XMM3	 = _mm_load_ss(data+n[15]);
+											goto SORT32_140C18;
+										  }else{
+											index[19]	 = n[24];
+SORT32_140B19:
+											if(!_mm_comilt_ss(XMM3, XMM5)){
+											  index[20]	 = n[11];
+											  XMM0	 = _mm_load_ss(data+n[12]);
+											  XMM1	 = _mm_load_ss(data+n[13]);
+											  XMM2	 = _mm_load_ss(data+n[14]);
+											  XMM3	 = _mm_load_ss(data+n[15]);
+											  goto SORT32_150C19;
+											}else{
+											  index[20]	 = n[25];
+SORT32_150B1A:
+											  if(!_mm_comilt_ss(XMM3, XMM6)){
+												index[21]	 = n[11];
+												XMM0	 = _mm_load_ss(data+n[12]);
+												XMM1	 = _mm_load_ss(data+n[13]);
+												XMM2	 = _mm_load_ss(data+n[14]);
+												XMM3	 = _mm_load_ss(data+n[15]);
+												goto SORT32_160C1A;
+											  }else{
+												index[21]	 = n[26];
+SORT32_160B1B:
+												if(!_mm_comilt_ss(XMM3, XMM7)){
+												  index[22]	 = n[11];
+												  XMM0	 = _mm_load_ss(data+n[12]);
+												  XMM1	 = _mm_load_ss(data+n[13]);
+												  XMM2	 = _mm_load_ss(data+n[14]);
+												  XMM3	 = _mm_load_ss(data+n[15]);
+												  goto SORT32_170C1B;
+												}else{
+												  index[22]	 = n[27];
+												  XMM4	 = _mm_load_ss(data+n[28]);
+												  XMM5	 = _mm_load_ss(data+n[29]);
+												  XMM6	 = _mm_load_ss(data+n[30]);
+												  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_170B1C:
+												  if(!_mm_comilt_ss(XMM3, XMM4)){
+													index[23]	 = n[11];
+													XMM0	 = _mm_load_ss(data+n[12]);
+													XMM1	 = _mm_load_ss(data+n[13]);
+													XMM2	 = _mm_load_ss(data+n[14]);
+													XMM3	 = _mm_load_ss(data+n[15]);
+													goto SORT32_180C1C;
+												  }else{
+													index[23]	 = n[28];
+SORT32_180B1D:
+													if(!_mm_comilt_ss(XMM3, XMM5)){
+													  index[24]	 = n[11];
+													  XMM0	 = _mm_load_ss(data+n[12]);
+													  XMM1	 = _mm_load_ss(data+n[13]);
+													  XMM2	 = _mm_load_ss(data+n[14]);
+													  XMM3	 = _mm_load_ss(data+n[15]);
+													  goto SORT32_190C1D;
+													}else{
+													  index[24]	 = n[29];
+SORT32_190B1E:
+													  if(!_mm_comilt_ss(XMM3, XMM6)){
+														index[25]	 = n[11];
+														XMM0	 = _mm_load_ss(data+n[12]);
+														XMM1	 = _mm_load_ss(data+n[13]);
+														XMM2	 = _mm_load_ss(data+n[14]);
+														XMM3	 = _mm_load_ss(data+n[15]);
+														goto SORT32_1A0C1E;
+													  }else{
+														index[25]	 = n[30];
+SORT32_1A0B1F:
+														if(!_mm_comilt_ss(XMM3, XMM7)){
+														  index[26]	 = n[11];
+														  XMM0	 = _mm_load_ss(data+n[12]);
+														  XMM1	 = _mm_load_ss(data+n[13]);
+														  XMM2	 = _mm_load_ss(data+n[14]);
+														  XMM3	 = _mm_load_ss(data+n[15]);
+														  goto SORT32_1B0C1F;
+														}else{
+														  index[26]	 = n[31];
+														  index[27]	 = n[11];
+														  index[28]	 = n[12];
+														  index[29]	 = n[13];
+														  index[30]	 = n[14];
+														  index[31]	 = n[15];
+														}
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}else{
+						  index[10]	 = n[16];
+SORT32_0B0A11:
+						  if(!_mm_comilt_ss(XMM2, XMM5)){
+							index[11]	 = n[10];
+							goto SORT32_0C0B11;
+						  }else{
+							index[11]	 = n[17];
+SORT32_0C0A12:
+							if(!_mm_comilt_ss(XMM2, XMM6)){
+							  index[12]	 = n[10];
+							  goto SORT32_0D0B12;
+							}else{
+							  index[12]	 = n[18];
+SORT32_0D0A13:
+							  if(!_mm_comilt_ss(XMM2, XMM7)){
+								index[13]	 = n[10];
+								goto SORT32_0E0B13;
+							  }else{
+								index[13]	 = n[19];
+								XMM4	 = _mm_load_ss(data+n[20]);
+								XMM5	 = _mm_load_ss(data+n[21]);
+								XMM6	 = _mm_load_ss(data+n[22]);
+								XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0E0A14:
+								if(!_mm_comilt_ss(XMM2, XMM4)){
+								  index[14]	 = n[10];
+								  goto SORT32_0F0B14;
+								}else{
+								  index[14]	 = n[20];
+SORT32_0F0A15:
+								  if(!_mm_comilt_ss(XMM2, XMM5)){
+									index[15]	 = n[10];
+									goto SORT32_100B15;
+								  }else{
+									index[15]	 = n[21];
+SORT32_100A16:
+									if(!_mm_comilt_ss(XMM2, XMM6)){
+									  index[16]	 = n[10];
+									  goto SORT32_110B16;
+									}else{
+									  index[16]	 = n[22];
+SORT32_110A17:
+									  if(!_mm_comilt_ss(XMM2, XMM7)){
+										index[17]	 = n[10];
+										goto SORT32_120B17;
+									  }else{
+										index[17]	 = n[23];
+										XMM4	 = _mm_load_ss(data+n[24]);
+										XMM5	 = _mm_load_ss(data+n[25]);
+										XMM6	 = _mm_load_ss(data+n[26]);
+										XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_120A18:
+										if(!_mm_comilt_ss(XMM2, XMM4)){
+										  index[18]	 = n[10];
+										  goto SORT32_130B18;
+										}else{
+										  index[18]	 = n[24];
+SORT32_130A19:
+										  if(!_mm_comilt_ss(XMM2, XMM5)){
+											index[19]	 = n[10];
+											goto SORT32_140B19;
+										  }else{
+											index[19]	 = n[25];
+SORT32_140A1A:
+											if(!_mm_comilt_ss(XMM2, XMM6)){
+											  index[20]	 = n[10];
+											  goto SORT32_150B1A;
+											}else{
+											  index[20]	 = n[26];
+SORT32_150A1B:
+											  if(!_mm_comilt_ss(XMM2, XMM7)){
+												index[21]	 = n[10];
+												goto SORT32_160B1B;
+											  }else{
+												index[21]	 = n[27];
+												XMM4	 = _mm_load_ss(data+n[28]);
+												XMM5	 = _mm_load_ss(data+n[29]);
+												XMM6	 = _mm_load_ss(data+n[30]);
+												XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_160A1C:
+												if(!_mm_comilt_ss(XMM2, XMM4)){
+												  index[22]	 = n[10];
+												  goto SORT32_170B1C;
+												}else{
+												  index[22]	 = n[28];
+SORT32_170A1D:
+												  if(!_mm_comilt_ss(XMM2, XMM5)){
+													index[23]	 = n[10];
+													goto SORT32_180B1D;
+												  }else{
+													index[23]	 = n[29];
+SORT32_180A1E:
+													if(!_mm_comilt_ss(XMM2, XMM6)){
+													  index[24]	 = n[10];
+													  goto SORT32_190B1E;
+													}else{
+													  index[24]	 = n[30];
+SORT32_190A1F:
+													  if(!_mm_comilt_ss(XMM2, XMM7)){
+														index[25]	 = n[10];
+														goto SORT32_1A0B1F;
+													  }else{
+														index[25]	 = n[31];
+														index[26]	 = n[10];
+														index[27]	 = n[11];
+														index[28]	 = n[12];
+														index[29]	 = n[13];
+														index[30]	 = n[14];
+														index[31]	 = n[15];
+													  }
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }else{
+						index[9]	 = n[16];
+SORT32_0A0911:
+						if(!_mm_comilt_ss(XMM1, XMM5)){
+						  index[10]	 = n[9];
+						  goto SORT32_0B0A11;
+						}else{
+						  index[10]	 = n[17];
+SORT32_0B0912:
+						  if(!_mm_comilt_ss(XMM1, XMM6)){
+							index[11]	 = n[9];
+							goto SORT32_0C0A12;
+						  }else{
+							index[11]	 = n[18];
+SORT32_0C0913:
+							if(!_mm_comilt_ss(XMM1, XMM7)){
+							  index[12]	 = n[9];
+							  goto SORT32_0D0A13;
+							}else{
+							  index[12]	 = n[19];
+							  XMM4	 = _mm_load_ss(data+n[20]);
+							  XMM5	 = _mm_load_ss(data+n[21]);
+							  XMM6	 = _mm_load_ss(data+n[22]);
+							  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0D0914:
+							  if(!_mm_comilt_ss(XMM1, XMM4)){
+								index[13]	 = n[9];
+								goto SORT32_0E0A14;
+							  }else{
+								index[13]	 = n[20];
+SORT32_0E0915:
+								if(!_mm_comilt_ss(XMM1, XMM5)){
+								  index[14]	 = n[9];
+								  goto SORT32_0F0A15;
+								}else{
+								  index[14]	 = n[21];
+SORT32_0F0916:
+								  if(!_mm_comilt_ss(XMM1, XMM6)){
+									index[15]	 = n[9];
+									goto SORT32_100A16;
+								  }else{
+									index[15]	 = n[22];
+SORT32_100917:
+									if(!_mm_comilt_ss(XMM1, XMM7)){
+									  index[16]	 = n[9];
+									  goto SORT32_110A17;
+									}else{
+									  index[16]	 = n[23];
+									  XMM4	 = _mm_load_ss(data+n[24]);
+									  XMM5	 = _mm_load_ss(data+n[25]);
+									  XMM6	 = _mm_load_ss(data+n[26]);
+									  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_110918:
+									  if(!_mm_comilt_ss(XMM1, XMM4)){
+										index[17]	 = n[9];
+										goto SORT32_120A18;
+									  }else{
+										index[17]	 = n[24];
+SORT32_120919:
+										if(!_mm_comilt_ss(XMM1, XMM5)){
+										  index[18]	 = n[9];
+										  goto SORT32_130A19;
+										}else{
+										  index[18]	 = n[25];
+SORT32_13091A:
+										  if(!_mm_comilt_ss(XMM1, XMM6)){
+											index[19]	 = n[9];
+											goto SORT32_140A1A;
+										  }else{
+											index[19]	 = n[26];
+SORT32_14091B:
+											if(!_mm_comilt_ss(XMM1, XMM7)){
+											  index[20]	 = n[9];
+											  goto SORT32_150A1B;
+											}else{
+											  index[20]	 = n[27];
+											  XMM4	 = _mm_load_ss(data+n[28]);
+											  XMM5	 = _mm_load_ss(data+n[29]);
+											  XMM6	 = _mm_load_ss(data+n[30]);
+											  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_15091C:
+											  if(!_mm_comilt_ss(XMM1, XMM4)){
+												index[21]	 = n[9];
+												goto SORT32_160A1C;
+											  }else{
+												index[21]	 = n[28];
+SORT32_16091D:
+												if(!_mm_comilt_ss(XMM1, XMM5)){
+												  index[22]	 = n[9];
+												  goto SORT32_170A1D;
+												}else{
+												  index[22]	 = n[29];
+SORT32_17091E:
+												  if(!_mm_comilt_ss(XMM1, XMM6)){
+													index[23]	 = n[9];
+													goto SORT32_180A1E;
+												  }else{
+													index[23]	 = n[30];
+SORT32_18091F:
+													if(!_mm_comilt_ss(XMM1, XMM7)){
+													  index[24]	 = n[9];
+													  goto SORT32_190A1F;
+													}else{
+													  index[24]	 = n[31];
+													  index[25]	 = n[9];
+													  index[26]	 = n[10];
+													  index[27]	 = n[11];
+													  index[28]	 = n[12];
+													  index[29]	 = n[13];
+													  index[30]	 = n[14];
+													  index[31]	 = n[15];
+													}
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}else{
+					  index[8]	 = n[16];
+SORT32_090811:
+					  if(!_mm_comilt_ss(XMM0, XMM5)){
+						index[9]	 = n[8];
+						goto SORT32_0A0911;
+					  }else{
+						index[9]	 = n[17];
+SORT32_0A0812:
+						if(!_mm_comilt_ss(XMM0, XMM6)){
+						  index[10]	 = n[8];
+						  goto SORT32_0B0912;
+						}else{
+						  index[10]	 = n[18];
+SORT32_0B0813:
+						  if(!_mm_comilt_ss(XMM0, XMM7)){
+							index[11]	 = n[8];
+							goto SORT32_0C0913;
+						  }else{
+							index[11]	 = n[19];
+							XMM4	 = _mm_load_ss(data+n[20]);
+							XMM5	 = _mm_load_ss(data+n[21]);
+							XMM6	 = _mm_load_ss(data+n[22]);
+							XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0C0814:
+							if(!_mm_comilt_ss(XMM0, XMM4)){
+							  index[12]	 = n[8];
+							  goto SORT32_0D0914;
+							}else{
+							  index[12]	 = n[20];
+SORT32_0D0815:
+							  if(!_mm_comilt_ss(XMM0, XMM5)){
+								index[13]	 = n[8];
+								goto SORT32_0E0915;
+							  }else{
+								index[13]	 = n[21];
+SORT32_0E0816:
+								if(!_mm_comilt_ss(XMM0, XMM6)){
+								  index[14]	 = n[8];
+								  goto SORT32_0F0916;
+								}else{
+								  index[14]	 = n[22];
+SORT32_0F0817:
+								  if(!_mm_comilt_ss(XMM0, XMM7)){
+									index[15]	 = n[8];
+									goto SORT32_100917;
+								  }else{
+									index[15]	 = n[23];
+									XMM4	 = _mm_load_ss(data+n[24]);
+									XMM5	 = _mm_load_ss(data+n[25]);
+									XMM6	 = _mm_load_ss(data+n[26]);
+									XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_100818:
+									if(!_mm_comilt_ss(XMM0, XMM4)){
+									  index[16]	 = n[8];
+									  goto SORT32_110918;
+									}else{
+									  index[16]	 = n[24];
+SORT32_110819:
+									  if(!_mm_comilt_ss(XMM0, XMM5)){
+										index[17]	 = n[8];
+										goto SORT32_120919;
+									  }else{
+										index[17]	 = n[25];
+SORT32_12081A:
+										if(!_mm_comilt_ss(XMM0, XMM6)){
+										  index[18]	 = n[8];
+										  goto SORT32_13091A;
+										}else{
+										  index[18]	 = n[26];
+SORT32_13081B:
+										  if(!_mm_comilt_ss(XMM0, XMM7)){
+											index[19]	 = n[8];
+											goto SORT32_14091B;
+										  }else{
+											index[19]	 = n[27];
+											XMM4	 = _mm_load_ss(data+n[28]);
+											XMM5	 = _mm_load_ss(data+n[29]);
+											XMM6	 = _mm_load_ss(data+n[30]);
+											XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_14081C:
+											if(!_mm_comilt_ss(XMM0, XMM4)){
+											  index[20]	 = n[8];
+											  goto SORT32_15091C;
+											}else{
+											  index[20]	 = n[28];
+SORT32_15081D:
+											  if(!_mm_comilt_ss(XMM0, XMM5)){
+												index[21]	 = n[8];
+												goto SORT32_16091D;
+											  }else{
+												index[21]	 = n[29];
+SORT32_16081E:
+												if(!_mm_comilt_ss(XMM0, XMM6)){
+												  index[22]	 = n[8];
+												  goto SORT32_17091E;
+												}else{
+												  index[22]	 = n[30];
+SORT32_17081F:
+												  if(!_mm_comilt_ss(XMM0, XMM7)){
+													index[23]	 = n[8];
+													goto SORT32_18091F;
+												  }else{
+													index[23]	 = n[31];
+													index[24]	 = n[8];
+													index[25]	 = n[9];
+													index[26]	 = n[10];
+													index[27]	 = n[11];
+													index[28]	 = n[12];
+													index[29]	 = n[13];
+													index[30]	 = n[14];
+													index[31]	 = n[15];
+												  }
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }else{
+					index[7]	 = n[16];
+SORT32_080711:
+					if(!_mm_comilt_ss(XMM3, XMM5)){
+					  index[8]	 = n[7];
+					  XMM0	 = _mm_load_ss(data+n[8]);
+					  XMM1	 = _mm_load_ss(data+n[9]);
+					  XMM2	 = _mm_load_ss(data+n[10]);
+					  XMM3	 = _mm_load_ss(data+n[11]);
+					  goto SORT32_090811;
+					}else{
+					  index[8]	 = n[17];
+SORT32_090712:
+					  if(!_mm_comilt_ss(XMM3, XMM6)){
+						index[9]	 = n[7];
+						XMM0	 = _mm_load_ss(data+n[8]);
+						XMM1	 = _mm_load_ss(data+n[9]);
+						XMM2	 = _mm_load_ss(data+n[10]);
+						XMM3	 = _mm_load_ss(data+n[11]);
+						goto SORT32_0A0812;
+					  }else{
+						index[9]	 = n[18];
+SORT32_0A0713:
+						if(!_mm_comilt_ss(XMM3, XMM7)){
+						  index[10]	 = n[7];
+						  XMM0	 = _mm_load_ss(data+n[8]);
+						  XMM1	 = _mm_load_ss(data+n[9]);
+						  XMM2	 = _mm_load_ss(data+n[10]);
+						  XMM3	 = _mm_load_ss(data+n[11]);
+						  goto SORT32_0B0813;
+						}else{
+						  index[10]	 = n[19];
+						  XMM4	 = _mm_load_ss(data+n[20]);
+						  XMM5	 = _mm_load_ss(data+n[21]);
+						  XMM6	 = _mm_load_ss(data+n[22]);
+						  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0B0714:
+						  if(!_mm_comilt_ss(XMM3, XMM4)){
+							index[11]	 = n[7];
+							XMM0	 = _mm_load_ss(data+n[8]);
+							XMM1	 = _mm_load_ss(data+n[9]);
+							XMM2	 = _mm_load_ss(data+n[10]);
+							XMM3	 = _mm_load_ss(data+n[11]);
+							goto SORT32_0C0814;
+						  }else{
+							index[11]	 = n[20];
+SORT32_0C0715:
+							if(!_mm_comilt_ss(XMM3, XMM5)){
+							  index[12]	 = n[7];
+							  XMM0	 = _mm_load_ss(data+n[8]);
+							  XMM1	 = _mm_load_ss(data+n[9]);
+							  XMM2	 = _mm_load_ss(data+n[10]);
+							  XMM3	 = _mm_load_ss(data+n[11]);
+							  goto SORT32_0D0815;
+							}else{
+							  index[12]	 = n[21];
+SORT32_0D0716:
+							  if(!_mm_comilt_ss(XMM3, XMM6)){
+								index[13]	 = n[7];
+								XMM0	 = _mm_load_ss(data+n[8]);
+								XMM1	 = _mm_load_ss(data+n[9]);
+								XMM2	 = _mm_load_ss(data+n[10]);
+								XMM3	 = _mm_load_ss(data+n[11]);
+								goto SORT32_0E0816;
+							  }else{
+								index[13]	 = n[22];
+SORT32_0E0717:
+								if(!_mm_comilt_ss(XMM3, XMM7)){
+								  index[14]	 = n[7];
+								  XMM0	 = _mm_load_ss(data+n[8]);
+								  XMM1	 = _mm_load_ss(data+n[9]);
+								  XMM2	 = _mm_load_ss(data+n[10]);
+								  XMM3	 = _mm_load_ss(data+n[11]);
+								  goto SORT32_0F0817;
+								}else{
+								  index[14]	 = n[23];
+								  XMM4	 = _mm_load_ss(data+n[24]);
+								  XMM5	 = _mm_load_ss(data+n[25]);
+								  XMM6	 = _mm_load_ss(data+n[26]);
+								  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0F0718:
+								  if(!_mm_comilt_ss(XMM3, XMM4)){
+									index[15]	 = n[7];
+									XMM0	 = _mm_load_ss(data+n[8]);
+									XMM1	 = _mm_load_ss(data+n[9]);
+									XMM2	 = _mm_load_ss(data+n[10]);
+									XMM3	 = _mm_load_ss(data+n[11]);
+									goto SORT32_100818;
+								  }else{
+									index[15]	 = n[24];
+SORT32_100719:
+									if(!_mm_comilt_ss(XMM3, XMM5)){
+									  index[16]	 = n[7];
+									  XMM0	 = _mm_load_ss(data+n[8]);
+									  XMM1	 = _mm_load_ss(data+n[9]);
+									  XMM2	 = _mm_load_ss(data+n[10]);
+									  XMM3	 = _mm_load_ss(data+n[11]);
+									  goto SORT32_110819;
+									}else{
+									  index[16]	 = n[25];
+SORT32_11071A:
+									  if(!_mm_comilt_ss(XMM3, XMM6)){
+										index[17]	 = n[7];
+										XMM0	 = _mm_load_ss(data+n[8]);
+										XMM1	 = _mm_load_ss(data+n[9]);
+										XMM2	 = _mm_load_ss(data+n[10]);
+										XMM3	 = _mm_load_ss(data+n[11]);
+										goto SORT32_12081A;
+									  }else{
+										index[17]	 = n[26];
+SORT32_12071B:
+										if(!_mm_comilt_ss(XMM3, XMM7)){
+										  index[18]	 = n[7];
+										  XMM0	 = _mm_load_ss(data+n[8]);
+										  XMM1	 = _mm_load_ss(data+n[9]);
+										  XMM2	 = _mm_load_ss(data+n[10]);
+										  XMM3	 = _mm_load_ss(data+n[11]);
+										  goto SORT32_13081B;
+										}else{
+										  index[18]	 = n[27];
+										  XMM4	 = _mm_load_ss(data+n[28]);
+										  XMM5	 = _mm_load_ss(data+n[29]);
+										  XMM6	 = _mm_load_ss(data+n[30]);
+										  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_13071C:
+										  if(!_mm_comilt_ss(XMM3, XMM4)){
+											index[19]	 = n[7];
+											XMM0	 = _mm_load_ss(data+n[8]);
+											XMM1	 = _mm_load_ss(data+n[9]);
+											XMM2	 = _mm_load_ss(data+n[10]);
+											XMM3	 = _mm_load_ss(data+n[11]);
+											goto SORT32_14081C;
+										  }else{
+											index[19]	 = n[28];
+SORT32_14071D:
+											if(!_mm_comilt_ss(XMM3, XMM5)){
+											  index[20]	 = n[7];
+											  XMM0	 = _mm_load_ss(data+n[8]);
+											  XMM1	 = _mm_load_ss(data+n[9]);
+											  XMM2	 = _mm_load_ss(data+n[10]);
+											  XMM3	 = _mm_load_ss(data+n[11]);
+											  goto SORT32_15081D;
+											}else{
+											  index[20]	 = n[29];
+SORT32_15071E:
+											  if(!_mm_comilt_ss(XMM3, XMM6)){
+												index[21]	 = n[7];
+												XMM0	 = _mm_load_ss(data+n[8]);
+												XMM1	 = _mm_load_ss(data+n[9]);
+												XMM2	 = _mm_load_ss(data+n[10]);
+												XMM3	 = _mm_load_ss(data+n[11]);
+												goto SORT32_16081E;
+											  }else{
+												index[21]	 = n[30];
+SORT32_16071F:
+												if(!_mm_comilt_ss(XMM3, XMM7)){
+												  index[22]	 = n[7];
+												  XMM0	 = _mm_load_ss(data+n[8]);
+												  XMM1	 = _mm_load_ss(data+n[9]);
+												  XMM2	 = _mm_load_ss(data+n[10]);
+												  XMM3	 = _mm_load_ss(data+n[11]);
+												  goto SORT32_17081F;
+												}else{
+												  index[22]	 = n[31];
+												  index[23]	 = n[7];
+												  index[24]	 = n[8];
+												  index[25]	 = n[9];
+												  index[26]	 = n[10];
+												  index[27]	 = n[11];
+												  index[28]	 = n[12];
+												  index[29]	 = n[13];
+												  index[30]	 = n[14];
+												  index[31]	 = n[15];
+												}
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}else{
+				  index[6]	 = n[16];
+SORT32_070611:
+				  if(!_mm_comilt_ss(XMM2, XMM5)){
+					index[7]	 = n[6];
+					goto SORT32_080711;
+				  }else{
+					index[7]	 = n[17];
+SORT32_080612:
+					if(!_mm_comilt_ss(XMM2, XMM6)){
+					  index[8]	 = n[6];
+					  goto SORT32_090712;
+					}else{
+					  index[8]	 = n[18];
+SORT32_090613:
+					  if(!_mm_comilt_ss(XMM2, XMM7)){
+						index[9]	 = n[6];
+						goto SORT32_0A0713;
+					  }else{
+						index[9]	 = n[19];
+						XMM4	 = _mm_load_ss(data+n[20]);
+						XMM5	 = _mm_load_ss(data+n[21]);
+						XMM6	 = _mm_load_ss(data+n[22]);
+						XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_0A0614:
+						if(!_mm_comilt_ss(XMM2, XMM4)){
+						  index[10]	 = n[6];
+						  goto SORT32_0B0714;
+						}else{
+						  index[10]	 = n[20];
+SORT32_0B0615:
+						  if(!_mm_comilt_ss(XMM2, XMM5)){
+							index[11]	 = n[6];
+							goto SORT32_0C0715;
+						  }else{
+							index[11]	 = n[21];
+SORT32_0C0616:
+							if(!_mm_comilt_ss(XMM2, XMM6)){
+							  index[12]	 = n[6];
+							  goto SORT32_0D0716;
+							}else{
+							  index[12]	 = n[22];
+SORT32_0D0617:
+							  if(!_mm_comilt_ss(XMM2, XMM7)){
+								index[13]	 = n[6];
+								goto SORT32_0E0717;
+							  }else{
+								index[13]	 = n[23];
+								XMM4	 = _mm_load_ss(data+n[24]);
+								XMM5	 = _mm_load_ss(data+n[25]);
+								XMM6	 = _mm_load_ss(data+n[26]);
+								XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0E0618:
+								if(!_mm_comilt_ss(XMM2, XMM4)){
+								  index[14]	 = n[6];
+								  goto SORT32_0F0718;
+								}else{
+								  index[14]	 = n[24];
+SORT32_0F0619:
+								  if(!_mm_comilt_ss(XMM2, XMM5)){
+									index[15]	 = n[6];
+									goto SORT32_100719;
+								  }else{
+									index[15]	 = n[25];
+SORT32_10061A:
+									if(!_mm_comilt_ss(XMM2, XMM6)){
+									  index[16]	 = n[6];
+									  goto SORT32_11071A;
+									}else{
+									  index[16]	 = n[26];
+SORT32_11061B:
+									  if(!_mm_comilt_ss(XMM2, XMM7)){
+										index[17]	 = n[6];
+										goto SORT32_12071B;
+									  }else{
+										index[17]	 = n[27];
+										XMM4	 = _mm_load_ss(data+n[28]);
+										XMM5	 = _mm_load_ss(data+n[29]);
+										XMM6	 = _mm_load_ss(data+n[30]);
+										XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_12061C:
+										if(!_mm_comilt_ss(XMM2, XMM4)){
+										  index[18]	 = n[6];
+										  goto SORT32_13071C;
+										}else{
+										  index[18]	 = n[28];
+SORT32_13061D:
+										  if(!_mm_comilt_ss(XMM2, XMM5)){
+											index[19]	 = n[6];
+											goto SORT32_14071D;
+										  }else{
+											index[19]	 = n[29];
+SORT32_14061E:
+											if(!_mm_comilt_ss(XMM2, XMM6)){
+											  index[20]	 = n[6];
+											  goto SORT32_15071E;
+											}else{
+											  index[20]	 = n[30];
+SORT32_15061F:
+											  if(!_mm_comilt_ss(XMM2, XMM7)){
+												index[21]	 = n[6];
+												goto SORT32_16071F;
+											  }else{
+												index[21]	 = n[31];
+												index[22]	 = n[6];
+												index[23]	 = n[7];
+												index[24]	 = n[8];
+												index[25]	 = n[9];
+												index[26]	 = n[10];
+												index[27]	 = n[11];
+												index[28]	 = n[12];
+												index[29]	 = n[13];
+												index[30]	 = n[14];
+												index[31]	 = n[15];
+											  }
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }else{
+				index[5]	 = n[16];
+SORT32_060511:
+				if(!_mm_comilt_ss(XMM1, XMM5)){
+				  index[6]	 = n[5];
+				  goto SORT32_070611;
+				}else{
+				  index[6]	 = n[17];
+SORT32_070512:
+				  if(!_mm_comilt_ss(XMM1, XMM6)){
+					index[7]	 = n[5];
+					goto SORT32_080612;
+				  }else{
+					index[7]	 = n[18];
+SORT32_080513:
+					if(!_mm_comilt_ss(XMM1, XMM7)){
+					  index[8]	 = n[5];
+					  goto SORT32_090613;
+					}else{
+					  index[8]	 = n[19];
+					  XMM4	 = _mm_load_ss(data+n[20]);
+					  XMM5	 = _mm_load_ss(data+n[21]);
+					  XMM6	 = _mm_load_ss(data+n[22]);
+					  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_090514:
+					  if(!_mm_comilt_ss(XMM1, XMM4)){
+						index[9]	 = n[5];
+						goto SORT32_0A0614;
+					  }else{
+						index[9]	 = n[20];
+SORT32_0A0515:
+						if(!_mm_comilt_ss(XMM1, XMM5)){
+						  index[10]	 = n[5];
+						  goto SORT32_0B0615;
+						}else{
+						  index[10]	 = n[21];
+SORT32_0B0516:
+						  if(!_mm_comilt_ss(XMM1, XMM6)){
+							index[11]	 = n[5];
+							goto SORT32_0C0616;
+						  }else{
+							index[11]	 = n[22];
+SORT32_0C0517:
+							if(!_mm_comilt_ss(XMM1, XMM7)){
+							  index[12]	 = n[5];
+							  goto SORT32_0D0617;
+							}else{
+							  index[12]	 = n[23];
+							  XMM4	 = _mm_load_ss(data+n[24]);
+							  XMM5	 = _mm_load_ss(data+n[25]);
+							  XMM6	 = _mm_load_ss(data+n[26]);
+							  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0D0518:
+							  if(!_mm_comilt_ss(XMM1, XMM4)){
+								index[13]	 = n[5];
+								goto SORT32_0E0618;
+							  }else{
+								index[13]	 = n[24];
+SORT32_0E0519:
+								if(!_mm_comilt_ss(XMM1, XMM5)){
+								  index[14]	 = n[5];
+								  goto SORT32_0F0619;
+								}else{
+								  index[14]	 = n[25];
+SORT32_0F051A:
+								  if(!_mm_comilt_ss(XMM1, XMM6)){
+									index[15]	 = n[5];
+									goto SORT32_10061A;
+								  }else{
+									index[15]	 = n[26];
+SORT32_10051B:
+									if(!_mm_comilt_ss(XMM1, XMM7)){
+									  index[16]	 = n[5];
+									  goto SORT32_11061B;
+									}else{
+									  index[16]	 = n[27];
+									  XMM4	 = _mm_load_ss(data+n[28]);
+									  XMM5	 = _mm_load_ss(data+n[29]);
+									  XMM6	 = _mm_load_ss(data+n[30]);
+									  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_11051C:
+									  if(!_mm_comilt_ss(XMM1, XMM4)){
+										index[17]	 = n[5];
+										goto SORT32_12061C;
+									  }else{
+										index[17]	 = n[28];
+SORT32_12051D:
+										if(!_mm_comilt_ss(XMM1, XMM5)){
+										  index[18]	 = n[5];
+										  goto SORT32_13061D;
+										}else{
+										  index[18]	 = n[29];
+SORT32_13051E:
+										  if(!_mm_comilt_ss(XMM1, XMM6)){
+											index[19]	 = n[5];
+											goto SORT32_14061E;
+										  }else{
+											index[19]	 = n[30];
+SORT32_14051F:
+											if(!_mm_comilt_ss(XMM1, XMM7)){
+											  index[20]	 = n[5];
+											  goto SORT32_15061F;
+											}else{
+											  index[20]	 = n[31];
+											  index[21]	 = n[5];
+											  index[22]	 = n[6];
+											  index[23]	 = n[7];
+											  index[24]	 = n[8];
+											  index[25]	 = n[9];
+											  index[26]	 = n[10];
+											  index[27]	 = n[11];
+											  index[28]	 = n[12];
+											  index[29]	 = n[13];
+											  index[30]	 = n[14];
+											  index[31]	 = n[15];
+											}
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}else{
+			  index[4]	 = n[16];
+SORT32_050411:
+			  if(!_mm_comilt_ss(XMM0, XMM5)){
+				index[5]	 = n[4];
+				goto SORT32_060511;
+			  }else{
+				index[5]	 = n[17];
+SORT32_060412:
+				if(!_mm_comilt_ss(XMM0, XMM6)){
+				  index[6]	 = n[4];
+				  goto SORT32_070512;
+				}else{
+				  index[6]	 = n[18];
+SORT32_070413:
+				  if(!_mm_comilt_ss(XMM0, XMM7)){
+					index[7]	 = n[4];
+					goto SORT32_080513;
+				  }else{
+					index[7]	 = n[19];
+					XMM4	 = _mm_load_ss(data+n[20]);
+					XMM5	 = _mm_load_ss(data+n[21]);
+					XMM6	 = _mm_load_ss(data+n[22]);
+					XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_080414:
+					if(!_mm_comilt_ss(XMM0, XMM4)){
+					  index[8]	 = n[4];
+					  goto SORT32_090514;
+					}else{
+					  index[8]	 = n[20];
+SORT32_090415:
+					  if(!_mm_comilt_ss(XMM0, XMM5)){
+						index[9]	 = n[4];
+						goto SORT32_0A0515;
+					  }else{
+						index[9]	 = n[21];
+SORT32_0A0416:
+						if(!_mm_comilt_ss(XMM0, XMM6)){
+						  index[10]	 = n[4];
+						  goto SORT32_0B0516;
+						}else{
+						  index[10]	 = n[22];
+SORT32_0B0417:
+						  if(!_mm_comilt_ss(XMM0, XMM7)){
+							index[11]	 = n[4];
+							goto SORT32_0C0517;
+						  }else{
+							index[11]	 = n[23];
+							XMM4	 = _mm_load_ss(data+n[24]);
+							XMM5	 = _mm_load_ss(data+n[25]);
+							XMM6	 = _mm_load_ss(data+n[26]);
+							XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0C0418:
+							if(!_mm_comilt_ss(XMM0, XMM4)){
+							  index[12]	 = n[4];
+							  goto SORT32_0D0518;
+							}else{
+							  index[12]	 = n[24];
+SORT32_0D0419:
+							  if(!_mm_comilt_ss(XMM0, XMM5)){
+								index[13]	 = n[4];
+								goto SORT32_0E0519;
+							  }else{
+								index[13]	 = n[25];
+SORT32_0E041A:
+								if(!_mm_comilt_ss(XMM0, XMM6)){
+								  index[14]	 = n[4];
+								  goto SORT32_0F051A;
+								}else{
+								  index[14]	 = n[26];
+SORT32_0F041B:
+								  if(!_mm_comilt_ss(XMM0, XMM7)){
+									index[15]	 = n[4];
+									goto SORT32_10051B;
+								  }else{
+									index[15]	 = n[27];
+									XMM4	 = _mm_load_ss(data+n[28]);
+									XMM5	 = _mm_load_ss(data+n[29]);
+									XMM6	 = _mm_load_ss(data+n[30]);
+									XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_10041C:
+									if(!_mm_comilt_ss(XMM0, XMM4)){
+									  index[16]	 = n[4];
+									  goto SORT32_11051C;
+									}else{
+									  index[16]	 = n[28];
+SORT32_11041D:
+									  if(!_mm_comilt_ss(XMM0, XMM5)){
+										index[17]	 = n[4];
+										goto SORT32_12051D;
+									  }else{
+										index[17]	 = n[29];
+SORT32_12041E:
+										if(!_mm_comilt_ss(XMM0, XMM6)){
+										  index[18]	 = n[4];
+										  goto SORT32_13051E;
+										}else{
+										  index[18]	 = n[30];
+SORT32_13041F:
+										  if(!_mm_comilt_ss(XMM0, XMM7)){
+											index[19]	 = n[4];
+											goto SORT32_14051F;
+										  }else{
+											index[19]	 = n[31];
+											index[20]	 = n[4];
+											index[21]	 = n[5];
+											index[22]	 = n[6];
+											index[23]	 = n[7];
+											index[24]	 = n[8];
+											index[25]	 = n[9];
+											index[26]	 = n[10];
+											index[27]	 = n[11];
+											index[28]	 = n[12];
+											index[29]	 = n[13];
+											index[30]	 = n[14];
+											index[31]	 = n[15];
+										  }
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }else{
+			index[3]	 = n[16];
+SORT32_040311:
+			if(!_mm_comilt_ss(XMM3, XMM5)){
+			  index[4]	 = n[3];
+			  XMM0	 = _mm_load_ss(data+n[4]);
+			  XMM1	 = _mm_load_ss(data+n[5]);
+			  XMM2	 = _mm_load_ss(data+n[6]);
+			  XMM3	 = _mm_load_ss(data+n[7]);
+			  goto SORT32_050411;
+			}else{
+			  index[4]	 = n[17];
+SORT32_050312:
+			  if(!_mm_comilt_ss(XMM3, XMM6)){
+				index[5]	 = n[3];
+				XMM0	 = _mm_load_ss(data+n[4]);
+				XMM1	 = _mm_load_ss(data+n[5]);
+				XMM2	 = _mm_load_ss(data+n[6]);
+				XMM3	 = _mm_load_ss(data+n[7]);
+				goto SORT32_060412;
+			  }else{
+				index[5]	 = n[18];
+SORT32_060313:
+				if(!_mm_comilt_ss(XMM3, XMM7)){
+				  index[6]	 = n[3];
+				  XMM0	 = _mm_load_ss(data+n[4]);
+				  XMM1	 = _mm_load_ss(data+n[5]);
+				  XMM2	 = _mm_load_ss(data+n[6]);
+				  XMM3	 = _mm_load_ss(data+n[7]);
+				  goto SORT32_070413;
+				}else{
+				  index[6]	 = n[19];
+				  XMM4	 = _mm_load_ss(data+n[20]);
+				  XMM5	 = _mm_load_ss(data+n[21]);
+				  XMM6	 = _mm_load_ss(data+n[22]);
+				  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_070314:
+				  if(!_mm_comilt_ss(XMM3, XMM4)){
+					index[7]	 = n[3];
+					XMM0	 = _mm_load_ss(data+n[4]);
+					XMM1	 = _mm_load_ss(data+n[5]);
+					XMM2	 = _mm_load_ss(data+n[6]);
+					XMM3	 = _mm_load_ss(data+n[7]);
+					goto SORT32_080414;
+				  }else{
+					index[7]	 = n[20];
+SORT32_080315:
+					if(!_mm_comilt_ss(XMM3, XMM5)){
+					  index[8]	 = n[3];
+					  XMM0	 = _mm_load_ss(data+n[4]);
+					  XMM1	 = _mm_load_ss(data+n[5]);
+					  XMM2	 = _mm_load_ss(data+n[6]);
+					  XMM3	 = _mm_load_ss(data+n[7]);
+					  goto SORT32_090415;
+					}else{
+					  index[8]	 = n[21];
+SORT32_090316:
+					  if(!_mm_comilt_ss(XMM3, XMM6)){
+						index[9]	 = n[3];
+						XMM0	 = _mm_load_ss(data+n[4]);
+						XMM1	 = _mm_load_ss(data+n[5]);
+						XMM2	 = _mm_load_ss(data+n[6]);
+						XMM3	 = _mm_load_ss(data+n[7]);
+						goto SORT32_0A0416;
+					  }else{
+						index[9]	 = n[22];
+SORT32_0A0317:
+						if(!_mm_comilt_ss(XMM3, XMM7)){
+						  index[10]	 = n[3];
+						  XMM0	 = _mm_load_ss(data+n[4]);
+						  XMM1	 = _mm_load_ss(data+n[5]);
+						  XMM2	 = _mm_load_ss(data+n[6]);
+						  XMM3	 = _mm_load_ss(data+n[7]);
+						  goto SORT32_0B0417;
+						}else{
+						  index[10]	 = n[23];
+						  XMM4	 = _mm_load_ss(data+n[24]);
+						  XMM5	 = _mm_load_ss(data+n[25]);
+						  XMM6	 = _mm_load_ss(data+n[26]);
+						  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0B0318:
+						  if(!_mm_comilt_ss(XMM3, XMM4)){
+							index[11]	 = n[3];
+							XMM0	 = _mm_load_ss(data+n[4]);
+							XMM1	 = _mm_load_ss(data+n[5]);
+							XMM2	 = _mm_load_ss(data+n[6]);
+							XMM3	 = _mm_load_ss(data+n[7]);
+							goto SORT32_0C0418;
+						  }else{
+							index[11]	 = n[24];
+SORT32_0C0319:
+							if(!_mm_comilt_ss(XMM3, XMM5)){
+							  index[12]	 = n[3];
+							  XMM0	 = _mm_load_ss(data+n[4]);
+							  XMM1	 = _mm_load_ss(data+n[5]);
+							  XMM2	 = _mm_load_ss(data+n[6]);
+							  XMM3	 = _mm_load_ss(data+n[7]);
+							  goto SORT32_0D0419;
+							}else{
+							  index[12]	 = n[25];
+SORT32_0D031A:
+							  if(!_mm_comilt_ss(XMM3, XMM6)){
+								index[13]	 = n[3];
+								XMM0	 = _mm_load_ss(data+n[4]);
+								XMM1	 = _mm_load_ss(data+n[5]);
+								XMM2	 = _mm_load_ss(data+n[6]);
+								XMM3	 = _mm_load_ss(data+n[7]);
+								goto SORT32_0E041A;
+							  }else{
+								index[13]	 = n[26];
+SORT32_0E031B:
+								if(!_mm_comilt_ss(XMM3, XMM7)){
+								  index[14]	 = n[3];
+								  XMM0	 = _mm_load_ss(data+n[4]);
+								  XMM1	 = _mm_load_ss(data+n[5]);
+								  XMM2	 = _mm_load_ss(data+n[6]);
+								  XMM3	 = _mm_load_ss(data+n[7]);
+								  goto SORT32_0F041B;
+								}else{
+								  index[14]	 = n[27];
+								  XMM4	 = _mm_load_ss(data+n[28]);
+								  XMM5	 = _mm_load_ss(data+n[29]);
+								  XMM6	 = _mm_load_ss(data+n[30]);
+								  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_0F031C:
+								  if(!_mm_comilt_ss(XMM3, XMM4)){
+									index[15]	 = n[3];
+									XMM0	 = _mm_load_ss(data+n[4]);
+									XMM1	 = _mm_load_ss(data+n[5]);
+									XMM2	 = _mm_load_ss(data+n[6]);
+									XMM3	 = _mm_load_ss(data+n[7]);
+									goto SORT32_10041C;
+								  }else{
+									index[15]	 = n[28];
+SORT32_10031D:
+									if(!_mm_comilt_ss(XMM3, XMM5)){
+									  index[16]	 = n[3];
+									  XMM0	 = _mm_load_ss(data+n[4]);
+									  XMM1	 = _mm_load_ss(data+n[5]);
+									  XMM2	 = _mm_load_ss(data+n[6]);
+									  XMM3	 = _mm_load_ss(data+n[7]);
+									  goto SORT32_11041D;
+									}else{
+									  index[16]	 = n[29];
+SORT32_11031E:
+									  if(!_mm_comilt_ss(XMM3, XMM6)){
+										index[17]	 = n[3];
+										XMM0	 = _mm_load_ss(data+n[4]);
+										XMM1	 = _mm_load_ss(data+n[5]);
+										XMM2	 = _mm_load_ss(data+n[6]);
+										XMM3	 = _mm_load_ss(data+n[7]);
+										goto SORT32_12041E;
+									  }else{
+										index[17]	 = n[30];
+SORT32_12031F:
+										if(!_mm_comilt_ss(XMM3, XMM7)){
+										  index[18]	 = n[3];
+										  XMM0	 = _mm_load_ss(data+n[4]);
+										  XMM1	 = _mm_load_ss(data+n[5]);
+										  XMM2	 = _mm_load_ss(data+n[6]);
+										  XMM3	 = _mm_load_ss(data+n[7]);
+										  goto SORT32_13041F;
+										}else{
+										  index[18]	 = n[31];
+										  index[19]	 = n[3];
+										  index[20]	 = n[4];
+										  index[21]	 = n[5];
+										  index[22]	 = n[6];
+										  index[23]	 = n[7];
+										  index[24]	 = n[8];
+										  index[25]	 = n[9];
+										  index[26]	 = n[10];
+										  index[27]	 = n[11];
+										  index[28]	 = n[12];
+										  index[29]	 = n[13];
+										  index[30]	 = n[14];
+										  index[31]	 = n[15];
+										}
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}else{
+		  index[2]	 = n[16];
+SORT32_030211:
+		  if(!_mm_comilt_ss(XMM2, XMM5)){
+			index[3]	 = n[2];
+			goto SORT32_040311;
+		  }else{
+			index[3]	 = n[17];
+SORT32_040212:
+			if(!_mm_comilt_ss(XMM2, XMM6)){
+			  index[4]	 = n[2];
+			  goto SORT32_050312;
+			}else{
+			  index[4]	 = n[18];
+SORT32_050213:
+			  if(!_mm_comilt_ss(XMM2, XMM7)){
+				index[5]	 = n[2];
+				goto SORT32_060313;
+			  }else{
+				index[5]	 = n[19];
+				XMM4	 = _mm_load_ss(data+n[20]);
+				XMM5	 = _mm_load_ss(data+n[21]);
+				XMM6	 = _mm_load_ss(data+n[22]);
+				XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_060214:
+				if(!_mm_comilt_ss(XMM2, XMM4)){
+				  index[6]	 = n[2];
+				  goto SORT32_070314;
+				}else{
+				  index[6]	 = n[20];
+SORT32_070215:
+				  if(!_mm_comilt_ss(XMM2, XMM5)){
+					index[7]	 = n[2];
+					goto SORT32_080315;
+				  }else{
+					index[7]	 = n[21];
+SORT32_080216:
+					if(!_mm_comilt_ss(XMM2, XMM6)){
+					  index[8]	 = n[2];
+					  goto SORT32_090316;
+					}else{
+					  index[8]	 = n[22];
+SORT32_090217:
+					  if(!_mm_comilt_ss(XMM2, XMM7)){
+						index[9]	 = n[2];
+						goto SORT32_0A0317;
+					  }else{
+						index[9]	 = n[23];
+						XMM4	 = _mm_load_ss(data+n[24]);
+						XMM5	 = _mm_load_ss(data+n[25]);
+						XMM6	 = _mm_load_ss(data+n[26]);
+						XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_0A0218:
+						if(!_mm_comilt_ss(XMM2, XMM4)){
+						  index[10]	 = n[2];
+						  goto SORT32_0B0318;
+						}else{
+						  index[10]	 = n[24];
+SORT32_0B0219:
+						  if(!_mm_comilt_ss(XMM2, XMM5)){
+							index[11]	 = n[2];
+							goto SORT32_0C0319;
+						  }else{
+							index[11]	 = n[25];
+SORT32_0C021A:
+							if(!_mm_comilt_ss(XMM2, XMM6)){
+							  index[12]	 = n[2];
+							  goto SORT32_0D031A;
+							}else{
+							  index[12]	 = n[26];
+SORT32_0D021B:
+							  if(!_mm_comilt_ss(XMM2, XMM7)){
+								index[13]	 = n[2];
+								goto SORT32_0E031B;
+							  }else{
+								index[13]	 = n[27];
+								XMM4	 = _mm_load_ss(data+n[28]);
+								XMM5	 = _mm_load_ss(data+n[29]);
+								XMM6	 = _mm_load_ss(data+n[30]);
+								XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_0E021C:
+								if(!_mm_comilt_ss(XMM2, XMM4)){
+								  index[14]	 = n[2];
+								  goto SORT32_0F031C;
+								}else{
+								  index[14]	 = n[28];
+SORT32_0F021D:
+								  if(!_mm_comilt_ss(XMM2, XMM5)){
+									index[15]	 = n[2];
+									goto SORT32_10031D;
+								  }else{
+									index[15]	 = n[29];
+SORT32_10021E:
+									if(!_mm_comilt_ss(XMM2, XMM6)){
+									  index[16]	 = n[2];
+									  goto SORT32_11031E;
+									}else{
+									  index[16]	 = n[30];
+SORT32_11021F:
+									  if(!_mm_comilt_ss(XMM2, XMM7)){
+										index[17]	 = n[2];
+										goto SORT32_12031F;
+									  }else{
+										index[17]	 = n[31];
+										index[18]	 = n[2];
+										index[19]	 = n[3];
+										index[20]	 = n[4];
+										index[21]	 = n[5];
+										index[22]	 = n[6];
+										index[23]	 = n[7];
+										index[24]	 = n[8];
+										index[25]	 = n[9];
+										index[26]	 = n[10];
+										index[27]	 = n[11];
+										index[28]	 = n[12];
+										index[29]	 = n[13];
+										index[30]	 = n[14];
+										index[31]	 = n[15];
+									  }
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }else{
+		index[1]	 = n[16];
+SORT32_020111:
+		if(!_mm_comilt_ss(XMM1, XMM5)){
+		  index[2]	 = n[1];
+		  goto SORT32_030211;
+		}else{
+		  index[2]	 = n[17];
+SORT32_030112:
+		  if(!_mm_comilt_ss(XMM1, XMM6)){
+			index[3]	 = n[1];
+			goto SORT32_040212;
+		  }else{
+			index[3]	 = n[18];
+SORT32_040113:
+			if(!_mm_comilt_ss(XMM1, XMM7)){
+			  index[4]	 = n[1];
+			  goto SORT32_050213;
+			}else{
+			  index[4]	 = n[19];
+			  XMM4	 = _mm_load_ss(data+n[20]);
+			  XMM5	 = _mm_load_ss(data+n[21]);
+			  XMM6	 = _mm_load_ss(data+n[22]);
+			  XMM7	 = _mm_load_ss(data+n[23]);
+SORT32_050114:
+			  if(!_mm_comilt_ss(XMM1, XMM4)){
+				index[5]	 = n[1];
+				goto SORT32_060214;
+			  }else{
+				index[5]	 = n[20];
+SORT32_060115:
+				if(!_mm_comilt_ss(XMM1, XMM5)){
+				  index[6]	 = n[1];
+				  goto SORT32_070215;
+				}else{
+				  index[6]	 = n[21];
+SORT32_070116:
+				  if(!_mm_comilt_ss(XMM1, XMM6)){
+					index[7]	 = n[1];
+					goto SORT32_080216;
+				  }else{
+					index[7]	 = n[22];
+SORT32_080117:
+					if(!_mm_comilt_ss(XMM1, XMM7)){
+					  index[8]	 = n[1];
+					  goto SORT32_090217;
+					}else{
+					  index[8]	 = n[23];
+					  XMM4	 = _mm_load_ss(data+n[24]);
+					  XMM5	 = _mm_load_ss(data+n[25]);
+					  XMM6	 = _mm_load_ss(data+n[26]);
+					  XMM7	 = _mm_load_ss(data+n[27]);
+SORT32_090118:
+					  if(!_mm_comilt_ss(XMM1, XMM4)){
+						index[9]	 = n[1];
+						goto SORT32_0A0218;
+					  }else{
+						index[9]	 = n[24];
+SORT32_0A0119:
+						if(!_mm_comilt_ss(XMM1, XMM5)){
+						  index[10]	 = n[1];
+						  goto SORT32_0B0219;
+						}else{
+						  index[10]	 = n[25];
+SORT32_0B011A:
+						  if(!_mm_comilt_ss(XMM1, XMM6)){
+							index[11]	 = n[1];
+							goto SORT32_0C021A;
+						  }else{
+							index[11]	 = n[26];
+SORT32_0C011B:
+							if(!_mm_comilt_ss(XMM1, XMM7)){
+							  index[12]	 = n[1];
+							  goto SORT32_0D021B;
+							}else{
+							  index[12]	 = n[27];
+							  XMM4	 = _mm_load_ss(data+n[28]);
+							  XMM5	 = _mm_load_ss(data+n[29]);
+							  XMM6	 = _mm_load_ss(data+n[30]);
+							  XMM7	 = _mm_load_ss(data+n[31]);
+SORT32_0D011C:
+							  if(!_mm_comilt_ss(XMM1, XMM4)){
+								index[13]	 = n[1];
+								goto SORT32_0E021C;
+							  }else{
+								index[13]	 = n[28];
+SORT32_0E011D:
+								if(!_mm_comilt_ss(XMM1, XMM5)){
+								  index[14]	 = n[1];
+								  goto SORT32_0F021D;
+								}else{
+								  index[14]	 = n[29];
+SORT32_0F011E:
+								  if(!_mm_comilt_ss(XMM1, XMM6)){
+									index[15]	 = n[1];
+									goto SORT32_10021E;
+								  }else{
+									index[15]	 = n[30];
+SORT32_10011F:
+									if(!_mm_comilt_ss(XMM1, XMM7)){
+									  index[16]	 = n[1];
+									  goto SORT32_11021F;
+									}else{
+									  index[16]	 = n[31];
+									  index[17]	 = n[1];
+									  index[18]	 = n[2];
+									  index[19]	 = n[3];
+									  index[20]	 = n[4];
+									  index[21]	 = n[5];
+									  index[22]	 = n[6];
+									  index[23]	 = n[7];
+									  index[24]	 = n[8];
+									  index[25]	 = n[9];
+									  index[26]	 = n[10];
+									  index[27]	 = n[11];
+									  index[28]	 = n[12];
+									  index[29]	 = n[13];
+									  index[30]	 = n[14];
+									  index[31]	 = n[15];
+									}
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	}else{
+	  index[0]	 = n[16];
+	  if(!_mm_comilt_ss(XMM0, XMM5)){
+		index[1]	 = n[0];
+		goto SORT32_020111;
+	  }else{
+		index[1]	 = n[17];
+		if(!_mm_comilt_ss(XMM0, XMM6)){
+		  index[2]	 = n[0];
+		  goto SORT32_030112;
+		}else{
+		  index[2]	 = n[18];
+		  if(!_mm_comilt_ss(XMM0, XMM7)){
+			index[3]	 = n[0];
+			goto SORT32_040113;
+		  }else{
+			index[3]	 = n[19];
+			XMM4	 = _mm_load_ss(data+n[20]);
+			XMM5	 = _mm_load_ss(data+n[21]);
+			XMM6	 = _mm_load_ss(data+n[22]);
+			XMM7	 = _mm_load_ss(data+n[23]);
+			if(!_mm_comilt_ss(XMM0, XMM4)){
+			  index[4]	 = n[0];
+			  goto SORT32_050114;
+			}else{
+			  index[4]	 = n[20];
+			  if(!_mm_comilt_ss(XMM0, XMM5)){
+				index[5]	 = n[0];
+				goto SORT32_060115;
+			  }else{
+				index[5]	 = n[21];
+				if(!_mm_comilt_ss(XMM0, XMM6)){
+				  index[6]	 = n[0];
+				  goto SORT32_070116;
+				}else{
+				  index[6]	 = n[22];
+				  if(!_mm_comilt_ss(XMM0, XMM7)){
+					index[7]	 = n[0];
+					goto SORT32_080117;
+				  }else{
+					index[7]	 = n[23];
+					XMM4	 = _mm_load_ss(data+n[24]);
+					XMM5	 = _mm_load_ss(data+n[25]);
+					XMM6	 = _mm_load_ss(data+n[26]);
+					XMM7	 = _mm_load_ss(data+n[27]);
+					if(!_mm_comilt_ss(XMM0, XMM4)){
+					  index[8]	 = n[0];
+					  goto SORT32_090118;
+					}else{
+					  index[8]	 = n[24];
+					  if(!_mm_comilt_ss(XMM0, XMM5)){
+						index[9]	 = n[0];
+						goto SORT32_0A0119;
+					  }else{
+						index[9]	 = n[25];
+						if(!_mm_comilt_ss(XMM0, XMM6)){
+						  index[10]	 = n[0];
+						  goto SORT32_0B011A;
+						}else{
+						  index[10]	 = n[26];
+						  if(!_mm_comilt_ss(XMM0, XMM7)){
+							index[11]	 = n[0];
+							goto SORT32_0C011B;
+						  }else{
+							index[11]	 = n[27];
+							XMM4	 = _mm_load_ss(data+n[28]);
+							XMM5	 = _mm_load_ss(data+n[29]);
+							XMM6	 = _mm_load_ss(data+n[30]);
+							XMM7	 = _mm_load_ss(data+n[31]);
+							if(!_mm_comilt_ss(XMM0, XMM4)){
+							  index[12]	 = n[0];
+							  goto SORT32_0D011C;
+							}else{
+							  index[12]	 = n[28];
+							  if(!_mm_comilt_ss(XMM0, XMM5)){
+								index[13]	 = n[0];
+								goto SORT32_0E011D;
+							  }else{
+								index[13]	 = n[29];
+								if(!_mm_comilt_ss(XMM0, XMM6)){
+								  index[14]	 = n[0];
+								  goto SORT32_0F011E;
+								}else{
+								  index[14]	 = n[30];
+								  if(!_mm_comilt_ss(XMM0, XMM7)){
+									index[15]	 = n[0];
+									goto SORT32_10011F;
+								  }else{
+									index[15]	 = n[31];
+									index[16]	 = n[0];
+									index[17]	 = n[1];
+									index[18]	 = n[2];
+									index[19]	 = n[3];
+									index[20]	 = n[4];
+									index[21]	 = n[5];
+									index[22]	 = n[6];
+									index[23]	 = n[7];
+									index[24]	 = n[8];
+									index[25]	 = n[9];
+									index[26]	 = n[10];
+									index[27]	 = n[11];
+									index[28]	 = n[12];
+									index[29]	 = n[13];
+									index[30]	 = n[14];
+									index[31]	 = n[15];
+								  }
+								}
+							  }
+							}
+						  }
+						}
+					  }
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	}
+}
+static void sortindex_shellsort(int *index,
+								float *data,
+								int offset,
+								int count){
+  int gap,pos,left,i,j;
+  index+=offset;
+  for(i=0;i<count;i++)index[i]=i+offset;
+  gap=1;
+  while (gap<=count)gap=gap*3+1;
+  gap/=3;
+  if(gap>=4)gap/=3;
+  while(gap>0){
+	for(pos=gap;pos<count;pos++){
+	  for(left=pos-gap;left>=0;left-=gap){
+		i=index[left];j=index[left+gap];
+		if(!C(i,j)){
+		  index[left]=j;
+		  index[left+gap]=i;
+		}else break;
+	  }
+	}
+	gap/=3;
+  }
+}
+#else														/* SSE Optimize */
 #define C(o,a,b)\
   (fabs(data[o+a])>=fabs(data[o+b]))
 #define O(o,a,b,c,d)\
@@ -1390,6 +9362,7 @@
     gap/=3;
   }
 }
+#endif														/* SSE Optimize */
 
 static void sortindex(int *index,
                       float *data,
@@ -1401,18 +9374,62 @@
 }
 
 #undef C
+#ifndef	__SSE__												/* SSE Optimize */
+/* this is for per-channel noise normalization */
+static int apsort(const void *a, const void *b){
+  float f1=fabs(**(float**)a);
+  float f2=fabs(**(float**)b);
+  return (f1<f2)-(f1>f2);
+}
 #undef O
 #undef SORT4
-
-#endif
-/*** OPT_SORT End ***/
-
+#endif														/* SSE Optimize */
 
 int **_vp_quantize_couple_sort(vorbis_block *vb,
 			       vorbis_look_psy *p,
 			       vorbis_info_mapping0 *vi,
-			       float **mags){
+#ifdef	__SSE__												/* SSE Optimize */
+			       float **mags,
+				   float *temp){
+#else														/* SSE Optimize */
+				   float **mags){
+#endif														/* SSE Optimize */
+
 
+#ifdef	__SSE__												/* SSE Optimize */
+  if(p->vi->normal_point_p){
+    int i,j,n=p->n;
+    int **ret=_vorbis_block_alloc(vb,vi->coupling_steps*sizeof(*ret));
+    int partition=p->vi->normal_partition;
+    
+	for(i=0;i<vi->coupling_steps;i++)
+	{
+		for(j=0;j<n;j+=16)
+		{
+			__m128	XMM0	 = _mm_load_ps(mags[i]+j   );
+			__m128	XMM1	 = _mm_load_ps(mags[i]+j+ 4);
+			__m128	XMM2	 = _mm_load_ps(mags[i]+j+ 8);
+			__m128	XMM3	 = _mm_load_ps(mags[i]+j+12);
+			XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+			XMM1	 = _mm_and_ps(XMM1, PABSMASK.ps);
+			XMM2	 = _mm_and_ps(XMM2, PABSMASK.ps);
+			XMM3	 = _mm_and_ps(XMM3, PABSMASK.ps);
+			_mm_store_ps(temp+j   , XMM0);
+			_mm_store_ps(temp+j+ 4, XMM1);
+			_mm_store_ps(temp+j+ 8, XMM2);
+			_mm_store_ps(temp+j+12, XMM3);
+		}
+		ret[i]=_vorbis_block_alloc(vb,n*sizeof(**ret));
+	
+		for(j=0;j<n;j+=partition)
+		{
+			sortindex(ret[i], temp, j, partition);
+		}
+	}
+    return(ret);
+  }
+  return(NULL);
+#else														/* SSE Optimize */
 #ifdef OPT_SORT
   if(p->vi->normal_point_p){
     int i,j,n=p->n;
@@ -1449,21 +9466,51 @@
   }
   return(NULL);
 #endif
+#endif														/* SSE Optimize */
 }
 
+#ifdef	__SSE__												/* SSE Optimize */
+void _vp_noise_normalize_sort(vorbis_look_psy *p,
+			      float *magnitudes,int *sortedindex,float *temp){
+	int j, n=p->n;
+	vorbis_info_psy	*vi=p->vi;
+	int	partition=vi->normal_partition;
+	int	start=vi->normal_start;
+
+	int k;
+	j	 = start;
+	k	 = (j+15)&(~15);
+	k	 = (k>=n)?n:k;
+	for(;j<k;j++)
+	{
+		__m128	XMM0	 = _mm_load_ss(magnitudes+j);
+		XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+		_mm_store_ss(temp+j,XMM0);
+	}
+	for(;j<n;j+=16)
+	{
+		__m128	XMM0	 = _mm_load_ps(magnitudes+j   );
+		__m128	XMM1	 = _mm_load_ps(magnitudes+j+ 4);
+		__m128	XMM2	 = _mm_load_ps(magnitudes+j+ 8);
+		__m128	XMM3	 = _mm_load_ps(magnitudes+j+12);
+		XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+		XMM1	 = _mm_and_ps(XMM1, PABSMASK.ps);
+		XMM2	 = _mm_and_ps(XMM2, PABSMASK.ps);
+		XMM3	 = _mm_and_ps(XMM3, PABSMASK.ps);
+		_mm_store_ps(temp+j   , XMM0);
+		_mm_store_ps(temp+j+ 4, XMM1);
+		_mm_store_ps(temp+j+ 8, XMM2);
+		_mm_store_ps(temp+j+12, XMM3);
+	}
+	for(j=start;j<n;j+=partition)
+	{
+		if(j+partition>n)
+			partition	 = n-j;
+		sortindex(sortedindex-start, temp, j, partition);
+	}
+#else														/* SSE Optimize */
 void _vp_noise_normalize_sort(vorbis_look_psy *p,
 			      float *magnitudes,int *sortedindex){
-#ifdef OPT_SORT
-  int j,n=p->n;
-  vorbis_info_psy *vi=p->vi;
-  int partition=vi->normal_partition;
-  int start=vi->normal_start;
-
-  for(j=start;j<n;j+=partition){
-    if(j+partition>n)partition=n-j;
-    sortindex(sortedindex-start,magnitudes,j,partition);
-  }
-#else
   int i,j,n=p->n;
   vorbis_info_psy *vi=p->vi;
   int partition=vi->normal_partition;
@@ -1478,12 +9525,12 @@
       sortedindex[i+j-start]=work[i]-magnitudes;
     }
   }
-#endif
+#endif														/* SSE Optimize */
 }
 
 void _vp_noise_normalize(vorbis_look_psy *p,
 			 float *in,float *out,int *sortedindex){
-  int i,j=0,n=p->n,min_energy;
+  int i,j=0,n=p->n/*,min_energy*/;
   vorbis_info_psy *vi=p->vi;
   int partition=vi->normal_partition;
   int start=vi->normal_start;
@@ -1491,23 +9538,299 @@
   if(start>n)start=n;
 
   if(vi->normal_channel_p){
+#ifdef	__SSE__												/* SSE Optimize */
+	{
+		int k;
+		k	 = start&(~15);
+		for(;j<k;j+=16)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+#if	!defined(__SSE2__)
+			__m64	MM0, MM1, MM2, MM3;
+			__m64	MM4, MM5, MM6, MM7;
+#endif
+			XMM0	 = _mm_load_ps(in+j   );
+			XMM1	 = _mm_load_ps(in+j+ 4);
+			XMM2	 = _mm_load_ps(in+j+ 8);
+			XMM3	 = _mm_load_ps(in+j+12);
+#if	defined(__SSE2__)
+			XMM0	 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM0));
+			XMM1	 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM1));
+			XMM2	 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM2));
+			XMM3	 = _mm_cvtepi32_ps(_mm_cvtps_epi32(XMM3));
+#else
+			MM0		 = _mm_cvtps_pi32(XMM0);
+			MM2		 = _mm_cvtps_pi32(XMM1);
+			MM4		 = _mm_cvtps_pi32(XMM2);
+			MM6		 = _mm_cvtps_pi32(XMM3);
+			XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+			XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+			XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+			XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+			MM1		 = _mm_cvtps_pi32(XMM0);
+			MM3		 = _mm_cvtps_pi32(XMM1);
+			MM5		 = _mm_cvtps_pi32(XMM2);
+			MM7		 = _mm_cvtps_pi32(XMM3);
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM1);
+			XMM1	 = _mm_cvtpi32_ps(XMM1, MM3);
+			XMM2	 = _mm_cvtpi32_ps(XMM2, MM5);
+			XMM3	 = _mm_cvtpi32_ps(XMM3, MM7);
+			XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+			XMM1	 = _mm_movelh_ps(XMM1, XMM1);
+			XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+			XMM3	 = _mm_movelh_ps(XMM3, XMM3);
+			XMM0	 = _mm_cvtpi32_ps(XMM0, MM0);
+			XMM1	 = _mm_cvtpi32_ps(XMM1, MM2);
+			XMM2	 = _mm_cvtpi32_ps(XMM2, MM4);
+			XMM3	 = _mm_cvtpi32_ps(XMM3, MM6);
+#endif
+			_mm_store_ps(out+j   , XMM0);
+			_mm_store_ps(out+j+ 4, XMM1);
+			_mm_store_ps(out+j+ 8, XMM2);
+			_mm_store_ps(out+j+12, XMM3);
+		}
+#if	!defined(__SSE2__)
+		_mm_empty();
+#endif
+		for(;j<start;j++)
+			out[j]	 = rint(in[j]);
+	}
+#else														/* SSE Optimize */
     for(;j<start;j++)
       out[j]=rint(in[j]);
+#endif														/* SSE Optimize */
     
     for(;j+partition<=n;j+=partition){
+#ifdef	__SSE__												/* SSE Optimize */
+      float acc;
+      int k;
+      int energy_loss;
+#else
       float acc=0.;
       int k;
       int energy_loss=0;
+#endif
       int nn_num=0;
       int freqband_mid=j+16;
       int freqband_flag=0;
       
+#ifdef	__SSE__												/* SSE Optimize */
+	  {
+		if(partition==8)
+		{
+		  int c0, c1;
+#if defined(__SSE2__)
+		  __m128 XMM0, XMM1, XMM2, XMM3;
+		  XMM0 = _mm_load_ps(in+j  );
+		  XMM1 = _mm_load_ps(in+j+4);
+		  XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+		  XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+		  XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+		  XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+		  XMM0 = _mm_and_ps(XMM0, XMM2);
+		  XMM1 = _mm_and_ps(XMM1, XMM3);
+		  XMM0 = _mm_mul_ps(XMM0, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  c0   = _mm_movemask_ps(XMM2);
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  c1   = _mm_movemask_ps(XMM3);
+#else
+		  __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		  XMM0 = _mm_load_ps(in+j  );
+		  XMM1 = _mm_load_ps(in+j+4);
+		  XMM4 = XMM0;
+		  XMM5 = XMM1;
+		  XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+		  XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+		  XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+		  XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+		  XMM2 = _mm_and_ps(XMM2, XMM4);
+		  XMM3 = _mm_and_ps(XMM3, XMM5);
+		  XMM0 = _mm_and_ps(XMM0, XMM2);
+		  XMM1 = _mm_and_ps(XMM1, XMM3);
+		  XMM0 = _mm_mul_ps(XMM0, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  c0   = _mm_movemask_ps(XMM2);
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  c1   = _mm_movemask_ps(XMM3);
+#endif
+		  acc = _mm_add_horz(XMM0);
+		  energy_loss  = bitCountTable[c0];
+		  energy_loss += bitCountTable[c1];
+		}
+		else if(partition==32)
+		{
+		  int c0, c1;
+#if defined(__SSE2__)
+		  __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		  XMM0 = _mm_load_ps(in+j   );
+		  XMM1 = _mm_load_ps(in+j+ 4);
+		  XMM4 = _mm_load_ps(in+j+ 8);
+		  XMM5 = _mm_load_ps(in+j+12);
+		  XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+		  XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+		  XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM4));
+		  XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+		  XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+		  XMM0 = _mm_and_ps(XMM0, XMM2);
+		  XMM1 = _mm_and_ps(XMM1, XMM3);
+		  XMM0 = _mm_mul_ps(XMM0, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  c0   = _mm_movemask_ps(XMM2);
+		  XMM2 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM5));
+		  XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  XMM1 = _mm_load_ps(in+j+16);
+		  c1   = _mm_movemask_ps(XMM3);
+		  XMM3 = _mm_load_ps(in+j+20);
+		  XMM2 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM2), PFV_0.pi));
+		  XMM4 = _mm_and_ps(XMM4, XMM6);
+		  XMM5 = _mm_and_ps(XMM5, XMM2);
+		  energy_loss  = bitCountTable[c0];
+		  energy_loss += bitCountTable[c1];
+		  XMM4 = _mm_mul_ps(XMM4, XMM4);
+		  XMM5 = _mm_mul_ps(XMM5, XMM5);
+		  c0   = _mm_movemask_ps(XMM6);
+		  XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+		  XMM4 = _mm_add_ps(XMM4, XMM5);
+		  XMM5 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+		  c1   = _mm_movemask_ps(XMM2);
+		  XMM2 = _mm_load_ps(in+j+24);
+		  energy_loss += bitCountTable[c0];
+		  XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+		  XMM5 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM5), PFV_0.pi));
+		  XMM0 = _mm_add_ps(XMM0, XMM4);
+		  XMM4 = _mm_load_ps(in+j+28);
+		  energy_loss += bitCountTable[c1];
+		  XMM1 = _mm_and_ps(XMM1, XMM6);
+		  XMM3 = _mm_and_ps(XMM3, XMM5);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  XMM3 = _mm_mul_ps(XMM3, XMM3);
+		  c0   = _mm_movemask_ps(XMM6);
+		  XMM6 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+		  XMM1 = _mm_add_ps(XMM1, XMM3);
+		  XMM3 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM4));
+		  c1   = _mm_movemask_ps(XMM5);
+		  energy_loss += bitCountTable[c0];
+		  XMM6 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM6), PFV_0.pi));
+		  XMM3 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM3), PFV_0.pi));
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  energy_loss += bitCountTable[c1];
+		  XMM2 = _mm_and_ps(XMM2, XMM6);
+		  XMM4 = _mm_and_ps(XMM4, XMM3);
+		  XMM2 = _mm_mul_ps(XMM2, XMM2);
+		  XMM4 = _mm_mul_ps(XMM4, XMM4);
+		  c0   = _mm_movemask_ps(XMM6);
+		  XMM2 = _mm_add_ps(XMM2, XMM4);
+		  c1   = _mm_movemask_ps(XMM3);
+		  energy_loss += bitCountTable[c0];
+		  XMM0 = _mm_add_ps(XMM0, XMM2);
+		  energy_loss += bitCountTable[c1];
+		  acc = _mm_add_horz(XMM0);
+#else
+		  __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		  XMM0 = _mm_load_ps(in+j   );
+		  XMM1 = _mm_load_ps(in+j+ 4);
+		  XMM6 = _mm_load_ps(in+j+ 8);
+		  XMM4 = XMM0;
+		  XMM5 = XMM1;
+		  XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+		  XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+		  XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+		  XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+		  XMM2 = _mm_and_ps(XMM2, XMM4);
+		  XMM4 = _mm_load_ps(in+j+12);
+		  XMM3 = _mm_and_ps(XMM3, XMM5);
+		  XMM5 = XMM6;
+		  XMM0 = _mm_and_ps(XMM0, XMM2);
+		  XMM2 = XMM4;
+		  XMM1 = _mm_and_ps(XMM1, XMM3);
+		  XMM0 = _mm_mul_ps(XMM0, XMM0);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  c0   = _mm_movemask_ps(XMM2);
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  XMM1 = _mm_cmplt_ps(PFV_M0P5.ps, XMM6);
+		  c1   = _mm_movemask_ps(XMM3);
+		  XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM4);
+		  XMM5 = _mm_cmplt_ps(XMM5, PFV_0P5.ps);
+		  XMM2 = _mm_cmplt_ps(XMM2, PFV_0P5.ps);
+		  energy_loss += bitCountTable[c0];
+		  energy_loss += bitCountTable[c1];
+		  XMM1 = _mm_and_ps(XMM1, XMM5);
+		  XMM5 = _mm_load_ps(in+j+16);
+		  XMM3 = _mm_and_ps(XMM3, XMM2);
+		  XMM2 = _mm_load_ps(in+j+20);
+		  XMM6 = _mm_and_ps(XMM6, XMM1);
+		  XMM4 = _mm_and_ps(XMM4, XMM3);
+		  XMM6 = _mm_mul_ps(XMM6, XMM6);
+		  XMM4 = _mm_mul_ps(XMM4, XMM4);
+		  c0   = _mm_movemask_ps(XMM1);
+		  XMM1 = XMM5;
+		  XMM6 = _mm_add_ps(XMM6, XMM4);
+		  XMM4 = XMM2;
+		  c1   = _mm_movemask_ps(XMM3);
+		  XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM5);
+		  XMM0 = _mm_add_ps(XMM0, XMM6);
+		  XMM6 = _mm_cmplt_ps(PFV_M0P5.ps, XMM2);
+		  XMM1 = _mm_cmplt_ps(XMM1, PFV_0P5.ps);
+		  XMM4 = _mm_cmplt_ps(XMM4, PFV_0P5.ps);
+		  energy_loss += bitCountTable[c0];
+		  energy_loss += bitCountTable[c1];
+		  XMM3 = _mm_and_ps(XMM3, XMM1);
+		  XMM1 = _mm_load_ps(in+j+24);
+		  XMM6 = _mm_and_ps(XMM6, XMM4);
+		  XMM4 = _mm_load_ps(in+j+28);
+		  XMM5 = _mm_and_ps(XMM5, XMM3);
+		  XMM2 = _mm_and_ps(XMM2, XMM6);
+		  XMM5 = _mm_mul_ps(XMM5, XMM5);
+		  XMM2 = _mm_mul_ps(XMM2, XMM2);
+		  c0   = _mm_movemask_ps(XMM3);
+		  XMM3 = XMM1;
+		  XMM5 = _mm_add_ps(XMM5, XMM2);
+		  XMM2 = XMM4;
+		  c1   = _mm_movemask_ps(XMM6);
+		  XMM6 = _mm_cmplt_ps(PFV_M0P5.ps, XMM1);
+		  XMM0 = _mm_add_ps(XMM0, XMM5);
+		  XMM5 = _mm_cmplt_ps(PFV_M0P5.ps, XMM4);
+		  XMM3 = _mm_cmplt_ps(XMM3, PFV_0P5.ps);
+		  XMM2 = _mm_cmplt_ps(XMM2, PFV_0P5.ps);
+		  energy_loss += bitCountTable[c0];
+		  energy_loss += bitCountTable[c1];
+		  XMM6 = _mm_and_ps(XMM6, XMM3);
+		  XMM5 = _mm_and_ps(XMM5, XMM2);
+		  XMM1 = _mm_and_ps(XMM1, XMM6);
+		  XMM4 = _mm_and_ps(XMM4, XMM5);
+		  XMM1 = _mm_mul_ps(XMM1, XMM1);
+		  XMM4 = _mm_mul_ps(XMM4, XMM4);
+		  c0   = _mm_movemask_ps(XMM6);
+		  XMM1 = _mm_add_ps(XMM1, XMM4);
+		  c1   = _mm_movemask_ps(XMM5);
+		  energy_loss += bitCountTable[c0];
+		  XMM0 = _mm_add_ps(XMM0, XMM1);
+		  energy_loss += bitCountTable[c1];
+		  acc = _mm_add_horz(XMM0);
+#endif
+		}
+		else
+		{
+		  acc = 0.f;
+		  energy_loss = 0;
+		  for(i=j;i<j+partition;i++){
+			if(rint(in[i])==0.f){
+			  acc+=in[i]*in[i];
+			  energy_loss++;
+			}
+		  }
+		}
+	  }
+#else														/* SSE Optimize */
       for(i=j;i<j+partition;i++){
         if(rint(in[i])==0.f){
         	acc+=in[i]*in[i];
         	energy_loss++;
         }
       }
+#endif														/* SSE Optimize */
       /* When an energy loss is large, NN processing is carried out in the middle of partition. */
       /*if(energy_loss==32 && fabs(in[freqband_mid])>nnmid_th){
       	if(in[freqband_mid]*in[freqband_mid]<.25f){
@@ -1616,8 +9939,19 @@
       int limit=g->coupling_pointlimit[p->vi->blockflag][blobno];
       int pointlimit=limit;
       int freqlimit=p->st_freqlimit;
+#ifdef	__SSE__												/* SSE Optimize */
+      _MM_ALIGN16 unsigned int Mc_treshp[2048];
+      _MM_ALIGN16 unsigned int Ac_treshp[2048];
+      _MM_ALIGN16 float rMs[2048];
+      _MM_ALIGN16 float rAs[2048];
+      _MM_ALIGN16 unsigned int mdctMA[2048];
+      int midpoint0	 = (limit/partition)*partition;
+      int midpoint1	 = ((limit+partition-1)/partition)*partition;
+#else														/* SSE Optimize */
       unsigned char Mc_treshp[2048];
       unsigned char Ac_treshp[2048];
+#endif														/* SSE Optimize */
+      int s, e;
       int lof_st;
       int hif_st;
       int hif_stcopy;
@@ -1629,11 +9963,1067 @@
       
       nonzero[vi->coupling_mag[i]]=1; 
       nonzero[vi->coupling_ang[i]]=1; 
+	  s = 0;
+	  e = p->n;
        
       postpoint_backup=postpoint;
       
       /** @ M6 PRE **/
       // lossless only?
+#ifdef	__SSE__												/* SSE Optimize */
+	  for(j=0;j<e;j+=16)
+	  {
+		__m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		XMM0 = _mm_load_ps(mdctM+j   );
+		XMM2 = _mm_load_ps(mdctA+j   );
+		XMM1 = _mm_load_ps(mdctM+j+ 4);
+		XMM3 = _mm_load_ps(mdctA+j+ 4);
+		XMM4 = _mm_load_ps(mdctM+j+ 8);
+		XMM5 = _mm_load_ps(mdctA+j+ 8);
+		XMM0 = _mm_mul_ps(XMM0, XMM2);
+		XMM1 = _mm_mul_ps(XMM1, XMM3);
+		XMM3 = _mm_load_ps(mdctA+j+12);
+		XMM2 = _mm_load_ps(mdctM+j+12);
+		XMM4 = _mm_mul_ps(XMM4, XMM5);
+		XMM3 = _mm_mul_ps(XMM3, XMM2);
+		XMM5 = _mm_load_ps(rMo+j   );
+		XMM2 = _mm_load_ps(rMo+j+ 4);
+		XMM0 = _mm_cmplt_ps(XMM0, PFV_0.ps);
+		XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+		XMM4 = _mm_cmplt_ps(XMM4, PFV_0.ps);
+		XMM3 = _mm_cmplt_ps(XMM3, PFV_0.ps);
+		_mm_store_ps(mdctMA+j   , XMM0);
+		XMM0 = _mm_load_ps(rMo+j+ 8);
+		_mm_store_ps(mdctMA+j+ 4, XMM1);
+		XMM1 = _mm_load_ps(rMo+j+12);
+		_mm_store_ps(mdctMA+j+ 8, XMM4);
+		_mm_store_ps(mdctMA+j+12, XMM3);
+		XMM5 = _mm_and_ps(XMM5, PABSMASK.ps);
+		XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+		XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+		XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+		_mm_store_ps(rMs+j   , XMM5);
+		_mm_store_ps(rMs+j+ 4, XMM2);
+		_mm_store_ps(rMs+j+ 8, XMM0);
+		_mm_store_ps(rMs+j+12, XMM1);
+		XMM5 = _mm_load_ps(rAo+j   );
+		XMM2 = _mm_load_ps(rAo+j+ 4);
+		XMM0 = _mm_load_ps(rAo+j+ 8);
+		XMM1 = _mm_load_ps(rAo+j+12);
+		XMM5 = _mm_and_ps(XMM5, PABSMASK.ps);
+		XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+		XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+		XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+		_mm_store_ps(rAs+j   , XMM5);
+		_mm_store_ps(rAs+j+ 4, XMM2);
+		_mm_store_ps(rAs+j+ 8, XMM0);
+		_mm_store_ps(rAs+j+12, XMM1);
+	  }
+	  for(;j<n;j+=16)
+	  {
+		__m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		XMM0 = _mm_load_ps(mdctM+j   );
+		XMM2 = _mm_load_ps(mdctA+j   );
+		XMM1 = _mm_load_ps(mdctM+j+ 4);
+		XMM3 = _mm_load_ps(mdctA+j+ 4);
+		XMM4 = _mm_load_ps(mdctM+j+ 8);
+		XMM5 = _mm_load_ps(mdctA+j+ 8);
+		XMM0 = _mm_mul_ps(XMM0, XMM2);
+		XMM1 = _mm_mul_ps(XMM1, XMM3);
+		XMM3 = _mm_load_ps(mdctA+j+12);
+		XMM2 = _mm_load_ps(mdctM+j+12);
+		XMM4 = _mm_mul_ps(XMM4, XMM5);
+		XMM3 = _mm_mul_ps(XMM3, XMM2);
+		XMM0 = _mm_cmplt_ps(XMM0, PFV_0.ps);
+		XMM1 = _mm_cmplt_ps(XMM1, PFV_0.ps);
+		XMM4 = _mm_cmplt_ps(XMM4, PFV_0.ps);
+		XMM3 = _mm_cmplt_ps(XMM3, PFV_0.ps);
+		_mm_store_ps(mdctMA+j   , XMM0);
+		_mm_store_ps(mdctMA+j+ 4, XMM1);
+		_mm_store_ps(mdctMA+j+ 8, XMM4);
+		_mm_store_ps(mdctMA+j+12, XMM3);
+	  }
+	  if(!stereo_threshholds[g->coupling_postpointamp[blobno]])stcont_start=n;
+	  else{
+		static _MM_ALIGN16 __m128x PUI1 =
+			{ .si32 = { 1, 1, 1, 1} };
+		int freqlimit16 = freqlimit&(~15);
+		__m128 PST_THRESH;
+		// exception handling
+		if((postpoint-sth_high)<prepoint)sth_high=postpoint-prepoint;
+		// start point setup
+		for(j=0;j<n;j++){
+		  stcont_start=j;
+		  if(p->noiseoffset[1][j]>=-2)break;
+		}
+		// start point correction & threshold setup 
+		st_thresh=.1;
+		if(p->m_val<.5){
+		  // low frequency limit
+		  if(stcont_start<limit)stcont_start=limit;
+		}else if(p->vi->normal_thresh>1.)st_thresh=.5;
+		PST_THRESH = _mm_set_ps1(st_thresh);
+		for(j=0;j<freqlimit16;j+=16){
+		  __m128 XMM0, XMM1, XMM2, XMM3;
+		  XMM0 = _mm_load_ps(rM+j   );
+		  XMM1 = _mm_load_ps(rM+j+ 4);
+		  XMM2 = _mm_load_ps(rM+j+ 8);
+		  XMM3 = _mm_load_ps(rM+j+12);
+		  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+		  XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+		  XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+		  XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+		  XMM0 = _mm_cmplt_ps(XMM0, PST_THRESH);
+		  XMM1 = _mm_cmplt_ps(XMM1, PST_THRESH);
+		  XMM2 = _mm_cmplt_ps(XMM2, PST_THRESH);
+		  XMM3 = _mm_cmplt_ps(XMM3, PST_THRESH);
+		  XMM0 = _mm_and_ps(XMM0, PUI1.ps);
+		  XMM1 = _mm_and_ps(XMM1, PUI1.ps);
+		  XMM2 = _mm_and_ps(XMM2, PUI1.ps);
+		  XMM3 = _mm_and_ps(XMM3, PUI1.ps);
+		  _mm_store_ps(Mc_treshp+j   , XMM0);
+		  _mm_store_ps(Mc_treshp+j+ 4, XMM1);
+		  _mm_store_ps(Mc_treshp+j+ 8, XMM2);
+		  _mm_store_ps(Mc_treshp+j+12, XMM3);
+		  XMM0 = _mm_load_ps(rA+j   );
+		  XMM1 = _mm_load_ps(rA+j+ 4);
+		  XMM2 = _mm_load_ps(rA+j+ 8);
+		  XMM3 = _mm_load_ps(rA+j+12);
+		  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+		  XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+		  XMM2 = _mm_and_ps(XMM2, PABSMASK.ps);
+		  XMM3 = _mm_and_ps(XMM3, PABSMASK.ps);
+		  XMM0 = _mm_cmplt_ps(XMM0, PST_THRESH);
+		  XMM1 = _mm_cmplt_ps(XMM1, PST_THRESH);
+		  XMM2 = _mm_cmplt_ps(XMM2, PST_THRESH);
+		  XMM3 = _mm_cmplt_ps(XMM3, PST_THRESH);
+		  XMM0 = _mm_and_ps(XMM0, PUI1.ps);
+		  XMM1 = _mm_and_ps(XMM1, PUI1.ps);
+		  XMM2 = _mm_and_ps(XMM2, PUI1.ps);
+		  XMM3 = _mm_and_ps(XMM3, PUI1.ps);
+		  _mm_store_ps(Ac_treshp+j   , XMM0);
+		  _mm_store_ps(Ac_treshp+j+ 4, XMM1);
+		  _mm_store_ps(Ac_treshp+j+ 8, XMM2);
+		  _mm_store_ps(Ac_treshp+j+12, XMM3);
+		}
+		for(;j<=freqlimit;j++){ // or j<n
+		  if(fabs(rM[j])<st_thresh)Mc_treshp[j]=1;
+		  else Mc_treshp[j]=0;
+		  if(fabs(rA[j])<st_thresh)Ac_treshp[j]=1;
+		  else Ac_treshp[j]=0;
+		}
+	  }
+	  if(n<=sliding_lowpass&&p->vi->normal_point_p&&partition%8==0)
+	  {
+		static _MM_ALIGN16 const __m128x PP001 =
+			{ .sf = {0.001f, 0.001f, 0.001f, 0.001f} };
+		static _MM_ALIGN16 const __m128x P1000 =
+			{ .sf = {1000.f, 1000.f, 1000.f, 1000.f} };
+		__m128	PPOSTPOINT_BACKUP	 = _mm_set_ps1(postpoint_backup);
+		__m128	PDUMMYPOINT		 = 
+			_mm_set_ps1(stereo_threshholds_rephase[g->coupling_postpointamp[blobno]]);
+		_MM_ALIGN16 float slowM[2048];
+		_MM_ALIGN16 float slowA[2048];
+		_MM_ALIGN16 float shigh[2048];
+		int	midpoint0	 = (limit/partition)*partition;
+		int	midpoint1	 = ((limit+partition-1)/partition)*partition;
+		for(j=0;j<e;j+=partition){
+		  float rpacc;
+		  int energy_loss=0;
+		  int nn_num=0;
+
+		  for(k=0;k<partition;k++){
+			int l=k+j;
+			float slow=0.f;
+			float shighM=0.f;
+			float shighA=0.f;
+
+			slowM[l] = prepoint;
+			slowA[l] = prepoint;
+			shigh[l] = 0.f;
+
+			postpoint=postpoint_backup;
+
+			/* AoTuV */
+			/** @ M6 MAIN **
+			The threshold of a stereo is changed dynamically. 
+			by Aoyumi @ 2006/06/04
+			*/
+			if(l>=stcont_start){
+			  int m;
+			  int lof_num;
+			  int hif_num;
+
+			  // (It may be better to calculate this in advance) 
+			  lof_st=l-(l/2)*.167;
+			  hif_st=l+l*.167;
+
+			  hif_stcopy=hif_st;
+
+			  // limit setting
+			  if(hif_st>freqlimit)hif_st=freqlimit;
+
+			  if(old_lof_st || old_hif_st){
+				if(hif_st>l){
+				  // hif_st, lof_st ...absolute value
+				  // lof_num, hif_num ...relative value
+
+				  // low freq.(lower)
+				  lof_num=lof_st-old_lof_st;
+				  if(lof_num==0){
+					Afreq_num+=Ac_treshp[l-1];
+					Mfreq_num+=Mc_treshp[l-1];
+				  }else if(lof_num==1){
+					Afreq_num+=Ac_treshp[l-1];
+					Mfreq_num+=Mc_treshp[l-1];
+					Afreq_num-=Ac_treshp[old_lof_st];
+					Mfreq_num-=Mc_treshp[old_lof_st];
+				  }//else puts("err. low");
+
+				  // high freq.(higher)
+				  hif_num=hif_st-old_hif_st;
+				  if(hif_num==0){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+				  }else if(hif_num==1){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+					Afreq_num+=Ac_treshp[hif_st];
+					Mfreq_num+=Mc_treshp[hif_st];
+				  }else if(hif_num==2){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+					Afreq_num+=Ac_treshp[hif_st];
+					Mfreq_num+=Mc_treshp[hif_st];
+					Afreq_num+=Ac_treshp[hif_st-1];
+					Mfreq_num+=Mc_treshp[hif_st-1];
+				  }//else puts("err. high");
+				}
+			  }else{
+				for(m=lof_st; m<=hif_st; m++){
+				  if(m==l)continue;
+				  if(Ac_treshp[m]) Afreq_num++;
+				  if(Mc_treshp[m]) Mfreq_num++;
+				}
+			  }
+			  if(l>=limit){
+				shigh[l]=sth_high/(hif_stcopy-lof_st);
+				shighA=shigh[l]*Afreq_num;
+				shighM=shigh[l]*Mfreq_num;
+				if((shighA+rAs[l])>(shighM+rMs[l]))shigh[l]=shighA;
+				else shigh[l]=shighM;
+			  }else{
+				slow=sth_low/(hif_stcopy-lof_st);
+				slowA[l]=slow*Afreq_num;
+				slowM[l]=slow*Mfreq_num;
+				if(p->noiseoffset[1][l]<-1){
+				  slowA[l]*=(p->noiseoffset[1][l]+2);
+				  slowM[l]*=(p->noiseoffset[1][l]+2);
+				}
+				slowA[l] = prepoint - slowA[l];
+				slowM[l] = prepoint - slowM[l];
+			  }
+			  old_lof_st=lof_st;
+			  old_hif_st=hif_st;
+			}
+		  }
+		}
+
+		/* Phase 0 */
+		if(s<midpoint0)
+		{
+		  int te;
+		  if(e>=midpoint0)
+			te = midpoint0;
+		  else
+			te = e;
+		  for(j=s;j<te;j+=partition){
+			int energy_loss=0;
+			for(k=0;k<partition;k+=4)
+			{
+			  int l	 = k+j;
+			  int ifc0, m, o;
+			  __m128 XMM0, XMM1, XMM2, XMM3;
+			  XMM0 = _mm_load_ps(rMs+l  );
+			  XMM2 = _mm_load_ps(slowM+l  );
+			  XMM1 = _mm_load_ps(rAs+l  );
+			  XMM3 = _mm_load_ps(slowA+l  );
+			  XMM0 = _mm_cmplt_ps(XMM0, XMM2);
+			  XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+			  XMM1 = _mm_and_ps(XMM1, XMM0);
+			  ifc0 = _mm_movemask_ps(XMM1);
+			  if(ifc0==0)
+			  {
+				couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+				l += 4;
+			  }
+			  else if(ifc0==0xF)
+			  {
+				precomputed_couple_point_ps(&mag_memo[i][l],
+				  floorM+l,floorA+l,
+				  qM+l,qA+l);
+				XMM0 = _mm_load_ps(qM+l);
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM1 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM1);
+#endif
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				l += 4;
+			  }
+			  else
+			  {
+				for(m=0,o=1;m<4;m++)
+				{
+				  if(ifc0&o)
+					precomputed_couple_point(mag_memo[i][l],
+					  floorM[l],floorA[l],
+					  qM+l,qA+l);
+				  else
+					couple_lossless(rM[l],rA[l],qM+l,qA+l);
+				  l ++;
+				  o = o << 1;
+				}
+				XMM0 = _mm_load_ps(qM+l-4);
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+				XMM0 = _mm_and_ps(XMM0, XMM1);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+			  }
+			}
+			{
+			  int min_energy = 32-energy_loss;
+			  if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+				int l;
+				float ab;
+				for(;k<partition;k++){
+				  l=mag_sort[i][j+k];
+				  ab=fabs(qM[l]);
+				  if(ab<0.04)break;
+#if	1
+				  if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+					&& ab<0.11)break; // 0.11
+#else
+				  if(mdctMA[l] && ab < 0.11)break;
+#endif
+				  if( l>=pointlimit){
+					__m128 XMM0 = _mm_load_ss(qM+l);
+					if(_mm_cvtss_si32(XMM0)==0){
+					  qM[l]=unitnorm(qM[l]);
+					  break;
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+		/* Phase 1 */
+		if(s<=midpoint0&&e>=midpoint1)
+		{
+		  for(j=midpoint0;j<midpoint1;j+=partition)
+		  {
+			__m128	PACC;
+			int midpoint033 = (limit-midpoint0)&(~3);
+			int midpoint066 = (limit-midpoint0+3)&(~3);
+			float acc=0.f;
+			float rpacc;
+			int energy_loss=0;
+			int nn_num=0;
+
+			for(k=0;k<midpoint033;k+=4)
+			{
+			  int l	 = k+j;
+			  int ifc0, m, o;
+			  __m128 XMM0, XMM1, XMM2, XMM3;
+			  XMM0 = _mm_load_ps(rMs+l  );
+			  XMM2 = _mm_load_ps(slowM+l  );
+			  XMM1 = _mm_load_ps(rAs+l  );
+			  XMM3 = _mm_load_ps(slowA+l  );
+			  XMM0 = _mm_cmplt_ps(XMM0, XMM2);
+			  XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+			  XMM1 = _mm_and_ps(XMM1, XMM0);
+			  ifc0 = _mm_movemask_ps(XMM1);
+			  if(ifc0==0)
+			  {
+				couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+				l += 4;
+			  }
+			  else if(ifc0==0xF)
+			  {
+				precomputed_couple_point_ps(&mag_memo[i][l],
+				  floorM+l,floorA+l,
+				  qM+l,qA+l);
+				XMM0 = _mm_load_ps(qM+l);
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				l += 4;
+			  }
+			  else
+			  {
+				for(m=0,o=1;m<4;m++)
+				{
+				  if(ifc0&o)
+					precomputed_couple_point(mag_memo[i][l],
+					  floorM[l],floorA[l],
+					  qM+l,qA+l);
+				  else
+					couple_lossless(rM[l],rA[l],qM+l,qA+l);
+				  l ++;
+				  o = o << 1;
+				}
+				XMM0 = _mm_load_ps(qM+l-4);
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM2 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM2);
+#endif
+				XMM0 = _mm_and_ps(XMM0, XMM1);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+			  }
+			}
+			for(;k<midpoint066;k++)
+			{
+			  int l=k+j;
+			  float a=mdctM[l];
+			  float b=mdctA[l];
+			  float dummypoint;
+			  float hypot_reserve;
+
+			  postpoint=postpoint_backup;
+
+			  if(l>=limit){
+				postpoint-=shigh[l];
+				/* The following prevents an extreme reduction of residue. (2ch stereo only) */
+				if(mdctMA[l]){
+				  hypot_reserve = fabs(fabs(a)-fabs(b));
+				  if(hypot_reserve < 0.001){ // 0~0.000999-
+					dummypoint = stereo_threshholds_rephase[g->coupling_postpointamp[blobno]];
+					dummypoint = dummypoint+((postpoint-dummypoint)*(hypot_reserve*1000));
+					if(postpoint > dummypoint) postpoint = dummypoint;
+				  }
+				}
+			  }
+
+			  if((l>=limit && rMs[l]<postpoint && rAs[l]<postpoint) ||
+				(rMs[l]<slowM[l] && rAs[l]<slowA[l])){
+
+				  __m128 XMM0;
+				  if(l>=0&&l<=n)
+				  {
+					precomputed_couple_point(mag_memo[i][l],
+					  floorM[l],floorA[l],
+					  qM+l,qA+l);
+				  }
+				  //if(rint(qM[l])==0.f)acc+=qM[l]*qM[l];
+				  XMM0 = _mm_load_ss(qM+l);
+				  if(_mm_cvtss_si32(XMM0)==0){
+					energy_loss++;
+					if(l>=limit)acc+=qM[l]*qM[l];
+				  }
+			  }else{
+				couple_lossless(rM[l],rA[l],qM+l,qA+l);
+			  }
+			}
+			PACC	 = _mm_set_ss(acc);
+			for(;k<partition;k+=4)
+			{
+			  int l	 = k+j;
+			  int ifc0, m, o;
+			  __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			  XMM3 = PPOSTPOINT_BACKUP;
+			  XMM4 = _mm_load_ps(shigh+l  );
+			  XMM0 = _mm_load_ps(mdctM+l  );
+			  XMM1 = _mm_load_ps(mdctA+l  );
+			  XMM2 = _mm_load_ps(mdctMA+l  );
+			  XMM3 = _mm_sub_ps(XMM3, XMM4);	/* postpoint */
+			  if(_mm_movemask_ps(XMM2)!=0)
+					{
+					  XMM5 = XMM3;								/* copy of postpoint */
+					  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+					  XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+					  XMM0 = _mm_sub_ps(XMM0, XMM1);
+					  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);	/* hypot_reserve */
+					  XMM1 = _mm_cmplt_ps(XMM0, PP001.ps);	/* Mask of hypot_reserve */
+					  XMM0 = _mm_mul_ps(XMM0, P1000.ps);
+					  XMM5 = _mm_sub_ps(XMM5, PDUMMYPOINT);
+					  XMM0 = _mm_mul_ps(XMM0, XMM5);
+					  XMM0 = _mm_add_ps(XMM0, PDUMMYPOINT);		/* dummypoint */
+					  XMM1 = _mm_and_ps(XMM1, XMM2);
+					  XMM0 = _mm_min_ps(XMM0, XMM3);
+					  XMM0 = _mm_or_ps(
+						_mm_and_ps(XMM0, XMM1),
+						_mm_andnot_ps(XMM1, XMM3)
+						);											/* postpoint */
+					}
+			  else
+				XMM0 = XMM3;
+			  XMM3 = _mm_load_ps(slowM+l  );
+			  XMM4 = _mm_load_ps(slowA+l  );
+			  XMM1 = _mm_load_ps(rMs+l  );
+			  XMM2 = _mm_load_ps(rAs+l  );
+			  XMM3 = _mm_max_ps(XMM3, XMM0);
+			  XMM4 = _mm_max_ps(XMM4, XMM0);
+			  XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+			  XMM2 = _mm_cmplt_ps(XMM2, XMM4);
+			  XMM1 = _mm_and_ps(XMM1, XMM2);
+			  ifc0 = _mm_movemask_ps(XMM1);
+			  if(ifc0==0)
+			  {
+				couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+				l += 4;
+			  }
+			  else if(ifc0==0xF)
+			  {
+				precomputed_couple_point_ps(&mag_memo[i][l],
+				  floorM+l,floorA+l,
+				  qM+l,qA+l);
+				XMM0 = _mm_load_ps(qM+l);
+				XMM2 = XMM0;
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+				XMM2 = _mm_and_ps(XMM2, XMM0);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				XMM2 = _mm_mul_ps(XMM2, XMM2);
+				PACC = _mm_add_ps(PACC, XMM2);
+				l += 4;
+			  }
+			  else
+			  {
+				for(m=0,o=1;m<4;m++)
+				{
+				  if(ifc0&o)
+					precomputed_couple_point(mag_memo[i][l],
+					  floorM[l],floorA[l],
+					  qM+l,qA+l);
+				  else
+					couple_lossless(rM[l],rA[l],qM+l,qA+l);
+				  l ++;
+				  o = o << 1;
+				}
+				XMM0 = _mm_load_ps(qM+l-4);
+				XMM2 = XMM0;
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+				XMM2 = _mm_and_ps(XMM2, XMM0);
+				XMM2 = _mm_and_ps(XMM2, XMM1);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				XMM2 = _mm_mul_ps(XMM2, XMM2);
+				PACC = _mm_add_ps(PACC, XMM2);
+			  }
+			}
+			acc = _mm_add_horz(PACC);
+			{
+			  int freqband_mid=j+16;
+			  int freqband_flag=0;
+			  int min_energy;
+
+			  rpacc=acc;
+			  /* When the energy loss of a partition is large, NN is performed in the middle of partition.
+			  for 48/44.1/32kHz */
+			  if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+				&& freqband_mid>=pointlimit){
+				  __m128 XMM0;
+				  XMM0 = _mm_load_ss(qM+freqband_mid);
+				  if(_mm_cvtss_si32(XMM0)==0){
+					if(mdctMA[freqband_mid]){
+					  acc-=1.f;
+					  rpacc-=1.32;
+					}else{
+					  acc-=1.f;
+					  rpacc-=1.f;
+					}
+					qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+					freqband_flag=1;
+					nn_num++;
+				  }
+			  }
+			  /* NN main (point stereo) */
+			  for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+				int l;
+				l=mag_sort[i][j+k];
+				if(freqband_mid==l && freqband_flag)continue;
+				if(l>=pointlimit){
+				  __m128 XMM0 = _mm_load_ss(qM+l);
+				  if(_mm_cvtss_si32(XMM0)==0){
+					if(mdctMA[l]){
+					  if(rpacc<p->vi->normal_thresh)continue;
+					  acc-=1.f;
+					  rpacc-=1.32;
+					}else{
+					  acc-=1.f;
+					  rpacc-=1.f;
+					}
+					qM[l]=unitnorm(qM[l]);
+					nn_num++;
+				  }
+				}
+			  }
+			  /* The minimum energy complement.
+			  for 48/44.1/32kHz */
+			  min_energy=32-energy_loss+nn_num;
+			  if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+				int l;
+				float ab;
+				for(;k<partition;k++){
+				  l=mag_sort[i][j+k];
+				  ab=fabs(qM[l]);
+				  if(ab<0.04)break;
+#if	1
+				  if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+					&& ab<0.11)break; // 0.11
+#else
+				  if(mdctMA[l] && ab < 0.11)break;
+#endif
+				  if(l>=pointlimit){
+					__m128 XMM0 = _mm_load_ss(qM+l);
+					if(_mm_cvtss_si32(XMM0)==0){
+					  qM[l]=unitnorm(qM[l]);
+					  break;
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+		/* Phase 2 */
+		if(e>midpoint1)
+		{
+		  int ts;
+		  if(s<midpoint1)
+			ts = midpoint1;
+		  else
+			ts = s;
+		  for(j=ts;j<e;j+=partition){
+			float acc=0.f;
+			float rpacc;
+			int energy_loss=0;
+			int nn_num=0;
+			__m128	PACC	 = _mm_setzero_ps();
+
+			for(k=0;k<partition;k+=4)
+			{
+			  int l	 = k+j;
+			  int ifc0, m, o;
+			  __m128 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			  XMM3 = PPOSTPOINT_BACKUP;
+			  XMM4 = _mm_load_ps(shigh+l  );
+			  XMM0 = _mm_load_ps(mdctM+l  );
+			  XMM1 = _mm_load_ps(mdctA+l  );
+			  XMM2 = _mm_load_ps(mdctMA+l  );
+			  XMM3 = _mm_sub_ps(XMM3, XMM4);	/* postpoint */
+			  if(_mm_movemask_ps(XMM2)!=0)
+					{
+					  XMM5 = XMM3;								/* copy of postpoint */
+					  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);
+					  XMM1 = _mm_and_ps(XMM1, PABSMASK.ps);
+					  XMM0 = _mm_sub_ps(XMM0, XMM1);
+					  XMM0 = _mm_and_ps(XMM0, PABSMASK.ps);	/* hypot_reserve */
+					  XMM1 = _mm_cmplt_ps(XMM0, PP001.ps);	/* Mask of hypot_reserve */
+					  XMM0 = _mm_mul_ps(XMM0, P1000.ps);
+					  XMM5 = _mm_sub_ps(XMM5, PDUMMYPOINT);
+					  XMM0 = _mm_mul_ps(XMM0, XMM5);
+					  XMM0 = _mm_add_ps(XMM0, PDUMMYPOINT);		/* dummypoint */
+					  XMM1 = _mm_and_ps(XMM1, XMM2);
+					  XMM0 = _mm_min_ps(XMM0, XMM3);
+					  XMM0 = _mm_or_ps(
+						_mm_and_ps(XMM0, XMM1),
+						_mm_andnot_ps(XMM1, XMM3)
+						);											/* postpoint */
+					}
+			  else
+				XMM0 = XMM3;
+			  XMM3 = _mm_load_ps(slowM+l  );
+			  XMM4 = _mm_load_ps(slowA+l  );
+			  XMM1 = _mm_load_ps(rMs+l  );
+			  XMM2 = _mm_load_ps(rAs+l  );
+			  XMM3 = _mm_max_ps(XMM3, XMM0);
+			  XMM4 = _mm_max_ps(XMM4, XMM0);
+			  XMM1 = _mm_cmplt_ps(XMM1, XMM3);
+			  XMM2 = _mm_cmplt_ps(XMM2, XMM4);
+			  XMM1 = _mm_and_ps(XMM1, XMM2);
+			  ifc0 = _mm_movemask_ps(XMM1);
+			  if(ifc0==0)
+			  {
+				couple_lossless_ps(rM+l, rA+l, qM+l, qA+l);
+				l += 4;
+			  }
+			  else if(ifc0==0xF)
+			  {
+				precomputed_couple_point_ps(&mag_memo[i][l],
+				  floorM+l,floorA+l,
+				  qM+l,qA+l);
+				XMM0 = _mm_load_ps(qM+l);
+				XMM2 = XMM0;
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+				XMM2 = _mm_and_ps(XMM2, XMM0);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				XMM2 = _mm_mul_ps(XMM2, XMM2);
+				PACC = _mm_add_ps(PACC, XMM2);
+				l += 4;
+			  }
+			  else
+			  {
+				for(m=0,o=1;m<4;m++)
+				{
+				  if(ifc0&o)
+					precomputed_couple_point(mag_memo[i][l],
+					  floorM[l],floorA[l],
+					  qM+l,qA+l);
+				  else
+					couple_lossless(rM[l],rA[l],qM+l,qA+l);
+				  l ++;
+				  o = o << 1;
+				}
+				XMM0 = _mm_load_ps(qM+l-4);
+				XMM2 = XMM0;
+#if defined(__SSE2__)
+				XMM0 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM0 = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XMM0), PFV_0.pi));
+#else
+				XMM3 = _mm_cmplt_ps(PFV_M0P5.ps, XMM0);
+				XMM0 = _mm_cmplt_ps(XMM0, PFV_0P5.ps);
+				XMM0 = _mm_and_ps(XMM0, XMM3);
+#endif
+				XMM2 = _mm_and_ps(XMM2, XMM0);
+				XMM2 = _mm_and_ps(XMM2, XMM1);
+				energy_loss += bitCountTable[_mm_movemask_ps(XMM0)];
+				XMM2 = _mm_mul_ps(XMM2, XMM2);
+				PACC = _mm_add_ps(PACC, XMM2);
+			  }
+			}
+			acc = _mm_add_horz(PACC);
+			{
+			  int freqband_mid=j+16;
+			  int freqband_flag=0;
+			  int min_energy;
+
+			  rpacc=acc;
+			  /* When the energy loss of a partition is large, NN is performed in the middle of partition.
+			  for 48/44.1/32kHz */
+			  if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+				&& freqband_mid>=pointlimit){
+				  __m128 XMM0;
+				  XMM0 = _mm_load_ss(qM+freqband_mid);
+				  if(_mm_cvtss_si32(XMM0)==0){
+					if(mdctMA[freqband_mid]){
+					  acc-=1.f;
+					  rpacc-=1.32;
+					}else{
+					  acc-=1.f;
+					  rpacc-=1.f;
+					}
+					qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+					freqband_flag=1;
+					nn_num++;
+				  }
+			  }
+			  /* NN main (point stereo) */
+			  for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+				int l;
+				l=mag_sort[i][j+k];
+				if(freqband_mid==l && freqband_flag)continue;
+				if(l>=pointlimit && rint(qM[l])==0.f){
+				  if(mdctMA[l]){
+					if(rpacc<p->vi->normal_thresh)continue;
+					acc-=1.f;
+					rpacc-=1.32;
+				  }else{
+					acc-=1.f;
+					rpacc-=1.f;
+				  }
+				  qM[l]=unitnorm(qM[l]);
+				  nn_num++;
+				}
+			  }
+			  /* The minimum energy complement.
+			  for 48/44.1/32kHz */
+			  min_energy=32-energy_loss+nn_num;
+			  if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+				int l;
+				float ab;
+				for(;k<partition;k++){
+				  __m128 XMM0;
+				  l=mag_sort[i][j+k];
+				  ab=fabs(qM[l]);
+				  if(ab<0.04)break;
+#if	1
+				  if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+					&& ab<0.11)break; // 0.11
+#else
+				  if(mdctMA[l] && ab < 0.11)break;
+#endif
+				  if(l>=pointlimit){
+					__m128 XMM0 = _mm_load_ss(qM+l);
+					if(_mm_cvtss_si32(XMM0)==0){
+					  qM[l]=unitnorm(qM[l]);
+					  break;
+					}
+				  }
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	  else
+	  {
+		_MM_ALIGN16 float slowM[2048];
+		_MM_ALIGN16 float slowA[2048];
+		_MM_ALIGN16 float shigh[2048];
+		int	midpoint0	 = (limit/partition)*partition;
+		int	midpoint1	 = ((limit+partition-1)/partition)*partition;
+		for(j=0;j<e;j+=partition){
+		  float rpacc;
+		  int energy_loss=0;
+		  int nn_num=0;
+
+		  for(k=0;k<partition;k++){
+			int l=k+j;
+			float slow=0.f;
+			float shighM=0.f;
+			float shighA=0.f;
+
+			slowM[l] = prepoint;
+			slowA[l] = prepoint;
+			shigh[l] = 0.f;
+
+			postpoint=postpoint_backup;
+
+			/* AoTuV */
+			/** @ M6 MAIN **
+			The threshold of a stereo is changed dynamically. 
+			by Aoyumi @ 2006/06/04
+			*/
+			if(l>=stcont_start){
+			  int m;
+			  int lof_num;
+			  int hif_num;
+
+			  // (It may be better to calculate this in advance) 
+			  lof_st=l-(l/2)*.167;
+			  hif_st=l+l*.167;
+
+			  hif_stcopy=hif_st;
+
+			  // limit setting
+			  if(hif_st>freqlimit)hif_st=freqlimit;
+
+			  if(old_lof_st || old_hif_st){
+				if(hif_st>l){
+				  // hif_st, lof_st ...absolute value
+				  // lof_num, hif_num ...relative value
+
+				  // low freq.(lower)
+				  lof_num=lof_st-old_lof_st;
+				  if(lof_num==0){
+					Afreq_num+=Ac_treshp[l-1];
+					Mfreq_num+=Mc_treshp[l-1];
+				  }else if(lof_num==1){
+					Afreq_num+=Ac_treshp[l-1];
+					Mfreq_num+=Mc_treshp[l-1];
+					Afreq_num-=Ac_treshp[old_lof_st];
+					Mfreq_num-=Mc_treshp[old_lof_st];
+				  }//else puts("err. low");
+
+				  // high freq.(higher)
+				  hif_num=hif_st-old_hif_st;
+				  if(hif_num==0){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+				  }else if(hif_num==1){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+					Afreq_num+=Ac_treshp[hif_st];
+					Mfreq_num+=Mc_treshp[hif_st];
+				  }else if(hif_num==2){
+					Afreq_num-=Ac_treshp[l];
+					Mfreq_num-=Mc_treshp[l];
+					Afreq_num+=Ac_treshp[hif_st];
+					Mfreq_num+=Mc_treshp[hif_st];
+					Afreq_num+=Ac_treshp[hif_st-1];
+					Mfreq_num+=Mc_treshp[hif_st-1];
+				  }//else puts("err. high");
+				}
+			  }else{
+				for(m=lof_st; m<=hif_st; m++){
+				  if(m==l)continue;
+				  if(Ac_treshp[m]) Afreq_num++;
+				  if(Mc_treshp[m]) Mfreq_num++;
+				}
+			  }
+			  if(l>=limit){
+				shigh[l]=sth_high/(hif_stcopy-lof_st);
+				shighA=shigh[l]*Afreq_num;
+				shighM=shigh[l]*Mfreq_num;
+				if((shighA+rAs[l])>(shighM+rMs[l]))shigh[l]=shighA;
+				else shigh[l]=shighM;
+			  }else{
+				slow=sth_low/(hif_stcopy-lof_st);
+				slowA[l]=slow*Afreq_num;
+				slowM[l]=slow*Mfreq_num;
+				if(p->noiseoffset[1][l]<-1){
+				  slowA[l]*=(p->noiseoffset[1][l]+2);
+				  slowM[l]*=(p->noiseoffset[1][l]+2);
+				}
+				slowA[l] = prepoint - slowA[l];
+				slowM[l] = prepoint - slowM[l];
+			  }
+			  old_lof_st=lof_st;
+			  old_hif_st=hif_st;
+			}
+		  }
+		}
+
+		for(j=0;j<n;j+=partition){
+		  float acc=0.f;
+		  float rpacc;
+		  int energy_loss=0;
+		  int nn_num=0;
+
+		  for(k=0;k<partition;k++){
+			int l=k+j;
+			float a=mdctM[l];
+			float b=mdctA[l];
+			float dummypoint;
+			float hypot_reserve;
+
+			postpoint=postpoint_backup;
+
+			if(l>=limit){
+			  postpoint-=shigh[l];
+			  if(mdctMA[l]){
+				hypot_reserve = fabs(fabs(a)-fabs(b));
+				if(hypot_reserve < 0.001){ // 0~0.000999-
+				  dummypoint = stereo_threshholds_rephase[g->coupling_postpointamp[blobno]];
+				  dummypoint = dummypoint+((postpoint-dummypoint)*(hypot_reserve*1000));
+				  if(postpoint > dummypoint) postpoint = dummypoint;
+				}
+			  }
+			}
+
+			if((l>=limit && rMs[l]<postpoint && rAs[l]<postpoint) ||
+			  (rMs[l]<slowM[l] && rAs[l]<slowA[l])){
+
+				if(l>=0&&l<=n)
+				{
+				  precomputed_couple_point(mag_memo[i][l],
+					floorM[l],floorA[l],
+					qM+l,qA+l);
+				}
+				if(rint(qM[l])==0.f){
+				  energy_loss++;
+				  if(l>=limit)acc+=qM[l]*qM[l];
+				}
+			}else{
+			  couple_lossless(rM[l],rA[l],qM+l,qA+l);
+			}
+		  }
+
+		  {
+			int freqband_mid=j+16;
+			int freqband_flag=0;
+			int min_energy;
+
+			rpacc=acc;
+			/* When the energy loss of a partition is large, NN is performed in the middle of partition.
+			for 48/44.1/32kHz */
+			if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
+			  && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
+				if(mdctMA[freqband_mid]){
+					acc-=1.f;
+					rpacc-=1.32;
+				}else{
+				  acc-=1.f;
+				  rpacc-=1.f;
+				}
+				qM[freqband_mid]=unitnorm(qM[freqband_mid]);
+				freqband_flag=1;
+				nn_num++;
+			}
+			/* NN main (point stereo) */
+			for(k=0;k<partition && acc>=p->vi->normal_thresh;k++){
+			  int l;
+			  l=mag_sort[i][j+k];
+			  if(freqband_mid==l && freqband_flag)continue;
+			  if(l>=pointlimit && rint(qM[l])==0.f){
+				if(mdctMA[l]){
+				  if(rpacc<p->vi->normal_thresh)continue;
+				  acc-=1.f;
+				  rpacc-=1.32;
+				}else{
+				  acc-=1.f;
+				  rpacc-=1.f;
+				}
+				qM[l]=unitnorm(qM[l]);
+				nn_num++;
+			  }
+			}
+			/* The minimum energy complement.
+			for 48/44.1/32kHz */
+			min_energy=32-energy_loss+nn_num;
+			if(min_energy<2 || (j<=p->min_nn_lp && min_energy==2)){
+			  int l;
+			  float ab;
+			  for(;k<partition;k++){
+				l=mag_sort[i][j+k];
+				ab=fabs(qM[l]);
+				if(ab<0.04)break;
+#if	1
+				if( ((mdctM[l]>0. && mdctA[l]<0.) || (mdctA[l]>0. && mdctM[l]<0.))
+				 && ab<0.11)break; // 0.11
+#else
+				if(mdctMA[l] && ab < 0.11)break;
+#endif
+				if(rint(qM[l])==0.f && l>=pointlimit){
+				  qM[l]=unitnorm(qM[l]);
+				  break;
+				}
+			  }
+			}
+		  }
+		}
+	  }
+#else														/* SSE Optimize */
       if(!stereo_threshholds[g->coupling_postpointamp[blobno]])stcont_start=n;
       else{
       	// exception handling
@@ -1760,7 +11150,6 @@
 		old_lof_st=lof_st;
 	  	old_hif_st=hif_st;
 	  }
-
 	  if(l>=limit){
 	    postpoint-=shigh;
 	    /* The following prevents an extreme reduction of residue. (2ch stereo only) */
@@ -1778,11 +11167,12 @@
 	    if((l>=limit && rMs<postpoint && rAs<postpoint) ||
 	       (rMs<(prepoint-slowM) && rAs<(prepoint-slowA))){
 
-
+		  if(l>=0&&l<=n)
+		  {
 	      precomputed_couple_point(mag_memo[i][l],
 				       floorM[l],floorA[l],
 				       qM+l,qA+l);
-
+		  }
 	      //if(rint(qM[l])==0.f)acc+=qM[l]*qM[l];
 	      if(rint(qM[l])==0.f){
 	      	energy_loss++;
@@ -1806,7 +11196,7 @@
 	  /* When the energy loss of a partition is large, NN is performed in the middle of partition.
 	      for 48/44.1/32kHz */
 	  if(energy_loss==32 && fabs(qM[freqband_mid])>nnmid_th && acc>=p->vi->normal_thresh
-	   && freqband_mid<sliding_lowpass && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
+	   && freqband_mid>=pointlimit && rint(qM[freqband_mid])==0.f){
 	  	if( ((mdctM[freqband_mid]>0.) && (mdctA[freqband_mid]<0.)) ||
 	  	 ((mdctA[freqband_mid]>0.) && (mdctM[freqband_mid]<0.)) ){
 	  	 acc-=1.f;
@@ -1857,6 +11247,7 @@
 	  }
 	}
       }
+#endif														/* SSE Optimize */
     }
   }
 }
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/psy.h libvorbis-1.2.0-sse/lib/psy.h
--- libvorbis-1.2.0/lib/psy.h	2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/psy.h	2007-08-02 12:43:10.000000000 +0200
@@ -126,6 +126,24 @@
   int n33p;
   int n75p;
 
+#ifdef __SSE__
+  int   midpoint1;	/* for bark_noise_hybridmp */
+  int   midpoint1_4;
+  int   midpoint1_8;
+  int   midpoint1_16;
+  int   midpoint2;
+  int   midpoint2_4;
+  int   midpoint2_8;
+  int   midpoint2_16;
+
+  long  *octsft; /* shifted octave */
+  long  *octend; /* for seed_loop */
+  long  *octpos; /* for max_seeds */
+#if	defined(_OPENMP)
+  int   _vp_couple_spoint0;
+  int   _vp_couple_spoint1;
+#endif
+#endif
 } vorbis_look_psy;
 
 extern void   _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,
@@ -165,7 +183,12 @@
 			       int end_block,
 			       int blocktype, int modenumber,
 			       int nW_modenumber,
+#ifdef __SSE__												/* SSE Optimize */
+			       int lW_blocktype, int lW_modenumber, int lW_no,
+			       float *tlogmdct);
+#else														/* SSE Optimize */
 			       int lW_blocktype, int lW_modenumber, int lW_no);
+#endif														/* SSE Optimize */
 
 extern float _vp_ampmax_decay(float amp,vorbis_dsp_state *vd);
 
@@ -173,7 +196,14 @@
 					vorbis_info_psy_global *g,
 					vorbis_look_psy *p,
 					vorbis_info_mapping0 *vi,
+#if	defined(_OPENMP)
+					float **mdct,
+					float **ret,
+					int thnum,
+					int thmax);
+#else
 					float **mdct);
+#endif
 
 extern void _vp_couple(int blobno,
 		       vorbis_info_psy_global *g,
@@ -185,18 +215,39 @@
 		       int   **ifloor,
 		       int   *nonzero,
 		       int   sliding_lowpass,
+#if defined(_OPENMP)
+		       float **mdct, float **res_org,
+			   int thnum, int thmax);
+#else
 		       float **mdct, float **res_org);
+#endif
 
 extern void _vp_noise_normalize(vorbis_look_psy *p,
 				float *in,float *out,int *sortedindex);
 
 extern void _vp_noise_normalize_sort(vorbis_look_psy *p,
+#ifdef __SSE__												/* SSE Optimize */
+				     float *magnitudes,int *sortedindex,float *temp);
+#else														/* SSE Optimize */
 				     float *magnitudes,int *sortedindex);
+#endif														/* SSE Optimize */
 
 extern int **_vp_quantize_couple_sort(vorbis_block *vb,
 				      vorbis_look_psy *p,
 				      vorbis_info_mapping0 *vi,
+#ifdef __SSE__												/* SSE Optimize */
+				      float **mags,
+#if defined(_OPENMP)
+					  float *temp,
+					  int **ret,
+					  int thnum,
+					  int thmax);
+#else
+					  float *temp);
+#endif
+#else														/* SSE Optimize */
 				      float **mags);
+#endif														/* SSE Optimize */
 
 extern float lb_loudnoise_fix(vorbis_look_psy *p,
 		float noise_compand_level,
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/registry.h libvorbis-1.2.0-sse/lib/registry.h
--- libvorbis-1.2.0/lib/registry.h	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/registry.h	2007-08-02 12:43:10.000000000 +0200
@@ -27,6 +27,9 @@
 
 extern vorbis_func_floor     *_floor_P[];
 extern vorbis_func_residue   *_residue_P[];
+#if defined(_OPENMP)&&defined(__SSE__)
+extern vorbis_func_residue_mt *_residue_mt_P[];
+#endif
 extern vorbis_func_mapping   *_mapping_P[];
 
 #endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/res0.c libvorbis-1.2.0-sse/lib/res0.c
--- libvorbis-1.2.0/lib/res0.c	2007-08-02 12:42:12.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/res0.c	2007-08-02 13:01:02.000000000 +0200
@@ -30,6 +30,9 @@
 #include "codebook.h"
 #include "misc.h"
 #include "os.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 #if defined(TRAIN_RES) || defined (TRAIN_RESAUX)
 #include <stdio.h>
@@ -337,18 +340,582 @@
   }
 
   if(best>-1){
+#ifdef __SSE__												/* SSE Optimize */
+	switch(dim)
+	{
+		case 2 :
+			{
+				float *ptr	 = book->valuelist+best*2;
+				__m128	XMM0	 = _mm_load_ss(a  );
+				__m128	XMM1	 = _mm_load_ss(a+1);
+				XMM0	 = _mm_sub_ss(XMM0, PM128(ptr  ));
+				XMM1	 = _mm_sub_ss(XMM1, PM128(ptr+1));
+				_mm_store_ss(a  , XMM0);
+				_mm_store_ss(a+1, XMM1);
+			}
+			break;
+		case 4 :
+			{
+				float *ptr	 = book->valuelist+best*4;
+				__m128	XMM0;
+				XMM0	 = _mm_load_ps(a  );
+				XMM0	 = _mm_sub_ps(XMM0, PM128(ptr  ));
+				_mm_store_ps(a  , XMM0);
+			}
+			break;
+		case 8 :
+			{
+				float *ptr	 = book->valuelist+best*8;
+				__m128	XMM0, XMM1;
+				XMM0	 = _mm_load_ps(a  );
+				XMM1	 = _mm_load_ps(a+4);
+				XMM0	 = _mm_sub_ps(XMM0, PM128(ptr  ));
+				XMM1	 = _mm_sub_ps(XMM1, PM128(ptr+4));
+				_mm_store_ps(a  , XMM0);
+				_mm_store_ps(a+4, XMM1);
+			}
+			break;
+		default :
+			{
+				float *ptr	 = book->valuelist+best*dim;
+				for(i=0;i<dim;i++)
+					*a++	-= *ptr++;
+			}
+			break;
+	}
+#else														/* SSE Optimize */
+    float *ptr=book->valuelist+best*dim;
+    for(i=0;i<dim;i++)
+      *a++ -= *ptr++;
     float *ptr=book->valuelist+best*dim;
     for(i=0;i<dim;i++)
       *a++ -= *ptr++;
+#endif														/* SSE Optimize */
   }
 
   return(best);
 }
 
+#ifdef __SSE__												/* SSE Optimize */
+static inline int local_book_besterror_dim1x4(codebook *book,float *a,oggpack_buffer *opb, int* ia)
+{
+	int bits;
+	encode_aux_threshmatch *tt	 = book->c->thresh_tree;
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+	int ctrl0, ctrl1, ctrl2, ctrl3;
+	ctrl0	 = ia[0];
+	ctrl1	 = ia[1];
+	ctrl2	 = ia[2];
+	ctrl3	 = ia[3];
+	XMM0	 = _mm_load_ss(a  );
+	XMM1	 = _mm_load_ss(a+1);
+	XMM2	 = _mm_load_ss(a+2);
+	XMM3	 = _mm_load_ss(a+3);
+	ctrl0	 = tt->quantmap[ctrl0];
+	ctrl1	 = tt->quantmap[ctrl1];
+	ctrl2	 = tt->quantmap[ctrl2];
+	ctrl3	 = tt->quantmap[ctrl3];
+	if(book->c->lengthlist[ctrl0]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl0	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				{
+					float val	 = (e[0]-a[0]);
+					this		 = val*val;
+				}
+				if(ctrl0==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl0	 = i;
+				}
+			}
+			e	++;
+		}
+	}
+	if(book->c->lengthlist[ctrl1]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl1	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				{
+					float val	 = (e[0]-a[1]);
+					this		 = val*val;
+				}
+				if(ctrl1==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl1	 = i;
+				}
+			}
+			e	++;
+		}
+	}
+	if(book->c->lengthlist[ctrl2]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl2	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				{
+					float val	 = (e[0]-a[2]);
+					this		 = val*val;
+				}
+				if(ctrl2==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl2	 = i;
+				}
+			}
+			e	++;
+		}
+	}
+	if(book->c->lengthlist[ctrl3]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl3	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				{
+					float val	 = (e[0]-a[3]);
+					this		 = val*val;
+				}
+				if(ctrl3==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl3	 = i;
+				}
+			}
+			e	++;
+		}
+	}
+	XMM4	 = _mm_load_ss(book->valuelist+ctrl0);
+	XMM5	 = _mm_load_ss(book->valuelist+ctrl1);
+	XMM6	 = _mm_load_ss(book->valuelist+ctrl2);
+	XMM7	 = _mm_load_ss(book->valuelist+ctrl3);
+	XMM0	 = _mm_sub_ss(XMM0, XMM4);
+	XMM1	 = _mm_sub_ss(XMM1, XMM5);
+	XMM2	 = _mm_sub_ss(XMM2, XMM6);
+	XMM3	 = _mm_sub_ss(XMM3, XMM7);
+	_mm_store_ss(a  , XMM0);
+	_mm_store_ss(a+1, XMM1);
+	_mm_store_ss(a+2, XMM2);
+	_mm_store_ss(a+3, XMM3);
+	bits		 = vorbis_book_encode(book, ctrl0, opb);
+	bits		+= vorbis_book_encode(book, ctrl1, opb);
+	bits		+= vorbis_book_encode(book, ctrl2, opb);
+	bits		+= vorbis_book_encode(book, ctrl3, opb);
+	return(bits);
+}
+
+static inline int local_book_besterror_dim2x2(codebook *book,float *a,oggpack_buffer *opb, int* ia)
+{
+	int bits;
+	encode_aux_threshmatch *tt	 = book->c->thresh_tree;
+	__m128	XMM0, XMM1;
+	int ctrl0, ctrl1, ctrl2, ctrl3;
+	ctrl0	 = ia[1];
+	ctrl1	 = ia[0];
+	ctrl2	 = ia[3];
+	ctrl3	 = ia[2];
+	XMM0	 = _mm_load_ps(a);
+	ctrl0	 = tt->quantmap[ctrl0];
+	ctrl1	 = tt->quantmap[ctrl1];
+	ctrl2	 = tt->quantmap[ctrl2];
+	ctrl3	 = tt->quantmap[ctrl3];
+	ctrl0	 = ctrl0*tt->quantvals+ctrl1;
+	ctrl2	 = ctrl2*tt->quantvals+ctrl3;
+
+	if(book->c->lengthlist[ctrl0]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i,j;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl0	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this	 = 0.f;
+				for(j=0;j<2;j++)
+				{
+					float val	 = (e[j]-a[j]);
+					this		+= val*val;
+				}
+				if(ctrl0==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl0	 = i;
+				}
+			}
+			e	+= 2;
+		}
+	}
+	if(book->c->lengthlist[ctrl2]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i,j;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl2	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this	 = 0.f;
+				for(j=0;j<2;j++)
+				{
+					float val	 = (e[j]-a[j+2]);
+					this		+= val*val;
+				}
+				if(ctrl2==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl2	 = i;
+				}
+			}
+			e	+= 2;
+		}
+	}
+	XMM1	 = _mm_loadl_pi(XMM1, (__m64*)(book->valuelist+ctrl0*2));
+	XMM1	 = _mm_loadh_pi(XMM1, (__m64*)(book->valuelist+ctrl2*2));
+	XMM0	 = _mm_sub_ps(XMM0, XMM1);
+	_mm_store_ps(a, XMM0);
+	bits		 = vorbis_book_encode(book, ctrl0, opb);
+	bits		+= vorbis_book_encode(book, ctrl2, opb);
+	return(bits);
+}
+
+static inline int local_book_besterror_dim4(codebook *book,float *a, int* ia)
+{
+	encode_aux_threshmatch *tt	 = book->c->thresh_tree;
+	__m128	XMM0;
+	int ctrl0, ctrl1, ctrl2, ctrl3;
+	ctrl0	 = ia[3];
+	ctrl1	 = ia[2];
+	ctrl2	 = ia[1];
+	ctrl3	 = ia[0];
+	XMM0	 = _mm_load_ps(a  );
+	ctrl0	 = tt->quantmap[ctrl0];
+	ctrl1	 = tt->quantmap[ctrl1];
+	ctrl2	 = tt->quantmap[ctrl2];
+	ctrl3	 = tt->quantmap[ctrl3];
+	ctrl0	 = ctrl0 *tt->quantvals+ctrl1;
+	ctrl0	 = ctrl0 *tt->quantvals+ctrl2;
+	ctrl0	 = ctrl0 *tt->quantvals+ctrl3;
+
+	if(book->c->lengthlist[ctrl0]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		ctrl0	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				__m128	PVAL	 = _mm_load_ps(e);
+				PVAL	 = _mm_sub_ps(PVAL, XMM0);
+				PVAL	 = _mm_mul_ps(PVAL, PVAL);
+				this	 = _mm_add_horz(PVAL);
+				if(ctrl0==-1 || this<bestf)
+				{
+					bestf	 = this;
+					ctrl0	 = i;
+				}
+			}
+			e	+= 4;
+		}
+	}
+	XMM0	 = _mm_sub_ps(XMM0, PM128(book->valuelist+ctrl0*4));
+	_mm_store_ps(a, XMM0);
+	return(ctrl0);
+}
+
+static inline int local_book_besterror_dim8(codebook *book,float *a, int* ia)
+{
+	int best;
+	encode_aux_threshmatch *tt	 = book->c->thresh_tree;
+	__m128	XMM0, XMM1;
+	int ctrl0, ctrl1, ctrl2, ctrl3;
+	XMM0	 = _mm_load_ps(a  );
+	ctrl0	 = ia[7];
+	ctrl1	 = ia[6];
+	ctrl2	 = ia[5];
+	ctrl3	 = ia[4];
+	ctrl0	 = tt->quantmap[ctrl0];
+	ctrl1	 = tt->quantmap[ctrl1];
+	ctrl2	 = tt->quantmap[ctrl2];
+	ctrl3	 = tt->quantmap[ctrl3];
+	best	 = ctrl0*tt->quantvals+ctrl1;
+	best	 = best *tt->quantvals+ctrl2;
+	best	 = best *tt->quantvals+ctrl3;
+	XMM1	 = _mm_load_ps(a+4);
+	ctrl0	 = ia[3];
+	ctrl1	 = ia[2];
+	ctrl2	 = ia[1];
+	ctrl3	 = ia[0];
+	ctrl0	 = tt->quantmap[ctrl0];
+	ctrl1	 = tt->quantmap[ctrl1];
+	ctrl2	 = tt->quantmap[ctrl2];
+	ctrl3	 = tt->quantmap[ctrl3];
+	best	 = best *tt->quantvals+ctrl0;
+	best	 = best *tt->quantvals+ctrl1;
+	best	 = best *tt->quantvals+ctrl2;
+	best	 = best *tt->quantvals+ctrl3;
+
+	if(book->c->lengthlist[best]<=0)
+	{
+		const static_codebook *c	 = book->c;
+		int i;
+		float bestf	 = 0.f;
+		float *e	 = book->valuelist;
+		best	 = -1;
+		for(i=0;i<book->entries;i++)
+		{
+			if(c->lengthlist[i]>0)
+			{
+				float this;
+				__m128	PVAL0	 = _mm_load_ps(e  );
+				__m128	PVAL1	 = _mm_load_ps(e+4);
+				PVAL0	 = _mm_sub_ps(PVAL0, PM128(a  ));
+				PVAL1	 = _mm_sub_ps(PVAL1, PM128(a+4));
+				PVAL0	 = _mm_mul_ps(PVAL0, PVAL0);
+				PVAL1	 = _mm_mul_ps(PVAL1, PVAL1);
+				PVAL0	 = _mm_add_ps(PVAL0, PVAL1);
+				this	 = _mm_add_horz(PVAL0);
+				if(best==-1 || this<bestf)
+				{
+					bestf	 = this;
+					best	 = i;
+				}
+			}
+			e	+= 8;
+		}
+	}
+	XMM0	 = _mm_sub_ps(XMM0, PM128(book->valuelist+best*8  ));
+	XMM1	 = _mm_sub_ps(XMM1, PM128(book->valuelist+best*8+4));
+	_mm_store_ps(a  , XMM0);
+	_mm_store_ps(a+4, XMM1);
+	return(best);
+}
+#endif														/* SSE Optimize */
+
 static int _encodepart(oggpack_buffer *opb,float *vec, int n,
 		       codebook *book,long *acc){
   int i,bits=0;
   int dim=book->dim;
+#ifdef __SSE__												/* SSE Optimize */
+	int*	TEMP	 = (int*)_ogg_alloca(sizeof(int)*n);
+	__m128	PMIN	 = _mm_set1_ps(-(float)(book->c->thresh_tree->threshvals>>1));
+	__m128	PMAX	 = _mm_set1_ps( (float)(book->c->thresh_tree->threshvals>>1));
+
+	if(dim<=8)
+	{
+		if(book->c->thresh_tree->quantthresh[0]==-(float)(book->c->thresh_tree->threshvals>>1)+.5f)
+		{
+#if	defined(__SSE2__)
+			for(i=0;i<n;i+=16)
+			{
+				__m128	XMM0	 = _mm_load_ps(vec+i   );
+				__m128	XMM1	 = _mm_load_ps(vec+i+ 4);
+				__m128	XMM2	 = _mm_load_ps(vec+i+ 8);
+				__m128	XMM3	 = _mm_load_ps(vec+i+12);
+				XMM0	 = _mm_min_ps(XMM0, PMAX);
+				XMM1	 = _mm_min_ps(XMM1, PMAX);
+				XMM2	 = _mm_min_ps(XMM2, PMAX);
+				XMM3	 = _mm_min_ps(XMM3, PMAX);
+				XMM0	 = _mm_max_ps(XMM0, PMIN);
+				XMM1	 = _mm_max_ps(XMM1, PMIN);
+				XMM2	 = _mm_max_ps(XMM2, PMIN);
+				XMM3	 = _mm_max_ps(XMM3, PMIN);
+				XMM0	 = _mm_add_ps(XMM0, PMAX);
+				XMM1	 = _mm_add_ps(XMM1, PMAX);
+				XMM2	 = _mm_add_ps(XMM2, PMAX);
+				XMM3	 = _mm_add_ps(XMM3, PMAX);
+				XMM0	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM1	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+				XMM2	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+				XMM3	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+				_mm_store_ps((__m128*)(TEMP+i   ), XMM0);
+				_mm_store_ps((__m128*)(TEMP+i+ 4), XMM1);
+				_mm_store_ps((__m128*)(TEMP+i+ 8), XMM2);
+				_mm_store_ps((__m128*)(TEMP+i+12), XMM3);
+			}
+#else
+			for(i=0;i<n;i+=16)
+			{
+				__m64	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+				__m128	XMM0	 = _mm_load_ps(vec+i   );
+				__m128	XMM1	 = _mm_load_ps(vec+i+ 4);
+				__m128	XMM2	 = _mm_load_ps(vec+i+ 8);
+				__m128	XMM3	 = _mm_load_ps(vec+i+12);
+				XMM0	 = _mm_min_ps(XMM0, PMAX);
+				XMM1	 = _mm_min_ps(XMM1, PMAX);
+				XMM2	 = _mm_min_ps(XMM2, PMAX);
+				XMM3	 = _mm_min_ps(XMM3, PMAX);
+				XMM0	 = _mm_max_ps(XMM0, PMIN);
+				XMM1	 = _mm_max_ps(XMM1, PMIN);
+				XMM2	 = _mm_max_ps(XMM2, PMIN);
+				XMM3	 = _mm_max_ps(XMM3, PMIN);
+				XMM0	 = _mm_add_ps(XMM0, PMAX);
+				XMM1	 = _mm_add_ps(XMM1, PMAX);
+				XMM2	 = _mm_add_ps(XMM2, PMAX);
+				XMM3	 = _mm_add_ps(XMM3, PMAX);
+				MM0		 = _mm_cvtps_pi32(XMM0);
+				MM2		 = _mm_cvtps_pi32(XMM1);
+				MM4		 = _mm_cvtps_pi32(XMM2);
+				MM6 	 = _mm_cvtps_pi32(XMM3);
+				XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+				XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+				XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+				XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+				MM1		 = _mm_cvtps_pi32(XMM0);
+				MM3		 = _mm_cvtps_pi32(XMM1);
+				MM5		 = _mm_cvtps_pi32(XMM2);
+				MM7		 = _mm_cvtps_pi32(XMM3);
+				PM64(TEMP+i   )	 = MM0;
+				PM64(TEMP+i+ 4)	 = MM2;
+				PM64(TEMP+i+ 8)	 = MM4;
+				PM64(TEMP+i+12)	 = MM6;
+				PM64(TEMP+i+ 2)	 = MM1;
+				PM64(TEMP+i+ 6)	 = MM3;
+				PM64(TEMP+i+10)	 = MM5;
+				PM64(TEMP+i+14)	 = MM7;
+			}
+			_mm_empty();
+#endif
+		}
+		else
+		{
+			__m128	PM	 = _mm_set1_ps(1.f/(
+				(float)(book->c->thresh_tree->quantthresh[1]-book->c->thresh_tree->quantthresh[0])
+					-1.0e-04));	/* for control of round */
+			for(i=0;i<n;i+=16)
+			{
+#if	!defined(__SSE2__)
+				__m64	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+#endif
+				__m128	XMM0, XMM1, XMM2, XMM3;
+				XMM0	 = _mm_load_ps(vec+i   );
+				XMM1	 = _mm_load_ps(vec+i+ 4);
+				XMM2	 = _mm_load_ps(vec+i+ 8);
+				XMM3	 = _mm_load_ps(vec+i+12);
+				XMM0	 = _mm_mul_ps(XMM0, PM);
+				XMM1	 = _mm_mul_ps(XMM1, PM);
+				XMM2	 = _mm_mul_ps(XMM2, PM);
+				XMM3	 = _mm_mul_ps(XMM3, PM);
+				XMM0	 = _mm_max_ps(XMM0, PMIN);
+				XMM1	 = _mm_max_ps(XMM1, PMIN);
+				XMM2	 = _mm_max_ps(XMM2, PMIN);
+				XMM3	 = _mm_max_ps(XMM3, PMIN);
+				XMM0	 = _mm_min_ps(XMM0, PMAX);
+				XMM1	 = _mm_min_ps(XMM1, PMAX);
+				XMM2	 = _mm_min_ps(XMM2, PMAX);
+				XMM3	 = _mm_min_ps(XMM3, PMAX);
+				XMM0	 = _mm_add_ps(XMM0, PMAX);
+				XMM1	 = _mm_add_ps(XMM1, PMAX);
+				XMM2	 = _mm_add_ps(XMM2, PMAX);
+				XMM3	 = _mm_add_ps(XMM3, PMAX);
+#if	defined(__SSE2__)
+				XMM0	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM0));
+				XMM1	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM1));
+				XMM2	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM2));
+				XMM3	 = _mm_castsi128_ps(_mm_cvtps_epi32(XMM3));
+				_mm_store_ps((__m128*)(TEMP+i   ), XMM0);
+				_mm_store_ps((__m128*)(TEMP+i+ 4), XMM1);
+				_mm_store_ps((__m128*)(TEMP+i+ 8), XMM2);
+				_mm_store_ps((__m128*)(TEMP+i+12), XMM3);
+			}
+#else
+				MM0		 = _mm_cvtps_pi32(XMM0);
+				MM2		 = _mm_cvtps_pi32(XMM1);
+				MM4		 = _mm_cvtps_pi32(XMM2);
+				MM6 	 = _mm_cvtps_pi32(XMM3);
+				XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+				XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+				XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+				XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+				MM1		 = _mm_cvtps_pi32(XMM0);
+				MM3		 = _mm_cvtps_pi32(XMM1);
+				MM5		 = _mm_cvtps_pi32(XMM2);
+				MM7		 = _mm_cvtps_pi32(XMM3);
+				PM64(TEMP+i   )	 = MM0;
+				PM64(TEMP+i+ 4)	 = MM2;
+				PM64(TEMP+i+ 8)	 = MM4;
+				PM64(TEMP+i+12)	 = MM6;
+				PM64(TEMP+i+ 2)	 = MM1;
+				PM64(TEMP+i+ 6)	 = MM3;
+				PM64(TEMP+i+10)	 = MM5;
+				PM64(TEMP+i+14)	 = MM7;
+			}
+			_mm_empty();
+#endif
+		}
+		switch(dim)
+		{
+			case 1:
+				for(i=0;i<n;i+=4)
+				{
+					bits	+= local_book_besterror_dim1x4(book,vec+i, opb, TEMP+i);
+				}
+				break;
+			case 2:
+				for(i=0;i<n;i+=4)
+				{
+					bits	+= local_book_besterror_dim2x2(book,vec+i, opb, TEMP+i);
+				}
+				break;
+			case 4:
+				for(i=0;i<n;i+=4)
+				{
+					int entry	 = local_book_besterror_dim4(book,vec+i, TEMP+i);
+					bits		+= vorbis_book_encode(book,entry,opb);
+				}
+				break;
+			case 8:
+				for(i=0;i<n;i+=8)
+				{
+					int entry	 = local_book_besterror_dim8(book,vec+i, TEMP+i);
+					bits		+= vorbis_book_encode(book,entry,opb);
+				}
+				break;
+			default:
+				break;
+		}
+	}
+	else
+	{
+#endif														/* SSE Optimize */
   int step=n/dim;
 
   for(i=0;i<step;i++){
@@ -362,6 +929,9 @@
     bits+=vorbis_book_encode(book,entry,opb);
   
   }
+#if	defined(__SSE__)										/* SSE Optimize */
+	}
+#endif														/* SSE Optimize */
 
   return(bits);
 }
@@ -455,11 +1025,140 @@
 #endif
   
   partword[0]=_vorbis_block_alloc(vb,n*ch/samples_per_partition*sizeof(*partword[0]));
+#ifdef __SSE__												/* SSE Optimize */
+	{
+		int	pn	 = n*ch/samples_per_partition;
+		__m128	XMM0	 = _mm_setzero_ps();
+		int tn;
+		float *d = (float*)(partword[0]);
+		tn	 = pn&(~31);
+		for(i=0;i<tn;i+=32)
+		{
+			_mm_store_ps(d+i   , XMM0);
+			_mm_store_ps(d+i+ 4, XMM0);
+			_mm_store_ps(d+i+ 8, XMM0);
+			_mm_store_ps(d+i+12, XMM0);
+			_mm_store_ps(d+i+16, XMM0);
+			_mm_store_ps(d+i+20, XMM0);
+			_mm_store_ps(d+i+24, XMM0);
+			_mm_store_ps(d+i+28, XMM0);
+		}
+		tn	 = pn&(~15);
+		for(;i<tn;i+=16)
+		{
+			_mm_store_ps(d+i   , XMM0);
+			_mm_store_ps(d+i+ 4, XMM0);
+			_mm_store_ps(d+i+ 8, XMM0);
+			_mm_store_ps(d+i+12, XMM0);
+		}
+		tn	 = pn&(~7);
+		for(;i<tn;i+=8)
+		{
+			_mm_store_ps(d+i   , XMM0);
+			_mm_store_ps(d+i+ 4, XMM0);
+		}
+		tn	 = pn&(~3);
+		for(;i<tn;i+=4)
+		{
+			_mm_store_ps(d+i   , XMM0);
+		}
+		for(;i<pn;i++)
+		{
+			*(d+i   )	 = 0;
+		}
+	}
+#else														/* SSE Optimize */
   memset(partword[0],0,n*ch/samples_per_partition*sizeof(*partword[0]));
+#endif														/* SSE Optimize */
   
   for(i=0,l=info->begin/ch;i<partvals;i++){
     float magmax=0.f;
     float angmax=0.f;
+#ifdef __SSE__												/* SSE Optimize */
+	if(ch==2&&possible_partitions==10)
+	{
+		register __m128 PMAGMAX	 = _mm_setzero_ps();
+		register __m128 PANGMAX	 = _mm_setzero_ps();
+		float	*pin0	 = in[0];
+		float	*pin1	 = in[1];
+		
+		for(j=0;j<samples_per_partition;j+=16)
+		{
+			__m128	XMM0	 = _mm_load_ps(pin0+l  );
+			__m128	XMM1	 = _mm_load_ps(pin1+l  );
+			__m128	XMM2	 = _mm_load_ps(pin0+l+4);
+			__m128	XMM3	 = _mm_load_ps(pin1+l+4);
+			XMM0	 = _mm_and_ps(XMM0, PABSMASK.ps);
+			XMM1	 = _mm_and_ps(XMM1, PABSMASK.ps);
+			XMM2	 = _mm_and_ps(XMM2, PABSMASK.ps);
+			XMM3	 = _mm_and_ps(XMM3, PABSMASK.ps);
+			PMAGMAX	 = _mm_max_ps(PMAGMAX, XMM0);
+			PANGMAX	 = _mm_max_ps(PANGMAX, XMM1);
+			PMAGMAX	 = _mm_max_ps(PMAGMAX, XMM2);
+			PANGMAX	 = _mm_max_ps(PANGMAX, XMM3);
+			l	+= 8;
+		}
+		magmax	 = _mm_max_horz(PMAGMAX);
+		angmax	 = _mm_max_horz(PANGMAX);
+		PMAGMAX	 = _mm_set1_ps(magmax);
+		PANGMAX	 = _mm_set1_ps(angmax);
+		{
+			static int jtable0[16]	 = 
+			{
+				 0, 1, 0, 2, 0, 1, 0, 3,
+				 0, 1, 0, 2, 0, 1, 0,-1,
+			};
+			static int jtable1[16]	 = 
+			{
+				 4, 5, 4, 6, 4, 5, 4, 7,
+				 4, 5, 4, 6, 5, 5, 4,-1,
+			};
+			__m128	XMM0	 = _mm_lddqu_ps(info->classmetric1);
+			__m128	XMM1	 = _mm_lddqu_ps(info->classmetric2);
+			XMM0	 = _mm_cmplt_ps(XMM0, PMAGMAX);
+			XMM1	 = _mm_cmplt_ps(XMM1, PANGMAX);
+			XMM0	 = _mm_or_ps(XMM0, XMM1);
+			j	 = _mm_movemask_ps(XMM0);
+			if(j!=15)
+				j	 = jtable0[j];
+			else
+			{
+				__m128	XMM0	 = _mm_lddqu_ps(info->classmetric1+4);
+				__m128	XMM1	 = _mm_lddqu_ps(info->classmetric2+4);
+				XMM0	 = _mm_cmplt_ps(XMM0, PMAGMAX);
+				XMM1	 = _mm_cmplt_ps(XMM1, PANGMAX);
+				XMM0	 = _mm_or_ps(XMM0, XMM1);
+				j	 = _mm_movemask_ps(XMM0);
+				if(j!=15)
+					j	 = jtable1[j];
+				else
+				{
+					if(magmax<=info->classmetric1[8] &&
+						   angmax<=info->classmetric2[8])
+						j	 = 8;
+					else
+						j	 = 9;
+				}
+			}
+		}
+	}
+	else
+	{
+		for(j=0;j<samples_per_partition;j+=ch)
+		{
+			if(fabs(in[0][l])>magmax)
+				magmax=fabs(in[0][l]);
+			for(k=1;k<ch;k++)
+				if(fabs(in[k][l])>angmax)
+					angmax=fabs(in[k][l]);
+				l++;
+		}
+		for(j=0;j<possible_partitions-1;j++)
+			if(magmax<=info->classmetric1[j] &&
+			   angmax<=info->classmetric2[j])
+				break;
+	}
+#else														/* SSE Optimize */
     for(j=0;j<samples_per_partition;j+=ch){
       if(fabs(in[0][l])>magmax)magmax=fabs(in[0][l]);
       for(k=1;k<ch;k++)
@@ -471,6 +1170,7 @@
       if(magmax<=info->classmetric1[j] &&
 	 angmax<=info->classmetric2[j])
 	break;
+#endif														/* SSE Optimize */
     
     partword[0][i]=j;
     
@@ -735,8 +1435,27 @@
   for(i=0;i<ch;i++)
     if(nonzero[i]){
       if(out)
+#ifdef __SSE__												/* SSE Optimize */
+	{
+		float	*pin	 = in[i];
+		float	*pout	 = out[i];
+		for(j=0;j<n;j+=8)
+		{
+			__m128	XMM0	 = _mm_load_ps(pout+j  );
+			__m128	XMM2	 = _mm_load_ps(pin+j  );
+			__m128	XMM1	 = _mm_load_ps(pout+j+4);
+			__m128	XMM3	 = _mm_load_ps(pin+j+4);
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			XMM1	 = _mm_add_ps(XMM1, XMM3);
+			_mm_store_ps(pout+j  , XMM0);
+			_mm_store_ps(pout+j+4, XMM1);
+				
+		}
+	}
+#else														/* SSE Optimize */
 	for(j=0;j<n;j++)
 	  out[i][j]+=in[i][j];
+#endif														/* SSE Optimize */
       in[used++]=in[i];
     }
 
@@ -746,8 +1465,26 @@
       used=0;
       for(i=0;i<ch;i++)
 	if(nonzero[i]){
+#ifdef __SSE__												/* SSE Optimize */
+		{
+			float	*pin	 = in[i];
+			float	*pout	 = out[used];
+			for(j=0;j<n;j+=8)
+			{
+				__m128	XMM0	 = _mm_load_ps(pout+j  );
+				__m128	XMM2	 = _mm_load_ps(pin+j  );
+				__m128	XMM1	 = _mm_load_ps(pout+j+4);
+				__m128	XMM3	 = _mm_load_ps(pin+j+4);
+				XMM0	 = _mm_sub_ps(XMM0, XMM2);
+				XMM1	 = _mm_sub_ps(XMM1, XMM3);
+				_mm_store_ps(pout+j  , XMM0);
+				_mm_store_ps(pout+j+4, XMM1);
+			}
+		}
+#else														/* SSE Optimize */
 	  for(j=0;j<n;j++)
 	    out[i][j]-=in[used][j];
+#endif														/* SSE Optimize */
 	  used++;
 	}
     }
@@ -805,24 +1542,124 @@
      reshape ourselves into a single channel res1 */
   /* ugly; reallocs for each coupling pass :-( */
   float *work=_vorbis_block_alloc(vb,ch*n*sizeof(*work));
+#ifdef __SSE__												/* SSE Optimize */
+  for(i=0;i<ch;i++){
+    if(nonzero[i])used++;
+  }
+  if(ch==2)
+  {
+	float *pcm0=in[0];
+	float *pcm1=in[1];
+	for(j=0;j<n;j+=16)
+	{
+		// ABCD    ABEF    AEBF
+		// EFGH -> CDGH -> CGDH
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7;
+		XMM0	 = _mm_load_ps(pcm0+j   );
+		XMM2	 = _mm_load_ps(pcm0+j+ 4);
+		XMM4	 = _mm_load_ps(pcm1+j   );
+		XMM5	 = _mm_load_ps(pcm1+j+ 4);
+		XMM1	 = XMM0;
+		XMM3	 = XMM2;
+		XMM0	 = _mm_unpacklo_ps(XMM0, XMM4);
+		XMM1	 = _mm_unpackhi_ps(XMM1, XMM4);
+		XMM6	 = _mm_load_ps(pcm0+j+ 8);
+		XMM7	 = _mm_load_ps(pcm0+j+12);
+		XMM2	 = _mm_unpacklo_ps(XMM2, XMM5);
+		XMM3	 = _mm_unpackhi_ps(XMM3, XMM5);
+		XMM4	 = _mm_load_ps(pcm1+j+ 8);
+		XMM5	 = _mm_load_ps(pcm1+j+12);
+		_mm_store_ps(work+j*2   , XMM0);
+		_mm_store_ps(work+j*2+ 4, XMM1);
+		XMM1	 = XMM6;
+		_mm_store_ps(work+j*2+ 8, XMM2);
+		_mm_store_ps(work+j*2+12, XMM3);
+		XMM3	 = XMM7;
+		XMM6	 = _mm_unpacklo_ps(XMM6, XMM4);
+		XMM1	 = _mm_unpackhi_ps(XMM1, XMM4);
+		XMM7	 = _mm_unpacklo_ps(XMM7, XMM5);
+		XMM3	 = _mm_unpackhi_ps(XMM3, XMM5);
+		_mm_store_ps(work+j*2+16, XMM6);
+		_mm_store_ps(work+j*2+20, XMM1);
+		_mm_store_ps(work+j*2+24, XMM7);
+		_mm_store_ps(work+j*2+28, XMM3);
+	}
+  }
+  else
+  {
+	for(i=0;i<ch;i++){
+		float *pcm=in[i];
+		for(j=0,k=i;j<n;j++,k+=ch)
+			work[k]=pcm[j];
+	}
+  }
+#else														/* SSE Optimize */
   for(i=0;i<ch;i++){
     float *pcm=in[i];
     if(nonzero[i])used++;
     for(j=0,k=i;j<n;j++,k+=ch)
       work[k]=pcm[j];
   }
+#endif														/* SSE Optimize */
   
   if(used){
     int ret=_01forward(opb,vb,vl,&work,1,partword,_encodepart);
     /* update the sofar vector */
     if(out){
+#ifdef __SSE__												/* SSE Optimize */
+	if(ch==2)
+	{
+		float *pcm0		 = in[0];
+		float *pcm1		 = in[1];
+		float *sofar0	 = out[0];
+		float *sofar1	 = out[1];
+		for(j=0;j<n;j+=8)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3;
+			__m128	XMM4, XMM5, XMM6, XMM7;
+			XMM0	 = _mm_load_ps(work+j*2   );
+			XMM4	 = _mm_load_ps(work+j*2+ 8);
+			XMM6	 = _mm_load_ps(work+j*2+ 4);
+			XMM7	 = _mm_load_ps(work+j*2+12);
+			XMM1	 = XMM0;
+			XMM5	 = XMM5;
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM6, _MM_SHUFFLE(2,0,2,0));
+			XMM1	 = _mm_shuffle_ps(XMM1, XMM6, _MM_SHUFFLE(3,1,3,1));
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM7, _MM_SHUFFLE(2,0,2,0));
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM7, _MM_SHUFFLE(3,1,3,1));
+			XMM2	 = _mm_load_ps(pcm0+j     );
+			XMM3	 = _mm_load_ps(pcm1+j     );
+			XMM6	 = _mm_load_ps(pcm0+j  + 4);
+			XMM7	 = _mm_load_ps(pcm1+j  + 4);
+			XMM2	 = _mm_sub_ps(XMM2, XMM0);
+			XMM3	 = _mm_sub_ps(XMM3, XMM1);
+			XMM6	 = _mm_sub_ps(XMM6, XMM4);
+			XMM7	 = _mm_sub_ps(XMM7, XMM5);
+			XMM0	 = _mm_load_ps(sofar0+j  );
+			XMM1	 = _mm_load_ps(sofar0+j+4);
+			XMM2	 = _mm_add_ps(XMM2, XMM0);
+			XMM3	 = _mm_add_ps(XMM3, XMM0);
+			XMM6	 = _mm_add_ps(XMM6, XMM1);
+			XMM7	 = _mm_add_ps(XMM7, XMM1);
+			_mm_store_ps(sofar0+j  , XMM2);
+			_mm_store_ps(sofar1+j  , XMM3);
+			_mm_store_ps(sofar0+j+4, XMM6);
+			_mm_store_ps(sofar1+j+4, XMM7);
+		}
+	}
+	else
+	{
+#endif														/* SSE Optimize */
       for(i=0;i<ch;i++){
 	float *pcm=in[i];
 	float *sofar=out[i];
 	for(j=0,k=i;j<n;j++,k+=ch)
 	  sofar[j]+=pcm[j]-work[k];
-	
+
       }
+#ifdef __SSE__												/* SSE Optimize */
+	}
+#endif														/* SSE Optimize */
     }
     return(ret);
   }else{
@@ -883,7 +1720,6 @@
   return(0);
 }
 
-
 vorbis_func_residue residue0_exportbundle={
   NULL,
   &res0_unpack,
@@ -916,4 +1752,3 @@
   &res2_forward,
   &res2_inverse
 };
-
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/sharedbook.c libvorbis-1.2.0-sse/lib/sharedbook.c
--- libvorbis-1.2.0/lib/sharedbook.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/sharedbook.c	2007-08-02 12:43:15.000000000 +0200
@@ -24,6 +24,9 @@
 #include "vorbis/codec.h"
 #include "codebook.h"
 #include "scales.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 /**** pack/unpack helpers ******************************************/
 int _ilog(unsigned int v){
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/smallft.c libvorbis-1.2.0-sse/lib/smallft.c
--- libvorbis-1.2.0/lib/smallft.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/smallft.c	2007-08-02 12:43:15.000000000 +0200
@@ -34,6 +34,5529 @@
 #include "smallft.h"
 #include "os.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+
+static _MM_ALIGN16 int		IP256[16]	 = {  64,   64,   0, 128,  64, 192,  32, 160,  96, 224};
+static _MM_ALIGN16 int		IP512[32]	 = { 128,  128,   0, 256, 128, 384,  64, 320, 192, 448};
+static _MM_ALIGN16 int		IP1024[32]	 = {
+	  256,  256,
+	    0,  512,  256,  768,  128,  640,  384,  896,
+	   64,  576,  320,  832,  192,  704,  448,  960
+};
+static _MM_ALIGN16 int		IP2048[32]	 = {
+	  512,  512,
+	    0, 1024,  512, 1536,  256, 1280,  768, 1792,
+	  128, 1152,  640, 1664,  384, 1408,  896, 1920
+};
+static _MM_ALIGN16 int		IP4096[64]	 = {
+	 1024, 1024,
+	    0, 2048, 1024, 3072,  512, 2560, 1536, 3584,
+	  256, 2304, 1280, 3328,  768, 2816, 1792, 3840,
+	  128, 2176, 1152, 3200,  640, 2688, 1664, 3712,
+	  384, 2432, 1408, 3456,  896, 2944, 1920, 3968
+};
+
+static _MM_ALIGN16 float	W2[4]	 = {
+	7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001, 7.0710676908493e-001
+};
+
+static _MM_ALIGN16 float	W256[]	 = {
+	 4.8772937059402e-001,  4.9984940886497e-001,  0.0000000000000e+000,  0.0000000000000e+000,
+	 4.7546616196632e-001,  4.7546616196632e-001,  4.6321770548820e-001,  4.6321770548820e-001,
+	-4.9939772486687e-001,  4.9939772486687e-001, -4.9864521622658e-001,  4.9864521622658e-001,
+	 4.5099142193794e-001,  4.5099142193794e-001,  4.3879467248917e-001,  4.3879467248917e-001,
+	-4.9759235978127e-001,  4.9759235978127e-001, -4.9623978137970e-001,  4.9623978137970e-001,
+	 4.2663475871086e-001,  4.2663475871086e-001,  4.1451907157898e-001,  4.1451907157898e-001,
+	-4.9458825588226e-001,  4.9458825588226e-001, -4.9263882637024e-001,  4.9263882637024e-001,
+	 4.0245485305786e-001,  4.0245485305786e-001,  3.9044937491417e-001,  3.9044937491417e-001,
+	-4.9039262533188e-001,  4.9039262533188e-001, -4.8785105347633e-001,  4.8785105347633e-001,
+	 3.7850990891457e-001,  3.7850990891457e-001,  3.6664360761642e-001,  3.6664360761642e-001,
+	-4.8501563072205e-001,  4.8501563072205e-001, -4.8188802599907e-001,  4.8188802599907e-001,
+	 3.5485765337944e-001,  3.5485765337944e-001,  3.4315913915634e-001,  3.4315913915634e-001,
+	-4.7847017645836e-001,  4.7847017645836e-001, -4.7476407885551e-001,  4.7476407885551e-001,
+	 3.3155506849289e-001,  3.3155506849289e-001,  3.2005247473717e-001,  3.2005247473717e-001,
+	-4.7077202796936e-001,  4.7077202796936e-001, -4.6649640798569e-001,  4.6649640798569e-001,
+	 3.0865827202797e-001,  3.0865827202797e-001,  2.9737934470177e-001,  2.9737934470177e-001,
+	-4.6193975210190e-001,  4.6193975210190e-001, -4.5710486173630e-001,  4.5710486173630e-001,
+	 2.8622245788574e-001,  2.8622245788574e-001,  2.7519434690475e-001,  2.7519434690475e-001,
+	-4.5199465751648e-001,  4.5199465751648e-001, -4.4661214947701e-001,  4.4661214947701e-001,
+	 2.6430162787437e-001,  2.6430162787437e-001,  2.5355088710785e-001,  2.5355088710785e-001,
+	-4.4096061587334e-001,  4.4096061587334e-001, -4.3504348397255e-001,  4.3504348397255e-001,
+	 2.4294862151146e-001,  2.4294862151146e-001,  2.3250117897987e-001,  2.3250117897987e-001,
+	-4.2886430025101e-001,  4.2886430025101e-001, -4.2242678999901e-001,  4.2242678999901e-001,
+	 2.2221487760544e-001,  2.2221487760544e-001,  2.1209588646889e-001,  2.1209588646889e-001,
+	-4.1573479771614e-001,  4.1573479771614e-001, -4.0879240632057e-001,  4.0879240632057e-001,
+	 2.0215034484863e-001,  2.0215034484863e-001,  1.9238418340683e-001,  1.9238418340683e-001,
+	-4.0160375833511e-001,  4.0160375833511e-001, -3.9417320489883e-001,  3.9417320489883e-001,
+	 1.8280336260796e-001,  1.8280336260796e-001,  1.7341357469559e-001,  1.7341357469559e-001,
+	-3.8650521636009e-001,  3.8650521636009e-001, -3.7860441207886e-001,  3.7860441207886e-001,
+	 1.6422051191330e-001,  1.6422051191330e-001,  1.5522971749306e-001,  1.5522971749306e-001,
+	-3.7047556042671e-001,  3.7047556042671e-001, -3.6212354898453e-001,  3.6212354898453e-001,
+	 1.4644661545753e-001,  1.4644661545753e-001,  1.3787645101547e-001,  1.3787645101547e-001,
+	-3.5355338454247e-001,  3.5355338454247e-001, -3.4477028250694e-001,  3.4477028250694e-001,
+	 1.2952443957329e-001,  1.2952443957329e-001,  1.2139558792114e-001,  1.2139558792114e-001,
+	-3.3577948808670e-001,  3.3577948808670e-001, -3.2658642530441e-001,  3.2658642530441e-001,
+	 1.1349478363991e-001,  1.1349478363991e-001,  1.0582679510117e-001,  1.0582679510117e-001,
+	-3.1719663739204e-001,  3.1719663739204e-001, -3.0761581659317e-001,  3.0761581659317e-001,
+	 9.8396241664886e-002,  9.8396241664886e-002,  9.1207593679428e-002,  9.1207593679428e-002,
+	-2.9784965515137e-001,  2.9784965515137e-001, -2.8790411353111e-001,  2.8790411353111e-001,
+	 8.4265202283859e-002,  8.4265202283859e-002,  7.7573210000992e-002,  7.7573210000992e-002,
+	-2.7778512239456e-001,  2.7778512239456e-001, -2.6749882102013e-001,  2.6749882102013e-001,
+	 7.1135699748993e-002,  7.1135699748993e-002,  6.4956516027451e-002,  6.4956516027451e-002,
+	-2.5705137848854e-001,  2.5705137848854e-001, -2.4644909799099e-001,  2.4644909799099e-001,
+	 5.9039384126663e-002,  5.9039384126663e-002,  5.3387850522995e-002,  5.3387850522995e-002,
+	-2.3569837212563e-001,  2.3569837212563e-001, -2.2480566799641e-001,  2.2480566799641e-001,
+	 4.8005342483521e-002,  4.8005342483521e-002,  4.2895138263702e-002,  4.2895138263702e-002,
+	-2.1377755701542e-001,  2.1377755701542e-001, -2.0262065529823e-001,  2.0262065529823e-001,
+	 3.8060247898102e-002,  3.8060247898102e-002,  3.3503592014313e-002,  3.3503592014313e-002,
+	-1.9134172797203e-001,  1.9134172797203e-001, -1.7994752526283e-001,  1.7994752526283e-001,
+	 2.9227972030640e-002,  2.9227972030640e-002,  2.5235921144485e-002,  2.5235921144485e-002,
+	-1.6844493150711e-001,  1.6844493150711e-001, -1.5684087574482e-001,  1.5684087574482e-001,
+	 2.1529823541641e-002,  2.1529823541641e-002,  1.8111974000931e-002,  1.8111974000931e-002,
+	-1.4514234662056e-001,  1.4514234662056e-001, -1.3335637748241e-001,  1.3335637748241e-001,
+	 1.4984369277954e-002,  1.4984369277954e-002,  1.2148946523666e-002,  1.2148946523666e-002,
+	-1.2149009108543e-001,  1.2149009108543e-001, -1.0955062508583e-001,  1.0955062508583e-001,
+	 9.6073746681213e-003,  9.6073746681213e-003,  7.3611736297607e-003,  7.3611736297607e-003,
+	-9.7545161843300e-002,  9.7545161843300e-002, -8.5480943322182e-002,  8.5480943322182e-002,
+	 5.4117441177368e-003,  5.4117441177368e-003,  3.7602186203003e-003,  3.7602186203003e-003,
+	-7.3365241289139e-002,  7.3365241289139e-002, -6.1205338686705e-002,  6.1205338686705e-002,
+	 2.4076402187347e-003,  2.4076402187347e-003,  1.3547837734222e-003,  1.3547837734222e-003,
+	-4.9008570611477e-002,  4.9008570611477e-002, -3.6782283335924e-002,  3.6782283335924e-002,
+	 6.0227513313293e-004,  6.0227513313293e-004,  1.5059113502502e-004,  1.5059113502502e-004,
+	-2.4533838033676e-002,  2.4533838033676e-002, -1.2270614504814e-002,  1.2270614504814e-002
+};
+static _MM_ALIGN16 float	W512[]	 = {
+	 4.9386423826218e-001,  4.9996235966682e-001,  0.0000000000000e+000,  0.0000000000000e+000,
+	 4.8772937059402e-001,  4.8772937059402e-001,  4.8159638047218e-001,  4.8159638047218e-001,
+	-4.9984940886497e-001,  4.9984940886497e-001, -4.9966117739677e-001,  4.9966117739677e-001,
+	 4.7546616196632e-001,  4.7546616196632e-001,  4.6933963894844e-001,  4.6933963894844e-001,
+	-4.9939772486687e-001,  4.9939772486687e-001, -4.9905905127525e-001,  4.9905905127525e-001,
+	 4.6321770548820e-001,  4.6321770548820e-001,  4.5710134506226e-001,  4.5710134506226e-001,
+	-4.9864521622658e-001,  4.9864521622658e-001, -4.9815630912781e-001,  4.9815630912781e-001,
+	 4.5099142193794e-001,  4.5099142193794e-001,  4.4488888978958e-001,  4.4488888978958e-001,
+	-4.9759235978127e-001,  4.9759235978127e-001, -4.9695348739624e-001,  4.9695348739624e-001,
+	 4.3879467248917e-001,  4.3879467248917e-001,  4.3270963430405e-001,  4.3270963430405e-001,
+	-4.9623978137970e-001,  4.9623978137970e-001, -4.9545133113861e-001,  4.9545133113861e-001,
+	 4.2663475871086e-001,  4.2663475871086e-001,  4.2057090997696e-001,  4.2057090997696e-001,
+	-4.9458825588226e-001,  4.9458825588226e-001, -4.9365070462227e-001,  4.9365070462227e-001,
+	 4.1451907157898e-001,  4.1451907157898e-001,  4.0848004817963e-001,  4.0848004817963e-001,
+	-4.9263882637024e-001,  4.9263882637024e-001, -4.9155274033546e-001,  4.9155274033546e-001,
+	 4.0245485305786e-001,  4.0245485305786e-001,  3.9644432067871e-001,  3.9644432067871e-001,
+	-4.9039262533188e-001,  4.9039262533188e-001, -4.8915868997574e-001,  4.8915868997574e-001,
+	 3.9044937491417e-001,  3.9044937491417e-001,  3.8447093963623e-001,  3.8447093963623e-001,
+	-4.8785105347633e-001,  4.8785105347633e-001, -4.8646998405457e-001,  4.8646998405457e-001,
+	 3.7850990891457e-001,  3.7850990891457e-001,  3.7256717681885e-001,  3.7256717681885e-001,
+	-4.8501563072205e-001,  4.8501563072205e-001, -4.8348823189735e-001,  4.8348823189735e-001,
+	 3.6664360761642e-001,  3.6664360761642e-001,  3.6074015498161e-001,  3.6074015498161e-001,
+	-4.8188802599907e-001,  4.8188802599907e-001, -4.8021525144577e-001,  4.8021525144577e-001,
+	 3.5485765337944e-001,  3.5485765337944e-001,  3.4899702668190e-001,  3.4899702668190e-001,
+	-4.7847017645836e-001,  4.7847017645836e-001, -4.7665300965309e-001,  4.7665300965309e-001,
+	 3.4315913915634e-001,  3.4315913915634e-001,  3.3734485507011e-001,  3.3734485507011e-001,
+	-4.7476407885551e-001,  4.7476407885551e-001, -4.7280365228653e-001,  4.7280365228653e-001,
+	 3.3155506849289e-001,  3.3155506849289e-001,  3.2579064369202e-001,  3.2579064369202e-001,
+	-4.7077202796936e-001,  4.7077202796936e-001, -4.6866950392723e-001,  4.6866950392723e-001,
+	 3.2005247473717e-001,  3.2005247473717e-001,  3.1434139609337e-001,  3.1434139609337e-001,
+	-4.6649640798569e-001,  4.6649640798569e-001, -4.6425303816795e-001,  4.6425303816795e-001,
+	 3.0865827202797e-001,  3.0865827202797e-001,  3.0300396680832e-001,  3.0300396680832e-001,
+	-4.6193975210190e-001,  4.6193975210190e-001, -4.5955693721771e-001,  4.5955693721771e-001,
+	 2.9737934470177e-001,  2.9737934470177e-001,  2.9178521037102e-001,  2.9178521037102e-001,
+	-4.5710486173630e-001,  4.5710486173630e-001, -4.5458400249481e-001,  4.5458400249481e-001,
+	 2.8622245788574e-001,  2.8622245788574e-001,  2.8069186210632e-001,  2.8069186210632e-001,
+	-4.5199465751648e-001,  4.5199465751648e-001, -4.4933724403381e-001,  4.4933724403381e-001,
+	 2.7519434690475e-001,  2.7519434690475e-001,  2.6973062753677e-001,  2.6973062753677e-001,
+	-4.4661214947701e-001,  4.4661214947701e-001, -4.4381982088089e-001,  4.4381982088089e-001,
+	 2.6430162787437e-001,  2.6430162787437e-001,  2.5890809297562e-001,  2.5890809297562e-001,
+	-4.4096061587334e-001,  4.4096061587334e-001, -4.3803504109383e-001,  4.3803504109383e-001,
+	 2.5355088710785e-001,  2.5355088710785e-001,  2.4823081493378e-001,  2.4823081493378e-001,
+	-4.3504348397255e-001,  4.3504348397255e-001, -4.3198642134666e-001,  4.3198642134666e-001,
+	 2.4294862151146e-001,  2.4294862151146e-001,  2.3770514130592e-001,  2.3770514130592e-001,
+	-4.2886430025101e-001,  4.2886430025101e-001, -4.2567759752274e-001,  4.2567759752274e-001,
+	 2.3250117897987e-001,  2.3250117897987e-001,  2.2733750939369e-001,  2.2733750939369e-001,
+	-4.2242678999901e-001,  4.2242678999901e-001, -4.1911235451698e-001,  4.1911235451698e-001,
+	 2.2221487760544e-001,  2.2221487760544e-001,  2.1713408827782e-001,  2.1713408827782e-001,
+	-4.1573479771614e-001,  4.1573479771614e-001, -4.1229465603828e-001,  4.1229465603828e-001,
+	 2.1209588646889e-001,  2.1209588646889e-001,  2.0710107684135e-001,  2.0710107684135e-001,
+	-4.0879240632057e-001,  4.0879240632057e-001, -4.0522858500481e-001,  4.0522858500481e-001,
+	 2.0215034484863e-001,  2.0215034484863e-001,  1.9724446535110e-001,  1.9724446535110e-001,
+	-4.0160375833511e-001,  4.0160375833511e-001, -3.9791843295097e-001,  3.9791843295097e-001,
+	 1.9238418340683e-001,  1.9238418340683e-001,  1.8757024407387e-001,  1.8757024407387e-001,
+	-3.9417320489883e-001,  3.9417320489883e-001, -3.9036861062050e-001,  3.9036861062050e-001,
+	 1.8280336260796e-001,  1.8280336260796e-001,  1.7808422446251e-001,  1.7808422446251e-001,
+	-3.8650521636009e-001,  3.8650521636009e-001, -3.8258361816406e-001,  3.8258361816406e-001,
+	 1.7341357469559e-001,  1.7341357469559e-001,  1.6879209876060e-001,  1.6879209876060e-001,
+	-3.7860441207886e-001,  3.7860441207886e-001, -3.7456819415092e-001,  3.7456819415092e-001,
+	 1.6422051191330e-001,  1.6422051191330e-001,  1.5969949960709e-001,  1.5969949960709e-001,
+	-3.7047556042671e-001,  3.7047556042671e-001, -3.6632713675499e-001,  3.6632713675499e-001,
+	 1.5522971749306e-001,  1.5522971749306e-001,  1.5081188082695e-001,  1.5081188082695e-001,
+	-3.6212354898453e-001,  3.6212354898453e-001, -3.5786539316177e-001,  3.5786539316177e-001,
+	 1.4644661545753e-001,  1.4644661545753e-001,  1.4213460683823e-001,  1.4213460683823e-001,
+	-3.5355338454247e-001,  3.5355338454247e-001, -3.4918811917305e-001,  3.4918811917305e-001,
+	 1.3787645101547e-001,  1.3787645101547e-001,  1.3367286324501e-001,  1.3367286324501e-001,
+	-3.4477028250694e-001,  3.4477028250694e-001, -3.4030050039291e-001,  3.4030050039291e-001,
+	 1.2952443957329e-001,  1.2952443957329e-001,  1.2543180584908e-001,  1.2543180584908e-001,
+	-3.3577948808670e-001,  3.3577948808670e-001, -3.3120790123940e-001,  3.3120790123940e-001,
+	 1.2139558792114e-001,  1.2139558792114e-001,  1.1741638183594e-001,  1.1741638183594e-001,
+	-3.2658642530441e-001,  3.2658642530441e-001, -3.2191577553749e-001,  3.2191577553749e-001,
+	 1.1349478363991e-001,  1.1349478363991e-001,  1.0963138937950e-001,  1.0963138937950e-001,
+	-3.1719663739204e-001,  3.1719663739204e-001, -3.1242975592613e-001,  3.1242975592613e-001,
+	 1.0582679510117e-001,  1.0582679510117e-001,  1.0208156704903e-001,  1.0208156704903e-001,
+	-3.0761581659317e-001,  3.0761581659317e-001, -3.0275553464890e-001,  3.0275553464890e-001,
+	 9.8396241664886e-002,  9.8396241664886e-002,  9.4771414995193e-002,  9.4771414995193e-002,
+	-2.9784965515137e-001,  2.9784965515137e-001, -2.9289892315865e-001,  2.9289892315865e-001,
+	 9.1207593679428e-002,  9.1207593679428e-002,  8.7705343961716e-002,  8.7705343961716e-002,
+	-2.8790411353111e-001,  2.8790411353111e-001, -2.8286591172218e-001,  2.8286591172218e-001,
+	 8.4265202283859e-002,  8.4265202283859e-002,  8.0887645483017e-002,  8.0887645483017e-002,
+	-2.7778512239456e-001,  2.7778512239456e-001, -2.7266249060631e-001,  2.7266249060631e-001,
+	 7.7573210000992e-002,  7.7573210000992e-002,  7.4322402477264e-002,  7.4322402477264e-002,
+	-2.6749882102013e-001,  2.6749882102013e-001, -2.6229485869408e-001,  2.6229485869408e-001,
+	 7.1135699748993e-002,  7.1135699748993e-002,  6.8013578653336e-002,  6.8013578653336e-002,
+	-2.5705137848854e-001,  2.5705137848854e-001, -2.5176918506622e-001,  2.5176918506622e-001,
+	 6.4956516027451e-002,  6.4956516027451e-002,  6.1964958906174e-002,  6.1964958906174e-002,
+	-2.4644909799099e-001,  2.4644909799099e-001, -2.4109189212322e-001,  2.4109189212322e-001,
+	 5.9039384126663e-002,  5.9039384126663e-002,  5.6180179119110e-002,  5.6180179119110e-002,
+	-2.3569837212563e-001,  2.3569837212563e-001, -2.3026935756207e-001,  2.3026935756207e-001,
+	 5.3387850522995e-002,  5.3387850522995e-002,  5.0662755966187e-002,  5.0662755966187e-002,
+	-2.2480566799641e-001,  2.2480566799641e-001, -2.1930812299252e-001,  2.1930812299252e-001,
+	 4.8005342483521e-002,  4.8005342483521e-002,  4.5415997505188e-002,  4.5415997505188e-002,
+	-2.1377755701542e-001,  2.1377755701542e-001, -2.0821478962898e-001,  2.0821478962898e-001,
+	 4.2895138263702e-002,  4.2895138263702e-002,  4.0443062782288e-002,  4.0443062782288e-002,
+	-2.0262065529823e-001,  2.0262065529823e-001, -1.9699601829052e-001,  1.9699601829052e-001,
+	 3.8060247898102e-002,  3.8060247898102e-002,  3.5746961832047e-002,  3.5746961832047e-002,
+	-1.9134172797203e-001,  1.9134172797203e-001, -1.8565860390663e-001,  1.8565860390663e-001,
+	 3.3503592014313e-002,  3.3503592014313e-002,  3.1330496072769e-002,  3.1330496072769e-002,
+	-1.7994752526283e-001,  1.7994752526283e-001, -1.7420934140682e-001,  1.7420934140682e-001,
+	 2.9227972030640e-002,  2.9227972030640e-002,  2.7196347713470e-002,  2.7196347713470e-002,
+	-1.6844493150711e-001,  1.6844493150711e-001, -1.6265514492989e-001,  1.6265514492989e-001,
+	 2.5235921144485e-002,  2.5235921144485e-002,  2.3346990346909e-002,  2.3346990346909e-002,
+	-1.5684087574482e-001,  1.5684087574482e-001, -1.5100297331810e-001,  1.5100297331810e-001,
+	 2.1529823541641e-002,  2.1529823541641e-002,  1.9784748554230e-002,  1.9784748554230e-002,
+	-1.4514234662056e-001,  1.4514234662056e-001, -1.3925984501839e-001,  1.3925984501839e-001,
+	 1.8111974000931e-002,  1.8111974000931e-002,  1.6511768102646e-002,  1.6511768102646e-002,
+	-1.3335637748241e-001,  1.3335637748241e-001, -1.2743283808231e-001,  1.2743283808231e-001,
+	 1.4984369277954e-002,  1.4984369277954e-002,  1.3530015945435e-002,  1.3530015945435e-002,
+	-1.2149009108543e-001,  1.2149009108543e-001, -1.1552906036377e-001,  1.1552906036377e-001,
+	 1.2148946523666e-002,  1.2148946523666e-002,  1.0841310024261e-002,  1.0841310024261e-002,
+	-1.0955062508583e-001,  1.0955062508583e-001, -1.0355569422245e-001,  1.0355569422245e-001,
+	 9.6073746681213e-003,  9.6073746681213e-003,  8.4472596645355e-003,  8.4472596645355e-003,
+	-9.7545161843300e-002,  9.7545161843300e-002, -9.1519944369793e-002,  9.1519944369793e-002,
+	 7.3611736297607e-003,  7.3611736297607e-003,  6.3492953777313e-003,  6.3492953777313e-003,
+	-8.5480943322182e-002,  8.5480943322182e-002, -7.9429075121880e-002,  7.9429075121880e-002,
+	 5.4117441177368e-003,  5.4117441177368e-003,  4.5486688613892e-003,  4.5486688613892e-003,
+	-7.3365241289139e-002,  7.3365241289139e-002, -6.7290358245373e-002,  6.7290358245373e-002,
+	 3.7602186203003e-003,  3.7602186203003e-003,  3.0465126037598e-003,  3.0465126037598e-003,
+	-6.1205338686705e-002,  6.1205338686705e-002, -5.5111106485128e-002,  5.5111106485128e-002,
+	 2.4076402187347e-003,  2.4076402187347e-003,  1.8436908721924e-003,  1.8436908721924e-003,
+	-4.9008570611477e-002,  4.9008570611477e-002, -4.2898658663034e-002,  4.2898658663034e-002,
+	 1.3547837734222e-003,  1.3547837734222e-003,  9.4094872474670e-004,  9.4094872474670e-004,
+	-3.6782283335924e-002,  3.6782283335924e-002, -3.0660368502140e-002,  3.0660368502140e-002,
+	 6.0227513313293e-004,  6.0227513313293e-004,  3.3882260322571e-004,  3.3882260322571e-004,
+	-2.4533838033676e-002,  2.4533838033676e-002, -1.8403612077236e-002,  1.8403612077236e-002,
+	 1.5059113502502e-004,  1.5059113502502e-004,  3.7640333175659e-005,  3.7640333175659e-005,
+	-1.2270614504814e-002,  1.2270614504814e-002, -6.1357691884041e-003,  6.1357691884041e-003,
+};
+static _MM_ALIGN16 float	W1024[]	 = {
+	 4.9693205952644e-001,  4.9999058246613e-001,  0.0000000000000e+000,  0.0000000000000e+000,
+	 4.9386423826218e-001,  4.9386423826218e-001,  4.9079662561417e-001,  4.9079662561417e-001,
+	-4.9996235966682e-001,  4.9996235966682e-001, -4.9991530179977e-001,  4.9991530179977e-001,
+	 4.8772937059402e-001,  4.8772937059402e-001,  4.8466259241104e-001,  4.8466259241104e-001,
+	-4.9984940886497e-001,  4.9984940886497e-001, -4.9976471066475e-001,  4.9976471066475e-001,
+	 4.8159638047218e-001,  4.8159638047218e-001,  4.7853088378906e-001,  4.7853088378906e-001,
+	-4.9966117739677e-001,  4.9966117739677e-001, -4.9953886866570e-001,  4.9953886866570e-001,
+	 4.7546616196632e-001,  4.7546616196632e-001,  4.7240236401558e-001,  4.7240236401558e-001,
+	-4.9939772486687e-001,  4.9939772486687e-001, -4.9923777580261e-001,  4.9923777580261e-001,
+	 4.6933963894844e-001,  4.6933963894844e-001,  4.6627804636955e-001,  4.6627804636955e-001,
+	-4.9905905127525e-001,  4.9905905127525e-001, -4.9886152148247e-001,  4.9886152148247e-001,
+	 4.6321770548820e-001,  4.6321770548820e-001,  4.6015876531601e-001,  4.6015876531601e-001,
+	-4.9864521622658e-001,  4.9864521622658e-001, -4.9841013550758e-001,  4.9841013550758e-001,
+	 4.5710134506226e-001,  4.5710134506226e-001,  4.5404553413391e-001,  4.5404553413391e-001,
+	-4.9815630912781e-001,  4.9815630912781e-001, -4.9788370728493e-001,  4.9788370728493e-001,
+	 4.5099142193794e-001,  4.5099142193794e-001,  4.4793918728828e-001,  4.4793918728828e-001,
+	-4.9759235978127e-001,  4.9759235978127e-001, -4.9728229641914e-001,  4.9728229641914e-001,
+	 4.4488888978958e-001,  4.4488888978958e-001,  4.4184067845345e-001,  4.4184067845345e-001,
+	-4.9695348739624e-001,  4.9695348739624e-001, -4.9660596251488e-001,  4.9660596251488e-001,
+	 4.3879467248917e-001,  4.3879467248917e-001,  4.3575096130371e-001,  4.3575096130371e-001,
+	-4.9623978137970e-001,  4.9623978137970e-001, -4.9585488438606e-001,  4.9585488438606e-001,
+	 4.3270963430405e-001,  4.3270963430405e-001,  4.2967087030411e-001,  4.2967087030411e-001,
+	-4.9545133113861e-001,  4.9545133113861e-001, -4.9502909183502e-001,  4.9502909183502e-001,
+	 4.2663475871086e-001,  4.2663475871086e-001,  4.2360138893127e-001,  4.2360138893127e-001,
+	-4.9458825588226e-001,  4.9458825588226e-001, -4.9412879347801e-001,  4.9412879347801e-001,
+	 4.2057090997696e-001,  4.2057090997696e-001,  4.1754344105721e-001,  4.1754344105721e-001,
+	-4.9365070462227e-001,  4.9365070462227e-001, -4.9315404891968e-001,  4.9315404891968e-001,
+	 4.1451907157898e-001,  4.1451907157898e-001,  4.1149789094925e-001,  4.1149789094925e-001,
+	-4.9263882637024e-001,  4.9263882637024e-001, -4.9210503697395e-001,  4.9210503697395e-001,
+	 4.0848004817963e-001,  4.0848004817963e-001,  4.0546566247940e-001,  4.0546566247940e-001,
+	-4.9155274033546e-001,  4.9155274033546e-001, -4.9098193645477e-001,  4.9098193645477e-001,
+	 4.0245485305786e-001,  4.0245485305786e-001,  3.9944767951965e-001,  3.9944767951965e-001,
+	-4.9039262533188e-001,  4.9039262533188e-001, -4.8978489637375e-001,  4.8978489637375e-001,
+	 3.9644432067871e-001,  3.9644432067871e-001,  3.9344483613968e-001,  3.9344483613968e-001,
+	-4.8915868997574e-001,  4.8915868997574e-001, -4.8851406574249e-001,  4.8851406574249e-001,
+	 3.9044937491417e-001,  3.9044937491417e-001,  3.8745802640915e-001,  3.8745802640915e-001,
+	-4.8785105347633e-001,  4.8785105347633e-001, -4.8716968297958e-001,  4.8716968297958e-001,
+	 3.8447093963623e-001,  3.8447093963623e-001,  3.8148820400238e-001,  3.8148820400238e-001,
+	-4.8646998405457e-001,  4.8646998405457e-001, -4.8575195670128e-001,  4.8575195670128e-001,
+	 3.7850990891457e-001,  3.7850990891457e-001,  3.7553620338440e-001,  3.7553620338440e-001,
+	-4.8501563072205e-001,  4.8501563072205e-001, -4.8426103591919e-001,  4.8426103591919e-001,
+	 3.7256717681885e-001,  3.7256717681885e-001,  3.6960291862488e-001,  3.6960291862488e-001,
+	-4.8348823189735e-001,  4.8348823189735e-001, -4.8269721865654e-001,  4.8269721865654e-001,
+	 3.6664360761642e-001,  3.6664360761642e-001,  3.6368930339813e-001,  3.6368930339813e-001,
+	-4.8188802599907e-001,  4.8188802599907e-001, -4.8106071352959e-001,  4.8106071352959e-001,
+	 3.6074015498161e-001,  3.6074015498161e-001,  3.5779622197151e-001,  3.5779622197151e-001,
+	-4.8021525144577e-001,  4.8021525144577e-001, -4.7935172915459e-001,  4.7935172915459e-001,
+	 3.5485765337944e-001,  3.5485765337944e-001,  3.5192453861237e-001,  3.5192453861237e-001,
+	-4.7847017645836e-001,  4.7847017645836e-001, -4.7757059335709e-001,  4.7757059335709e-001,
+	 3.4899702668190e-001,  3.4899702668190e-001,  3.4607517719269e-001,  3.4607517719269e-001,
+	-4.7665300965309e-001,  4.7665300965309e-001, -4.7571751475334e-001,  4.7571751475334e-001,
+	 3.4315913915634e-001,  3.4315913915634e-001,  3.4024900197983e-001,  3.4024900197983e-001,
+	-4.7476407885551e-001,  4.7476407885551e-001, -4.7379279136658e-001,  4.7379279136658e-001,
+	 3.3734485507011e-001,  3.3734485507011e-001,  3.3444684743881e-001,  3.3444684743881e-001,
+	-4.7280365228653e-001,  4.7280365228653e-001, -4.7179672122002e-001,  4.7179672122002e-001,
+	 3.3155506849289e-001,  3.3155506849289e-001,  3.2866963744164e-001,  3.2866963744164e-001,
+	-4.7077202796936e-001,  4.7077202796936e-001, -4.6972960233688e-001,  4.6972960233688e-001,
+	 3.2579064369202e-001,  3.2579064369202e-001,  3.2291823625565e-001,  3.2291823625565e-001,
+	-4.6866950392723e-001,  4.6866950392723e-001, -4.6759176254272e-001,  4.6759176254272e-001,
+	 3.2005247473717e-001,  3.2005247473717e-001,  3.1719350814819e-001,  3.1719350814819e-001,
+	-4.6649640798569e-001,  4.6649640798569e-001, -4.6538347005844e-001,  4.6538347005844e-001,
+	 3.1434139609337e-001,  3.1434139609337e-001,  3.1149628758430e-001,  3.1149628758430e-001,
+	-4.6425303816795e-001,  4.6425303816795e-001, -4.6310511231422e-001,  4.6310511231422e-001,
+	 3.0865827202797e-001,  3.0865827202797e-001,  3.0582746863365e-001,  3.0582746863365e-001,
+	-4.6193975210190e-001,  4.6193975210190e-001, -4.6075701713562e-001,  4.6075701713562e-001,
+	 3.0300396680832e-001,  3.0300396680832e-001,  3.0018788576126e-001,  3.0018788576126e-001,
+	-4.5955693721771e-001,  4.5955693721771e-001, -4.5833954215050e-001,  4.5833954215050e-001,
+	 2.9737934470177e-001,  2.9737934470177e-001,  2.9457840323448e-001,  2.9457840323448e-001,
+	-4.5710486173630e-001,  4.5710486173630e-001, -4.5585301518440e-001,  4.5585301518440e-001,
+	 2.9178521037102e-001,  2.9178521037102e-001,  2.8899985551834e-001,  2.8899985551834e-001,
+	-4.5458400249481e-001,  4.5458400249481e-001, -4.5329785346985e-001,  4.5329785346985e-001,
+	 2.8622245788574e-001,  2.8622245788574e-001,  2.8345310688019e-001,  2.8345310688019e-001,
+	-4.5199465751648e-001,  4.5199465751648e-001, -4.5067441463470e-001,  4.5067441463470e-001,
+	 2.8069186210632e-001,  2.8069186210632e-001,  2.7793890237808e-001,  2.7793890237808e-001,
+	-4.4933724403381e-001,  4.4933724403381e-001, -4.4798311591148e-001,  4.4798311591148e-001,
+	 2.7519434690475e-001,  2.7519434690475e-001,  2.7245819568634e-001,  2.7245819568634e-001,
+	-4.4661214947701e-001,  4.4661214947701e-001, -4.4522434473038e-001,  4.4522434473038e-001,
+	 2.6973062753677e-001,  2.6973062753677e-001,  2.6701176166534e-001,  2.6701176166534e-001,
+	-4.4381982088089e-001,  4.4381982088089e-001, -4.4239854812622e-001,  4.4239854812622e-001,
+	 2.6430162787437e-001,  2.6430162787437e-001,  2.6160037517548e-001,  2.6160037517548e-001,
+	-4.4096061587334e-001,  4.4096061587334e-001, -4.3950611352921e-001,  4.3950611352921e-001,
+	 2.5890809297562e-001,  2.5890809297562e-001,  2.5622493028641e-001,  2.5622493028641e-001,
+	-4.3803504109383e-001,  4.3803504109383e-001, -4.3654748797417e-001,  4.3654748797417e-001,
+	 2.5355088710785e-001,  2.5355088710785e-001,  2.5088614225388e-001,  2.5088614225388e-001,
+	-4.3504348397255e-001,  4.3504348397255e-001, -4.3352311849594e-001,  4.3352311849594e-001,
+	 2.4823081493378e-001,  2.4823081493378e-001,  2.4558493494987e-001,  2.4558493494987e-001,
+	-4.3198642134666e-001,  4.3198642134666e-001, -4.3043345212936e-001,  4.3043345212936e-001,
+	 2.4294862151146e-001,  2.4294862151146e-001,  2.4032199382782e-001,  2.4032199382782e-001,
+	-4.2886430025101e-001,  4.2886430025101e-001, -4.2727899551392e-001,  4.2727899551392e-001,
+	 2.3770514130592e-001,  2.3770514130592e-001,  2.3509818315506e-001,  2.3509818315506e-001,
+	-4.2567759752274e-001,  4.2567759752274e-001, -4.2406016588211e-001,  4.2406016588211e-001,
+	 2.3250117897987e-001,  2.3250117897987e-001,  2.2991424798965e-001,  2.2991424798965e-001,
+	-4.2242678999901e-001,  4.2242678999901e-001, -4.2077746987343e-001,  4.2077746987343e-001,
+	 2.2733750939369e-001,  2.2733750939369e-001,  2.2477099299431e-001,  2.2477099299431e-001,
+	-4.1911235451698e-001,  4.1911235451698e-001, -4.1743144392967e-001,  4.1743144392967e-001,
+	 2.2221487760544e-001,  2.2221487760544e-001,  2.1966919302940e-001,  2.1966919302940e-001,
+	-4.1573479771614e-001,  4.1573479771614e-001, -4.1402250528336e-001,  4.1402250528336e-001,
+	 2.1713408827782e-001,  2.1713408827782e-001,  2.1460962295532e-001,  2.1460962295532e-001,
+	-4.1229465603828e-001,  4.1229465603828e-001, -4.1055124998093e-001,  4.1055124998093e-001,
+	 2.1209588646889e-001,  2.1209588646889e-001,  2.0959302783012e-001,  2.0959302783012e-001,
+	-4.0879240632057e-001,  4.0879240632057e-001, -4.0701815485954e-001,  4.0701815485954e-001,
+	 2.0710107684135e-001,  2.0710107684135e-001,  2.0462015271187e-001,  2.0462015271187e-001,
+	-4.0522858500481e-001,  4.0522858500481e-001, -4.0342378616333e-001,  4.0342378616333e-001,
+	 2.0215034484863e-001,  2.0215034484863e-001,  1.9969174265862e-001,  1.9969174265862e-001,
+	-4.0160375833511e-001,  4.0160375833511e-001, -3.9976862072945e-001,  3.9976862072945e-001,
+	 1.9724446535110e-001,  1.9724446535110e-001,  1.9480860233307e-001,  1.9480860233307e-001,
+	-3.9791843295097e-001,  3.9791843295097e-001, -3.9605328440666e-001,  3.9605328440666e-001,
+	 1.9238418340683e-001,  1.9238418340683e-001,  1.8997138738632e-001,  1.8997138738632e-001,
+	-3.9417320489883e-001,  3.9417320489883e-001, -3.9227828383446e-001,  3.9227828383446e-001,
+	 1.8757024407387e-001,  1.8757024407387e-001,  1.8518087267876e-001,  1.8518087267876e-001,
+	-3.9036861062050e-001,  3.9036861062050e-001, -3.8844421505928e-001,  3.8844421505928e-001,
+	 1.8280336260796e-001,  1.8280336260796e-001,  1.8043777346611e-001,  1.8043777346611e-001,
+	-3.8650521636009e-001,  3.8650521636009e-001, -3.8455167412758e-001,  3.8455167412758e-001,
+	 1.7808422446251e-001,  1.7808422446251e-001,  1.7574280500412e-001,  1.7574280500412e-001,
+	-3.8258361816406e-001,  3.8258361816406e-001, -3.8060119748116e-001,  3.8060119748116e-001,
+	 1.7341357469559e-001,  1.7341357469559e-001,  1.7109665274620e-001,  1.7109665274620e-001,
+	-3.7860441207886e-001,  3.7860441207886e-001, -3.7659338116646e-001,  3.7659338116646e-001,
+	 1.6879209876060e-001,  1.6879209876060e-001,  1.6650003194809e-001,  1.6650003194809e-001,
+	-3.7456819415092e-001,  3.7456819415092e-001, -3.7252888083458e-001,  3.7252888083458e-001,
+	 1.6422051191330e-001,  1.6422051191330e-001,  1.6195362806320e-001,  1.6195362806320e-001,
+	-3.7047556042671e-001,  3.7047556042671e-001, -3.6840826272964e-001,  3.6840826272964e-001,
+	 1.5969949960709e-001,  1.5969949960709e-001,  1.5745815634727e-001,  1.5745815634727e-001,
+	-3.6632713675499e-001,  3.6632713675499e-001, -3.6423218250275e-001,  3.6423218250275e-001,
+	 1.5522971749306e-001,  1.5522971749306e-001,  1.5301427245140e-001,  1.5301427245140e-001,
+	-3.6212354898453e-001,  3.6212354898453e-001, -3.6000123620033e-001,  3.6000123620033e-001,
+	 1.5081188082695e-001,  1.5081188082695e-001,  1.4862263202667e-001,  1.4862263202667e-001,
+	-3.5786539316177e-001,  3.5786539316177e-001, -3.5571607947350e-001,  3.5571607947350e-001,
+	 1.4644661545753e-001,  1.4644661545753e-001,  1.4428392052650e-001,  1.4428392052650e-001,
+	-3.5355338454247e-001,  3.5355338454247e-001, -3.5137736797333e-001,  3.5137736797333e-001,
+	 1.4213460683823e-001,  1.4213460683823e-001,  1.3999876379967e-001,  1.3999876379967e-001,
+	-3.4918811917305e-001,  3.4918811917305e-001, -3.4698572754860e-001,  3.4698572754860e-001,
+	 1.3787645101547e-001,  1.3787645101547e-001,  1.3576781749725e-001,  1.3576781749725e-001,
+	-3.4477028250694e-001,  3.4477028250694e-001, -3.4254184365273e-001,  3.4254184365273e-001,
+	 1.3367286324501e-001,  1.3367286324501e-001,  1.3159173727036e-001,  1.3159173727036e-001,
+	-3.4030050039291e-001,  3.4030050039291e-001, -3.3804637193680e-001,  3.3804637193680e-001,
+	 1.2952443957329e-001,  1.2952443957329e-001,  1.2747111916542e-001,  1.2747111916542e-001,
+	-3.3577948808670e-001,  3.3577948808670e-001, -3.3349996805191e-001,  3.3349996805191e-001,
+	 1.2543180584908e-001,  1.2543180584908e-001,  1.2340661883354e-001,  1.2340661883354e-001,
+	-3.3120790123940e-001,  3.3120790123940e-001, -3.2890334725380e-001,  3.2890334725380e-001,
+	 1.2139558792114e-001,  1.2139558792114e-001,  1.1939880251884e-001,  1.1939880251884e-001,
+	-3.2658642530441e-001,  3.2658642530441e-001, -3.2425719499588e-001,  3.2425719499588e-001,
+	 1.1741638183594e-001,  1.1741638183594e-001,  1.1544832587242e-001,  1.1544832587242e-001,
+	-3.2191577553749e-001,  3.2191577553749e-001, -3.1956222653389e-001,  3.1956222653389e-001,
+	 1.1349478363991e-001,  1.1349478363991e-001,  1.1155578494072e-001,  1.1155578494072e-001,
+	-3.1719663739204e-001,  3.1719663739204e-001, -3.1481912732124e-001,  3.1481912732124e-001,
+	 1.0963138937950e-001,  1.0963138937950e-001,  1.0772171616554e-001,  1.0772171616554e-001,
+	-3.1242975592613e-001,  3.1242975592613e-001, -3.1002861261368e-001,  3.1002861261368e-001,
+	 1.0582679510117e-001,  1.0582679510117e-001,  1.0394671559334e-001,  1.0394671559334e-001,
+	-3.0761581659317e-001,  3.0761581659317e-001, -3.0519139766693e-001,  3.0519139766693e-001,
+	 1.0208156704903e-001,  1.0208156704903e-001,  1.0023137927055e-001,  1.0023137927055e-001,
+	-3.0275553464890e-001,  3.0275553464890e-001, -3.0030825734138e-001,  3.0030825734138e-001,
+	 9.8396241664886e-002,  9.8396241664886e-002,  9.6576213836670e-002,  9.6576213836670e-002,
+	-2.9784965515137e-001,  2.9784965515137e-001, -2.9537984728813e-001,  2.9537984728813e-001,
+	 9.4771414995193e-002,  9.4771414995193e-002,  9.2981845140457e-002,  9.2981845140457e-002,
+	-2.9289892315865e-001,  2.9289892315865e-001, -2.9040697216988e-001,  2.9040697216988e-001,
+	 9.1207593679428e-002,  9.1207593679428e-002,  8.9448750019073e-002,  8.9448750019073e-002,
+	-2.8790411353111e-001,  2.8790411353111e-001, -2.8539037704468e-001,  2.8539037704468e-001,
+	 8.7705343961716e-002,  8.7705343961716e-002,  8.5977494716644e-002,  8.5977494716644e-002,
+	-2.8286591172218e-001,  2.8286591172218e-001, -2.8033080697060e-001,  2.8033080697060e-001,
+	 8.4265202283859e-002,  8.4265202283859e-002,  8.2568556070328e-002,  8.2568556070328e-002,
+	-2.7778512239456e-001,  2.7778512239456e-001, -2.7522900700569e-001,  2.7522900700569e-001,
+	 8.0887645483017e-002,  8.0887645483017e-002,  7.9222530126572e-002,  7.9222530126572e-002,
+	-2.7266249060631e-001,  2.7266249060631e-001, -2.7008575201035e-001,  2.7008575201035e-001,
+	 7.7573210000992e-002,  7.7573210000992e-002,  7.5939834117889e-002,  7.5939834117889e-002,
+	-2.6749882102013e-001,  2.6749882102013e-001, -2.6490181684494e-001,  2.6490181684494e-001,
+	 7.4322402477264e-002,  7.4322402477264e-002,  7.2721004486084e-002,  7.2721004486084e-002,
+	-2.6229485869408e-001,  2.6229485869408e-001, -2.5967800617218e-001,  2.5967800617218e-001,
+	 7.1135699748993e-002,  7.1135699748993e-002,  6.9566547870636e-002,  6.9566547870636e-002,
+	-2.5705137848854e-001,  2.5705137848854e-001, -2.5441506505013e-001,  2.5441506505013e-001,
+	 6.8013578653336e-002,  6.8013578653336e-002,  6.6476881504059e-002,  6.6476881504059e-002,
+	-2.5176918506622e-001,  2.5176918506622e-001, -2.4911384284496e-001,  2.4911384284496e-001,
+	 6.4956516027451e-002,  6.4956516027451e-002,  6.3452512025833e-002,  6.3452512025833e-002,
+	-2.4644909799099e-001,  2.4644909799099e-001, -2.4377508461475e-001,  2.4377508461475e-001,
+	 6.1964958906174e-002,  6.1964958906174e-002,  6.0493886470795e-002,  6.0493886470795e-002,
+	-2.4109189212322e-001,  2.4109189212322e-001, -2.3839962482452e-001,  2.3839962482452e-001,
+	 5.9039384126663e-002,  5.9039384126663e-002,  5.7601451873779e-002,  5.7601451873779e-002,
+	-2.3569837212563e-001,  2.3569837212563e-001, -2.3298825323582e-001,  2.3298825323582e-001,
+	 5.6180179119110e-002,  5.6180179119110e-002,  5.4775655269623e-002,  5.4775655269623e-002,
+	-2.3026935756207e-001,  2.3026935756207e-001, -2.2754180431366e-001,  2.2754180431366e-001,
+	 5.3387850522995e-002,  5.3387850522995e-002,  5.2016884088516e-002,  5.2016884088516e-002,
+	-2.2480566799641e-001,  2.2480566799641e-001, -2.2206108272076e-001,  2.2206108272076e-001,
+	 5.0662755966187e-002,  5.0662755966187e-002,  4.9325585365295e-002,  4.9325585365295e-002,
+	-2.1930812299252e-001,  2.1930812299252e-001, -2.1654690802097e-001,  2.1654690802097e-001,
+	 4.8005342483521e-002,  4.8005342483521e-002,  4.6702146530151e-002,  4.6702146530151e-002,
+	-2.1377755701542e-001,  2.1377755701542e-001, -2.1100014448166e-001,  2.1100014448166e-001,
+	 4.5415997505188e-002,  4.5415997505188e-002,  4.4146984815598e-002,  4.4146984815598e-002,
+	-2.0821478962898e-001,  2.0821478962898e-001, -2.0542159676552e-001,  2.0542159676552e-001,
+	 4.2895138263702e-002,  4.2895138263702e-002,  4.1660457849503e-002,  4.1660457849503e-002,
+	-2.0262065529823e-001,  2.0262065529823e-001, -1.9981209933758e-001,  1.9981209933758e-001,
+	 4.0443062782288e-002,  4.0443062782288e-002,  3.9242982864380e-002,  3.9242982864380e-002,
+	-1.9699601829052e-001,  1.9699601829052e-001, -1.9417253136635e-001,  1.9417253136635e-001,
+	 3.8060247898102e-002,  3.8060247898102e-002,  3.6894887685776e-002,  3.6894887685776e-002,
+	-1.9134172797203e-001,  1.9134172797203e-001, -1.8850371241570e-001,  1.8850371241570e-001,
+	 3.5746961832047e-002,  3.5746961832047e-002,  3.4616529941559e-002,  3.4616529941559e-002,
+	-1.8565860390663e-001,  1.8565860390663e-001, -1.8280650675297e-001,  1.8280650675297e-001,
+	 3.3503592014313e-002,  3.3503592014313e-002,  3.2408237457275e-002,  3.2408237457275e-002,
+	-1.7994752526283e-001,  1.7994752526283e-001, -1.7708176374435e-001,  1.7708176374435e-001,
+	 3.1330496072769e-002,  3.1330496072769e-002,  3.0270397663116e-002,  3.0270397663116e-002,
+	-1.7420934140682e-001,  1.7420934140682e-001, -1.7133036255836e-001,  1.7133036255836e-001,
+	 2.9227972030640e-002,  2.9227972030640e-002,  2.8203278779984e-002,  2.8203278779984e-002,
+	-1.6844493150711e-001,  1.6844493150711e-001, -1.6555315256119e-001,  1.6555315256119e-001,
+	 2.7196347713470e-002,  2.7196347713470e-002,  2.6207208633423e-002,  2.6207208633423e-002,
+	-1.6265514492989e-001,  1.6265514492989e-001, -1.5975101292133e-001,  1.5975101292133e-001,
+	 2.5235921144485e-002,  2.5235921144485e-002,  2.4282485246658e-002,  2.4282485246658e-002,
+	-1.5684087574482e-001,  1.5684087574482e-001, -1.5392482280731e-001,  1.5392482280731e-001,
+	 2.3346990346909e-002,  2.3346990346909e-002,  2.2429406642914e-002,  2.2429406642914e-002,
+	-1.5100297331810e-001,  1.5100297331810e-001, -1.4807544648647e-001,  1.4807544648647e-001,
+	 2.1529823541641e-002,  2.1529823541641e-002,  2.0648270845413e-002,  2.0648270845413e-002,
+	-1.4514234662056e-001,  1.4514234662056e-001, -1.4220377802849e-001,  1.4220377802849e-001,
+	 1.9784748554230e-002,  1.9784748554230e-002,  1.8939286470413e-002,  1.8939286470413e-002,
+	-1.3925984501839e-001,  1.3925984501839e-001, -1.3631068170071e-001,  1.3631068170071e-001,
+	 1.8111974000931e-002,  1.8111974000931e-002,  1.7302781343460e-002,  1.7302781343460e-002,
+	-1.3335637748241e-001,  1.3335637748241e-001, -1.3039706647396e-001,  1.3039706647396e-001,
+	 1.6511768102646e-002,  1.6511768102646e-002,  1.5738964080811e-002,  1.5738964080811e-002,
+	-1.2743283808231e-001,  1.2743283808231e-001, -1.2446380406618e-001,  1.2446380406618e-001,
+	 1.4984369277954e-002,  1.4984369277954e-002,  1.4248043298721e-002,  1.4248043298721e-002,
+	-1.2149009108543e-001,  1.2149009108543e-001, -1.1851180344820e-001,  1.1851180344820e-001,
+	 1.3530015945435e-002,  1.3530015945435e-002,  1.2830317020416e-002,  1.2830317020416e-002,
+	-1.1552906036377e-001,  1.1552906036377e-001, -1.1254195868969e-001,  1.1254195868969e-001,
+	 1.2148946523666e-002,  1.2148946523666e-002,  1.1485934257507e-002,  1.1485934257507e-002,
+	-1.0955062508583e-001,  1.0955062508583e-001, -1.0655516386032e-001,  1.0655516386032e-001,
+	 1.0841310024261e-002,  1.0841310024261e-002,  1.0215103626251e-002,  1.0215103626251e-002,
+	-1.0355569422245e-001,  1.0355569422245e-001, -1.0055232048035e-001,  1.0055232048035e-001,
+	 9.6073746681213e-003,  9.6073746681213e-003,  9.0180635452271e-003,  9.0180635452271e-003,
+	-9.7545161843300e-002,  9.7545161843300e-002, -9.4534337520599e-002,  9.4534337520599e-002,
+	 8.4472596645355e-003,  8.4472596645355e-003,  7.8949630260468e-003,  7.8949630260468e-003,
+	-9.1519944369793e-002,  9.1519944369793e-002, -8.8502109050751e-002,  8.8502109050751e-002,
+	 7.3611736297607e-003,  7.3611736297607e-003,  6.8459510803223e-003,  6.8459510803223e-003,
+	-8.5480943322182e-002,  8.5480943322182e-002, -8.2456558942795e-002,  8.2456558942795e-002,
+	 6.3492953777313e-003,  6.3492953777313e-003,  5.8712065219879e-003,  5.8712065219879e-003,
+	-7.9429075121880e-002,  7.9429075121880e-002, -7.6398596167564e-002,  7.6398596167564e-002,
+	 5.4117441177368e-003,  5.4117441177368e-003,  4.9709081649780e-003,  4.9709081649780e-003,
+	-7.3365241289139e-002,  7.3365241289139e-002, -7.0329122245312e-002,  7.0329122245312e-002,
+	 4.5486688613892e-003,  4.5486688613892e-003,  4.1451156139374e-003,  4.1451156139374e-003,
+	-6.7290358245373e-002,  6.7290358245373e-002, -6.4249053597450e-002,  6.4249053597450e-002,
+	 3.7602186203003e-003,  3.7602186203003e-003,  3.3940374851227e-003,  3.3940374851227e-003,
+	-6.1205338686705e-002,  6.1205338686705e-002, -5.8159317821264e-002,  5.8159317821264e-002,
+	 3.0465126037598e-003,  3.0465126037598e-003,  2.7177035808563e-003,  2.7177035808563e-003,
+	-5.5111106485128e-002,  5.5111106485128e-002, -5.2060820162296e-002,  5.2060820162296e-002,
+	 2.4076402187347e-003,  2.4076402187347e-003,  2.1162927150726e-003,  2.1162927150726e-003,
+	-4.9008570611477e-002,  4.9008570611477e-002, -4.5954480767250e-002,  4.5954480767250e-002,
+	 1.8436908721924e-003,  1.8436908721924e-003,  1.5898644924164e-003,  1.5898644924164e-003,
+	-4.2898658663034e-002,  4.2898658663034e-002, -3.9841219782829e-002,  3.9841219782829e-002,
+	 1.3547837734222e-003,  1.3547837734222e-003,  1.1384785175323e-003,  1.1384785175323e-003,
+	-3.6782283335924e-002,  3.6782283335924e-002, -3.3721961081028e-002,  3.3721961081028e-002,
+	 9.4094872474670e-004,  9.4094872474670e-004,  7.6222419738770e-004,  7.6222419738770e-004,
+	-3.0660368502140e-002,  3.0660368502140e-002, -2.7597622945905e-002,  2.7597622945905e-002,
+	 6.0227513313293e-004,  6.0227513313293e-004,  4.6113133430481e-004,  4.6113133430481e-004,
+	-2.4533838033676e-002,  2.4533838033676e-002, -2.1469129249454e-002,  2.1469129249454e-002,
+	 3.3882260322571e-004,  3.3882260322571e-004,  2.3528933525085e-004,  2.3528933525085e-004,
+	-1.8403612077236e-002,  1.8403612077236e-002, -1.5337402001023e-002,  1.5337402001023e-002,
+	 1.5059113502502e-004,  1.5059113502502e-004,  8.4698200225830e-005,  8.4698200225830e-005,
+	-1.2270614504814e-002,  1.2270614504814e-002, -9.2033650726080e-003,  9.2033650726080e-003,
+	 3.7640333175659e-005,  3.7640333175659e-005,  9.4175338745117e-006,  9.4175338745117e-006,
+	-6.1357691884041e-003,  6.1357691884041e-003, -3.0679423362017e-003,  3.0679423362017e-003
+};
+static _MM_ALIGN16 float	W2048[]	 = {
+	 4.9846601486206e-001,  4.9999764561653e-001,  0.0000000000000e+000,  0.0000000000000e+000,
+	 4.9693205952644e-001,  4.9693205952644e-001,  4.9539813399315e-001,  4.9539813399315e-001,
+	-4.9999058246613e-001,  4.9999058246613e-001, -4.9997881054878e-001,  4.9997881054878e-001,
+	 4.9386423826218e-001,  4.9386423826218e-001,  4.9233040213585e-001,  4.9233040213585e-001,
+	-4.9996235966682e-001,  4.9996235966682e-001, -4.9994117021561e-001,  4.9994117021561e-001,
+	 4.9079662561417e-001,  4.9079662561417e-001,  4.8926296830177e-001,  4.8926296830177e-001,
+	-4.9991530179977e-001,  4.9991530179977e-001, -4.9988469481468e-001,  4.9988469481468e-001,
+	 4.8772937059402e-001,  4.8772937059402e-001,  4.8619592189789e-001,  4.8619592189789e-001,
+	-4.9984940886497e-001,  4.9984940886497e-001, -4.9980941414833e-001,  4.9980941414833e-001,
+	 4.8466259241104e-001,  4.8466259241104e-001,  4.8312941193581e-001,  4.8312941193581e-001,
+	-4.9976471066475e-001,  4.9976471066475e-001, -4.9971529841423e-001,  4.9971529841423e-001,
+	 4.8159638047218e-001,  4.8159638047218e-001,  4.8006352782249e-001,  4.8006352782249e-001,
+	-4.9966117739677e-001,  4.9966117739677e-001, -4.9960237741470e-001,  4.9960237741470e-001,
+	 4.7853088378906e-001,  4.7853088378906e-001,  4.7699841856956e-001,  4.7699841856956e-001,
+	-4.9953886866570e-001,  4.9953886866570e-001, -4.9947065114975e-001,  4.9947065114975e-001,
+	 4.7546616196632e-001,  4.7546616196632e-001,  4.7393414378166e-001,  4.7393414378166e-001,
+	-4.9939772486687e-001,  4.9939772486687e-001, -4.9932011961937e-001,  4.9932011961937e-001,
+	 4.7240236401558e-001,  4.7240236401558e-001,  4.7087085247040e-001,  4.7087085247040e-001,
+	-4.9923777580261e-001,  4.9923777580261e-001, -4.9915078282356e-001,  4.9915078282356e-001,
+	 4.6933963894844e-001,  4.6933963894844e-001,  4.6780869364738e-001,  4.6780869364738e-001,
+	-4.9905905127525e-001,  4.9905905127525e-001, -4.9896264076233e-001,  4.9896264076233e-001,
+	 4.6627804636955e-001,  4.6627804636955e-001,  4.6474772691727e-001,  4.6474772691727e-001,
+	-4.9886152148247e-001,  4.9886152148247e-001, -4.9875572323799e-001,  4.9875572323799e-001,
+	 4.6321770548820e-001,  4.6321770548820e-001,  4.6168807148933e-001,  4.6168807148933e-001,
+	-4.9864521622658e-001,  4.9864521622658e-001, -4.9853003025055e-001,  4.9853003025055e-001,
+	 4.6015876531601e-001,  4.6015876531601e-001,  4.5862987637520e-001,  4.5862987637520e-001,
+	-4.9841013550758e-001,  4.9841013550758e-001, -4.9828556180000e-001,  4.9828556180000e-001,
+	 4.5710134506226e-001,  4.5710134506226e-001,  4.5557323098183e-001,  4.5557323098183e-001,
+	-4.9815630912781e-001,  4.9815630912781e-001, -4.9802234768867e-001,  4.9802234768867e-001,
+	 4.5404553413391e-001,  4.5404553413391e-001,  4.5251825451851e-001,  4.5251825451851e-001,
+	-4.9788370728493e-001,  4.9788370728493e-001, -4.9774038791656e-001,  4.9774038791656e-001,
+	 4.5099142193794e-001,  4.5099142193794e-001,  4.4946506619453e-001,  4.4946506619453e-001,
+	-4.9759235978127e-001,  4.9759235978127e-001, -4.9743965268135e-001,  4.9743965268135e-001,
+	 4.4793918728828e-001,  4.4793918728828e-001,  4.4641378521919e-001,  4.4641378521919e-001,
+	-4.9728229641914e-001,  4.9728229641914e-001, -4.9712023139000e-001,  4.9712023139000e-001,
+	 4.4488888978958e-001,  4.4488888978958e-001,  4.4336453080177e-001,  4.4336453080177e-001,
+	-4.9695348739624e-001,  4.9695348739624e-001, -4.9678206443787e-001,  4.9678206443787e-001,
+	 4.4184067845345e-001,  4.4184067845345e-001,  4.4031739234924e-001,  4.4031739234924e-001,
+	-4.9660596251488e-001,  4.9660596251488e-001, -4.9642521142960e-001,  4.9642521142960e-001,
+	 4.3879467248917e-001,  4.3879467248917e-001,  4.3727248907089e-001,  4.3727248907089e-001,
+	-4.9623978137970e-001,  4.9623978137970e-001, -4.9604964256287e-001,  4.9604964256287e-001,
+	 4.3575096130371e-001,  4.3575096130371e-001,  4.3422996997833e-001,  4.3422996997833e-001,
+	-4.9585488438606e-001,  4.9585488438606e-001, -4.9565541744232e-001,  4.9565541744232e-001,
+	 4.3270963430405e-001,  4.3270963430405e-001,  4.3118995428085e-001,  4.3118995428085e-001,
+	-4.9545133113861e-001,  4.9545133113861e-001, -4.9524253606796e-001,  4.9524253606796e-001,
+	 4.2967087030411e-001,  4.2967087030411e-001,  4.2815247178078e-001,  4.2815247178078e-001,
+	-4.9502909183502e-001,  4.9502909183502e-001, -4.9481099843979e-001,  4.9481099843979e-001,
+	 4.2663475871086e-001,  4.2663475871086e-001,  4.2511773109436e-001,  4.2511773109436e-001,
+	-4.9458825588226e-001,  4.9458825588226e-001, -4.9436083436012e-001,  4.9436083436012e-001,
+	 4.2360138893127e-001,  4.2360138893127e-001,  4.2208579182625e-001,  4.2208579182625e-001,
+	-4.9412879347801e-001,  4.9412879347801e-001, -4.9389207363129e-001,  4.9389207363129e-001,
+	 4.2057090997696e-001,  4.2057090997696e-001,  4.1905680298805e-001,  4.1905680298805e-001,
+	-4.9365070462227e-001,  4.9365070462227e-001, -4.9340468645096e-001,  4.9340468645096e-001,
+	 4.1754344105721e-001,  4.1754344105721e-001,  4.1603085398674e-001,  4.1603085398674e-001,
+	-4.9315404891968e-001,  4.9315404891968e-001, -4.9289876222610e-001,  4.9289876222610e-001,
+	 4.1451907157898e-001,  4.1451907157898e-001,  4.1300806403160e-001,  4.1300806403160e-001,
+	-4.9263882637024e-001,  4.9263882637024e-001, -4.9237424135208e-001,  4.9237424135208e-001,
+	 4.1149789094925e-001,  4.1149789094925e-001,  4.0998855233192e-001,  4.0998855233192e-001,
+	-4.9210503697395e-001,  4.9210503697395e-001, -4.9183121323586e-001,  4.9183121323586e-001,
+	 4.0848004817963e-001,  4.0848004817963e-001,  4.0697240829468e-001,  4.0697240829468e-001,
+	-4.9155274033546e-001,  4.9155274033546e-001, -4.9126964807510e-001,  4.9126964807510e-001,
+	 4.0546566247940e-001,  4.0546566247940e-001,  4.0395981073380e-001,  4.0395981073380e-001,
+	-4.9098193645477e-001,  4.9098193645477e-001, -4.9068960547447e-001,  4.9068960547447e-001,
+	 4.0245485305786e-001,  4.0245485305786e-001,  4.0095078945160e-001,  4.0095078945160e-001,
+	-4.9039262533188e-001,  4.9039262533188e-001, -4.9009105563164e-001,  4.9009105563164e-001,
+	 3.9944767951965e-001,  3.9944767951965e-001,  3.9794552326202e-001,  3.9794552326202e-001,
+	-4.8978489637375e-001,  4.8978489637375e-001, -4.8947408795357e-001,  4.8947408795357e-001,
+	 3.9644432067871e-001,  3.9644432067871e-001,  3.9494407176971e-001,  3.9494407176971e-001,
+	-4.8915868997574e-001,  4.8915868997574e-001, -4.8883867263794e-001,  4.8883867263794e-001,
+	 3.9344483613968e-001,  3.9344483613968e-001,  3.9194661378860e-001,  3.9194661378860e-001,
+	-4.8851406574249e-001,  4.8851406574249e-001, -4.8818486928940e-001,  4.8818486928940e-001,
+	 3.9044937491417e-001,  3.9044937491417e-001,  3.8895317912102e-001,  3.8895317912102e-001,
+	-4.8785105347633e-001,  4.8785105347633e-001, -4.8751267790794e-001,  4.8751267790794e-001,
+	 3.8745802640915e-001,  3.8745802640915e-001,  3.8596394658089e-001,  3.8596394658089e-001,
+	-4.8716968297958e-001,  4.8716968297958e-001, -4.8682212829590e-001,  4.8682212829590e-001,
+	 3.8447093963623e-001,  3.8447093963623e-001,  3.8297903537750e-001,  3.8297903537750e-001,
+	-4.8646998405457e-001,  4.8646998405457e-001, -4.8611325025558e-001,  4.8611325025558e-001,
+	 3.8148820400238e-001,  3.8148820400238e-001,  3.7999847531319e-001,  3.7999847531319e-001,
+	-4.8575195670128e-001,  4.8575195670128e-001, -4.8538607358932e-001,  4.8538607358932e-001,
+	 3.7850990891457e-001,  3.7850990891457e-001,  3.7702247500420e-001,  3.7702247500420e-001,
+	-4.8501563072205e-001,  4.8501563072205e-001, -4.8464062809944e-001,  4.8464062809944e-001,
+	 3.7553620338440e-001,  3.7553620338440e-001,  3.7405109405518e-001,  3.7405109405518e-001,
+	-4.8426103591919e-001,  4.8426103591919e-001, -4.8387691378593e-001,  4.8387691378593e-001,
+	 3.7256717681885e-001,  3.7256717681885e-001,  3.7108445167542e-001,  3.7108445167542e-001,
+	-4.8348823189735e-001,  4.8348823189735e-001, -4.8309499025345e-001,  4.8309499025345e-001,
+	 3.6960291862488e-001,  3.6960291862488e-001,  3.6812263727188e-001,  3.6812263727188e-001,
+	-4.8269721865654e-001,  4.8269721865654e-001, -4.8229488730431e-001,  4.8229488730431e-001,
+	 3.6664360761642e-001,  3.6664360761642e-001,  3.6516582965851e-001,  3.6516582965851e-001,
+	-4.8188802599907e-001,  4.8188802599907e-001, -4.8147663474083e-001,  4.8147663474083e-001,
+	 3.6368930339813e-001,  3.6368930339813e-001,  3.6221408843994e-001,  3.6221408843994e-001,
+	-4.8106071352959e-001,  4.8106071352959e-001, -4.8064023256302e-001,  4.8064023256302e-001,
+	 3.6074015498161e-001,  3.6074015498161e-001,  3.5926753282547e-001,  3.5926753282547e-001,
+	-4.8021525144577e-001,  4.8021525144577e-001, -4.7978577017784e-001,  4.7978577017784e-001,
+	 3.5779622197151e-001,  3.5779622197151e-001,  3.5632628202438e-001,  3.5632628202438e-001,
+	-4.7935172915459e-001,  4.7935172915459e-001, -4.7891321778297e-001,  4.7891321778297e-001,
+	 3.5485765337944e-001,  3.5485765337944e-001,  3.5339039564133e-001,  3.5339039564133e-001,
+	-4.7847017645836e-001,  4.7847017645836e-001, -4.7802263498306e-001,  4.7802263498306e-001,
+	 3.5192453861237e-001,  3.5192453861237e-001,  3.5046008229256e-001,  3.5046008229256e-001,
+	-4.7757059335709e-001,  4.7757059335709e-001, -4.7711405158043e-001,  4.7711405158043e-001,
+	 3.4899702668190e-001,  3.4899702668190e-001,  3.4753537178040e-001,  3.4753537178040e-001,
+	-4.7665300965309e-001,  4.7665300965309e-001, -4.7618749737740e-001,  4.7618749737740e-001,
+	 3.4607517719269e-001,  3.4607517719269e-001,  3.4461641311646e-001,  3.4461641311646e-001,
+	-4.7571751475334e-001,  4.7571751475334e-001, -4.7524303197861e-001,  4.7524303197861e-001,
+	 3.4315913915634e-001,  3.4315913915634e-001,  3.4170329570770e-001,  3.4170329570770e-001,
+	-4.7476407885551e-001,  4.7476407885551e-001, -4.7428068518639e-001,  4.7428068518639e-001,
+	 3.4024900197983e-001,  3.4024900197983e-001,  3.3879613876343e-001,  3.3879613876343e-001,
+	-4.7379279136658e-001,  4.7379279136658e-001, -4.7330045700073e-001,  4.7330045700073e-001,
+	 3.3734485507011e-001,  3.3734485507011e-001,  3.3589506149292e-001,  3.3589506149292e-001,
+	-4.7280365228653e-001,  4.7280365228653e-001, -4.7230240702629e-001,  4.7230240702629e-001,
+	 3.3444684743881e-001,  3.3444684743881e-001,  3.3300018310547e-001,  3.3300018310547e-001,
+	-4.7179672122002e-001,  4.7179672122002e-001, -4.7128659486771e-001,  4.7128659486771e-001,
+	 3.3155506849289e-001,  3.3155506849289e-001,  3.3011156320572e-001,  3.3011156320572e-001,
+	-4.7077202796936e-001,  4.7077202796936e-001, -4.7025302052498e-001,  4.7025302052498e-001,
+	 3.2866963744164e-001,  3.2866963744164e-001,  3.2722932100296e-001,  3.2722932100296e-001,
+	-4.6972960233688e-001,  4.6972960233688e-001, -4.6920177340508e-001,  4.6920177340508e-001,
+	 3.2579064369202e-001,  3.2579064369202e-001,  3.2435363531113e-001,  3.2435363531113e-001,
+	-4.6866950392723e-001,  4.6866950392723e-001, -4.6813282370567e-001,  4.6813282370567e-001,
+	 3.2291823625565e-001,  3.2291823625565e-001,  3.2148450613022e-001,  3.2148450613022e-001,
+	-4.6759176254272e-001,  4.6759176254272e-001, -4.6704626083374e-001,  4.6704626083374e-001,
+	 3.2005247473717e-001,  3.2005247473717e-001,  3.1862211227417e-001,  3.1862211227417e-001,
+	-4.6649640798569e-001,  4.6649640798569e-001, -4.6594214439392e-001,  4.6594214439392e-001,
+	 3.1719350814819e-001,  3.1719350814819e-001,  3.1576657295227e-001,  3.1576657295227e-001,
+	-4.6538347005844e-001,  4.6538347005844e-001, -4.6482044458389e-001,  4.6482044458389e-001,
+	 3.1434139609337e-001,  3.1434139609337e-001,  3.1291794776917e-001,  3.1291794776917e-001,
+	-4.6425303816795e-001,  4.6425303816795e-001, -4.6368125081062e-001,  4.6368125081062e-001,
+	 3.1149628758430e-001,  3.1149628758430e-001,  3.1007638573647e-001,  3.1007638573647e-001,
+	-4.6310511231422e-001,  4.6310511231422e-001, -4.6252462267876e-001,  4.6252462267876e-001,
+	 3.0865827202797e-001,  3.0865827202797e-001,  3.0724197626114e-001,  3.0724197626114e-001,
+	-4.6193975210190e-001,  4.6193975210190e-001, -4.6135056018829e-001,  4.6135056018829e-001,
+	 3.0582746863365e-001,  3.0582746863365e-001,  3.0441480875015e-001,  3.0441480875015e-001,
+	-4.6075701713562e-001,  4.6075701713562e-001, -4.6015912294388e-001,  4.6015912294388e-001,
+	 3.0300396680832e-001,  3.0300396680832e-001,  3.0159500241280e-001,  3.0159500241280e-001,
+	-4.5955693721771e-001,  4.5955693721771e-001, -4.5895040035248e-001,  4.5895040035248e-001,
+	 3.0018788576126e-001,  3.0018788576126e-001,  2.9878267645836e-001,  2.9878267645836e-001,
+	-4.5833954215050e-001,  4.5833954215050e-001, -4.5772436261177e-001,  4.5772436261177e-001,
+	 2.9737934470177e-001,  2.9737934470177e-001,  2.9597792029381e-001,  2.9597792029381e-001,
+	-4.5710486173630e-001,  4.5710486173630e-001, -4.5648109912872e-001,  4.5648109912872e-001,
+	 2.9457840323448e-001,  2.9457840323448e-001,  2.9318082332611e-001,  2.9318082332611e-001,
+	-4.5585301518440e-001,  4.5585301518440e-001, -4.5522063970566e-001,  4.5522063970566e-001,
+	 2.9178521037102e-001,  2.9178521037102e-001,  2.9039156436920e-001,  2.9039156436920e-001,
+	-4.5458400249481e-001,  4.5458400249481e-001, -4.5394304394722e-001,  4.5394304394722e-001,
+	 2.8899985551834e-001,  2.8899985551834e-001,  2.8761017322540e-001,  2.8761017322540e-001,
+	-4.5329785346985e-001,  4.5329785346985e-001, -4.5264837145805e-001,  4.5264837145805e-001,
+	 2.8622245788574e-001,  2.8622245788574e-001,  2.8483676910400e-001,  2.8483676910400e-001,
+	-4.5199465751648e-001,  4.5199465751648e-001, -4.5133665204048e-001,  4.5133665204048e-001,
+	 2.8345310688019e-001,  2.8345310688019e-001,  2.8207147121429e-001,  2.8207147121429e-001,
+	-4.5067441463470e-001,  4.5067441463470e-001, -4.5000794529915e-001,  4.5000794529915e-001,
+	 2.8069186210632e-001,  2.8069186210632e-001,  2.7931433916092e-001,  2.7931433916092e-001,
+	-4.4933724403381e-001,  4.4933724403381e-001, -4.4866228103638e-001,  4.4866228103638e-001,
+	 2.7793890237808e-001,  2.7793890237808e-001,  2.7656558156013e-001,  2.7656558156013e-001,
+	-4.4798311591148e-001,  4.4798311591148e-001, -4.4729974865913e-001,  4.4729974865913e-001,
+	 2.7519434690475e-001,  2.7519434690475e-001,  2.7382519841194e-001,  2.7382519841194e-001,
+	-4.4661214947701e-001,  4.4661214947701e-001, -4.4592034816742e-001,  4.4592034816742e-001,
+	 2.7245819568634e-001,  2.7245819568634e-001,  2.7109333872795e-001,  2.7109333872795e-001,
+	-4.4522434473038e-001,  4.4522434473038e-001, -4.4452416896820e-001,  4.4452416896820e-001,
+	 2.6973062753677e-001,  2.6973062753677e-001,  2.6837009191513e-001,  2.6837009191513e-001,
+	-4.4381982088089e-001,  4.4381982088089e-001, -4.4311127066612e-001,  4.4311127066612e-001,
+	 2.6701176166534e-001,  2.6701176166534e-001,  2.6565557718277e-001,  2.6565557718277e-001,
+	-4.4239854812622e-001,  4.4239854812622e-001, -4.4168165326118e-001,  4.4168165326118e-001,
+	 2.6430162787437e-001,  2.6430162787437e-001,  2.6294988393784e-001,  2.6294988393784e-001,
+	-4.4096061587334e-001,  4.4096061587334e-001, -4.4023543596268e-001,  4.4023543596268e-001,
+	 2.6160037517548e-001,  2.6160037517548e-001,  2.6025313138962e-001,  2.6025313138962e-001,
+	-4.3950611352921e-001,  4.3950611352921e-001, -4.3877264857292e-001,  4.3877264857292e-001,
+	 2.5890809297562e-001,  2.5890809297562e-001,  2.5756537914276e-001,  2.5756537914276e-001,
+	-4.3803504109383e-001,  4.3803504109383e-001, -4.3729332089424e-001,  4.3729332089424e-001,
+	 2.5622493028641e-001,  2.5622493028641e-001,  2.5488674640656e-001,  2.5488674640656e-001,
+	-4.3654748797417e-001,  4.3654748797417e-001, -4.3579754233360e-001,  4.3579754233360e-001,
+	 2.5355088710785e-001,  2.5355088710785e-001,  2.5221735239029e-001,  2.5221735239029e-001,
+	-4.3504348397255e-001,  4.3504348397255e-001, -4.3428534269333e-001,  4.3428534269333e-001,
+	 2.5088614225388e-001,  2.5088614225388e-001,  2.4955731630325e-001,  2.4955731630325e-001,
+	-4.3352311849594e-001,  4.3352311849594e-001, -4.3275681138039e-001,  4.3275681138039e-001,
+	 2.4823081493378e-001,  2.4823081493378e-001,  2.4690666794777e-001,  2.4690666794777e-001,
+	-4.3198642134666e-001,  4.3198642134666e-001, -4.3121197819710e-001,  4.3121197819710e-001,
+	 2.4558493494987e-001,  2.4558493494987e-001,  2.4426555633545e-001,  2.4426555633545e-001,
+	-4.3043345212936e-001,  4.3043345212936e-001, -4.2965090274811e-001,  4.2965090274811e-001,
+	 2.4294862151146e-001,  2.4294862151146e-001,  2.4163410067558e-001,  2.4163410067558e-001,
+	-4.2886430025101e-001,  4.2886430025101e-001, -4.2807367444038e-001,  4.2807367444038e-001,
+	 2.4032199382782e-001,  2.4032199382782e-001,  2.3901236057281e-001,  2.3901236057281e-001,
+	-4.2727899551392e-001,  4.2727899551392e-001, -4.2648029327393e-001,  4.2648029327393e-001,
+	 2.3770514130592e-001,  2.3770514130592e-001,  2.3640042543411e-001,  2.3640042543411e-001,
+	-4.2567759752274e-001,  4.2567759752274e-001, -4.2487087845802e-001,  4.2487087845802e-001,
+	 2.3509818315506e-001,  2.3509818315506e-001,  2.3379844427109e-001,  2.3379844427109e-001,
+	-4.2406016588211e-001,  4.2406016588211e-001, -4.2324545979500e-001,  4.2324545979500e-001,
+	 2.3250117897987e-001,  2.3250117897987e-001,  2.3120644688606e-001,  2.3120644688606e-001,
+	-4.2242678999901e-001,  4.2242678999901e-001, -4.2160412669182e-001,  4.2160412669182e-001,
+	 2.2991424798965e-001,  2.2991424798965e-001,  2.2862461209297e-001,  2.2862461209297e-001,
+	-4.2077746987343e-001,  4.2077746987343e-001, -4.1994687914848e-001,  4.1994687914848e-001,
+	 2.2733750939369e-001,  2.2733750939369e-001,  2.2605296969414e-001,  2.2605296969414e-001,
+	-4.1911235451698e-001,  4.1911235451698e-001, -4.1827386617661e-001,  4.1827386617661e-001,
+	 2.2477099299431e-001,  2.2477099299431e-001,  2.2349163889885e-001,  2.2349163889885e-001,
+	-4.1743144392967e-001,  4.1743144392967e-001, -4.1658508777618e-001,  4.1658508777618e-001,
+	 2.2221487760544e-001,  2.2221487760544e-001,  2.2094073891640e-001,  2.2094073891640e-001,
+	-4.1573479771614e-001,  4.1573479771614e-001, -4.1488060355186e-001,  4.1488060355186e-001,
+	 2.1966919302940e-001,  2.1966919302940e-001,  2.1840032935143e-001,  2.1840032935143e-001,
+	-4.1402250528336e-001,  4.1402250528336e-001, -4.1316053271294e-001,  4.1316053271294e-001,
+	 2.1713408827782e-001,  2.1713408827782e-001,  2.1587052941322e-001,  2.1587052941322e-001,
+	-4.1229465603828e-001,  4.1229465603828e-001, -4.1142487525940e-001,  4.1142487525940e-001,
+	 2.1460962295532e-001,  2.1460962295532e-001,  2.1335139870644e-001,  2.1335139870644e-001,
+	-4.1055124998093e-001,  4.1055124998093e-001, -4.0967375040054e-001,  4.0967375040054e-001,
+	 2.1209588646889e-001,  2.1209588646889e-001,  2.1084308624268e-001,  2.1084308624268e-001,
+	-4.0879240632057e-001,  4.0879240632057e-001, -4.0790718793869e-001,  4.0790718793869e-001,
+	 2.0959302783012e-001,  2.0959302783012e-001,  2.0834565162659e-001,  2.0834565162659e-001,
+	-4.0701815485954e-001,  4.0701815485954e-001, -4.0612527728081e-001,  4.0612527728081e-001,
+	 2.0710107684135e-001,  2.0710107684135e-001,  2.0585921406746e-001,  2.0585921406746e-001,
+	-4.0522858500481e-001,  4.0522858500481e-001, -4.0432807803154e-001,  4.0432807803154e-001,
+	 2.0462015271187e-001,  2.0462015271187e-001,  2.0338383316994e-001,  2.0338383316994e-001,
+	-4.0342378616333e-001,  4.0342378616333e-001, -4.0251564979553e-001,  4.0251564979553e-001,
+	 2.0215034484863e-001,  2.0215034484863e-001,  2.0091962814331e-001,  2.0091962814331e-001,
+	-4.0160375833511e-001,  4.0160375833511e-001, -4.0068808197975e-001,  4.0068808197975e-001,
+	 1.9969174265862e-001,  1.9969174265862e-001,  1.9846668839455e-001,  1.9846668839455e-001,
+	-3.9976862072945e-001,  3.9976862072945e-001, -3.9884540438652e-001,  3.9884540438652e-001,
+	 1.9724446535110e-001,  1.9724446535110e-001,  1.9602510333061e-001,  1.9602510333061e-001,
+	-3.9791843295097e-001,  3.9791843295097e-001, -3.9698773622513e-001,  3.9698773622513e-001,
+	 1.9480860233307e-001,  1.9480860233307e-001,  1.9359496235847e-001,  1.9359496235847e-001,
+	-3.9605328440666e-001,  3.9605328440666e-001, -3.9511510729790e-001,  3.9511510729790e-001,
+	 1.9238418340683e-001,  1.9238418340683e-001,  1.9117632508278e-001,  1.9117632508278e-001,
+	-3.9417320489883e-001,  3.9417320489883e-001, -3.9322760701180e-001,  3.9322760701180e-001,
+	 1.8997138738632e-001,  1.8997138738632e-001,  1.8876934051514e-001,  1.8876934051514e-001,
+	-3.9227828383446e-001,  3.9227828383446e-001, -3.9132529497147e-001,  3.9132529497147e-001,
+	 1.8757024407387e-001,  1.8757024407387e-001,  1.8637409806252e-001,  1.8637409806252e-001,
+	-3.9036861062050e-001,  3.9036861062050e-001, -3.8940826058388e-001,  3.8940826058388e-001,
+	 1.8518087267876e-001,  1.8518087267876e-001,  1.8399062752724e-001,  1.8399062752724e-001,
+	-3.8844421505928e-001,  3.8844421505928e-001, -3.8747653365135e-001,  3.8747653365135e-001,
+	 1.8280336260796e-001,  1.8280336260796e-001,  1.8161904811859e-001,  1.8161904811859e-001,
+	-3.8650521636009e-001,  3.8650521636009e-001, -3.8553026318550e-001,  3.8553026318550e-001,
+	 1.8043777346611e-001,  1.8043777346611e-001,  1.7925947904587e-001,  1.7925947904587e-001,
+	-3.8455167412758e-001,  3.8455167412758e-001, -3.8356944918633e-001,  3.8356944918633e-001,
+	 1.7808422446251e-001,  1.7808422446251e-001,  1.7691197991371e-001,  1.7691197991371e-001,
+	-3.8258361816406e-001,  3.8258361816406e-001, -3.8159421086311e-001,  3.8159421086311e-001,
+	 1.7574280500412e-001,  1.7574280500412e-001,  1.7457664012909e-001,  1.7457664012909e-001,
+	-3.8060119748116e-001,  3.8060119748116e-001, -3.7960457801819e-001,  3.7960457801819e-001,
+	 1.7341357469559e-001,  1.7341357469559e-001,  1.7225357890129e-001,  1.7225357890129e-001,
+	-3.7860441207886e-001,  3.7860441207886e-001, -3.7760066986084e-001,  3.7760066986084e-001,
+	 1.7109665274620e-001,  1.7109665274620e-001,  1.6994282603264e-001,  1.6994282603264e-001,
+	-3.7659338116646e-001,  3.7659338116646e-001, -3.7558254599571e-001,  3.7558254599571e-001,
+	 1.6879209876060e-001,  1.6879209876060e-001,  1.6764450073242e-001,  1.6764450073242e-001,
+	-3.7456819415092e-001,  3.7456819415092e-001, -3.7355029582977e-001,  3.7355029582977e-001,
+	 1.6650003194809e-001,  1.6650003194809e-001,  1.6535869240761e-001,  1.6535869240761e-001,
+	-3.7252888083458e-001,  3.7252888083458e-001, -3.7150397896767e-001,  3.7150397896767e-001,
+	 1.6422051191330e-001,  1.6422051191330e-001,  1.6308549046516e-001,  1.6308549046516e-001,
+	-3.7047556042671e-001,  3.7047556042671e-001, -3.6944365501404e-001,  3.6944365501404e-001,
+	 1.6195362806320e-001,  1.6195362806320e-001,  1.6082498431206e-001,  1.6082498431206e-001,
+	-3.6840826272964e-001,  3.6840826272964e-001, -3.6736944317818e-001,  3.6736944317818e-001,
+	 1.5969949960709e-001,  1.5969949960709e-001,  1.5857723355293e-001,  1.5857723355293e-001,
+	-3.6632713675499e-001,  3.6632713675499e-001, -3.6528137326241e-001,  3.6528137326241e-001,
+	 1.5745815634727e-001,  1.5745815634727e-001,  1.5634232759476e-001,  1.5634232759476e-001,
+	-3.6423218250275e-001,  3.6423218250275e-001, -3.6317956447601e-001,  3.6317956447601e-001,
+	 1.5522971749306e-001,  1.5522971749306e-001,  1.5412035584450e-001,  1.5412035584450e-001,
+	-3.6212354898453e-001,  3.6212354898453e-001, -3.6106407642365e-001,  3.6106407642365e-001,
+	 1.5301427245140e-001,  1.5301427245140e-001,  1.5191143751144e-001,  1.5191143751144e-001,
+	-3.6000123620033e-001,  3.6000123620033e-001, -3.5893502831459e-001,  3.5893502831459e-001,
+	 1.5081188082695e-001,  1.5081188082695e-001,  1.4971560239792e-001,  1.4971560239792e-001,
+	-3.5786539316177e-001,  3.5786539316177e-001, -3.5679242014885e-001,  3.5679242014885e-001,
+	 1.4862263202667e-001,  1.4862263202667e-001,  1.4753293991089e-001,  1.4753293991089e-001,
+	-3.5571607947350e-001,  3.5571607947350e-001, -3.5463640093803e-001,  3.5463640093803e-001,
+	 1.4644661545753e-001,  1.4644661545753e-001,  1.4536359906197e-001,  1.4536359906197e-001,
+	-3.5355338454247e-001,  3.5355338454247e-001, -3.5246706008911e-001,  3.5246706008911e-001,
+	 1.4428392052650e-001,  1.4428392052650e-001,  1.4320757985115e-001,  1.4320757985115e-001,
+	-3.5137736797333e-001,  3.5137736797333e-001, -3.5028439760208e-001,  3.5028439760208e-001,
+	 1.4213460683823e-001,  1.4213460683823e-001,  1.4106497168541e-001,  1.4106497168541e-001,
+	-3.4918811917305e-001,  3.4918811917305e-001, -3.4808856248856e-001,  3.4808856248856e-001,
+	 1.3999876379967e-001,  1.3999876379967e-001,  1.3893592357635e-001,  1.3893592357635e-001,
+	-3.4698572754860e-001,  3.4698572754860e-001, -3.4587964415550e-001,  3.4587964415550e-001,
+	 1.3787645101547e-001,  1.3787645101547e-001,  1.3682043552399e-001,  1.3682043552399e-001,
+	-3.4477028250694e-001,  3.4477028250694e-001, -3.4365767240524e-001,  3.4365767240524e-001,
+	 1.3576781749725e-001,  1.3576781749725e-001,  1.3471862673759e-001,  1.3471862673759e-001,
+	-3.4254184365273e-001,  3.4254184365273e-001, -3.4142276644707e-001,  3.4142276644707e-001,
+	 1.3367286324501e-001,  1.3367286324501e-001,  1.3263055682182e-001,  1.3263055682182e-001,
+	-3.4030050039291e-001,  3.4030050039291e-001, -3.3917501568794e-001,  3.3917501568794e-001,
+	 1.3159173727036e-001,  1.3159173727036e-001,  1.3055634498596e-001,  1.3055634498596e-001,
+	-3.3804637193680e-001,  3.3804637193680e-001, -3.3691450953484e-001,  3.3691450953484e-001,
+	 1.2952443957329e-001,  1.2952443957329e-001,  1.2849602103233e-001,  1.2849602103233e-001,
+	-3.3577948808670e-001,  3.3577948808670e-001, -3.3464130759239e-001,  3.3464130759239e-001,
+	 1.2747111916542e-001,  1.2747111916542e-001,  1.2644970417023e-001,  1.2644970417023e-001,
+	-3.3349996805191e-001,  3.3349996805191e-001, -3.3235549926758e-001,  3.3235549926758e-001,
+	 1.2543180584908e-001,  1.2543180584908e-001,  1.2441745400429e-001,  1.2441745400429e-001,
+	-3.3120790123940e-001,  3.3120790123940e-001, -3.3005717396736e-001,  3.3005717396736e-001,
+	 1.2340661883354e-001,  1.2340661883354e-001,  1.2239933013916e-001,  1.2239933013916e-001,
+	-3.2890334725380e-001,  3.2890334725380e-001, -3.2774642109871e-001,  3.2774642109871e-001,
+	 1.2139558792114e-001,  1.2139558792114e-001,  1.2039542198181e-001,  1.2039542198181e-001,
+	-3.2658642530441e-001,  3.2658642530441e-001, -3.2542335987091e-001,  3.2542335987091e-001,
+	 1.1939880251884e-001,  1.1939880251884e-001,  1.1840578913689e-001,  1.1840578913689e-001,
+	-3.2425719499588e-001,  3.2425719499588e-001, -3.2308802008629e-001,  3.2308802008629e-001,
+	 1.1741638183594e-001,  1.1741638183594e-001,  1.1643055081367e-001,  1.1643055081367e-001,
+	-3.2191577553749e-001,  3.2191577553749e-001, -3.2074052095413e-001,  3.2074052095413e-001,
+	 1.1544832587242e-001,  1.1544832587242e-001,  1.1446973681450e-001,  1.1446973681450e-001,
+	-3.1956222653389e-001,  3.1956222653389e-001, -3.1838095188141e-001,  3.1838095188141e-001,
+	 1.1349478363991e-001,  1.1349478363991e-001,  1.1252346634865e-001,  1.1252346634865e-001,
+	-3.1719663739204e-001,  3.1719663739204e-001, -3.1600937247276e-001,  3.1600937247276e-001,
+	 1.1155578494072e-001,  1.1155578494072e-001,  1.1059173941612e-001,  1.1059173941612e-001,
+	-3.1481912732124e-001,  3.1481912732124e-001, -3.1362590193748e-001,  3.1362590193748e-001,
+	 1.0963138937950e-001,  1.0963138937950e-001,  1.0867470502853e-001,  1.0867470502853e-001,
+	-3.1242975592613e-001,  3.1242975592613e-001, -3.1123065948486e-001,  3.1123065948486e-001,
+	 1.0772171616554e-001,  1.0772171616554e-001,  1.0677239298820e-001,  1.0677239298820e-001,
+	-3.1002861261368e-001,  3.1002861261368e-001, -3.0882367491722e-001,  3.0882367491722e-001,
+	 1.0582679510117e-001,  1.0582679510117e-001,  1.0488489270210e-001,  1.0488489270210e-001,
+	-3.0761581659317e-001,  3.0761581659317e-001, -3.0640503764153e-001,  3.0640503764153e-001,
+	 1.0394671559334e-001,  1.0394671559334e-001,  1.0301226377487e-001,  1.0301226377487e-001,
+	-3.0519139766693e-001,  3.0519139766693e-001, -3.0397489666939e-001,  3.0397489666939e-001,
+	 1.0208156704903e-001,  1.0208156704903e-001,  1.0115459561348e-001,  1.0115459561348e-001,
+	-3.0275553464890e-001,  3.0275553464890e-001, -3.0153331160545e-001,  3.0153331160545e-001,
+	 1.0023137927055e-001,  1.0023137927055e-001,  9.9311918020248e-002,  9.9311918020248e-002,
+	-3.0030825734138e-001,  3.0030825734138e-001, -2.9908037185669e-001,  2.9908037185669e-001,
+	 9.8396241664886e-002,  9.8396241664886e-002,  9.7484350204468e-002,  9.7484350204468e-002,
+	-2.9784965515137e-001,  2.9784965515137e-001, -2.9661616683006e-001,  2.9661616683006e-001,
+	 9.6576213836670e-002,  9.6576213836670e-002,  9.5671921968460e-002,  9.5671921968460e-002,
+	-2.9537984728813e-001,  2.9537984728813e-001, -2.9414078593254e-001,  2.9414078593254e-001,
+	 9.4771414995193e-002,  9.4771414995193e-002,  9.3874722719193e-002,  9.3874722719193e-002,
+	-2.9289892315865e-001,  2.9289892315865e-001, -2.9165434837341e-001,  2.9165434837341e-001,
+	 9.2981845140457e-002,  9.2981845140457e-002,  9.2092812061310e-002,  9.2092812061310e-002,
+	-2.9040697216988e-001,  2.9040697216988e-001, -2.8915691375732e-001,  2.8915691375732e-001,
+	 9.1207593679428e-002,  9.1207593679428e-002,  9.0326249599457e-002,  9.0326249599457e-002,
+	-2.8790411353111e-001,  2.8790411353111e-001, -2.8664860129356e-001,  2.8664860129356e-001,
+	 8.9448750019073e-002,  8.9448750019073e-002,  8.8575124740601e-002,  8.8575124740601e-002,
+	-2.8539037704468e-001,  2.8539037704468e-001, -2.8412947058678e-001,  2.8412947058678e-001,
+	 8.7705343961716e-002,  8.7705343961716e-002,  8.6839467287064e-002,  8.6839467287064e-002,
+	-2.8286591172218e-001,  2.8286591172218e-001, -2.8159967064857e-001,  2.8159967064857e-001,
+	 8.5977494716644e-002,  8.5977494716644e-002,  8.5119396448135e-002,  8.5119396448135e-002,
+	-2.8033080697060e-001,  2.8033080697060e-001, -2.7905926108360e-001,  2.7905926108360e-001,
+	 8.4265202283859e-002,  8.4265202283859e-002,  8.3414912223816e-002,  8.3414912223816e-002,
+	-2.7778512239456e-001,  2.7778512239456e-001, -2.7650836110115e-001,  2.7650836110115e-001,
+	 8.2568556070328e-002,  8.2568556070328e-002,  8.1726133823395e-002,  8.1726133823395e-002,
+	-2.7522900700569e-001,  2.7522900700569e-001, -2.7394703030586e-001,  2.7394703030586e-001,
+	 8.0887645483017e-002,  8.0887645483017e-002,  8.0053120851517e-002,  8.0053120851517e-002,
+	-2.7266249060631e-001,  2.7266249060631e-001, -2.7137538790703e-001,  2.7137538790703e-001,
+	 7.9222530126572e-002,  7.9222530126572e-002,  7.8395873308182e-002,  7.8395873308182e-002,
+	-2.7008575201035e-001,  2.7008575201035e-001, -2.6879355311394e-001,  2.6879355311394e-001,
+	 7.7573210000992e-002,  7.7573210000992e-002,  7.6754540205002e-002,  7.6754540205002e-002,
+	-2.6749882102013e-001,  2.6749882102013e-001, -2.6620155572891e-001,  2.6620155572891e-001,
+	 7.5939834117889e-002,  7.5939834117889e-002,  7.5129121541977e-002,  7.5129121541977e-002,
+	-2.6490181684494e-001,  2.6490181684494e-001, -2.6359957456589e-001,  2.6359957456589e-001,
+	 7.4322402477264e-002,  7.4322402477264e-002,  7.3519706726074e-002,  7.3519706726074e-002,
+	-2.6229485869408e-001,  2.6229485869408e-001, -2.6098763942719e-001,  2.6098763942719e-001,
+	 7.2721004486084e-002,  7.2721004486084e-002,  7.1926325559616e-002,  7.1926325559616e-002,
+	-2.5967800617218e-001,  2.5967800617218e-001, -2.5836589932442e-001,  2.5836589932442e-001,
+	 7.1135699748993e-002,  7.1135699748993e-002,  7.0349097251892e-002,  7.0349097251892e-002,
+	-2.5705137848854e-001,  2.5705137848854e-001, -2.5573444366455e-001,  2.5573444366455e-001,
+	 6.9566547870636e-002,  6.9566547870636e-002,  6.8788021802902e-002,  6.8788021802902e-002,
+	-2.5441506505013e-001,  2.5441506505013e-001, -2.5309333205223e-001,  2.5309333205223e-001,
+	 6.8013578653336e-002,  6.8013578653336e-002,  6.7243188619614e-002,  6.7243188619614e-002,
+	-2.5176918506622e-001,  2.5176918506622e-001, -2.5044268369675e-001,  2.5044268369675e-001,
+	 6.6476881504059e-002,  6.6476881504059e-002,  6.5714657306671e-002,  6.5714657306671e-002,
+	-2.4911384284496e-001,  2.4911384284496e-001, -2.4778263270855e-001,  2.4778263270855e-001,
+	 6.4956516027451e-002,  6.4956516027451e-002,  6.4202457666397e-002,  6.4202457666397e-002,
+	-2.4644909799099e-001,  2.4644909799099e-001, -2.4511325359344e-001,  2.4511325359344e-001,
+	 6.3452512025833e-002,  6.3452512025833e-002,  6.2706679105759e-002,  6.2706679105759e-002,
+	-2.4377508461475e-001,  2.4377508461475e-001, -2.4243463575840e-001,  2.4243463575840e-001,
+	 6.1964958906174e-002,  6.1964958906174e-002,  6.1227351427078e-002,  6.1227351427078e-002,
+	-2.4109189212322e-001,  2.4109189212322e-001, -2.3974688351154e-001,  2.3974688351154e-001,
+	 6.0493886470795e-002,  6.0493886470795e-002,  5.9764564037323e-002,  5.9764564037323e-002,
+	-2.3839962482452e-001,  2.3839962482452e-001, -2.3705011606216e-001,  2.3705011606216e-001,
+	 5.9039384126663e-002,  5.9039384126663e-002,  5.8318346738815e-002,  5.8318346738815e-002,
+	-2.3569837212563e-001,  2.3569837212563e-001, -2.3434442281723e-001,  2.3434442281723e-001,
+	 5.7601451873779e-002,  5.7601451873779e-002,  5.6888729333878e-002,  5.6888729333878e-002,
+	-2.3298825323582e-001,  2.3298825323582e-001, -2.3162989318371e-001,  2.3162989318371e-001,
+	 5.6180179119110e-002,  5.6180179119110e-002,  5.5475831031799e-002,  5.5475831031799e-002,
+	-2.3026935756207e-001,  2.3026935756207e-001, -2.2890666127205e-001,  2.2890666127205e-001,
+	 5.4775655269623e-002,  5.4775655269623e-002,  5.4079651832581e-002,  5.4079651832581e-002,
+	-2.2754180431366e-001,  2.2754180431366e-001, -2.2617480158806e-001,  2.2617480158806e-001,
+	 5.3387850522995e-002,  5.3387850522995e-002,  5.2700251340866e-002,  5.2700251340866e-002,
+	-2.2480566799641e-001,  2.2480566799641e-001, -2.2343441843987e-001,  2.2343441843987e-001,
+	 5.2016884088516e-002,  5.2016884088516e-002,  5.1337718963623e-002,  5.1337718963623e-002,
+	-2.2206108272076e-001,  2.2206108272076e-001, -2.2068564593792e-001,  2.2068564593792e-001,
+	 5.0662755966187e-002,  5.0662755966187e-002,  4.9992054700851e-002,  4.9992054700851e-002,
+	-2.1930812299252e-001,  2.1930812299252e-001, -2.1792854368687e-001,  2.1792854368687e-001,
+	 4.9325585365295e-002,  4.9325585365295e-002,  4.8663347959518e-002,  4.8663347959518e-002,
+	-2.1654690802097e-001,  2.1654690802097e-001, -2.1516324579716e-001,  2.1516324579716e-001,
+	 4.8005342483521e-002,  4.8005342483521e-002,  4.7351628541946e-002,  4.7351628541946e-002,
+	-2.1377755701542e-001,  2.1377755701542e-001, -2.1238984167576e-001,  2.1238984167576e-001,
+	 4.6702146530151e-002,  4.6702146530151e-002,  4.6056956052780e-002,  4.6056956052780e-002,
+	-2.1100014448166e-001,  2.1100014448166e-001, -2.0960845053196e-001,  2.0960845053196e-001,
+	 4.5415997505188e-002,  4.5415997505188e-002,  4.4779360294342e-002,  4.4779360294342e-002,
+	-2.0821478962898e-001,  2.0821478962898e-001, -2.0681916177273e-001,  2.0681916177273e-001,
+	 4.4146984815598e-002,  4.4146984815598e-002,  4.3518900871277e-002,  4.3518900871277e-002,
+	-2.0542159676552e-001,  2.0542159676552e-001, -2.0402207970619e-001,  2.0402207970619e-001,
+	 4.2895138263702e-002,  4.2895138263702e-002,  4.2275637388229e-002,  4.2275637388229e-002,
+	-2.0262065529823e-001,  2.0262065529823e-001, -2.0121732354164e-001,  2.0121732354164e-001,
+	 4.1660457849503e-002,  4.1660457849503e-002,  4.1049599647522e-002,  4.1049599647522e-002,
+	-1.9981209933758e-001,  1.9981209933758e-001, -1.9840499758720e-001,  1.9840499758720e-001,
+	 4.0443062782288e-002,  4.0443062782288e-002,  3.9840877056122e-002,  3.9840877056122e-002,
+	-1.9699601829052e-001,  1.9699601829052e-001, -1.9558519124985e-001,  1.9558519124985e-001,
+	 3.9242982864380e-002,  3.9242982864380e-002,  3.8649439811707e-002,  3.8649439811707e-002,
+	-1.9417253136635e-001,  1.9417253136635e-001, -1.9275803864002e-001,  1.9275803864002e-001,
+	 3.8060247898102e-002,  3.8060247898102e-002,  3.7475377321243e-002,  3.7475377321243e-002,
+	-1.9134172797203e-001,  1.9134172797203e-001, -1.8992361426353e-001,  1.8992361426353e-001,
+	 3.6894887685776e-002,  3.6894887685776e-002,  3.6318749189377e-002,  3.6318749189377e-002,
+	-1.8850371241570e-001,  1.8850371241570e-001, -1.8708203732967e-001,  1.8708203732967e-001,
+	 3.5746961832047e-002,  3.5746961832047e-002,  3.5179555416107e-002,  3.5179555416107e-002,
+	-1.8565860390663e-001,  1.8565860390663e-001, -1.8423342704773e-001,  1.8423342704773e-001,
+	 3.4616529941559e-002,  3.4616529941559e-002,  3.4057855606079e-002,  3.4057855606079e-002,
+	-1.8280650675297e-001,  1.8280650675297e-001, -1.8137787282467e-001,  1.8137787282467e-001,
+	 3.3503592014313e-002,  3.3503592014313e-002,  3.2953739166260e-002,  3.2953739166260e-002,
+	-1.7994752526283e-001,  1.7994752526283e-001, -1.7851547896862e-001,  1.7851547896862e-001,
+	 3.2408237457275e-002,  3.2408237457275e-002,  3.1867176294327e-002,  3.1867176294327e-002,
+	-1.7708176374435e-001,  1.7708176374435e-001, -1.7564637959003e-001,  1.7564637959003e-001,
+	 3.1330496072769e-002,  3.1330496072769e-002,  3.0798226594925e-002,  3.0798226594925e-002,
+	-1.7420934140682e-001,  1.7420934140682e-001, -1.7277066409588e-001,  1.7277066409588e-001,
+	 3.0270397663116e-002,  3.0270397663116e-002,  2.9746979475021e-002,  2.9746979475021e-002,
+	-1.7133036255836e-001,  1.7133036255836e-001, -1.6988845169544e-001,  1.6988845169544e-001,
+	 2.9227972030640e-002,  2.9227972030640e-002,  2.8713405132294e-002,  2.8713405132294e-002,
+	-1.6844493150711e-001,  1.6844493150711e-001, -1.6699983179569e-001,  1.6699983179569e-001,
+	 2.8203278779984e-002,  2.8203278779984e-002,  2.7697592973709e-002,  2.7697592973709e-002,
+	-1.6555315256119e-001,  1.6555315256119e-001, -1.6410492360592e-001,  1.6410492360592e-001,
+	 2.7196347713470e-002,  2.7196347713470e-002,  2.6699542999268e-002,  2.6699542999268e-002,
+	-1.6265514492989e-001,  1.6265514492989e-001, -1.6120384633541e-001,  1.6120384633541e-001,
+	 2.6207208633423e-002,  2.6207208633423e-002,  2.5719314813614e-002,  2.5719314813614e-002,
+	-1.5975101292133e-001,  1.5975101292133e-001, -1.5829668939114e-001,  1.5829668939114e-001,
+	 2.5235921144485e-002,  2.5235921144485e-002,  2.4756968021393e-002,  2.4756968021393e-002,
+	-1.5684087574482e-001,  1.5684087574482e-001, -1.5538358688354e-001,  1.5538358688354e-001,
+	 2.4282485246658e-002,  2.4282485246658e-002,  2.3812502622604e-002,  2.3812502622604e-002,
+	-1.5392482280731e-001,  1.5392482280731e-001, -1.5246461331844e-001,  1.5246461331844e-001,
+	 2.3346990346909e-002,  2.3346990346909e-002,  2.2885948419571e-002,  2.2885948419571e-002,
+	-1.5100297331810e-001,  1.5100297331810e-001, -1.4953991770744e-001,  1.4953991770744e-001,
+	 2.2429406642914e-002,  2.2429406642914e-002,  2.1977365016937e-002,  2.1977365016937e-002,
+	-1.4807544648647e-001,  1.4807544648647e-001, -1.4660958945751e-001,  1.4660958945751e-001,
+	 2.1529823541641e-002,  2.1529823541641e-002,  2.1086782217026e-002,  2.1086782217026e-002,
+	-1.4514234662056e-001,  1.4514234662056e-001, -1.4367373287678e-001,  1.4367373287678e-001,
+	 2.0648270845413e-002,  2.0648270845413e-002,  2.0214229822159e-002,  2.0214229822159e-002,
+	-1.4220377802849e-001,  1.4220377802849e-001, -1.4073246717453e-001,  1.4073246717453e-001,
+	 1.9784748554230e-002,  1.9784748554230e-002,  1.9359767436981e-002,  1.9359767436981e-002,
+	-1.3925984501839e-001,  1.3925984501839e-001, -1.3778591156006e-001,  1.3778591156006e-001,
+	 1.8939286470413e-002,  1.8939286470413e-002,  1.8523365259171e-002,  1.8523365259171e-002,
+	-1.3631068170071e-001,  1.3631068170071e-001, -1.3483417034149e-001,  1.3483417034149e-001,
+	 1.8111974000931e-002,  1.8111974000931e-002,  1.7705112695694e-002,  1.7705112695694e-002,
+	-1.3335637748241e-001,  1.3335637748241e-001, -1.3187734782696e-001,  1.3187734782696e-001,
+	 1.7302781343460e-002,  1.7302781343460e-002,  1.6905009746552e-002,  1.6905009746552e-002,
+	-1.3039706647396e-001,  1.3039706647396e-001, -1.2891554832458e-001,  1.2891554832458e-001,
+	 1.6511768102646e-002,  1.6511768102646e-002,  1.6123086214066e-002,  1.6123086214066e-002,
+	-1.2743283808231e-001,  1.2743283808231e-001, -1.2594890594482e-001,  1.2594890594482e-001,
+	 1.5738964080811e-002,  1.5738964080811e-002,  1.5359371900558e-002,  1.5359371900558e-002,
+	-1.2446380406618e-001,  1.2446380406618e-001, -1.2297752499580e-001,  1.2297752499580e-001,
+	 1.4984369277954e-002,  1.4984369277954e-002,  1.4613926410675e-002,  1.4613926410675e-002,
+	-1.2149009108543e-001,  1.2149009108543e-001, -1.2000151723623e-001,  1.2000151723623e-001,
+	 1.4248043298721e-002,  1.4248043298721e-002,  1.3886749744415e-002,  1.3886749744415e-002,
+	-1.1851180344820e-001,  1.1851180344820e-001, -1.1702097952366e-001,  1.1702097952366e-001,
+	 1.3530015945435e-002,  1.3530015945435e-002,  1.3177871704102e-002,  1.3177871704102e-002,
+	-1.1552906036377e-001,  1.1552906036377e-001, -1.1403604596853e-001,  1.1403604596853e-001,
+	 1.2830317020416e-002,  1.2830317020416e-002,  1.2487322092056e-002,  1.2487322092056e-002,
+	-1.1254195868969e-001,  1.1254195868969e-001, -1.1104681342840e-001,  1.1104681342840e-001,
+	 1.2148946523666e-002,  1.2148946523666e-002,  1.1815130710602e-002,  1.1815130710602e-002,
+	-1.0955062508583e-001,  1.0955062508583e-001, -1.0805340111256e-001,  1.0805340111256e-001,
+	 1.1485934257507e-002,  1.1485934257507e-002,  1.1161327362061e-002,  1.1161327362061e-002,
+	-1.0655516386032e-001,  1.0655516386032e-001, -1.0505592077971e-001,  1.0505592077971e-001,
+	 1.0841310024261e-002,  1.0841310024261e-002,  1.0525912046432e-002,  1.0525912046432e-002,
+	-1.0355569422245e-001,  1.0355569422245e-001, -1.0205448418856e-001,  1.0205448418856e-001,
+	 1.0215103626251e-002,  1.0215103626251e-002,  9.9089443683624e-003,  9.9089443683624e-003,
+	-1.0055232048035e-001,  1.0055232048035e-001, -9.9049210548401e-002,  9.9049210548401e-002,
+	 9.6073746681213e-003,  9.6073746681213e-003,  9.3103945255280e-003,  9.3103945255280e-003,
+	-9.7545161843300e-002,  9.7545161843300e-002, -9.6040204167366e-002,  9.6040204167366e-002,
+	 9.0180635452271e-003,  9.0180635452271e-003,  8.7303519248962e-003,  8.7303519248962e-003,
+	-9.4534337520599e-002,  9.4534337520599e-002, -9.3027576804161e-002,  9.3027576804161e-002,
+	 8.4472596645355e-003,  8.4472596645355e-003,  8.1687867641449e-003,  8.1687867641449e-003,
+	-9.1519944369793e-002,  9.1519944369793e-002, -9.0011455118656e-002,  9.0011455118656e-002,
+	 7.8949630260468e-003,  7.8949630260468e-003,  7.6257586479187e-003,  7.6257586479187e-003,
+	-8.8502109050751e-002,  8.8502109050751e-002, -8.6991935968399e-002,  8.6991935968399e-002,
+	 7.3611736297607e-003,  7.3611736297607e-003,  7.1012377738953e-003,  7.1012377738953e-003,
+	-8.5480943322182e-002,  8.5480943322182e-002, -8.3969153463840e-002,  8.3969153463840e-002,
+	 6.8459510803223e-003,  6.8459510803223e-003,  6.5953135490417e-003,  6.5953135490417e-003,
+	-8.2456558942795e-002,  8.2456558942795e-002, -8.0943197011948e-002,  8.0943197011948e-002,
+	 6.3492953777313e-003,  6.3492953777313e-003,  6.1079263687134e-003,  6.1079263687134e-003,
+	-7.9429075121880e-002,  7.9429075121880e-002, -7.7914200723171e-002,  7.7914200723171e-002,
+	 5.8712065219879e-003,  5.8712065219879e-003,  5.6391656398773e-003,  5.6391656398773e-003,
+	-7.6398596167564e-002,  7.6398596167564e-002, -7.4882268905640e-002,  7.4882268905640e-002,
+	 5.4117441177368e-003,  5.4117441177368e-003,  5.1890015602112e-003,  5.1890015602112e-003,
+	-7.3365241289139e-002,  7.3365241289139e-002, -7.1847520768642e-002,  7.1847520768642e-002,
+	 4.9709081649780e-003,  4.9709081649780e-003,  4.7574639320374e-003,  4.7574639320374e-003,
+	-7.0329122245312e-002,  7.0329122245312e-002, -6.8810060620308e-002,  6.8810060620308e-002,
+	 4.5486688613892e-003,  4.5486688613892e-003,  4.3445825576782e-003,  4.3445825576782e-003,
+	-6.7290358245373e-002,  6.7290358245373e-002, -6.5770015120506e-002,  6.5770015120506e-002,
+	 4.1451156139374e-003,  4.1451156139374e-003,  3.9503574371338e-003,  3.9503574371338e-003,
+	-6.4249053597450e-002,  6.4249053597450e-002, -6.2727496027946e-002,  6.2727496027946e-002,
+	 3.7602186203003e-003,  3.7602186203003e-003,  3.5747885704041e-003,  3.5747885704041e-003,
+	-6.1205338686705e-002,  6.1205338686705e-002, -5.9682607650757e-002,  5.9682607650757e-002,
+	 3.3940374851227e-003,  3.3940374851227e-003,  3.2179355621338e-003,  3.2179355621338e-003,
+	-5.8159317821264e-002,  5.8159317821264e-002, -5.6635476648808e-002,  5.6635476648808e-002,
+	 3.0465126037598e-003,  3.0465126037598e-003,  2.8797686100006e-003,  2.8797686100006e-003,
+	-5.5111106485128e-002,  5.5111106485128e-002, -5.3586214780807e-002,  5.3586214780807e-002,
+	 2.7177035808563e-003,  2.7177035808563e-003,  2.5603473186493e-003,  2.5603473186493e-003,
+	-5.2060820162296e-002,  5.2060820162296e-002, -5.0534933805466e-002,  5.0534933805466e-002,
+	 2.4076402187347e-003,  2.4076402187347e-003,  2.2596120834351e-003,  2.2596120834351e-003,
+	-4.9008570611477e-002,  4.9008570611477e-002, -4.7481749206781e-002,  4.7481749206781e-002,
+	 2.1162927150726e-003,  2.1162927150726e-003,  1.9776523113251e-003,  1.9776523113251e-003,
+	-4.5954480767250e-002,  4.5954480767250e-002, -4.4426776468754e-002,  4.4426776468754e-002,
+	 1.8436908721924e-003,  1.8436908721924e-003,  1.7144381999969e-003,  1.7144381999969e-003,
+	-4.2898658663034e-002,  4.2898658663034e-002, -4.1370134800673e-002,  4.1370134800673e-002,
+	 1.5898644924164e-003,  1.5898644924164e-003,  1.4699697494507e-003,  1.4699697494507e-003,
+	-3.9841219782829e-002,  3.9841219782829e-002, -3.8311932235956e-002,  3.8311932235956e-002,
+	 1.3547837734222e-003,  1.3547837734222e-003,  1.2442767620087e-003,  1.2442767620087e-003,
+	-3.6782283335924e-002,  3.6782283335924e-002, -3.5252287983894e-002,  3.5252287983894e-002,
+	 1.1384785175323e-003,  1.1384785175323e-003,  1.0373592376709e-003,  1.0373592376709e-003,
+	-3.3721961081028e-002,  3.3721961081028e-002, -3.2191317528486e-002,  3.2191317528486e-002,
+	 9.4094872474670e-004,  9.4094872474670e-004,  8.4921717643738e-004,  8.4921717643738e-004,
+	-3.0660368502140e-002,  3.0660368502140e-002, -2.9129132628441e-002,  2.9129132628441e-002,
+	 7.6222419738770e-004,  7.6222419738770e-004,  6.7988038063049e-004,  6.7988038063049e-004,
+	-2.7597622945905e-002,  2.7597622945905e-002, -2.6065852493048e-002,  2.6065852493048e-002,
+	 6.0227513313293e-004,  6.0227513313293e-004,  5.2934885025024e-004,  5.2934885025024e-004,
+	-2.4533838033676e-002,  2.4533838033676e-002, -2.3001592606306e-002,  2.3001592606306e-002,
+	 4.6113133430481e-004,  4.6113133430481e-004,  3.9762258529663e-004,  3.9762258529663e-004,
+	-2.1469129249454e-002,  2.1469129249454e-002, -1.9936464726925e-002,  1.9936464726925e-002,
+	 3.3882260322571e-004,  3.3882260322571e-004,  2.8470158576965e-004,  2.8470158576965e-004,
+	-1.8403612077236e-002,  1.8403612077236e-002, -1.6870586201549e-002,  1.6870586201549e-002,
+	 2.3528933525085e-004,  2.3528933525085e-004,  1.9058585166931e-004,  1.9058585166931e-004,
+	-1.5337402001023e-002,  1.5337402001023e-002, -1.3804073445499e-002,  1.3804073445499e-002,
+	 1.5059113502502e-004,  1.5059113502502e-004,  1.1530518531799e-004,  1.1530518531799e-004,
+	-1.2270614504814e-002,  1.2270614504814e-002, -1.0737040080130e-002,  1.0737040080130e-002,
+	 8.4698200225830e-005,  8.4698200225830e-005,  5.8829784393311e-005,  5.8829784393311e-005,
+	-9.2033650726080e-003,  9.2033650726080e-003, -7.6696034520864e-003,  7.6696034520864e-003,
+	 3.7640333175659e-005,  3.7640333175659e-005,  2.1189451217651e-005,  2.1189451217651e-005,
+	-6.1357691884041e-003,  6.1357691884041e-003, -4.6018776483834e-003,  4.6018776483834e-003,
+	 9.4175338745117e-006,  9.4175338745117e-006,  2.3543834686279e-006,  2.3543834686279e-006,
+	-3.0679423362017e-003,  3.0679423362017e-003, -1.5339783858508e-003,  1.5339783858508e-003
+};
+static _MM_ALIGN16 float	W4096[]	 = {
+	 4.9923300743103e-001,  4.9999940395355e-001,  0.0000000000000e+000,  0.0000000000000e+000,
+	 4.9846601486206e-001,  4.9846601486206e-001,  4.9769902229309e-001,  4.9769902229309e-001,
+	-4.9999764561653e-001,  4.9999764561653e-001, -4.9999469518661e-001,  4.9999469518661e-001,
+	 4.9693205952644e-001,  4.9693205952644e-001,  4.9616509675980e-001,  4.9616509675980e-001,
+	-4.9999058246613e-001,  4.9999058246613e-001, -4.9998530745506e-001,  4.9998530745506e-001,
+	 4.9539813399315e-001,  4.9539813399315e-001,  4.9463117122650e-001,  4.9463117122650e-001,
+	-4.9997881054878e-001,  4.9997881054878e-001, -4.9997118115425e-001,  4.9997118115425e-001,
+	 4.9386423826218e-001,  4.9386423826218e-001,  4.9309730529785e-001,  4.9309730529785e-001,
+	-4.9996235966682e-001,  4.9996235966682e-001, -4.9995234608650e-001,  4.9995234608650e-001,
+	 4.9233040213585e-001,  4.9233040213585e-001,  4.9156349897385e-001,  4.9156349897385e-001,
+	-4.9994117021561e-001,  4.9994117021561e-001, -4.9992883205414e-001,  4.9992883205414e-001,
+	 4.9079662561417e-001,  4.9079662561417e-001,  4.9002978205681e-001,  4.9002978205681e-001,
+	-4.9991530179977e-001,  4.9991530179977e-001, -4.9990057945251e-001,  4.9990057945251e-001,
+	 4.8926296830177e-001,  4.8926296830177e-001,  4.8849615454674e-001,  4.8849615454674e-001,
+	-4.9988469481468e-001,  4.9988469481468e-001, -4.9986764788628e-001,  4.9986764788628e-001,
+	 4.8772937059402e-001,  4.8772937059402e-001,  4.8696264624596e-001,  4.8696264624596e-001,
+	-4.9984940886497e-001,  4.9984940886497e-001, -4.9983000755310e-001,  4.9983000755310e-001,
+	 4.8619592189789e-001,  4.8619592189789e-001,  4.8542925715446e-001,  4.8542925715446e-001,
+	-4.9980941414833e-001,  4.9980941414833e-001, -4.9978765845299e-001,  4.9978765845299e-001,
+	 4.8466259241104e-001,  4.8466259241104e-001,  4.8389598727226e-001,  4.8389598727226e-001,
+	-4.9976471066475e-001,  4.9976471066475e-001, -4.9974060058594e-001,  4.9974060058594e-001,
+	 4.8312941193581e-001,  4.8312941193581e-001,  4.8236286640167e-001,  4.8236286640167e-001,
+	-4.9971529841423e-001,  4.9971529841423e-001, -4.9968883395195e-001,  4.9968883395195e-001,
+	 4.8159638047218e-001,  4.8159638047218e-001,  4.8082995414734e-001,  4.8082995414734e-001,
+	-4.9966117739677e-001,  4.9966117739677e-001, -4.9963238835335e-001,  4.9963238835335e-001,
+	 4.8006352782249e-001,  4.8006352782249e-001,  4.7929719090462e-001,  4.7929719090462e-001,
+	-4.9960237741470e-001,  4.9960237741470e-001, -4.9957120418549e-001,  4.9957120418549e-001,
+	 4.7853088378906e-001,  4.7853088378906e-001,  4.7776460647583e-001,  4.7776460647583e-001,
+	-4.9953886866570e-001,  4.9953886866570e-001, -4.9950534105301e-001,  4.9950534105301e-001,
+	 4.7699841856956e-001,  4.7699841856956e-001,  4.7623226046562e-001,  4.7623226046562e-001,
+	-4.9947065114975e-001,  4.9947065114975e-001, -4.9943476915359e-001,  4.9943476915359e-001,
+	 4.7546616196632e-001,  4.7546616196632e-001,  4.7470012307167e-001,  4.7470012307167e-001,
+	-4.9939772486687e-001,  4.9939772486687e-001, -4.9935951828957e-001,  4.9935951828957e-001,
+	 4.7393414378166e-001,  4.7393414378166e-001,  4.7316822409630e-001,  4.7316822409630e-001,
+	-4.9932011961937e-001,  4.9932011961937e-001, -4.9927952885628e-001,  4.9927952885628e-001,
+	 4.7240236401558e-001,  4.7240236401558e-001,  4.7163659334183e-001,  4.7163659334183e-001,
+	-4.9923777580261e-001,  4.9923777580261e-001, -4.9919486045837e-001,  4.9919486045837e-001,
+	 4.7087085247040e-001,  4.7087085247040e-001,  4.7010520100594e-001,  4.7010520100594e-001,
+	-4.9915078282356e-001,  4.9915078282356e-001, -4.9910551309586e-001,  4.9910551309586e-001,
+	 4.6933963894844e-001,  4.6933963894844e-001,  4.6857410669327e-001,  4.6857410669327e-001,
+	-4.9905905127525e-001,  4.9905905127525e-001, -4.9901142716408e-001,  4.9901142716408e-001,
+	 4.6780869364738e-001,  4.6780869364738e-001,  4.6704331040382e-001,  4.6704331040382e-001,
+	-4.9896264076233e-001,  4.9896264076233e-001, -4.9891266226768e-001,  4.9891266226768e-001,
+	 4.6627804636955e-001,  4.6627804636955e-001,  4.6551284193993e-001,  4.6551284193993e-001,
+	-4.9886152148247e-001,  4.9886152148247e-001, -4.9880921840668e-001,  4.9880921840668e-001,
+	 4.6474772691727e-001,  4.6474772691727e-001,  4.6398267149925e-001,  4.6398267149925e-001,
+	-4.9875572323799e-001,  4.9875572323799e-001, -4.9870106577873e-001,  4.9870106577873e-001,
+	 4.6321770548820e-001,  4.6321770548820e-001,  4.6245285868645e-001,  4.6245285868645e-001,
+	-4.9864521622658e-001,  4.9864521622658e-001, -4.9858820438385e-001,  4.9858820438385e-001,
+	 4.6168807148933e-001,  4.6168807148933e-001,  4.6092337369919e-001,  4.6092337369919e-001,
+	-4.9853003025055e-001,  4.9853003025055e-001, -4.9847066402435e-001,  4.9847066402435e-001,
+	 4.6015876531601e-001,  4.6015876531601e-001,  4.5939427614212e-001,  4.5939427614212e-001,
+	-4.9841013550758e-001,  4.9841013550758e-001, -4.9834844470024e-001,  4.9834844470024e-001,
+	 4.5862987637520e-001,  4.5862987637520e-001,  4.5786556601524e-001,  4.5786556601524e-001,
+	-4.9828556180000e-001,  4.9828556180000e-001, -4.9822151660919e-001,  4.9822151660919e-001,
+	 4.5710134506226e-001,  4.5710134506226e-001,  4.5633724331856e-001,  4.5633724331856e-001,
+	-4.9815630912781e-001,  4.9815630912781e-001, -4.9808990955353e-001,  4.9808990955353e-001,
+	 4.5557323098183e-001,  4.5557323098183e-001,  4.5480930805206e-001,  4.5480930805206e-001,
+	-4.9802234768867e-001,  4.9802234768867e-001, -4.9795362353325e-001,  4.9795362353325e-001,
+	 4.5404553413391e-001,  4.5404553413391e-001,  4.5328181982040e-001,  4.5328181982040e-001,
+	-4.9788370728493e-001,  4.9788370728493e-001, -4.9781262874603e-001,  4.9781262874603e-001,
+	 4.5251825451851e-001,  4.5251825451851e-001,  4.5175477862358e-001,  4.5175477862358e-001,
+	-4.9774038791656e-001,  4.9774038791656e-001, -4.9766695499420e-001,  4.9766695499420e-001,
+	 4.5099142193794e-001,  4.5099142193794e-001,  4.5022818446159e-001,  4.5022818446159e-001,
+	-4.9759235978127e-001,  4.9759235978127e-001, -4.9751660227776e-001,  4.9751660227776e-001,
+	 4.4946506619453e-001,  4.4946506619453e-001,  4.4870206713676e-001,  4.4870206713676e-001,
+	-4.9743965268135e-001,  4.9743965268135e-001, -4.9736157059669e-001,  4.9736157059669e-001,
+	 4.4793918728828e-001,  4.4793918728828e-001,  4.4717642664909e-001,  4.4717642664909e-001,
+	-4.9728229641914e-001,  4.9728229641914e-001, -4.9720183014870e-001,  4.9720183014870e-001,
+	 4.4641378521919e-001,  4.4641378521919e-001,  4.4565129280090e-001,  4.4565129280090e-001,
+	-4.9712023139000e-001,  4.9712023139000e-001, -4.9703744053841e-001,  4.9703744053841e-001,
+	 4.4488888978958e-001,  4.4488888978958e-001,  4.4412663578987e-001,  4.4412663578987e-001,
+	-4.9695348739624e-001,  4.9695348739624e-001, -4.9686837196350e-001,  4.9686837196350e-001,
+	 4.4336453080177e-001,  4.4336453080177e-001,  4.4260254502296e-001,  4.4260254502296e-001,
+	-4.9678206443787e-001,  4.9678206443787e-001, -4.9669459462166e-001,  4.9669459462166e-001,
+	 4.4184067845345e-001,  4.4184067845345e-001,  4.4107896089554e-001,  4.4107896089554e-001,
+	-4.9660596251488e-001,  4.9660596251488e-001, -4.9651616811752e-001,  4.9651616811752e-001,
+	 4.4031739234924e-001,  4.4031739234924e-001,  4.3955594301224e-001,  4.3955594301224e-001,
+	-4.9642521142960e-001,  4.9642521142960e-001, -4.9633306264877e-001,  4.9633306264877e-001,
+	 4.3879467248917e-001,  4.3879467248917e-001,  4.3803352117538e-001,  4.3803352117538e-001,
+	-4.9623978137970e-001,  4.9623978137970e-001, -4.9614530801773e-001,  4.9614530801773e-001,
+	 4.3727248907089e-001,  4.3727248907089e-001,  4.3651163578033e-001,  4.3651163578033e-001,
+	-4.9604964256287e-001,  4.9604964256287e-001, -4.9595284461975e-001,  4.9595284461975e-001,
+	 4.3575096130371e-001,  4.3575096130371e-001,  4.3499037623405e-001,  4.3499037623405e-001,
+	-4.9585488438606e-001,  4.9585488438606e-001, -4.9575573205948e-001,  4.9575573205948e-001,
+	 4.3422996997833e-001,  4.3422996997833e-001,  4.3346974253654e-001,  4.3346974253654e-001,
+	-4.9565541744232e-001,  4.9565541744232e-001, -4.9555397033691e-001,  4.9555397033691e-001,
+	 4.3270963430405e-001,  4.3270963430405e-001,  4.3194970488548e-001,  4.3194970488548e-001,
+	-4.9545133113861e-001,  4.9545133113861e-001, -4.9534749984741e-001,  4.9534749984741e-001,
+	 4.3118995428085e-001,  4.3118995428085e-001,  4.3043032288551e-001,  4.3043032288551e-001,
+	-4.9524253606796e-001,  4.9524253606796e-001, -4.9513640999794e-001,  4.9513640999794e-001,
+	 4.2967087030411e-001,  4.2967087030411e-001,  4.2891159653664e-001,  4.2891159653664e-001,
+	-4.9502909183502e-001,  4.9502909183502e-001, -4.9492064118385e-001,  4.9492064118385e-001,
+	 4.2815247178078e-001,  4.2815247178078e-001,  4.2739352583885e-001,  4.2739352583885e-001,
+	-4.9481099843979e-001,  4.9481099843979e-001, -4.9470022320747e-001,  4.9470022320747e-001,
+	 4.2663475871086e-001,  4.2663475871086e-001,  4.2587614059448e-001,  4.2587614059448e-001,
+	-4.9458825588226e-001,  4.9458825588226e-001, -4.9447512626648e-001,  4.9447512626648e-001,
+	 4.2511773109436e-001,  4.2511773109436e-001,  4.2435947060585e-001,  4.2435947060585e-001,
+	-4.9436083436012e-001,  4.9436083436012e-001, -4.9424540996552e-001,  4.9424540996552e-001,
+	 4.2360138893127e-001,  4.2360138893127e-001,  4.2284351587296e-001,  4.2284351587296e-001,
+	-4.9412879347801e-001,  4.9412879347801e-001, -4.9401101469994e-001,  4.9401101469994e-001,
+	 4.2208579182625e-001,  4.2208579182625e-001,  4.2132827639580e-001,  4.2132827639580e-001,
+	-4.9389207363129e-001,  4.9389207363129e-001, -4.9377197027206e-001,  4.9377197027206e-001,
+	 4.2057090997696e-001,  4.2057090997696e-001,  4.1981375217438e-001,  4.1981375217438e-001,
+	-4.9365070462227e-001,  4.9365070462227e-001, -4.9352827668190e-001,  4.9352827668190e-001,
+	 4.1905680298805e-001,  4.1905680298805e-001,  4.1830003261566e-001,  4.1830003261566e-001,
+	-4.9340468645096e-001,  4.9340468645096e-001, -4.9327996373177e-001,  4.9327996373177e-001,
+	 4.1754344105721e-001,  4.1754344105721e-001,  4.1678702831268e-001,  4.1678702831268e-001,
+	-4.9315404891968e-001,  4.9315404891968e-001, -4.9302697181702e-001,  4.9302697181702e-001,
+	 4.1603085398674e-001,  4.1603085398674e-001,  4.1527485847473e-001,  4.1527485847473e-001,
+	-4.9289876222610e-001,  4.9289876222610e-001, -4.9276936054230e-001,  4.9276936054230e-001,
+	 4.1451907157898e-001,  4.1451907157898e-001,  4.1376346349716e-001,  4.1376346349716e-001,
+	-4.9263882637024e-001,  4.9263882637024e-001, -4.9250712990761e-001,  4.9250712990761e-001,
+	 4.1300806403160e-001,  4.1300806403160e-001,  4.1225287318230e-001,  4.1225287318230e-001,
+	-4.9237424135208e-001,  4.9237424135208e-001, -4.9224022030830e-001,  4.9224022030830e-001,
+	 4.1149789094925e-001,  4.1149789094925e-001,  4.1074311733246e-001,  4.1074311733246e-001,
+	-4.9210503697395e-001,  4.9210503697395e-001, -4.9196872115135e-001,  4.9196872115135e-001,
+	 4.0998855233192e-001,  4.0998855233192e-001,  4.0923419594765e-001,  4.0923419594765e-001,
+	-4.9183121323586e-001,  4.9183121323586e-001, -4.9169254302979e-001,  4.9169254302979e-001,
+	 4.0848004817963e-001,  4.0848004817963e-001,  4.0772613883018e-001,  4.0772613883018e-001,
+	-4.9155274033546e-001,  4.9155274033546e-001, -4.9141177535057e-001,  4.9141177535057e-001,
+	 4.0697240829468e-001,  4.0697240829468e-001,  4.0621894598007e-001,  4.0621894598007e-001,
+	-4.9126964807510e-001,  4.9126964807510e-001, -4.9112635850906e-001,  4.9112635850906e-001,
+	 4.0546566247940e-001,  4.0546566247940e-001,  4.0471261739731e-001,  4.0471261739731e-001,
+	-4.9098193645477e-001,  4.9098193645477e-001, -4.9083635210991e-001,  4.9083635210991e-001,
+	 4.0395981073380e-001,  4.0395981073380e-001,  4.0320721268654e-001,  4.0320721268654e-001,
+	-4.9068960547447e-001,  4.9068960547447e-001, -4.9054169654846e-001,  4.9054169654846e-001,
+	 4.0245485305786e-001,  4.0245485305786e-001,  4.0170270204544e-001,  4.0170270204544e-001,
+	-4.9039262533188e-001,  4.9039262533188e-001, -4.9024242162704e-001,  4.9024242162704e-001,
+	 4.0095078945160e-001,  4.0095078945160e-001,  4.0019911527634e-001,  4.0019911527634e-001,
+	-4.9009105563164e-001,  4.9009105563164e-001, -4.8993855714798e-001,  4.8993855714798e-001,
+	 3.9944767951965e-001,  3.9944767951965e-001,  3.9869648218155e-001,  3.9869648218155e-001,
+	-4.8978489637375e-001,  4.8978489637375e-001, -4.8963007330894e-001,  4.8963007330894e-001,
+	 3.9794552326202e-001,  3.9794552326202e-001,  3.9719480276108e-001,  3.9719480276108e-001,
+	-4.8947408795357e-001,  4.8947408795357e-001, -4.8931697010994e-001,  4.8931697010994e-001,
+	 3.9644432067871e-001,  3.9644432067871e-001,  3.9569407701492e-001,  3.9569407701492e-001,
+	-4.8915868997574e-001,  4.8915868997574e-001, -4.8899924755096e-001,  4.8899924755096e-001,
+	 3.9494407176971e-001,  3.9494407176971e-001,  3.9419433474541e-001,  3.9419433474541e-001,
+	-4.8883867263794e-001,  4.8883867263794e-001, -4.8867693543434e-001,  4.8867693543434e-001,
+	 3.9344483613968e-001,  3.9344483613968e-001,  3.9269560575485e-001,  3.9269560575485e-001,
+	-4.8851406574249e-001,  4.8851406574249e-001, -4.8835003376007e-001,  4.8835003376007e-001,
+	 3.9194661378860e-001,  3.9194661378860e-001,  3.9119786024094e-001,  3.9119786024094e-001,
+	-4.8818486928940e-001,  4.8818486928940e-001, -4.8801854252815e-001,  4.8801854252815e-001,
+	 3.9044937491417e-001,  3.9044937491417e-001,  3.8970115780830e-001,  3.8970115780830e-001,
+	-4.8785105347633e-001,  4.8785105347633e-001, -4.8768243193626e-001,  4.8768243193626e-001,
+	 3.8895317912102e-001,  3.8895317912102e-001,  3.8820546865463e-001,  3.8820546865463e-001,
+	-4.8751267790794e-001,  4.8751267790794e-001, -4.8734176158905e-001,  4.8734176158905e-001,
+	 3.8745802640915e-001,  3.8745802640915e-001,  3.8671088218689e-001,  3.8671088218689e-001,
+	-4.8716968297958e-001,  4.8716968297958e-001, -4.8699647188187e-001,  4.8699647188187e-001,
+	 3.8596394658089e-001,  3.8596394658089e-001,  3.8521730899811e-001,  3.8521730899811e-001,
+	-4.8682212829590e-001,  4.8682212829590e-001, -4.8664662241936e-001,  4.8664662241936e-001,
+	 3.8447093963623e-001,  3.8447093963623e-001,  3.8372483849525e-001,  3.8372483849525e-001,
+	-4.8646998405457e-001,  4.8646998405457e-001, -4.8629218339920e-001,  4.8629218339920e-001,
+	 3.8297903537750e-001,  3.8297903537750e-001,  3.8223347067833e-001,  3.8223347067833e-001,
+	-4.8611325025558e-001,  4.8611325025558e-001, -4.8593315482140e-001,  4.8593315482140e-001,
+	 3.8148820400238e-001,  3.8148820400238e-001,  3.8074320554733e-001,  3.8074320554733e-001,
+	-4.8575195670128e-001,  4.8575195670128e-001, -4.8556956648827e-001,  4.8556956648827e-001,
+	 3.7999847531319e-001,  3.7999847531319e-001,  3.7925404310226e-001,  3.7925404310226e-001,
+	-4.8538607358932e-001,  4.8538607358932e-001, -4.8520141839981e-001,  4.8520141839981e-001,
+	 3.7850990891457e-001,  3.7850990891457e-001,  3.7776604294777e-001,  3.7776604294777e-001,
+	-4.8501563072205e-001,  4.8501563072205e-001, -4.8482868075371e-001,  4.8482868075371e-001,
+	 3.7702247500420e-001,  3.7702247500420e-001,  3.7627917528152e-001,  3.7627917528152e-001,
+	-4.8464062809944e-001,  4.8464062809944e-001, -4.8445141315460e-001,  4.8445141315460e-001,
+	 3.7553620338440e-001,  3.7553620338440e-001,  3.7479349970818e-001,  3.7479349970818e-001,
+	-4.8426103591919e-001,  4.8426103591919e-001, -4.8406955599785e-001,  4.8406955599785e-001,
+	 3.7405109405518e-001,  3.7405109405518e-001,  3.7330895662308e-001,  3.7330895662308e-001,
+	-4.8387691378593e-001,  4.8387691378593e-001, -4.8368313908577e-001,  4.8368313908577e-001,
+	 3.7256717681885e-001,  3.7256717681885e-001,  3.7182563543320e-001,  3.7182563543320e-001,
+	-4.8348823189735e-001,  4.8348823189735e-001, -4.8329219222069e-001,  4.8329219222069e-001,
+	 3.7108445167542e-001,  3.7108445167542e-001,  3.7034353613853e-001,  3.7034353613853e-001,
+	-4.8309499025345e-001,  4.8309499025345e-001, -4.8289668560028e-001,  4.8289668560028e-001,
+	 3.6960291862488e-001,  3.6960291862488e-001,  3.6886262893677e-001,  3.6886262893677e-001,
+	-4.8269721865654e-001,  4.8269721865654e-001, -4.8249661922455e-001,  4.8249661922455e-001,
+	 3.6812263727188e-001,  3.6812263727188e-001,  3.6738300323486e-001,  3.6738300323486e-001,
+	-4.8229488730431e-001,  4.8229488730431e-001, -4.8209202289581e-001,  4.8209202289581e-001,
+	 3.6664360761642e-001,  3.6664360761642e-001,  3.6590456962585e-001,  3.6590456962585e-001,
+	-4.8188802599907e-001,  4.8188802599907e-001, -4.8168289661407e-001,  4.8168289661407e-001,
+	 3.6516582965851e-001,  3.6516582965851e-001,  3.6442741751671e-001,  3.6442741751671e-001,
+	-4.8147663474083e-001,  4.8147663474083e-001, -4.8126924037933e-001,  4.8126924037933e-001,
+	 3.6368930339813e-001,  3.6368930339813e-001,  3.6295154690742e-001,  3.6295154690742e-001,
+	-4.8106071352959e-001,  4.8106071352959e-001, -4.8085102438927e-001,  4.8085102438927e-001,
+	 3.6221408843994e-001,  3.6221408843994e-001,  3.6147695779800e-001,  3.6147695779800e-001,
+	-4.8064023256302e-001,  4.8064023256302e-001, -4.8042830824852e-001,  4.8042830824852e-001,
+	 3.6074015498161e-001,  3.6074015498161e-001,  3.6000367999077e-001,  3.6000367999077e-001,
+	-4.8021525144577e-001,  4.8021525144577e-001, -4.8000106215477e-001,  4.8000106215477e-001,
+	 3.5926753282547e-001,  3.5926753282547e-001,  3.5853171348572e-001,  3.5853171348572e-001,
+	-4.7978577017784e-001,  4.7978577017784e-001, -4.7956931591034e-001,  4.7956931591034e-001,
+	 3.5779622197151e-001,  3.5779622197151e-001,  3.5706108808517e-001,  3.5706108808517e-001,
+	-4.7935172915459e-001,  4.7935172915459e-001, -4.7913303971291e-001,  4.7913303971291e-001,
+	 3.5632628202438e-001,  3.5632628202438e-001,  3.5559177398682e-001,  3.5559177398682e-001,
+	-4.7891321778297e-001,  4.7891321778297e-001, -4.7869226336479e-001,  4.7869226336479e-001,
+	 3.5485765337944e-001,  3.5485765337944e-001,  3.5412386059761e-001,  3.5412386059761e-001,
+	-4.7847017645836e-001,  4.7847017645836e-001, -4.7824695706367e-001,  4.7824695706367e-001,
+	 3.5339039564133e-001,  3.5339039564133e-001,  3.5265731811523e-001,  3.5265731811523e-001,
+	-4.7802263498306e-001,  4.7802263498306e-001, -4.7779718041420e-001,  4.7779718041420e-001,
+	 3.5192453861237e-001,  3.5192453861237e-001,  3.5119214653969e-001,  3.5119214653969e-001,
+	-4.7757059335709e-001,  4.7757059335709e-001, -4.7734287381172e-001,  4.7734287381172e-001,
+	 3.5046008229256e-001,  3.5046008229256e-001,  3.4972837567329e-001,  3.4972837567329e-001,
+	-4.7711405158043e-001,  4.7711405158043e-001, -4.7688409686089e-001,  4.7688409686089e-001,
+	 3.4899702668190e-001,  3.4899702668190e-001,  3.4826600551605e-001,  3.4826600551605e-001,
+	-4.7665300965309e-001,  4.7665300965309e-001, -4.7642081975937e-001,  4.7642081975937e-001,
+	 3.4753537178040e-001,  3.4753537178040e-001,  3.4680509567261e-001,  3.4680509567261e-001,
+	-4.7618749737740e-001,  4.7618749737740e-001, -4.7595307230949e-001,  4.7595307230949e-001,
+	 3.4607517719269e-001,  3.4607517719269e-001,  3.4534561634064e-001,  3.4534561634064e-001,
+	-4.7571751475334e-001,  4.7571751475334e-001, -4.7548082470894e-001,  4.7548082470894e-001,
+	 3.4461641311646e-001,  3.4461641311646e-001,  3.4388756752014e-001,  3.4388756752014e-001,
+	-4.7524303197861e-001,  4.7524303197861e-001, -4.7500410676003e-001,  4.7500410676003e-001,
+	 3.4315913915634e-001,  3.4315913915634e-001,  3.4243103861809e-001,  3.4243103861809e-001,
+	-4.7476407885551e-001,  4.7476407885551e-001, -4.7452294826508e-001,  4.7452294826508e-001,
+	 3.4170329570770e-001,  3.4170329570770e-001,  3.4097594022751e-001,  3.4097594022751e-001,
+	-4.7428068518639e-001,  4.7428068518639e-001, -4.7403728961945e-001,  4.7403728961945e-001,
+	 3.4024900197983e-001,  3.4024900197983e-001,  3.3952236175537e-001,  3.3952236175537e-001,
+	-4.7379279136658e-001,  4.7379279136658e-001, -4.7354719042778e-001,  4.7354719042778e-001,
+	 3.3879613876343e-001,  3.3879613876343e-001,  3.3807033300400e-001,  3.3807033300400e-001,
+	-4.7330045700073e-001,  4.7330045700073e-001, -4.7305262088776e-001,  4.7305262088776e-001,
+	 3.3734485507011e-001,  3.3734485507011e-001,  3.3661976456642e-001,  3.3661976456642e-001,
+	-4.7280365228653e-001,  4.7280365228653e-001, -4.7255358099937e-001,  4.7255358099937e-001,
+	 3.3589506149292e-001,  3.3589506149292e-001,  3.3517074584961e-001,  3.3517074584961e-001,
+	-4.7230240702629e-001,  4.7230240702629e-001, -4.7205013036728e-001,  4.7205013036728e-001,
+	 3.3444684743881e-001,  3.3444684743881e-001,  3.3372330665588e-001,  3.3372330665588e-001,
+	-4.7179672122002e-001,  4.7179672122002e-001, -4.7154220938683e-001,  4.7154220938683e-001,
+	 3.3300018310547e-001,  3.3300018310547e-001,  3.3227741718292e-001,  3.3227741718292e-001,
+	-4.7128659486771e-001,  4.7128659486771e-001, -4.7102987766266e-001,  4.7102987766266e-001,
+	 3.3155506849289e-001,  3.3155506849289e-001,  3.3083310723305e-001,  3.3083310723305e-001,
+	-4.7077202796936e-001,  4.7077202796936e-001, -4.7051307559013e-001,  4.7051307559013e-001,
+	 3.3011156320572e-001,  3.3011156320572e-001,  3.2939040660858e-001,  3.2939040660858e-001,
+	-4.7025302052498e-001,  4.7025302052498e-001, -4.6999186277390e-001,  4.6999186277390e-001,
+	 3.2866963744164e-001,  3.2866963744164e-001,  3.2794928550720e-001,  3.2794928550720e-001,
+	-4.6972960233688e-001,  4.6972960233688e-001, -4.6946623921394e-001,  4.6946623921394e-001,
+	 3.2722932100296e-001,  3.2722932100296e-001,  3.2650977373123e-001,  3.2650977373123e-001,
+	-4.6920177340508e-001,  4.6920177340508e-001, -4.6893617510796e-001,  4.6893617510796e-001,
+	 3.2579064369202e-001,  3.2579064369202e-001,  3.2507193088531e-001,  3.2507193088531e-001,
+	-4.6866950392723e-001,  4.6866950392723e-001, -4.6840173006058e-001,  4.6840173006058e-001,
+	 3.2435363531113e-001,  3.2435363531113e-001,  3.2363569736481e-001,  3.2363569736481e-001,
+	-4.6813282370567e-001,  4.6813282370567e-001, -4.6786284446716e-001,  4.6786284446716e-001,
+	 3.2291823625565e-001,  3.2291823625565e-001,  3.2220116257668e-001,  3.2220116257668e-001,
+	-4.6759176254272e-001,  4.6759176254272e-001, -4.6731957793236e-001,  4.6731957793236e-001,
+	 3.2148450613022e-001,  3.2148450613022e-001,  3.2076829671860e-001,  3.2076829671860e-001,
+	-4.6704626083374e-001,  4.6704626083374e-001, -4.6677187085152e-001,  4.6677187085152e-001,
+	 3.2005247473717e-001,  3.2005247473717e-001,  3.1933709979057e-001,  3.1933709979057e-001,
+	-4.6649640798569e-001,  4.6649640798569e-001, -4.6621981263161e-001,  4.6621981263161e-001,
+	 3.1862211227417e-001,  3.1862211227417e-001,  3.1790760159492e-001,  3.1790760159492e-001,
+	-4.6594214439392e-001,  4.6594214439392e-001, -4.6566334366798e-001,  4.6566334366798e-001,
+	 3.1719350814819e-001,  3.1719350814819e-001,  3.1647980213165e-001,  3.1647980213165e-001,
+	-4.6538347005844e-001,  4.6538347005844e-001, -4.6510252356529e-001,  4.6510252356529e-001,
+	 3.1576657295227e-001,  3.1576657295227e-001,  3.1505376100540e-001,  3.1505376100540e-001,
+	-4.6482044458389e-001,  4.6482044458389e-001, -4.6453729271889e-001,  4.6453729271889e-001,
+	 3.1434139609337e-001,  3.1434139609337e-001,  3.1362944841385e-001,  3.1362944841385e-001,
+	-4.6425303816795e-001,  4.6425303816795e-001, -4.6396768093109e-001,  4.6396768093109e-001,
+	 3.1291794776917e-001,  3.1291794776917e-001,  3.1220692396164e-001,  3.1220692396164e-001,
+	-4.6368125081062e-001,  4.6368125081062e-001, -4.6339374780655e-001,  4.6339374780655e-001,
+	 3.1149628758430e-001,  3.1149628758430e-001,  3.1078612804413e-001,  3.1078612804413e-001,
+	-4.6310511231422e-001,  4.6310511231422e-001, -4.6281540393829e-001,  4.6281540393829e-001,
+	 3.1007638573647e-001,  3.1007638573647e-001,  3.0936712026596e-001,  3.0936712026596e-001,
+	-4.6252462267876e-001,  4.6252462267876e-001, -4.6223273873329e-001,  4.6223273873329e-001,
+	 3.0865827202797e-001,  3.0865827202797e-001,  3.0794990062714e-001,  3.0794990062714e-001,
+	-4.6193975210190e-001,  4.6193975210190e-001, -4.6164569258690e-001,  4.6164569258690e-001,
+	 3.0724197626114e-001,  3.0724197626114e-001,  3.0653449892998e-001,  3.0653449892998e-001,
+	-4.6135056018829e-001,  4.6135056018829e-001, -4.6105432510376e-001,  4.6105432510376e-001,
+	 3.0582746863365e-001,  3.0582746863365e-001,  3.0512091517448e-001,  3.0512091517448e-001,
+	-4.6075701713562e-001,  4.6075701713562e-001, -4.6045860648155e-001,  4.6045860648155e-001,
+	 3.0441480875015e-001,  3.0441480875015e-001,  3.0370914936066e-001,  3.0370914936066e-001,
+	-4.6015912294388e-001,  4.6015912294388e-001, -4.5985856652260e-001,  4.5985856652260e-001,
+	 3.0300396680832e-001,  3.0300396680832e-001,  3.0229926109314e-001,  3.0229926109314e-001,
+	-4.5955693721771e-001,  4.5955693721771e-001, -4.5925420522690e-001,  4.5925420522690e-001,
+	 3.0159500241280e-001,  3.0159500241280e-001,  3.0089122056961e-001,  3.0089122056961e-001,
+	-4.5895040035248e-001,  4.5895040035248e-001, -4.5864549279213e-001,  4.5864549279213e-001,
+	 3.0018788576126e-001,  3.0018788576126e-001,  2.9948502779007e-001,  2.9948502779007e-001,
+	-4.5833954215050e-001,  4.5833954215050e-001, -4.5803248882294e-001,  4.5803248882294e-001,
+	 2.9878267645836e-001,  2.9878267645836e-001,  2.9808077216148e-001,  2.9808077216148e-001,
+	-4.5772436261177e-001,  4.5772436261177e-001, -4.5741516351700e-001,  4.5741516351700e-001,
+	 2.9737934470177e-001,  2.9737934470177e-001,  2.9667836427689e-001,  2.9667836427689e-001,
+	-4.5710486173630e-001,  4.5710486173630e-001, -4.5679351687431e-001,  4.5679351687431e-001,
+	 2.9597792029381e-001,  2.9597792029381e-001,  2.9527792334557e-001,  2.9527792334557e-001,
+	-4.5648109912872e-001,  4.5648109912872e-001, -4.5616757869720e-001,  4.5616757869720e-001,
+	 2.9457840323448e-001,  2.9457840323448e-001,  2.9387938976288e-001,  2.9387938976288e-001,
+	-4.5585301518440e-001,  4.5585301518440e-001, -4.5553737878799e-001,  4.5553737878799e-001,
+	 2.9318082332611e-001,  2.9318082332611e-001,  2.9248279333115e-001,  2.9248279333115e-001,
+	-4.5522063970566e-001,  4.5522063970566e-001, -4.5490285754204e-001,  4.5490285754204e-001,
+	 2.9178521037102e-001,  2.9178521037102e-001,  2.9108813405037e-001,  2.9108813405037e-001,
+	-4.5458400249481e-001,  4.5458400249481e-001, -4.5426404476166e-001,  4.5426404476166e-001,
+	 2.9039156436920e-001,  2.9039156436920e-001,  2.8969544172287e-001,  2.8969544172287e-001,
+	-4.5394304394722e-001,  4.5394304394722e-001, -4.5362100005150e-001,  4.5362100005150e-001,
+	 2.8899985551834e-001,  2.8899985551834e-001,  2.8830474615097e-001,  2.8830474615097e-001,
+	-4.5329785346985e-001,  4.5329785346985e-001, -4.5297363400459e-001,  4.5297363400459e-001,
+	 2.8761017322540e-001,  2.8761017322540e-001,  2.8691604733467e-001,  2.8691604733467e-001,
+	-4.5264837145805e-001,  4.5264837145805e-001, -4.5232203602791e-001,  4.5232203602791e-001,
+	 2.8622245788574e-001,  2.8622245788574e-001,  2.8552934527397e-001,  2.8552934527397e-001,
+	-4.5199465751648e-001,  4.5199465751648e-001, -4.5166617631912e-001,  4.5166617631912e-001,
+	 2.8483676910400e-001,  2.8483676910400e-001,  2.8414466977119e-001,  2.8414466977119e-001,
+	-4.5133665204048e-001,  4.5133665204048e-001, -4.5100605487823e-001,  4.5100605487823e-001,
+	 2.8345310688019e-001,  2.8345310688019e-001,  2.8276202082634e-001,  2.8276202082634e-001,
+	-4.5067441463470e-001,  4.5067441463470e-001, -4.5034170150757e-001,  4.5034170150757e-001,
+	 2.8207147121429e-001,  2.8207147121429e-001,  2.8138142824173e-001,  2.8138142824173e-001,
+	-4.5000794529915e-001,  4.5000794529915e-001, -4.4967311620712e-001,  4.4967311620712e-001,
+	 2.8069186210632e-001,  2.8069186210632e-001,  2.8000286221504e-001,  2.8000286221504e-001,
+	-4.4933724403381e-001,  4.4933724403381e-001, -4.4900029897690e-001,  4.4900029897690e-001,
+	 2.7931433916092e-001,  2.7931433916092e-001,  2.7862638235092e-001,  2.7862638235092e-001,
+	-4.4866228103638e-001,  4.4866228103638e-001, -4.4832322001457e-001,  4.4832322001457e-001,
+	 2.7793890237808e-001,  2.7793890237808e-001,  2.7725198864937e-001,  2.7725198864937e-001,
+	-4.4798311591148e-001,  4.4798311591148e-001, -4.4764196872711e-001,  4.4764196872711e-001,
+	 2.7656558156013e-001,  2.7656558156013e-001,  2.7587968111038e-001,  2.7587968111038e-001,
+	-4.4729974865913e-001,  4.4729974865913e-001, -4.4695645570755e-001,  4.4695645570755e-001,
+	 2.7519434690475e-001,  2.7519434690475e-001,  2.7450948953629e-001,  2.7450948953629e-001,
+	-4.4661214947701e-001,  4.4661214947701e-001, -4.4626677036285e-001,  4.4626677036285e-001,
+	 2.7382519841194e-001,  2.7382519841194e-001,  2.7314144372940e-001,  2.7314144372940e-001,
+	-4.4592034816742e-001,  4.4592034816742e-001, -4.4557288289070e-001,  4.4557288289070e-001,
+	 2.7245819568634e-001,  2.7245819568634e-001,  2.7177548408508e-001,  2.7177548408508e-001,
+	-4.4522434473038e-001,  4.4522434473038e-001, -4.4487479329109e-001,  4.4487479329109e-001,
+	 2.7109333872795e-001,  2.7109333872795e-001,  2.7041172981262e-001,  2.7041172981262e-001,
+	-4.4452416896820e-001,  4.4452416896820e-001, -4.4417250156403e-001,  4.4417250156403e-001,
+	 2.6973062753677e-001,  2.6973062753677e-001,  2.6905009150505e-001,  2.6905009150505e-001,
+	-4.4381982088089e-001,  4.4381982088089e-001, -4.4346606731415e-001,  4.4346606731415e-001,
+	 2.6837009191513e-001,  2.6837009191513e-001,  2.6769065856934e-001,  2.6769065856934e-001,
+	-4.4311127066612e-001,  4.4311127066612e-001, -4.4275543093681e-001,  4.4275543093681e-001,
+	 2.6701176166534e-001,  2.6701176166534e-001,  2.6633340120316e-001,  2.6633340120316e-001,
+	-4.4239854812622e-001,  4.4239854812622e-001, -4.4204062223434e-001,  4.4204062223434e-001,
+	 2.6565557718277e-001,  2.6565557718277e-001,  2.6497834920883e-001,  2.6497834920883e-001,
+	-4.4168165326118e-001,  4.4168165326118e-001, -4.4132167100906e-001,  4.4132167100906e-001,
+	 2.6430162787437e-001,  2.6430162787437e-001,  2.6362547278404e-001,  2.6362547278404e-001,
+	-4.4096061587334e-001,  4.4096061587334e-001, -4.4059854745865e-001,  4.4059854745865e-001,
+	 2.6294988393784e-001,  2.6294988393784e-001,  2.6227486133575e-001,  2.6227486133575e-001,
+	-4.4023543596268e-001,  4.4023543596268e-001, -4.3987128138542e-001,  4.3987128138542e-001,
+	 2.6160037517548e-001,  2.6160037517548e-001,  2.6092648506165e-001,  2.6092648506165e-001,
+	-4.3950611352921e-001,  4.3950611352921e-001, -4.3913990259171e-001,  4.3913990259171e-001,
+	 2.6025313138962e-001,  2.6025313138962e-001,  2.5958031415939e-001,  2.5958031415939e-001,
+	-4.3877264857292e-001,  4.3877264857292e-001, -4.3840435147285e-001,  4.3840435147285e-001,
+	 2.5890809297562e-001,  2.5890809297562e-001,  2.5823646783829e-001,  2.5823646783829e-001,
+	-4.3803504109383e-001,  4.3803504109383e-001, -4.3766468763351e-001,  4.3766468763351e-001,
+	 2.5756537914276e-001,  2.5756537914276e-001,  2.5689485669136e-001,  2.5689485669136e-001,
+	-4.3729332089424e-001,  4.3729332089424e-001, -4.3692091107368e-001,  4.3692091107368e-001,
+	 2.5622493028641e-001,  2.5622493028641e-001,  2.5555554032326e-001,  2.5555554032326e-001,
+	-4.3654748797417e-001,  4.3654748797417e-001, -4.3617302179337e-001,  4.3617302179337e-001,
+	 2.5488674640656e-001,  2.5488674640656e-001,  2.5421851873398e-001,  2.5421851873398e-001,
+	-4.3579754233360e-001,  4.3579754233360e-001, -4.3542101979256e-001,  4.3542101979256e-001,
+	 2.5355088710785e-001,  2.5355088710785e-001,  2.5288385152817e-001,  2.5288385152817e-001,
+	-4.3504348397255e-001,  4.3504348397255e-001, -4.3466493487358e-001,  4.3466493487358e-001,
+	 2.5221735239029e-001,  2.5221735239029e-001,  2.5155144929886e-001,  2.5155144929886e-001,
+	-4.3428534269333e-001,  4.3428534269333e-001, -4.3390473723412e-001,  4.3390473723412e-001,
+	 2.5088614225388e-001,  2.5088614225388e-001,  2.5022143125534e-001,  2.5022143125534e-001,
+	-4.3352311849594e-001,  4.3352311849594e-001, -4.3314048647881e-001,  4.3314048647881e-001,
+	 2.4955731630325e-001,  2.4955731630325e-001,  2.4889376759529e-001,  2.4889376759529e-001,
+	-4.3275681138039e-001,  4.3275681138039e-001, -4.3237212300301e-001,  4.3237212300301e-001,
+	 2.4823081493378e-001,  2.4823081493378e-001,  2.4756842851639e-001,  2.4756842851639e-001,
+	-4.3198642134666e-001,  4.3198642134666e-001, -4.3159970641136e-001,  4.3159970641136e-001,
+	 2.4690666794777e-001,  2.4690666794777e-001,  2.4624550342560e-001,  2.4624550342560e-001,
+	-4.3121197819710e-001,  4.3121197819710e-001, -4.3082323670387e-001,  4.3082323670387e-001,
+	 2.4558493494987e-001,  2.4558493494987e-001,  2.4492493271828e-001,  2.4492493271828e-001,
+	-4.3043345212936e-001,  4.3043345212936e-001, -4.3004268407822e-001,  4.3004268407822e-001,
+	 2.4426555633545e-001,  2.4426555633545e-001,  2.4360680580139e-001,  2.4360680580139e-001,
+	-4.2965090274811e-001,  4.2965090274811e-001, -4.2925810813904e-001,  4.2925810813904e-001,
+	 2.4294862151146e-001,  2.4294862151146e-001,  2.4229106307030e-001,  2.4229106307030e-001,
+	-4.2886430025101e-001,  4.2886430025101e-001, -4.2846947908401e-001,  4.2846947908401e-001,
+	 2.4163410067558e-001,  2.4163410067558e-001,  2.4097773432732e-001,  2.4097773432732e-001,
+	-4.2807367444038e-001,  4.2807367444038e-001, -4.2767682671547e-001,  4.2767682671547e-001,
+	 2.4032199382782e-001,  2.4032199382782e-001,  2.3966687917709e-001,  2.3966687917709e-001,
+	-4.2727899551392e-001,  4.2727899551392e-001, -4.2688015103340e-001,  4.2688015103340e-001,
+	 2.3901236057281e-001,  2.3901236057281e-001,  2.3835843801498e-001,  2.3835843801498e-001,
+	-4.2648029327393e-001,  4.2648029327393e-001, -4.2607945203781e-001,  4.2607945203781e-001,
+	 2.3770514130592e-001,  2.3770514130592e-001,  2.3705247044563e-001,  2.3705247044563e-001,
+	-4.2567759752274e-001,  4.2567759752274e-001, -4.2527472972870e-001,  4.2527472972870e-001,
+	 2.3640042543411e-001,  2.3640042543411e-001,  2.3574900627136e-001,  2.3574900627136e-001,
+	-4.2487087845802e-001,  4.2487087845802e-001, -4.2446601390839e-001,  4.2446601390839e-001,
+	 2.3509818315506e-001,  2.3509818315506e-001,  2.3444798588753e-001,  2.3444798588753e-001,
+	-4.2406016588211e-001,  4.2406016588211e-001, -4.2365330457687e-001,  4.2365330457687e-001,
+	 2.3379844427109e-001,  2.3379844427109e-001,  2.3314949870110e-001,  2.3314949870110e-001,
+	-4.2324545979500e-001,  4.2324545979500e-001, -4.2283663153648e-001,  4.2283663153648e-001,
+	 2.3250117897987e-001,  2.3250117897987e-001,  2.3185351490974e-001,  2.3185351490974e-001,
+	-4.2242678999901e-001,  4.2242678999901e-001, -4.2201593518257e-001,  4.2201593518257e-001,
+	 2.3120644688606e-001,  2.3120644688606e-001,  2.3056003451347e-001,  2.3056003451347e-001,
+	-4.2160412669182e-001,  4.2160412669182e-001, -4.2119130492210e-001,  4.2119130492210e-001,
+	 2.2991424798965e-001,  2.2991424798965e-001,  2.2926911711693e-001,  2.2926911711693e-001,
+	-4.2077746987343e-001,  4.2077746987343e-001, -4.2036268115044e-001,  4.2036268115044e-001,
+	 2.2862461209297e-001,  2.2862461209297e-001,  2.2798073291779e-001,  2.2798073291779e-001,
+	-4.1994687914848e-001,  4.1994687914848e-001, -4.1953012347221e-001,  4.1953012347221e-001,
+	 2.2733750939369e-001,  2.2733750939369e-001,  2.2669491171837e-001,  2.2669491171837e-001,
+	-4.1911235451698e-001,  4.1911235451698e-001, -4.1869360208511e-001,  4.1869360208511e-001,
+	 2.2605296969414e-001,  2.2605296969414e-001,  2.2541165351868e-001,  2.2541165351868e-001,
+	-4.1827386617661e-001,  4.1827386617661e-001, -4.1785314679146e-001,  4.1785314679146e-001,
+	 2.2477099299431e-001,  2.2477099299431e-001,  2.2413098812103e-001,  2.2413098812103e-001,
+	-4.1743144392967e-001,  4.1743144392967e-001, -4.1700875759125e-001,  4.1700875759125e-001,
+	 2.2349163889885e-001,  2.2349163889885e-001,  2.2285294532776e-001,  2.2285294532776e-001,
+	-4.1658508777618e-001,  4.1658508777618e-001, -4.1616043448448e-001,  4.1616043448448e-001,
+	 2.2221487760544e-001,  2.2221487760544e-001,  2.2157746553421e-001,  2.2157746553421e-001,
+	-4.1573479771614e-001,  4.1573479771614e-001, -4.1530820727348e-001,  4.1530820727348e-001,
+	 2.2094073891640e-001,  2.2094073891640e-001,  2.2030463814735e-001,  2.2030463814735e-001,
+	-4.1488060355186e-001,  4.1488060355186e-001, -4.1445204615593e-001,  4.1445204615593e-001,
+	 2.1966919302940e-001,  2.1966919302940e-001,  2.1903443336487e-001,  2.1903443336487e-001,
+	-4.1402250528336e-001,  4.1402250528336e-001, -4.1359201073647e-001,  4.1359201073647e-001,
+	 2.1840032935143e-001,  2.1840032935143e-001,  2.1776688098907e-001,  2.1776688098907e-001,
+	-4.1316053271294e-001,  4.1316053271294e-001, -4.1272807121277e-001,  4.1272807121277e-001,
+	 2.1713408827782e-001,  2.1713408827782e-001,  2.1650198101997e-001,  2.1650198101997e-001,
+	-4.1229465603828e-001,  4.1229465603828e-001, -4.1186025738716e-001,  4.1186025738716e-001,
+	 2.1587052941322e-001,  2.1587052941322e-001,  2.1523973345757e-001,  2.1523973345757e-001,
+	-4.1142487525940e-001,  4.1142487525940e-001, -4.1098853945732e-001,  4.1098853945732e-001,
+	 2.1460962295532e-001,  2.1460962295532e-001,  2.1398016810417e-001,  2.1398016810417e-001,
+	-4.1055124998093e-001,  4.1055124998093e-001, -4.1011297702789e-001,  4.1011297702789e-001,
+	 2.1335139870644e-001,  2.1335139870644e-001,  2.1272331476212e-001,  2.1272331476212e-001,
+	-4.0967375040054e-001,  4.0967375040054e-001, -4.0923357009888e-001,  4.0923357009888e-001,
+	 2.1209588646889e-001,  2.1209588646889e-001,  2.1146914362907e-001,  2.1146914362907e-001,
+	-4.0879240632057e-001,  4.0879240632057e-001, -4.0835028886795e-001,  4.0835028886795e-001,
+	 2.1084308624268e-001,  2.1084308624268e-001,  2.1021771430969e-001,  2.1021771430969e-001,
+	-4.0790718793869e-001,  4.0790718793869e-001, -4.0746316313744e-001,  4.0746316313744e-001,
+	 2.0959302783012e-001,  2.0959302783012e-001,  2.0896899700165e-001,  2.0896899700165e-001,
+	-4.0701815485954e-001,  4.0701815485954e-001, -4.0657219290733e-001,  4.0657219290733e-001,
+	 2.0834565162659e-001,  2.0834565162659e-001,  2.0772302150726e-001,  2.0772302150726e-001,
+	-4.0612527728081e-001,  4.0612527728081e-001, -4.0567740797997e-001,  4.0567740797997e-001,
+	 2.0710107684135e-001,  2.0710107684135e-001,  2.0647978782654e-001,  2.0647978782654e-001,
+	-4.0522858500481e-001,  4.0522858500481e-001, -4.0477880835533e-001,  4.0477880835533e-001,
+	 2.0585921406746e-001,  2.0585921406746e-001,  2.0523932576180e-001,  2.0523932576180e-001,
+	-4.0432807803154e-001,  4.0432807803154e-001, -4.0387639403343e-001,  4.0387639403343e-001,
+	 2.0462015271187e-001,  2.0462015271187e-001,  2.0400163531303e-001,  2.0400163531303e-001,
+	-4.0342378616333e-001,  4.0342378616333e-001, -4.0297019481659e-001,  4.0297019481659e-001,
+	 2.0338383316994e-001,  2.0338383316994e-001,  2.0276674628258e-001,  2.0276674628258e-001,
+	-4.0251564979553e-001,  4.0251564979553e-001, -4.0206018090248e-001,  4.0206018090248e-001,
+	 2.0215034484863e-001,  2.0215034484863e-001,  2.0153462886810e-001,  2.0153462886810e-001,
+	-4.0160375833511e-001,  4.0160375833511e-001, -4.0114638209343e-001,  4.0114638209343e-001,
+	 2.0091962814331e-001,  2.0091962814331e-001,  2.0030534267426e-001,  2.0030534267426e-001,
+	-4.0068808197975e-001,  4.0068808197975e-001, -4.0022882819176e-001,  4.0022882819176e-001,
+	 1.9969174265862e-001,  1.9969174265862e-001,  1.9907885789871e-001,  1.9907885789871e-001,
+	-3.9976862072945e-001,  3.9976862072945e-001, -3.9930748939514e-001,  3.9930748939514e-001,
+	 1.9846668839455e-001,  1.9846668839455e-001,  1.9785523414612e-001,  1.9785523414612e-001,
+	-3.9884540438652e-001,  3.9884540438652e-001, -3.9838239550591e-001,  3.9838239550591e-001,
+	 1.9724446535110e-001,  1.9724446535110e-001,  1.9663444161415e-001,  1.9663444161415e-001,
+	-3.9791843295097e-001,  3.9791843295097e-001, -3.9745354652405e-001,  3.9745354652405e-001,
+	 1.9602510333061e-001,  1.9602510333061e-001,  1.9541648030281e-001,  1.9541648030281e-001,
+	-3.9698773622513e-001,  3.9698773622513e-001, -3.9652097225189e-001,  3.9652097225189e-001,
+	 1.9480860233307e-001,  1.9480860233307e-001,  1.9420140981674e-001,  1.9420140981674e-001,
+	-3.9605328440666e-001,  3.9605328440666e-001, -3.9558467268944e-001,  3.9558467268944e-001,
+	 1.9359496235847e-001,  1.9359496235847e-001,  1.9298920035362e-001,  1.9298920035362e-001,
+	-3.9511510729790e-001,  3.9511510729790e-001, -3.9464461803436e-001,  3.9464461803436e-001,
+	 1.9238418340683e-001,  1.9238418340683e-001,  1.9177991151810e-001,  1.9177991151810e-001,
+	-3.9417320489883e-001,  3.9417320489883e-001, -3.9370086789131e-001,  3.9370086789131e-001,
+	 1.9117632508278e-001,  1.9117632508278e-001,  1.9057351350784e-001,  1.9057351350784e-001,
+	-3.9322760701180e-001,  3.9322760701180e-001, -3.9275342226028e-001,  3.9275342226028e-001,
+	 1.8997138738632e-001,  1.8997138738632e-001,  1.8937000632286e-001,  1.8937000632286e-001,
+	-3.9227828383446e-001,  3.9227828383446e-001, -3.9180225133896e-001,  3.9180225133896e-001,
+	 1.8876934051514e-001,  1.8876934051514e-001,  1.8816941976547e-001,  1.8816941976547e-001,
+	-3.9132529497147e-001,  3.9132529497147e-001, -3.9084741473198e-001,  3.9084741473198e-001,
+	 1.8757024407387e-001,  1.8757024407387e-001,  1.8697178363800e-001,  1.8697178363800e-001,
+	-3.9036861062050e-001,  3.9036861062050e-001, -3.8988888263702e-001,  3.8988888263702e-001,
+	 1.8637409806252e-001,  1.8637409806252e-001,  1.8577709794044e-001,  1.8577709794044e-001,
+	-3.8940826058388e-001,  3.8940826058388e-001, -3.8892668485641e-001,  3.8892668485641e-001,
+	 1.8518087267876e-001,  1.8518087267876e-001,  1.8458539247513e-001,  1.8458539247513e-001,
+	-3.8844421505928e-001,  3.8844421505928e-001, -3.8796085119247e-001,  3.8796085119247e-001,
+	 1.8399062752724e-001,  1.8399062752724e-001,  1.8339660763741e-001,  1.8339660763741e-001,
+	-3.8747653365135e-001,  3.8747653365135e-001, -3.8699135184288e-001,  3.8699135184288e-001,
+	 1.8280336260796e-001,  1.8280336260796e-001,  1.8221083283424e-001,  1.8221083283424e-001,
+	-3.8650521636009e-001,  3.8650521636009e-001, -3.8601818680763e-001,  3.8601818680763e-001,
+	 1.8161904811859e-001,  1.8161904811859e-001,  1.8102803826332e-001,  1.8102803826332e-001,
+	-3.8553026318550e-001,  3.8553026318550e-001, -3.8504141569138e-001,  3.8504141569138e-001,
+	 1.8043777346611e-001,  1.8043777346611e-001,  1.7984825372696e-001,  1.7984825372696e-001,
+	-3.8455167412758e-001,  3.8455167412758e-001, -3.8406100869179e-001,  3.8406100869179e-001,
+	 1.7925947904587e-001,  1.7925947904587e-001,  1.7867147922516e-001,  1.7867147922516e-001,
+	-3.8356944918633e-001,  3.8356944918633e-001, -3.8307699561119e-001,  3.8307699561119e-001,
+	 1.7808422446251e-001,  1.7808422446251e-001,  1.7749771475792e-001,  1.7749771475792e-001,
+	-3.8258361816406e-001,  3.8258361816406e-001, -3.8208937644958e-001,  3.8208937644958e-001,
+	 1.7691197991371e-001,  1.7691197991371e-001,  1.7632701992989e-001,  1.7632701992989e-001,
+	-3.8159421086311e-001,  3.8159421086311e-001, -3.8109815120697e-001,  3.8109815120697e-001,
+	 1.7574280500412e-001,  1.7574280500412e-001,  1.7515933513641e-001,  1.7515933513641e-001,
+	-3.8060119748116e-001,  3.8060119748116e-001, -3.8010331988335e-001,  3.8010331988335e-001,
+	 1.7457664012909e-001,  1.7457664012909e-001,  1.7399471998215e-001,  1.7399471998215e-001,
+	-3.7960457801819e-001,  3.7960457801819e-001, -3.7910494208336e-001,  3.7910494208336e-001,
+	 1.7341357469559e-001,  1.7341357469559e-001,  1.7283317446709e-001,  1.7283317446709e-001,
+	-3.7860441207886e-001,  3.7860441207886e-001, -3.7810298800468e-001,  3.7810298800468e-001,
+	 1.7225357890129e-001,  1.7225357890129e-001,  1.7167472839355e-001,  1.7167472839355e-001,
+	-3.7760066986084e-001,  3.7760066986084e-001, -3.7709748744965e-001,  3.7709748744965e-001,
+	 1.7109665274620e-001,  1.7109665274620e-001,  1.7051935195923e-001,  1.7051935195923e-001,
+	-3.7659338116646e-001,  3.7659338116646e-001, -3.7608841061592e-001,  3.7608841061592e-001,
+	 1.6994282603264e-001,  1.6994282603264e-001,  1.6936707496643e-001,  1.6936707496643e-001,
+	-3.7558254599571e-001,  3.7558254599571e-001, -3.7507581710815e-001,  3.7507581710815e-001,
+	 1.6879209876060e-001,  1.6879209876060e-001,  1.6821792721748e-001,  1.6821792721748e-001,
+	-3.7456819415092e-001,  3.7456819415092e-001, -3.7405967712402e-001,  3.7405967712402e-001,
+	 1.6764450073242e-001,  1.6764450073242e-001,  1.6707187891006e-001,  1.6707187891006e-001,
+	-3.7355029582977e-001,  3.7355029582977e-001, -3.7304002046585e-001,  3.7304002046585e-001,
+	 1.6650003194809e-001,  1.6650003194809e-001,  1.6592895984650e-001,  1.6592895984650e-001,
+	-3.7252888083458e-001,  3.7252888083458e-001, -3.7201687693596e-001,  3.7201687693596e-001,
+	 1.6535869240761e-001,  1.6535869240761e-001,  1.6478919982910e-001,  1.6478919982910e-001,
+	-3.7150397896767e-001,  3.7150397896767e-001, -3.7099018692970e-001,  3.7099018692970e-001,
+	 1.6422051191330e-001,  1.6422051191330e-001,  1.6365259885788e-001,  1.6365259885788e-001,
+	-3.7047556042671e-001,  3.7047556042671e-001, -3.6996003985405e-001,  3.6996003985405e-001,
+	 1.6308549046516e-001,  1.6308549046516e-001,  1.6251915693283e-001,  1.6251915693283e-001,
+	-3.6944365501404e-001,  3.6944365501404e-001, -3.6892640590668e-001,  3.6892640590668e-001,
+	 1.6195362806320e-001,  1.6195362806320e-001,  1.6138890385628e-001,  1.6138890385628e-001,
+	-3.6840826272964e-001,  3.6840826272964e-001, -3.6788928508759e-001,  3.6788928508759e-001,
+	 1.6082498431206e-001,  1.6082498431206e-001,  1.6026183962822e-001,  1.6026183962822e-001,
+	-3.6736944317818e-001,  3.6736944317818e-001, -3.6684870719910e-001,  3.6684870719910e-001,
+	 1.5969949960709e-001,  1.5969949960709e-001,  1.5913796424866e-001,  1.5913796424866e-001,
+	-3.6632713675499e-001,  3.6632713675499e-001, -3.6580467224121e-001,  3.6580467224121e-001,
+	 1.5857723355293e-001,  1.5857723355293e-001,  1.5801727771759e-001,  1.5801727771759e-001,
+	-3.6528137326241e-001,  3.6528137326241e-001, -3.6475721001625e-001,  3.6475721001625e-001,
+	 1.5745815634727e-001,  1.5745815634727e-001,  1.5689983963966e-001,  1.5689983963966e-001,
+	-3.6423218250275e-001,  3.6423218250275e-001, -3.6370632052422e-001,  3.6370632052422e-001,
+	 1.5634232759476e-001,  1.5634232759476e-001,  1.5578562021255e-001,  1.5578562021255e-001,
+	-3.6317956447601e-001,  3.6317956447601e-001, -3.6265197396278e-001,  3.6265197396278e-001,
+	 1.5522971749306e-001,  1.5522971749306e-001,  1.5467464923859e-001,  1.5467464923859e-001,
+	-3.6212354898453e-001,  3.6212354898453e-001, -3.6159422993660e-001,  3.6159422993660e-001,
+	 1.5412035584450e-001,  1.5412035584450e-001,  1.5356689691544e-001,  1.5356689691544e-001,
+	-3.6106407642365e-001,  3.6106407642365e-001, -3.6053308844566e-001,  3.6053308844566e-001,
+	 1.5301427245140e-001,  1.5301427245140e-001,  1.5246242284775e-001,  1.5246242284775e-001,
+	-3.6000123620033e-001,  3.6000123620033e-001, -3.5946854948997e-001,  3.5946854948997e-001,
+	 1.5191143751144e-001,  1.5191143751144e-001,  1.5136122703552e-001,  1.5136122703552e-001,
+	-3.5893502831459e-001,  3.5893502831459e-001, -3.5840064287186e-001,  3.5840064287186e-001,
+	 1.5081188082695e-001,  1.5081188082695e-001,  1.5026330947876e-001,  1.5026330947876e-001,
+	-3.5786539316177e-001,  3.5786539316177e-001, -3.5732933878899e-001,  3.5732933878899e-001,
+	 1.4971560239792e-001,  1.4971560239792e-001,  1.4916869997978e-001,  1.4916869997978e-001,
+	-3.5679242014885e-001,  3.5679242014885e-001, -3.5625466704369e-001,  3.5625466704369e-001,
+	 1.4862263202667e-001,  1.4862263202667e-001,  1.4807736873627e-001,  1.4807736873627e-001,
+	-3.5571607947350e-001,  3.5571607947350e-001, -3.5517665743828e-001,  3.5517665743828e-001,
+	 1.4753293991089e-001,  1.4753293991089e-001,  1.4698937535286e-001,  1.4698937535286e-001,
+	-3.5463640093803e-001,  3.5463640093803e-001, -3.5409530997276e-001,  3.5409530997276e-001,
+	 1.4644661545753e-001,  1.4644661545753e-001,  1.4590469002724e-001,  1.4590469002724e-001,
+	-3.5355338454247e-001,  3.5355338454247e-001, -3.5301062464714e-001,  3.5301062464714e-001,
+	 1.4536359906197e-001,  1.4536359906197e-001,  1.4482334256172e-001,  1.4482334256172e-001,
+	-3.5246706008911e-001,  3.5246706008911e-001, -3.5192263126373e-001,  3.5192263126373e-001,
+	 1.4428392052650e-001,  1.4428392052650e-001,  1.4374533295631e-001,  1.4374533295631e-001,
+	-3.5137736797333e-001,  3.5137736797333e-001, -3.5083130002022e-001,  3.5083130002022e-001,
+	 1.4320757985115e-001,  1.4320757985115e-001,  1.4267066121101e-001,  1.4267066121101e-001,
+	-3.5028439760208e-001,  3.5028439760208e-001, -3.4973669052124e-001,  3.4973669052124e-001,
+	 1.4213460683823e-001,  1.4213460683823e-001,  1.4159935712814e-001,  1.4159935712814e-001,
+	-3.4918811917305e-001,  3.4918811917305e-001, -3.4863877296448e-001,  3.4863877296448e-001,
+	 1.4106497168541e-001,  1.4106497168541e-001,  1.4053145051003e-001,  1.4053145051003e-001,
+	-3.4808856248856e-001,  3.4808856248856e-001, -3.4753757715225e-001,  3.4753757715225e-001,
+	 1.3999876379967e-001,  1.3999876379967e-001,  1.3946691155434e-001,  1.3946691155434e-001,
+	-3.4698572754860e-001,  3.4698572754860e-001, -3.4643310308456e-001,  3.4643310308456e-001,
+	 1.3893592357635e-001,  1.3893592357635e-001,  1.3840577006340e-001,  1.3840577006340e-001,
+	-3.4587964415550e-001,  3.4587964415550e-001, -3.4532535076141e-001,  3.4532535076141e-001,
+	 1.3787645101547e-001,  1.3787645101547e-001,  1.3734802603722e-001,  1.3734802603722e-001,
+	-3.4477028250694e-001,  3.4477028250694e-001, -3.4421437978745e-001,  3.4421437978745e-001,
+	 1.3682043552399e-001,  1.3682043552399e-001,  1.3629367947578e-001,  1.3629367947578e-001,
+	-3.4365767240524e-001,  3.4365767240524e-001, -3.4310016036034e-001,  3.4310016036034e-001,
+	 1.3576781749725e-001,  1.3576781749725e-001,  1.3524278998375e-001,  1.3524278998375e-001,
+	-3.4254184365273e-001,  3.4254184365273e-001, -3.4198272228241e-001,  3.4198272228241e-001,
+	 1.3471862673759e-001,  1.3471862673759e-001,  1.3419532775879e-001,  1.3419532775879e-001,
+	-3.4142276644707e-001,  3.4142276644707e-001, -3.4086203575134e-001,  3.4086203575134e-001,
+	 1.3367286324501e-001,  1.3367286324501e-001,  1.3315129280090e-001,  1.3315129280090e-001,
+	-3.4030050039291e-001,  3.4030050039291e-001, -3.3973816037178e-001,  3.3973816037178e-001,
+	 1.3263055682182e-001,  1.3263055682182e-001,  1.3211071491241e-001,  1.3211071491241e-001,
+	-3.3917501568794e-001,  3.3917501568794e-001, -3.3861109614372e-001,  3.3861109614372e-001,
+	 1.3159173727036e-001,  1.3159173727036e-001,  1.3107359409332e-001,  1.3107359409332e-001,
+	-3.3804637193680e-001,  3.3804637193680e-001, -3.3748084306717e-001,  3.3748084306717e-001,
+	 1.3055634498596e-001,  1.3055634498596e-001,  1.3003996014595e-001,  1.3003996014595e-001,
+	-3.3691450953484e-001,  3.3691450953484e-001, -3.3634740114212e-001,  3.3634740114212e-001,
+	 1.2952443957329e-001,  1.2952443957329e-001,  1.2900981307030e-001,  1.2900981307030e-001,
+	-3.3577948808670e-001,  3.3577948808670e-001, -3.3521080017090e-001,  3.3521080017090e-001,
+	 1.2849602103233e-001,  1.2849602103233e-001,  1.2798312306404e-001,  1.2798312306404e-001,
+	-3.3464130759239e-001,  3.3464130759239e-001, -3.3407104015350e-001,  3.3407104015350e-001,
+	 1.2747111916542e-001,  1.2747111916542e-001,  1.2695997953415e-001,  1.2695997953415e-001,
+	-3.3349996805191e-001,  3.3349996805191e-001, -3.3292812108994e-001,  3.3292812108994e-001,
+	 1.2644970417023e-001,  1.2644970417023e-001,  1.2594032287598e-001,  1.2594032287598e-001,
+	-3.3235549926758e-001,  3.3235549926758e-001, -3.3178207278252e-001,  3.3178207278252e-001,
+	 1.2543180584908e-001,  1.2543180584908e-001,  1.2492418289185e-001,  1.2492418289185e-001,
+	-3.3120790123940e-001,  3.3120790123940e-001, -3.3063292503357e-001,  3.3063292503357e-001,
+	 1.2441745400429e-001,  1.2441745400429e-001,  1.2391158938408e-001,  1.2391158938408e-001,
+	-3.3005717396736e-001,  3.3005717396736e-001, -3.2948064804077e-001,  3.2948064804077e-001,
+	 1.2340661883354e-001,  1.2340661883354e-001,  1.2290251255035e-001,  1.2290251255035e-001,
+	-3.2890334725380e-001,  3.2890334725380e-001, -3.2832527160645e-001,  3.2832527160645e-001,
+	 1.2239933013916e-001,  1.2239933013916e-001,  1.2189701199532e-001,  1.2189701199532e-001,
+	-3.2774642109871e-001,  3.2774642109871e-001, -3.2716682553291e-001,  3.2716682553291e-001,
+	 1.2139558792114e-001,  1.2139558792114e-001,  1.2089505791664e-001,  1.2089505791664e-001,
+	-3.2658642530441e-001,  3.2658642530441e-001, -3.2600528001785e-001,  3.2600528001785e-001,
+	 1.2039542198181e-001,  1.2039542198181e-001,  1.1989668011665e-001,  1.1989668011665e-001,
+	-3.2542335987091e-001,  3.2542335987091e-001, -3.2484066486359e-001,  3.2484066486359e-001,
+	 1.1939880251884e-001,  1.1939880251884e-001,  1.1890184879303e-001,  1.1890184879303e-001,
+	-3.2425719499588e-001,  3.2425719499588e-001, -3.2367298007011e-001,  3.2367298007011e-001,
+	 1.1840578913689e-001,  1.1840578913689e-001,  1.1791062355042e-001,  1.1791062355042e-001,
+	-3.2308802008629e-001,  3.2308802008629e-001, -3.2250228524208e-001,  3.2250228524208e-001,
+	 1.1741638183594e-001,  1.1741638183594e-001,  1.1692300438881e-001,  1.1692300438881e-001,
+	-3.2191577553749e-001,  3.2191577553749e-001, -3.2132852077484e-001,  3.2132852077484e-001,
+	 1.1643055081367e-001,  1.1643055081367e-001,  1.1593899130821e-001,  1.1593899130821e-001,
+	-3.2074052095413e-001,  3.2074052095413e-001, -3.2015174627304e-001,  3.2015174627304e-001,
+	 1.1544832587242e-001,  1.1544832587242e-001,  1.1495858430862e-001,  1.1495858430862e-001,
+	-3.1956222653389e-001,  3.1956222653389e-001, -3.1897196173668e-001,  3.1897196173668e-001,
+	 1.1446973681450e-001,  1.1446973681450e-001,  1.1398181319237e-001,  1.1398181319237e-001,
+	-3.1838095188141e-001,  3.1838095188141e-001, -3.1778916716576e-001,  3.1778916716576e-001,
+	 1.1349478363991e-001,  1.1349478363991e-001,  1.1300864815712e-001,  1.1300864815712e-001,
+	-3.1719663739204e-001,  3.1719663739204e-001, -3.1660339236259e-001,  3.1660339236259e-001,
+	 1.1252346634865e-001,  1.1252346634865e-001,  1.1203914880753e-001,  1.1203914880753e-001,
+	-3.1600937247276e-001,  3.1600937247276e-001, -3.1541460752487e-001,  3.1541460752487e-001,
+	 1.1155578494072e-001,  1.1155578494072e-001,  1.1107331514359e-001,  1.1107331514359e-001,
+	-3.1481912732124e-001,  3.1481912732124e-001, -3.1422290205956e-001,  3.1422290205956e-001,
+	 1.1059173941612e-001,  1.1059173941612e-001,  1.1011111736298e-001,  1.1011111736298e-001,
+	-3.1362590193748e-001,  3.1362590193748e-001, -3.1302821636200e-001,  3.1302821636200e-001,
+	 1.0963138937950e-001,  1.0963138937950e-001,  1.0915258526802e-001,  1.0915258526802e-001,
+	-3.1242975592613e-001,  3.1242975592613e-001, -3.1183058023453e-001,  3.1183058023453e-001,
+	 1.0867470502853e-001,  1.0867470502853e-001,  1.0819774866104e-001,  1.0819774866104e-001,
+	-3.1123065948486e-001,  3.1123065948486e-001, -3.1062999367714e-001,  3.1062999367714e-001,
+	 1.0772171616554e-001,  1.0772171616554e-001,  1.0724657773972e-001,  1.0724657773972e-001,
+	-3.1002861261368e-001,  3.1002861261368e-001, -3.0942648649216e-001,  3.0942648649216e-001,
+	 1.0677239298820e-001,  1.0677239298820e-001,  1.0629913210869e-001,  1.0629913210869e-001,
+	-3.0882367491722e-001,  3.0882367491722e-001, -3.0822008848190e-001,  3.0822008848190e-001,
+	 1.0582679510117e-001,  1.0582679510117e-001,  1.0535538196564e-001,  1.0535538196564e-001,
+	-3.0761581659317e-001,  3.0761581659317e-001, -3.0701079964638e-001,  3.0701079964638e-001,
+	 1.0488489270210e-001,  1.0488489270210e-001,  1.0441532731056e-001,  1.0441532731056e-001,
+	-3.0640503764153e-001,  3.0640503764153e-001, -3.0579859018326e-001,  3.0579859018326e-001,
+	 1.0394671559334e-001,  1.0394671559334e-001,  1.0347902774811e-001,  1.0347902774811e-001,
+	-3.0519139766693e-001,  3.0519139766693e-001, -3.0458351969719e-001,  3.0458351969719e-001,
+	 1.0301226377487e-001,  1.0301226377487e-001,  1.0254645347595e-001,  1.0254645347595e-001,
+	-3.0397489666939e-001,  3.0397489666939e-001, -3.0336555838585e-001,  3.0336555838585e-001,
+	 1.0208156704903e-001,  1.0208156704903e-001,  1.0161760449409e-001,  1.0161760449409e-001,
+	-3.0275553464890e-001,  3.0275553464890e-001, -3.0214476585388e-001,  3.0214476585388e-001,
+	 1.0115459561348e-001,  1.0115459561348e-001,  1.0069251060486e-001,  1.0069251060486e-001,
+	-3.0153331160545e-001,  3.0153331160545e-001, -3.0092114210129e-001,  3.0092114210129e-001,
+	 1.0023137927055e-001,  1.0023137927055e-001,  9.9771171808243e-002,  9.9771171808243e-002,
+	-3.0030825734138e-001,  3.0030825734138e-001, -2.9969465732574e-001,  2.9969465732574e-001,
+	 9.9311918020248e-002,  9.9311918020248e-002,  9.8853617906570e-002,  9.8853617906570e-002,
+	-2.9908037185669e-001,  2.9908037185669e-001, -2.9846537113190e-001,  2.9846537113190e-001,
+	 9.8396241664886e-002,  9.8396241664886e-002,  9.7939819097519e-002,  9.7939819097519e-002,
+	-2.9784965515137e-001,  2.9784965515137e-001, -2.9723325371742e-001,  2.9723325371742e-001,
+	 9.7484350204468e-002,  9.7484350204468e-002,  9.7029805183411e-002,  9.7029805183411e-002,
+	-2.9661616683006e-001,  2.9661616683006e-001, -2.9599836468697e-001,  2.9599836468697e-001,
+	 9.6576213836670e-002,  9.6576213836670e-002,  9.6123605966568e-002,  9.6123605966568e-002,
+	-2.9537984728813e-001,  2.9537984728813e-001, -2.9476067423820e-001,  2.9476067423820e-001,
+	 9.5671921968460e-002,  9.5671921968460e-002,  9.5221191644669e-002,  9.5221191644669e-002,
+	-2.9414078593254e-001,  2.9414078593254e-001, -2.9352021217346e-001,  2.9352021217346e-001,
+	 9.4771414995193e-002,  9.4771414995193e-002,  9.4322592020035e-002,  9.4322592020035e-002,
+	-2.9289892315865e-001,  2.9289892315865e-001, -2.9227697849274e-001,  2.9227697849274e-001,
+	 9.3874722719193e-002,  9.3874722719193e-002,  9.3427807092667e-002,  9.3427807092667e-002,
+	-2.9165434837341e-001,  2.9165434837341e-001, -2.9103100299835e-001,  2.9103100299835e-001,
+	 9.2981845140457e-002,  9.2981845140457e-002,  9.2536836862564e-002,  9.2536836862564e-002,
+	-2.9040697216988e-001,  2.9040697216988e-001, -2.8978228569031e-001,  2.8978228569031e-001,
+	 9.2092812061310e-002,  9.2092812061310e-002,  9.1649711132050e-002,  9.1649711132050e-002,
+	-2.8915691375732e-001,  2.8915691375732e-001, -2.8853085637093e-001,  2.8853085637093e-001,
+	 9.1207593679428e-002,  9.1207593679428e-002,  9.0766429901123e-002,  9.0766429901123e-002,
+	-2.8790411353111e-001,  2.8790411353111e-001, -2.8727668523788e-001,  2.8727668523788e-001,
+	 9.0326249599457e-002,  9.0326249599457e-002,  8.9887022972107e-002,  8.9887022972107e-002,
+	-2.8664860129356e-001,  2.8664860129356e-001, -2.8601983189583e-001,  2.8601983189583e-001,
+	 8.9448750019073e-002,  8.9448750019073e-002,  8.9011460542679e-002,  8.9011460542679e-002,
+	-2.8539037704468e-001,  2.8539037704468e-001, -2.8476026654243e-001,  2.8476026654243e-001,
+	 8.8575124740601e-002,  8.8575124740601e-002,  8.8139742612839e-002,  8.8139742612839e-002,
+	-2.8412947058678e-001,  2.8412947058678e-001, -2.8349801898003e-001,  2.8349801898003e-001,
+	 8.7705343961716e-002,  8.7705343961716e-002,  8.7271928787231e-002,  8.7271928787231e-002,
+	-2.8286591172218e-001,  2.8286591172218e-001, -2.8223311901093e-001,  2.8223311901093e-001,
+	 8.6839467287064e-002,  8.6839467287064e-002,  8.6407989263535e-002,  8.6407989263535e-002,
+	-2.8159967064857e-001,  2.8159967064857e-001, -2.8096556663513e-001,  2.8096556663513e-001,
+	 8.5977494716644e-002,  8.5977494716644e-002,  8.5547953844070e-002,  8.5547953844070e-002,
+	-2.8033080697060e-001,  2.8033080697060e-001, -2.7969536185265e-001,  2.7969536185265e-001,
+	 8.5119396448135e-002,  8.5119396448135e-002,  8.4691792726517e-002,  8.4691792726517e-002,
+	-2.7905926108360e-001,  2.7905926108360e-001, -2.7842253446579e-001,  2.7842253446579e-001,
+	 8.4265202283859e-002,  8.4265202283859e-002,  8.3839565515518e-002,  8.3839565515518e-002,
+	-2.7778512239456e-001,  2.7778512239456e-001, -2.7714705467224e-001,  2.7714705467224e-001,
+	 8.3414912223816e-002,  8.3414912223816e-002,  8.2991242408752e-002,  8.2991242408752e-002,
+	-2.7650836110115e-001,  2.7650836110115e-001, -2.7586901187897e-001,  2.7586901187897e-001,
+	 8.2568556070328e-002,  8.2568556070328e-002,  8.2146853208542e-002,  8.2146853208542e-002,
+	-2.7522900700569e-001,  2.7522900700569e-001, -2.7458834648132e-001,  2.7458834648132e-001,
+	 8.1726133823395e-002,  8.1726133823395e-002,  8.1306397914886e-002,  8.1306397914886e-002,
+	-2.7394703030586e-001,  2.7394703030586e-001, -2.7330508828163e-001,  2.7330508828163e-001,
+	 8.0887645483017e-002,  8.0887645483017e-002,  8.0469876527786e-002,  8.0469876527786e-002,
+	-2.7266249060631e-001,  2.7266249060631e-001, -2.7201926708221e-001,  2.7201926708221e-001,
+	 8.0053120851517e-002,  8.0053120851517e-002,  7.9637318849564e-002,  7.9637318849564e-002,
+	-2.7137538790703e-001,  2.7137538790703e-001, -2.7073088288307e-001,  2.7073088288307e-001,
+	 7.9222530126572e-002,  7.9222530126572e-002,  7.8808695077896e-002,  7.8808695077896e-002,
+	-2.7008575201035e-001,  2.7008575201035e-001, -2.6943996548653e-001,  2.6943996548653e-001,
+	 7.8395873308182e-002,  7.8395873308182e-002,  7.7984064817429e-002,  7.7984064817429e-002,
+	-2.6879355311394e-001,  2.6879355311394e-001, -2.6814648509026e-001,  2.6814648509026e-001,
+	 7.7573210000992e-002,  7.7573210000992e-002,  7.7163368463516e-002,  7.7163368463516e-002,
+	-2.6749882102013e-001,  2.6749882102013e-001, -2.6685050129890e-001,  2.6685050129890e-001,
+	 7.6754540205002e-002,  7.6754540205002e-002,  7.6346695423126e-002,  7.6346695423126e-002,
+	-2.6620155572891e-001,  2.6620155572891e-001, -2.6555201411247e-001,  2.6555201411247e-001,
+	 7.5939834117889e-002,  7.5939834117889e-002,  7.5533986091614e-002,  7.5533986091614e-002,
+	-2.6490181684494e-001,  2.6490181684494e-001, -2.6425099372864e-001,  2.6425099372864e-001,
+	 7.5129121541977e-002,  7.5129121541977e-002,  7.4725270271301e-002,  7.4725270271301e-002,
+	-2.6359957456589e-001,  2.6359957456589e-001, -2.6294752955437e-001,  2.6294752955437e-001,
+	 7.4322402477264e-002,  7.4322402477264e-002,  7.3920547962189e-002,  7.3920547962189e-002,
+	-2.6229485869408e-001,  2.6229485869408e-001, -2.6164156198502e-001,  2.6164156198502e-001,
+	 7.3519706726074e-002,  7.3519706726074e-002,  7.3119848966599e-002,  7.3119848966599e-002,
+	-2.6098763942719e-001,  2.6098763942719e-001, -2.6033312082291e-001,  2.6033312082291e-001,
+	 7.2721004486084e-002,  7.2721004486084e-002,  7.2323173284531e-002,  7.2323173284531e-002,
+	-2.5967800617218e-001,  2.5967800617218e-001, -2.5902226567268e-001,  2.5902226567268e-001,
+	 7.1926325559616e-002,  7.1926325559616e-002,  7.1530520915985e-002,  7.1530520915985e-002,
+	-2.5836589932442e-001,  2.5836589932442e-001, -2.5770893692970e-001,  2.5770893692970e-001,
+	 7.1135699748993e-002,  7.1135699748993e-002,  7.0741891860962e-002,  7.0741891860962e-002,
+	-2.5705137848854e-001,  2.5705137848854e-001, -2.5639319419861e-001,  2.5639319419861e-001,
+	 7.0349097251892e-002,  7.0349097251892e-002,  6.9957315921783e-002,  6.9957315921783e-002,
+	-2.5573444366455e-001,  2.5573444366455e-001, -2.5507506728172e-001,  2.5507506728172e-001,
+	 6.9566547870636e-002,  6.9566547870636e-002,  6.9176763296127e-002,  6.9176763296127e-002,
+	-2.5441506505013e-001,  2.5441506505013e-001, -2.5375449657440e-001,  2.5375449657440e-001,
+	 6.8788021802902e-002,  6.8788021802902e-002,  6.8400293588638e-002,  6.8400293588638e-002,
+	-2.5309333205223e-001,  2.5309333205223e-001, -2.5243157148361e-001,  2.5243157148361e-001,
+	 6.8013578653336e-002,  6.8013578653336e-002,  6.7627876996994e-002,  6.7627876996994e-002,
+	-2.5176918506622e-001,  2.5176918506622e-001, -2.5110623240471e-001,  2.5110623240471e-001,
+	 6.7243188619614e-002,  6.7243188619614e-002,  6.6859513521194e-002,  6.6859513521194e-002,
+	-2.5044268369675e-001,  2.5044268369675e-001, -2.4977856874466e-001,  2.4977856874466e-001,
+	 6.6476881504059e-002,  6.6476881504059e-002,  6.6095262765884e-002,  6.6095262765884e-002,
+	-2.4911384284496e-001,  2.4911384284496e-001, -2.4844853579998e-001,  2.4844853579998e-001,
+	 6.5714657306671e-002,  6.5714657306671e-002,  6.5335065126419e-002,  6.5335065126419e-002,
+	-2.4778263270855e-001,  2.4778263270855e-001, -2.4711616337299e-001,  2.4711616337299e-001,
+	 6.4956516027451e-002,  6.4956516027451e-002,  6.4578980207443e-002,  6.4578980207443e-002,
+	-2.4644909799099e-001,  2.4644909799099e-001, -2.4578146636486e-001,  2.4578146636486e-001,
+	 6.4202457666397e-002,  6.4202457666397e-002,  6.3826978206635e-002,  6.3826978206635e-002,
+	-2.4511325359344e-001,  2.4511325359344e-001, -2.4444445967674e-001,  2.4444445967674e-001,
+	 6.3452512025833e-002,  6.3452512025833e-002,  6.3079088926315e-002,  6.3079088926315e-002,
+	-2.4377508461475e-001,  2.4377508461475e-001, -2.4310514330864e-001,  2.4310514330864e-001,
+	 6.2706679105759e-002,  6.2706679105759e-002,  6.2335312366486e-002,  6.2335312366486e-002,
+	-2.4243463575840e-001,  2.4243463575840e-001, -2.4176354706287e-001,  2.4176354706287e-001,
+	 6.1964958906174e-002,  6.1964958906174e-002,  6.1595648527145e-002,  6.1595648527145e-002,
+	-2.4109189212322e-001,  2.4109189212322e-001, -2.4041967093945e-001,  2.4041967093945e-001,
+	 6.1227351427078e-002,  6.1227351427078e-002,  6.0860097408295e-002,  6.0860097408295e-002,
+	-2.3974688351154e-001,  2.3974688351154e-001, -2.3907352983952e-001,  2.3907352983952e-001,
+	 6.0493886470795e-002,  6.0493886470795e-002,  6.0128718614578e-002,  6.0128718614578e-002,
+	-2.3839962482452e-001,  2.3839962482452e-001, -2.3772515356541e-001,  2.3772515356541e-001,
+	 5.9764564037323e-002,  5.9764564037323e-002,  5.9401452541351e-002,  5.9401452541351e-002,
+	-2.3705011606216e-001,  2.3705011606216e-001, -2.3637452721596e-001,  2.3637452721596e-001,
+	 5.9039384126663e-002,  5.9039384126663e-002,  5.8678328990936e-002,  5.8678328990936e-002,
+	-2.3569837212563e-001,  2.3569837212563e-001, -2.3502166569233e-001,  2.3502166569233e-001,
+	 5.8318346738815e-002,  5.8318346738815e-002,  5.7959377765656e-002,  5.7959377765656e-002,
+	-2.3434442281723e-001,  2.3434442281723e-001, -2.3366661369801e-001,  2.3366661369801e-001,
+	 5.7601451873779e-002,  5.7601451873779e-002,  5.7244569063187e-002,  5.7244569063187e-002,
+	-2.3298825323582e-001,  2.3298825323582e-001, -2.3230935633183e-001,  2.3230935633183e-001,
+	 5.6888729333878e-002,  5.6888729333878e-002,  5.6533932685852e-002,  5.6533932685852e-002,
+	-2.3162989318371e-001,  2.3162989318371e-001, -2.3094990849495e-001,  2.3094990849495e-001,
+	 5.6180179119110e-002,  5.6180179119110e-002,  5.5827498435974e-002,  5.5827498435974e-002,
+	-2.3026935756207e-001,  2.3026935756207e-001, -2.2958828508854e-001,  2.2958828508854e-001,
+	 5.5475831031799e-002,  5.5475831031799e-002,  5.5125206708908e-002,  5.5125206708908e-002,
+	-2.2890666127205e-001,  2.2890666127205e-001, -2.2822450101376e-001,  2.2822450101376e-001,
+	 5.4775655269623e-002,  5.4775655269623e-002,  5.4427117109299e-002,  5.4427117109299e-002,
+	-2.2754180431366e-001,  2.2754180431366e-001, -2.2685857117176e-001,  2.2685857117176e-001,
+	 5.4079651832581e-002,  5.4079651832581e-002,  5.3733229637146e-002,  5.3733229637146e-002,
+	-2.2617480158806e-001,  2.2617480158806e-001, -2.2549049556255e-001,  2.2549049556255e-001,
+	 5.3387850522995e-002,  5.3387850522995e-002,  5.3043544292450e-002,  5.3043544292450e-002,
+	-2.2480566799641e-001,  2.2480566799641e-001, -2.2412031888962e-001,  2.2412031888962e-001,
+	 5.2700251340866e-002,  5.2700251340866e-002,  5.2358031272888e-002,  5.2358031272888e-002,
+	-2.2343441843987e-001,  2.2343441843987e-001, -2.2274801135063e-001,  2.2274801135063e-001,
+	 5.2016884088516e-002,  5.2016884088516e-002,  5.1676779985428e-002,  5.1676779985428e-002,
+	-2.2206108272076e-001,  2.2206108272076e-001, -2.2137361764908e-001,  2.2137361764908e-001,
+	 5.1337718963623e-002,  5.1337718963623e-002,  5.0999701023102e-002,  5.0999701023102e-002,
+	-2.2068564593792e-001,  2.2068564593792e-001, -2.1999713778496e-001,  2.1999713778496e-001,
+	 5.0662755966187e-002,  5.0662755966187e-002,  5.0326883792877e-002,  5.0326883792877e-002,
+	-2.1930812299252e-001,  2.1930812299252e-001, -2.1861858665943e-001,  2.1861858665943e-001,
+	 4.9992054700851e-002,  4.9992054700851e-002,  4.9658298492432e-002,  4.9658298492432e-002,
+	-2.1792854368687e-001,  2.1792854368687e-001, -2.1723797917366e-001,  2.1723797917366e-001,
+	 4.9325585365295e-002,  4.9325585365295e-002,  4.8993945121765e-002,  4.8993945121765e-002,
+	-2.1654690802097e-001,  2.1654690802097e-001, -2.1585533022881e-001,  2.1585533022881e-001,
+	 4.8663347959518e-002,  4.8663347959518e-002,  4.8333823680878e-002,  4.8333823680878e-002,
+	-2.1516324579716e-001,  2.1516324579716e-001, -2.1447065472603e-001,  2.1447065472603e-001,
+	 4.8005342483521e-002,  4.8005342483521e-002,  4.7677963972092e-002,  4.7677963972092e-002,
+	-2.1377755701542e-001,  2.1377755701542e-001, -2.1308395266533e-001,  2.1308395266533e-001,
+	 4.7351628541946e-002,  4.7351628541946e-002,  4.7026365995407e-002,  4.7026365995407e-002,
+	-2.1238984167576e-001,  2.1238984167576e-001, -2.1169523894787e-001,  2.1169523894787e-001,
+	 4.6702146530151e-002,  4.6702146530151e-002,  4.6378999948502e-002,  4.6378999948502e-002,
+	-2.1100014448166e-001,  2.1100014448166e-001, -2.1030454337597e-001,  2.1030454337597e-001,
+	 4.6056956052780e-002,  4.6056956052780e-002,  4.5735955238342e-002,  4.5735955238342e-002,
+	-2.0960845053196e-001,  2.0960845053196e-001, -2.0891186594963e-001,  2.0891186594963e-001,
+	 4.5415997505188e-002,  4.5415997505188e-002,  4.5097142457962e-002,  4.5097142457962e-002,
+	-2.0821478962898e-001,  2.0821478962898e-001, -2.0751722157001e-001,  2.0751722157001e-001,
+	 4.4779360294342e-002,  4.4779360294342e-002,  4.4462621212006e-002,  4.4462621212006e-002,
+	-2.0681916177273e-001,  2.0681916177273e-001, -2.0612062513828e-001,  2.0612062513828e-001,
+	 4.4146984815598e-002,  4.4146984815598e-002,  4.3832421302795e-002,  4.3832421302795e-002,
+	-2.0542159676552e-001,  2.0542159676552e-001, -2.0472207665443e-001,  2.0472207665443e-001,
+	 4.3518900871277e-002,  4.3518900871277e-002,  4.3206483125687e-002,  4.3206483125687e-002,
+	-2.0402207970619e-001,  2.0402207970619e-001, -2.0332162082195e-001,  2.0332162082195e-001,
+	 4.2895138263702e-002,  4.2895138263702e-002,  4.2584836483002e-002,  4.2584836483002e-002,
+	-2.0262065529823e-001,  2.0262065529823e-001, -2.0191922783852e-001,  2.0191922783852e-001,
+	 4.2275637388229e-002,  4.2275637388229e-002,  4.1967511177063e-002,  4.1967511177063e-002,
+	-2.0121732354164e-001,  2.0121732354164e-001, -2.0051495730877e-001,  2.0051495730877e-001,
+	 4.1660457849503e-002,  4.1660457849503e-002,  4.1354507207870e-002,  4.1354507207870e-002,
+	-1.9981209933758e-001,  1.9981209933758e-001, -1.9910877943039e-001,  1.9910877943039e-001,
+	 4.1049599647522e-002,  4.1049599647522e-002,  4.0745794773102e-002,  4.0745794773102e-002,
+	-1.9840499758720e-001,  1.9840499758720e-001, -1.9770073890686e-001,  1.9770073890686e-001,
+	 4.0443062782288e-002,  4.0443062782288e-002,  4.0141433477402e-002,  4.0141433477402e-002,
+	-1.9699601829052e-001,  1.9699601829052e-001, -1.9629083573818e-001,  1.9629083573818e-001,
+	 3.9840877056122e-002,  3.9840877056122e-002,  3.9541393518448e-002,  3.9541393518448e-002,
+	-1.9558519124985e-001,  1.9558519124985e-001, -1.9487908482552e-001,  1.9487908482552e-001,
+	 3.9242982864380e-002,  3.9242982864380e-002,  3.8945674896240e-002,  3.8945674896240e-002,
+	-1.9417253136635e-001,  1.9417253136635e-001, -1.9346550107002e-001,  1.9346550107002e-001,
+	 3.8649439811707e-002,  3.8649439811707e-002,  3.8354307413101e-002,  3.8354307413101e-002,
+	-1.9275803864002e-001,  1.9275803864002e-001, -1.9205009937286e-001,  1.9205009937286e-001,
+	 3.8060247898102e-002,  3.8060247898102e-002,  3.7767261266708e-002,  3.7767261266708e-002,
+	-1.9134172797203e-001,  1.9134172797203e-001, -1.9063289463520e-001,  1.9063289463520e-001,
+	 3.7475377321243e-002,  3.7475377321243e-002,  3.7184596061707e-002,  3.7184596061707e-002,
+	-1.8992361426353e-001,  1.8992361426353e-001, -1.8921388685703e-001,  1.8921388685703e-001,
+	 3.6894887685776e-002,  3.6894887685776e-002,  3.6606252193451e-002,  3.6606252193451e-002,
+	-1.8850371241570e-001,  1.8850371241570e-001, -1.8779309093952e-001,  1.8779309093952e-001,
+	 3.6318749189377e-002,  3.6318749189377e-002,  3.6032319068909e-002,  3.6032319068909e-002,
+	-1.8708203732967e-001,  1.8708203732967e-001, -1.8637053668499e-001,  1.8637053668499e-001,
+	 3.5746961832047e-002,  3.5746961832047e-002,  3.5462707281113e-002,  3.5462707281113e-002,
+	-1.8565860390663e-001,  1.8565860390663e-001, -1.8494622409344e-001,  1.8494622409344e-001,
+	 3.5179555416107e-002,  3.5179555416107e-002,  3.4897476434708e-002,  3.4897476434708e-002,
+	-1.8423342704773e-001,  1.8423342704773e-001, -1.8352018296719e-001,  1.8352018296719e-001,
+	 3.4616529941559e-002,  3.4616529941559e-002,  3.4336656332016e-002,  3.4336656332016e-002,
+	-1.8280650675297e-001,  1.8280650675297e-001, -1.8209239840508e-001,  1.8209239840508e-001,
+	 3.4057855606079e-002,  3.4057855606079e-002,  3.3780187368393e-002,  3.3780187368393e-002,
+	-1.8137787282467e-001,  1.8137787282467e-001, -1.8066290020943e-001,  1.8066290020943e-001,
+	 3.3503592014313e-002,  3.3503592014313e-002,  3.3228129148483e-002,  3.3228129148483e-002,
+	-1.7994752526283e-001,  1.7994752526283e-001, -1.7923171818256e-001,  1.7923171818256e-001,
+	 3.2953739166260e-002,  3.2953739166260e-002,  3.2680422067642e-002,  3.2680422067642e-002,
+	-1.7851547896862e-001,  1.7851547896862e-001, -1.7779883742332e-001,  1.7779883742332e-001,
+	 3.2408237457275e-002,  3.2408237457275e-002,  3.2137155532837e-002,  3.2137155532837e-002,
+	-1.7708176374435e-001,  1.7708176374435e-001, -1.7636428773403e-001,  1.7636428773403e-001,
+	 3.1867176294327e-002,  3.1867176294327e-002,  3.1598269939423e-002,  3.1598269939423e-002,
+	-1.7564637959003e-001,  1.7564637959003e-001, -1.7492806911469e-001,  1.7492806911469e-001,
+	 3.1330496072769e-002,  3.1330496072769e-002,  3.1063824892044e-002,  3.1063824892044e-002,
+	-1.7420934140682e-001,  1.7420934140682e-001, -1.7349021136761e-001,  1.7349021136761e-001,
+	 3.0798226594925e-002,  3.0798226594925e-002,  3.0533760786057e-002,  3.0533760786057e-002,
+	-1.7277066409588e-001,  1.7277066409588e-001, -1.7205071449280e-001,  1.7205071449280e-001,
+	 3.0270397663116e-002,  3.0270397663116e-002,  3.0008137226105e-002,  3.0008137226105e-002,
+	-1.7133036255836e-001,  1.7133036255836e-001, -1.7060960829258e-001,  1.7060960829258e-001,
+	 2.9746979475021e-002,  2.9746979475021e-002,  2.9486924409866e-002,  2.9486924409866e-002,
+	-1.6988845169544e-001,  1.6988845169544e-001, -1.6916689276695e-001,  1.6916689276695e-001,
+	 2.9227972030640e-002,  2.9227972030640e-002,  2.8970122337341e-002,  2.8970122337341e-002,
+	-1.6844493150711e-001,  1.6844493150711e-001, -1.6772258281708e-001,  1.6772258281708e-001,
+	 2.8713405132294e-002,  2.8713405132294e-002,  2.8457790613174e-002,  2.8457790613174e-002,
+	-1.6699983179569e-001,  1.6699983179569e-001, -1.6627669334412e-001,  1.6627669334412e-001,
+	 2.8203278779984e-002,  2.8203278779984e-002,  2.7949869632721e-002,  2.7949869632721e-002,
+	-1.6555315256119e-001,  1.6555315256119e-001, -1.6482923924923e-001,  1.6482923924923e-001,
+	 2.7697592973709e-002,  2.7697592973709e-002,  2.7446419000626e-002,  2.7446419000626e-002,
+	-1.6410492360592e-001,  1.6410492360592e-001, -1.6338023543358e-001,  1.6338023543358e-001,
+	 2.7196347713470e-002,  2.7196347713470e-002,  2.6947379112244e-002,  2.6947379112244e-002,
+	-1.6265514492989e-001,  1.6265514492989e-001, -1.6192968189716e-001,  1.6192968189716e-001,
+	 2.6699542999268e-002,  2.6699542999268e-002,  2.6452809572220e-002,  2.6452809572220e-002,
+	-1.6120384633541e-001,  1.6120384633541e-001, -1.6047762334347e-001,  1.6047762334347e-001,
+	 2.6207208633423e-002,  2.6207208633423e-002,  2.5962710380554e-002,  2.5962710380554e-002,
+	-1.5975101292133e-001,  1.5975101292133e-001, -1.5902404487133e-001,  1.5902404487133e-001,
+	 2.5719314813614e-002,  2.5719314813614e-002,  2.5477051734924e-002,  2.5477051734924e-002,
+	-1.5829668939114e-001,  1.5829668939114e-001, -1.5756896138191e-001,  1.5756896138191e-001,
+	 2.5235921144485e-002,  2.5235921144485e-002,  2.4995893239975e-002,  2.4995893239975e-002,
+	-1.5684087574482e-001,  1.5684087574482e-001, -1.5611241757870e-001,  1.5611241757870e-001,
+	 2.4756968021393e-002,  2.4756968021393e-002,  2.4519175291061e-002,  2.4519175291061e-002,
+	-1.5538358688354e-001,  1.5538358688354e-001, -1.5465438365936e-001,  1.5465438365936e-001,
+	 2.4282485246658e-002,  2.4282485246658e-002,  2.4046927690506e-002,  2.4046927690506e-002,
+	-1.5392482280731e-001,  1.5392482280731e-001, -1.5319490432739e-001,  1.5319490432739e-001,
+	 2.3812502622604e-002,  2.3812502622604e-002,  2.3579180240631e-002,  2.3579180240631e-002,
+	-1.5246461331844e-001,  1.5246461331844e-001, -1.5173397958279e-001,  1.5173397958279e-001,
+	 2.3346990346909e-002,  2.3346990346909e-002,  2.3115903139114e-002,  2.3115903139114e-002,
+	-1.5100297331810e-001,  1.5100297331810e-001, -1.5027162432671e-001,  1.5027162432671e-001,
+	 2.2885948419571e-002,  2.2885948419571e-002,  2.2657126188278e-002,  2.2657126188278e-002,
+	-1.4953991770744e-001,  1.4953991770744e-001, -1.4880785346031e-001,  1.4880785346031e-001,
+	 2.2429406642914e-002,  2.2429406642914e-002,  2.2202819585800e-002,  2.2202819585800e-002,
+	-1.4807544648647e-001,  1.4807544648647e-001, -1.4734269678593e-001,  1.4734269678593e-001,
+	 2.1977365016937e-002,  2.1977365016937e-002,  2.1753042936325e-002,  2.1753042936325e-002,
+	-1.4660958945751e-001,  1.4660958945751e-001, -1.4587613940239e-001,  1.4587613940239e-001,
+	 2.1529823541641e-002,  2.1529823541641e-002,  2.1307736635208e-002,  2.1307736635208e-002,
+	-1.4514234662056e-001,  1.4514234662056e-001, -1.4440821111202e-001,  1.4440821111202e-001,
+	 2.1086782217026e-002,  2.1086782217026e-002,  2.0866960287094e-002,  2.0866960287094e-002,
+	-1.4367373287678e-001,  1.4367373287678e-001, -1.4293892681599e-001,  1.4293892681599e-001,
+	 2.0648270845413e-002,  2.0648270845413e-002,  2.0430684089661e-002,  2.0430684089661e-002,
+	-1.4220377802849e-001,  1.4220377802849e-001, -1.4146828651428e-001,  1.4146828651428e-001,
+	 2.0214229822159e-002,  2.0214229822159e-002,  1.9998937845230e-002,  1.9998937845230e-002,
+	-1.4073246717453e-001,  1.4073246717453e-001, -1.3999632000923e-001,  1.3999632000923e-001,
+	 1.9784748554230e-002,  1.9784748554230e-002,  1.9571691751480e-002,  1.9571691751480e-002,
+	-1.3925984501839e-001,  1.3925984501839e-001, -1.3852304220200e-001,  1.3852304220200e-001,
+	 1.9359767436981e-002,  1.9359767436981e-002,  1.9148975610733e-002,  1.9148975610733e-002,
+	-1.3778591156006e-001,  1.3778591156006e-001, -1.3704845309258e-001,  1.3704845309258e-001,
+	 1.8939286470413e-002,  1.8939286470413e-002,  1.8730759620667e-002,  1.8730759620667e-002,
+	-1.3631068170071e-001,  1.3631068170071e-001, -1.3557258248329e-001,  1.3557258248329e-001,
+	 1.8523365259171e-002,  1.8523365259171e-002,  1.8317103385925e-002,  1.8317103385925e-002,
+	-1.3483417034149e-001,  1.3483417034149e-001, -1.3409543037415e-001,  1.3409543037415e-001,
+	 1.8111974000931e-002,  1.8111974000931e-002,  1.7907977104187e-002,  1.7907977104187e-002,
+	-1.3335637748241e-001,  1.3335637748241e-001, -1.3261701166630e-001,  1.3261701166630e-001,
+	 1.7705112695694e-002,  1.7705112695694e-002,  1.7503380775452e-002,  1.7503380775452e-002,
+	-1.3187734782696e-001,  1.3187734782696e-001, -1.3113735616207e-001,  1.3113735616207e-001,
+	 1.7302781343460e-002,  1.7302781343460e-002,  1.7103314399719e-002,  1.7103314399719e-002,
+	-1.3039706647396e-001,  1.3039706647396e-001, -1.2965646386147e-001,  1.2965646386147e-001,
+	 1.6905009746552e-002,  1.6905009746552e-002,  1.6707807779312e-002,  1.6707807779312e-002,
+	-1.2891554832458e-001,  1.2891554832458e-001, -1.2817434966564e-001,  1.2817434966564e-001,
+	 1.6511768102646e-002,  1.6511768102646e-002,  1.6316860914230e-002,  1.6316860914230e-002,
+	-1.2743283808231e-001,  1.2743283808231e-001, -1.2669102847576e-001,  1.2669102847576e-001,
+	 1.6123086214066e-002,  1.6123086214066e-002,  1.5930444002151e-002,  1.5930444002151e-002,
+	-1.2594890594482e-001,  1.2594890594482e-001, -1.2520650029182e-001,  1.2520650029182e-001,
+	 1.5738964080811e-002,  1.5738964080811e-002,  1.5548586845398e-002,  1.5548586845398e-002,
+	-1.2446380406618e-001,  1.2446380406618e-001, -1.2372080981731e-001,  1.2372080981731e-001,
+	 1.5359371900558e-002,  1.5359371900558e-002,  1.5171319246292e-002,  1.5171319246292e-002,
+	-1.2297752499580e-001,  1.2297752499580e-001, -1.2223395705223e-001,  1.2223395705223e-001,
+	 1.4984369277954e-002,  1.4984369277954e-002,  1.4798581600189e-002,  1.4798581600189e-002,
+	-1.2149009108543e-001,  1.2149009108543e-001, -1.2074594944716e-001,  1.2074594944716e-001,
+	 1.4613926410675e-002,  1.4613926410675e-002,  1.4430433511734e-002,  1.4430433511734e-002,
+	-1.2000151723623e-001,  1.2000151723623e-001, -1.1925680190325e-001,  1.1925680190325e-001,
+	 1.4248043298721e-002,  1.4248043298721e-002,  1.4066845178604e-002,  1.4066845178604e-002,
+	-1.1851180344820e-001,  1.1851180344820e-001, -1.1776652932167e-001,  1.1776652932167e-001,
+	 1.3886749744415e-002,  1.3886749744415e-002,  1.3707816600800e-002,  1.3707816600800e-002,
+	-1.1702097952366e-001,  1.1702097952366e-001, -1.1627515405416e-001,  1.1627515405416e-001,
+	 1.3530015945435e-002,  1.3530015945435e-002,  1.3353377580643e-002,  1.3353377580643e-002,
+	-1.1552906036377e-001,  1.1552906036377e-001, -1.1478268355131e-001,  1.1478268355131e-001,
+	 1.3177871704102e-002,  1.3177871704102e-002,  1.3003528118134e-002,  1.3003528118134e-002,
+	-1.1403604596853e-001,  1.1403604596853e-001, -1.1328913271427e-001,  1.1328913271427e-001,
+	 1.2830317020416e-002,  1.2830317020416e-002,  1.2658238410950e-002,  1.2658238410950e-002,
+	-1.1254195868969e-001,  1.1254195868969e-001, -1.1179451644421e-001,  1.1179451644421e-001,
+	 1.2487322092056e-002,  1.2487322092056e-002,  1.2317568063736e-002,  1.2317568063736e-002,
+	-1.1104681342840e-001,  1.1104681342840e-001, -1.1029884964228e-001,  1.1029884964228e-001,
+	 1.2148946523666e-002,  1.2148946523666e-002,  1.1981457471848e-002,  1.1981457471848e-002,
+	-1.0955062508583e-001,  1.0955062508583e-001, -1.0880213975906e-001,  1.0880213975906e-001,
+	 1.1815130710602e-002,  1.1815130710602e-002,  1.1649966239929e-002,  1.1649966239929e-002,
+	-1.0805340111256e-001,  1.0805340111256e-001, -1.0730440914631e-001,  1.0730440914631e-001,
+	 1.1485934257507e-002,  1.1485934257507e-002,  1.1323064565659e-002,  1.1323064565659e-002,
+	-1.0655516386032e-001,  1.0655516386032e-001, -1.0580566525459e-001,  1.0580566525459e-001,
+	 1.1161327362061e-002,  1.1161327362061e-002,  1.1000752449036e-002,  1.1000752449036e-002,
+	-1.0505592077971e-001,  1.0505592077971e-001, -1.0430593043566e-001,  1.0430593043566e-001,
+	 1.0841310024261e-002,  1.0841310024261e-002,  1.0683029890060e-002,  1.0683029890060e-002,
+	-1.0355569422245e-001,  1.0355569422245e-001, -1.0280521214008e-001,  1.0280521214008e-001,
+	 1.0525912046432e-002,  1.0525912046432e-002,  1.0369926691055e-002,  1.0369926691055e-002,
+	-1.0205448418856e-001,  1.0205448418856e-001, -1.0130352526903e-001,  1.0130352526903e-001,
+	 1.0215103626251e-002,  1.0215103626251e-002,  1.0061442852020e-002,  1.0061442852020e-002,
+	-1.0055232048035e-001,  1.0055232048035e-001, -9.9800884723663e-002,  9.9800884723663e-002,
+	 9.9089443683624e-003,  9.9089443683624e-003,  9.7575783729553e-003,  9.7575783729553e-003,
+	-9.9049210548401e-002,  9.9049210548401e-002, -9.8297297954559e-002,  9.8297297954559e-002,
+	 9.6073746681213e-003,  9.6073746681213e-003,  9.4583034515381e-003,  9.4583034515381e-003,
+	-9.7545161843300e-002,  9.7545161843300e-002, -9.6792794764042e-002,  9.6792794764042e-002,
+	 9.3103945255280e-003,  9.3103945255280e-003,  9.1636478900909e-003,  9.1636478900909e-003,
+	-9.6040204167366e-002,  9.6040204167366e-002, -9.5287382602692e-002,  9.5287382602692e-002,
+	 9.0180635452271e-003,  9.0180635452271e-003,  8.8736414909363e-003,  8.8736414909363e-003,
+	-9.4534337520599e-002,  9.4534337520599e-002, -9.3781068921089e-002,  9.3781068921089e-002,
+	 8.7303519248962e-003,  8.7303519248962e-003,  8.5882246494293e-003,  8.5882246494293e-003,
+	-9.3027576804161e-002,  9.3027576804161e-002, -9.2273868620396e-002,  9.2273868620396e-002,
+	 8.4472596645355e-003,  8.4472596645355e-003,  8.3074569702148e-003,  8.3074569702148e-003,
+	-9.1519944369793e-002,  9.1519944369793e-002, -9.0765804052353e-002,  9.0765804052353e-002,
+	 8.1687867641449e-003,  8.1687867641449e-003,  8.0312788486481e-003,  8.0312788486481e-003,
+	-9.0011455118656e-002,  9.0011455118656e-002, -8.9256890118122e-002,  8.9256890118122e-002,
+	 7.8949630260468e-003,  7.8949630260468e-003,  7.7597796916962e-003,  7.7597796916962e-003,
+	-8.8502109050751e-002,  8.8502109050751e-002, -8.7747126817703e-002,  8.7747126817703e-002,
+	 7.6257586479187e-003,  7.6257586479187e-003,  7.4928700923920e-003,  7.4928700923920e-003,
+	-8.6991935968399e-002,  8.6991935968399e-002, -8.6236543953419e-002,  8.6236543953419e-002,
+	 7.3611736297607e-003,  7.3611736297607e-003,  7.2306394577026e-003,  7.2306394577026e-003,
+	-8.5480943322182e-002,  8.5480943322182e-002, -8.4725148975849e-002,  8.4725148975849e-002,
+	 7.1012377738953e-003,  7.1012377738953e-003,  6.9730281829834e-003,  6.9730281829834e-003,
+	-8.3969153463840e-002,  8.3969153463840e-002, -8.3212956786156e-002,  8.3212956786156e-002,
+	 6.8459510803223e-003,  6.8459510803223e-003,  6.7200362682343e-003,  6.7200362682343e-003,
+	-8.2456558942795e-002,  8.2456558942795e-002, -8.1699974834919e-002,  8.1699974834919e-002,
+	 6.5953135490417e-003,  6.5953135490417e-003,  6.4717233181000e-003,  6.4717233181000e-003,
+	-8.0943197011948e-002,  8.0943197011948e-002, -8.0186232924461e-002,  8.0186232924461e-002,
+	 6.3492953777313e-003,  6.3492953777313e-003,  6.2280297279358e-003,  6.2280297279358e-003,
+	-7.9429075121880e-002,  7.9429075121880e-002, -7.8671731054783e-002,  7.8671731054783e-002,
+	 6.1079263687134e-003,  6.1079263687134e-003,  5.9889853000641e-003,  5.9889853000641e-003,
+	-7.7914200723171e-002,  7.7914200723171e-002, -7.7156491577625e-002,  7.7156491577625e-002,
+	 5.8712065219879e-003,  5.8712065219879e-003,  5.7545900344849e-003,  5.7545900344849e-003,
+	-7.6398596167564e-002,  7.6398596167564e-002, -7.5640521943569e-002,  7.5640521943569e-002,
+	 5.6391656398773e-003,  5.6391656398773e-003,  5.5248737335205e-003,  5.5248737335205e-003,
+	-7.4882268905640e-002,  7.4882268905640e-002, -7.4123844504356e-002,  7.4123844504356e-002,
+	 5.4117441177368e-003,  5.4117441177368e-003,  5.2997767925262e-003,  5.2997767925262e-003,
+	-7.3365241289139e-002,  7.3365241289139e-002, -7.2606466710567e-002,  7.2606466710567e-002,
+	 5.1890015602112e-003,  5.1890015602112e-003,  5.0793588161469e-003,  5.0793588161469e-003,
+	-7.1847520768642e-002,  7.1847520768642e-002, -7.1088403463364e-002,  7.1088403463364e-002,
+	 4.9709081649780e-003,  4.9709081649780e-003,  4.8635900020599e-003,  4.8635900020599e-003,
+	-7.0329122245312e-002,  7.0329122245312e-002, -6.9569677114487e-002,  6.9569677114487e-002,
+	 4.7574639320374e-003,  4.7574639320374e-003,  4.6525001525879e-003,  4.6525001525879e-003,
+	-6.8810060620308e-002,  6.8810060620308e-002, -6.8050287663937e-002,  6.8050287663937e-002,
+	 4.5486688613892e-003,  4.5486688613892e-003,  4.4460296630859e-003,  4.4460296630859e-003,
+	-6.7290358245373e-002,  6.7290358245373e-002, -6.6530264914036e-002,  6.6530264914036e-002,
+	 4.3445825576782e-003,  4.3445825576782e-003,  4.2442679405212e-003,  4.2442679405212e-003,
+	-6.5770015120506e-002,  6.5770015120506e-002, -6.5009616315365e-002,  6.5009616315365e-002,
+	 4.1451156139374e-003,  4.1451156139374e-003,  4.0471553802490e-003,  4.0471553802490e-003,
+	-6.4249053597450e-002,  6.4249053597450e-002, -6.3488349318504e-002,  6.3488349318504e-002,
+	 3.9503574371338e-003,  3.9503574371338e-003,  3.8546919822693e-003,  3.8546919822693e-003,
+	-6.2727496027946e-002,  6.2727496027946e-002, -6.1966490000486e-002,  6.1966490000486e-002,
+	 3.7602186203003e-003,  3.7602186203003e-003,  3.6669373512268e-003,  3.6669373512268e-003,
+	-6.1205338686705e-002,  6.1205338686705e-002, -6.0444045811892e-002,  6.0444045811892e-002,
+	 3.5747885704041e-003,  3.5747885704041e-003,  3.4838318824768e-003,  3.4838318824768e-003,
+	-5.9682607650757e-002,  5.9682607650757e-002, -5.8921031653881e-002,  5.8921031653881e-002,
+	 3.3940374851227e-003,  3.3940374851227e-003,  3.3054053783417e-003,  3.3054053783417e-003,
+	-5.8159317821264e-002,  5.8159317821264e-002, -5.7397466152906e-002,  5.7397466152906e-002,
+	 3.2179355621338e-003,  3.2179355621338e-003,  3.1316280364990e-003,  3.1316280364990e-003,
+	-5.6635476648808e-002,  5.6635476648808e-002, -5.5873356759548e-002,  5.5873356759548e-002,
+	 3.0465126037598e-003,  3.0465126037598e-003,  2.9625594615936e-003,  2.9625594615936e-003,
+	-5.5111106485128e-002,  5.5111106485128e-002, -5.4348722100258e-002,  5.4348722100258e-002,
+	 2.8797686100006e-003,  2.8797686100006e-003,  2.7981698513031e-003,  2.7981698513031e-003,
+	-5.3586214780807e-002,  5.3586214780807e-002, -5.2823577076197e-002,  5.2823577076197e-002,
+	 2.7177035808563e-003,  2.7177035808563e-003,  2.6384294033051e-003,  2.6384294033051e-003,
+	-5.2060820162296e-002,  5.2060820162296e-002, -5.1297936588526e-002,  5.1297936588526e-002,
+	 2.5603473186493e-003,  2.5603473186493e-003,  2.4833977222443e-003,  2.4833977222443e-003,
+	-5.0534933805466e-002,  5.0534933805466e-002, -4.9771811813116e-002,  4.9771811813116e-002,
+	 2.4076402187347e-003,  2.4076402187347e-003,  2.3330450057983e-003,  2.3330450057983e-003,
+	-4.9008570611477e-002,  4.9008570611477e-002, -4.8245217651129e-002,  4.8245217651129e-002,
+	 2.2596120834351e-003,  2.2596120834351e-003,  2.1873712539673e-003,  2.1873712539673e-003,
+	-4.7481749206781e-002,  4.7481749206781e-002, -4.6718169003725e-002,  4.6718169003725e-002,
+	 2.1162927150726e-003,  2.1162927150726e-003,  2.0463764667511e-003,  2.0463764667511e-003,
+	-4.5954480767250e-002,  4.5954480767250e-002, -4.5190680772066e-002,  4.5190680772066e-002,
+	 1.9776523113251e-003,  1.9776523113251e-003,  1.9100904464722e-003,  1.9100904464722e-003,
+	-4.4426776468754e-002,  4.4426776468754e-002, -4.3662767857313e-002,  4.3662767857313e-002,
+	 1.8436908721924e-003,  1.8436908721924e-003,  1.7784833908081e-003,  1.7784833908081e-003,
+	-4.2898658663034e-002,  4.2898658663034e-002, -4.2134445160627e-002,  4.2134445160627e-002,
+	 1.7144381999969e-003,  1.7144381999969e-003,  1.6515552997589e-003,  1.6515552997589e-003,
+	-4.1370134800673e-002,  4.1370134800673e-002, -4.0605723857880e-002,  4.0605723857880e-002,
+	 1.5898644924164e-003,  1.5898644924164e-003,  1.5293359756470e-003,  1.5293359756470e-003,
+	-3.9841219782829e-002,  3.9841219782829e-002, -3.9076622575521e-002,  3.9076622575521e-002,
+	 1.4699697494507e-003,  1.4699697494507e-003,  1.4117956161499e-003,  1.4117956161499e-003,
+	-3.8311932235956e-002,  3.8311932235956e-002, -3.7547152489424e-002,  3.7547152489424e-002,
+	 1.3547837734222e-003,  1.3547837734222e-003,  1.2989342212677e-003,  1.2989342212677e-003,
+	-3.6782283335924e-002,  3.6782283335924e-002, -3.6017328500748e-002,  3.6017328500748e-002,
+	 1.2442767620087e-003,  1.2442767620087e-003,  1.1907815933228e-003,  1.1907815933228e-003,
+	-3.5252287983894e-002,  3.5252287983894e-002, -3.4487165510654e-002,  3.4487165510654e-002,
+	 1.1384785175323e-003,  1.1384785175323e-003,  1.0873377323151e-003,  1.0873377323151e-003,
+	-3.3721961081028e-002,  3.3721961081028e-002, -3.2956678420305e-002,  3.2956678420305e-002,
+	 1.0373592376709e-003,  1.0373592376709e-003,  9.8857283592224e-004,  9.8857283592224e-004,
+	-3.2191317528486e-002,  3.2191317528486e-002, -3.1425878405571e-002,  3.1425878405571e-002,
+	 9.4094872474670e-004,  9.4094872474670e-004,  8.9448690414429e-004,  8.9448690414429e-004,
+	-3.0660368502140e-002,  3.0660368502140e-002, -2.9894785955548e-002,  2.9894785955548e-002,
+	 8.4921717643738e-004,  8.4921717643738e-004,  8.0513954162598e-004,  8.0513954162598e-004,
+	-2.9129132628441e-002,  2.9129132628441e-002, -2.8363412246108e-002,  2.8363412246108e-002,
+	 7.6222419738770e-004,  7.6222419738770e-004,  7.2047114372253e-004,  7.2047114372253e-004,
+	-2.7597622945905e-002,  2.7597622945905e-002, -2.6831770315766e-002,  2.6831770315766e-002,
+	 6.7988038063049e-004,  6.7988038063049e-004,  6.4048171043396e-004,  6.4048171043396e-004,
+	-2.6065852493048e-002,  2.6065852493048e-002, -2.5299875065684e-002,  2.5299875065684e-002,
+	 6.0227513313293e-004,  6.0227513313293e-004,  5.6523084640503e-004,  5.6523084640503e-004,
+	-2.4533838033676e-002,  2.4533838033676e-002, -2.3767743259668e-002,  2.3767743259668e-002,
+	 5.2934885025024e-004,  5.2934885025024e-004,  4.9465894699097e-004,  4.9465894699097e-004,
+	-2.3001592606306e-002,  2.3001592606306e-002, -2.2235386073589e-002,  2.2235386073589e-002,
+	 4.6113133430481e-004,  4.6113133430481e-004,  4.2879581451416e-004,  4.2879581451416e-004,
+	-2.1469129249454e-002,  2.1469129249454e-002, -2.0702820271254e-002,  2.0702820271254e-002,
+	 3.9762258529663e-004,  3.9762258529663e-004,  3.6761164665222e-004,  3.6761164665222e-004,
+	-1.9936464726925e-002,  1.9936464726925e-002, -1.9170060753822e-002,  1.9170060753822e-002,
+	 3.3882260322571e-004,  3.3882260322571e-004,  3.1116604804993e-004,  3.1116604804993e-004,
+	-1.8403612077236e-002,  1.8403612077236e-002, -1.7637120559812e-002,  1.7637120559812e-002,
+	 2.8470158576965e-004,  2.8470158576965e-004,  2.5939941406250e-004,  2.5939941406250e-004,
+	-1.6870586201549e-002,  1.6870586201549e-002, -1.6104012727737e-002,  1.6104012727737e-002,
+	 2.3528933525085e-004,  2.3528933525085e-004,  2.1234154701233e-004,  2.1234154701233e-004,
+	-1.5337402001023e-002,  1.5337402001023e-002, -1.4570754952729e-002,  1.4570754952729e-002,
+	 1.9058585166931e-004,  1.9058585166931e-004,  1.6999244689941e-004,  1.6999244689941e-004,
+	-1.3804073445499e-002,  1.3804073445499e-002, -1.3037359341979e-002,  1.3037359341979e-002,
+	 1.5059113502502e-004,  1.5059113502502e-004,  1.3235211372375e-004,  1.3235211372375e-004,
+	-1.2270614504814e-002,  1.2270614504814e-002, -1.1503840796649e-002,  1.1503840796649e-002,
+	 1.1530518531799e-004,  1.1530518531799e-004,  9.9420547485352e-005,  9.9420547485352e-005,
+	-1.0737040080130e-002,  1.0737040080130e-002, -9.9702142179012e-003,  9.9702142179012e-003,
+	 8.4698200225830e-005,  8.4698200225830e-005,  7.1167945861816e-005,  7.1167945861816e-005,
+	-9.2033650726080e-003,  9.2033650726080e-003, -8.4364945068955e-003,  8.4364945068955e-003,
+	 5.8829784393311e-005,  5.8829784393311e-005,  4.7653913497925e-005,  4.7653913497925e-005,
+	-7.6696034520864e-003,  7.6696034520864e-003, -6.9026942364872e-003,  6.9026942364872e-003,
+	 3.7640333175659e-005,  3.7640333175659e-005,  2.8818845748901e-005,  2.8818845748901e-005,
+	-6.1357691884041e-003,  6.1357691884041e-003, -5.3688297048211e-003,  5.3688297048211e-003,
+	 2.1189451217651e-005,  2.1189451217651e-005,  1.4692544937134e-005,  1.4692544937134e-005,
+	-4.6018776483834e-003,  4.6018776483834e-003, -3.8349144160748e-003,  3.8349144160748e-003,
+	 9.4175338745117e-006,  9.4175338745117e-006,  5.3048133850098e-006,  5.3048133850098e-006,
+	-3.0679423362017e-003,  3.0679423362017e-003, -2.3009630385786e-003,  2.3009630385786e-003,
+	 2.3543834686279e-006,  2.3543834686279e-006,  5.9604644775391e-007,  5.9604644775391e-007,
+	-1.5339783858508e-003,  1.5339783858508e-003, -7.6699012424797e-004,  7.6699012424797e-004
+};
+
+static _MM_ALIGN16 float	CT1STP[]	 = {
+	 7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,
+	-7.0710676908493e-001,  7.0710676908493e-001, -7.0710676908493e-001,  7.0710676908493e-001,
+	 9.2387950420380e-001,  9.2387950420380e-001,  3.8268339633942e-001,  3.8268339633942e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -9.2387944459915e-001,  9.2387944459915e-001,
+	-7.0710676908493e-001,  7.0710676908493e-001, -7.0710676908493e-001,  7.0710676908493e-001,
+	 7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001, -9.2387944459915e-001, -9.2387944459915e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001,  3.8268339633942e-001, -3.8268339633942e-001,
+	 9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -3.8268345594406e-001,  3.8268345594406e-001,
+	 9.8078525066376e-001,  9.8078525066376e-001,  8.3146953582764e-001,  8.3146953582764e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001, -9.2387950420380e-001,  9.2387950420380e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001, -9.8078513145447e-001, -9.8078513145447e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -1.9509035348892e-001,  1.9509035348892e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001, -9.2387950420380e-001,  9.2387950420380e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001, -1.9509035348892e-001, -1.9509035348892e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -9.8078513145447e-001,  9.8078513145447e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -3.8268345594406e-001,  3.8268345594406e-001,
+	 9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,
+	 1.9509032368660e-001,  1.9509032368660e-001, -5.5557024478912e-001, -5.5557024478912e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001,  8.3146953582764e-001, -8.3146953582764e-001,
+	 9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -2.9028469324112e-001,  2.9028469324112e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001, -9.8078525066376e-001,  9.8078525066376e-001,
+	 1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001, -8.8192123174667e-001, -8.8192123174667e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  9.8017096519470e-002,  9.8017096519470e-002,
+	-4.7139674425125e-001,  4.7139674425125e-001, -9.9518460035324e-001,  9.9518460035324e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	 2.9028469324112e-001,  2.9028469324112e-001, -7.7301043272018e-001, -7.7301043272018e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001,  6.3439327478409e-001, -6.3439327478409e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001, -9.9518460035324e-001, -9.9518460035324e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001,  9.8017096519470e-002, -9.8017096519470e-002,
+	 1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001, -9.8078525066376e-001,  9.8078525066376e-001,
+	 7.7301043272018e-001,  7.7301043272018e-001, -4.7139674425125e-001, -4.7139674425125e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002, -2.9028469324112e-001, -2.9028469324112e-001,
+	-9.9518471956253e-001,  9.9518471956253e-001,  9.5694035291672e-001, -9.5694035291672e-001,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -9.8017141222954e-002,  9.8017141222954e-002,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -1.4673046767712e-001,  1.4673046767712e-001,
+	-9.9518471956253e-001,  9.9518471956253e-001, -9.9518471956253e-001,  9.9518471956253e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,
+	 6.7155897617340e-001,  6.7155897617340e-001, -8.0320751667023e-001, -8.0320751667023e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -5.9569936990738e-001,  5.9569936990738e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001,
+	 7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001, -8.5772860050201e-001, -8.5772860050201e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001,  5.1410269737244e-001, -5.1410269737244e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,
+	-4.7139674425125e-001,  4.7139674425125e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001, -9.9879533052444e-001, -9.9879533052444e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -4.9067676067352e-002,  4.9067676067352e-002,
+	 2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001,
+	 8.0320751667023e-001,  8.0320751667023e-001, -3.3688986301422e-001, -3.3688986301422e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -2.9028469324112e-001,  2.9028469324112e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001, -4.2755514383316e-001, -4.2755514383316e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001,  9.0398931503296e-001, -9.0398931503296e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -2.9028469324112e-001,  2.9028469324112e-001,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -4.2755514383316e-001,  4.2755514383316e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001,
+	 2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001, -9.4154405593872e-001, -9.4154405593872e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001, -4.9067676067352e-002, -4.9067676067352e-002,
+	-5.1410275697708e-001,  5.1410275697708e-001, -9.9879533052444e-001,  9.9879533052444e-001,
+	-4.7139674425125e-001,  4.7139674425125e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001, -6.7155897617340e-001, -6.7155897617340e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001,  7.4095112085342e-001, -7.4095112085342e-001,
+	 7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  5.1410269737244e-001,  5.1410269737244e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001, -9.7003126144409e-001, -9.7003126144409e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001,  2.4298018217087e-001, -2.4298018217087e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,
+	-9.9518471956253e-001,  9.9518471956253e-001, -9.9518471956253e-001,  9.9518471956253e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001, -5.9569936990738e-001, -5.9569936990738e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -9.8017141222954e-002,  9.8017141222954e-002,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002, -1.4673046767712e-001, -1.4673046767712e-001,
+	-9.9879544973373e-001,  9.9879544973373e-001,  9.8917651176453e-001, -9.8917651176453e-001,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -4.9067676067352e-002,  4.9067676067352e-002,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	-9.9879544973373e-001,  9.9879544973373e-001, -9.9879544973373e-001,  9.9879544973373e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,
+	 6.8954056501389e-001,  6.8954056501389e-001, -7.5720888376236e-001, -7.5720888376236e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  3.1368172168732e-001,  3.1368172168732e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001, -8.9322429895401e-001, -8.9322429895401e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001,  4.4961130619049e-001, -4.4961130619049e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -6.1523157358170e-001,  6.1523157358170e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001, -9.9247962236404e-001, -9.9247962236404e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -1.2241071462631e-001,  1.2241071462631e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001, -2.6671284437180e-001, -2.6671284437180e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001, -4.9289819598198e-001, -4.9289819598198e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001,  8.7008702754974e-001, -8.7008702754974e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001, -9.1420972347260e-001, -9.1420972347260e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -4.0524137020111e-001,  4.0524137020111e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  2.4541199207306e-002,  2.4541199207306e-002,
+	-4.9289819598198e-001,  4.9289819598198e-001, -9.9969875812531e-001,  9.9969875812531e-001,
+	-5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,
+	 2.6671275496483e-001,  2.6671275496483e-001, -7.2424709796906e-001, -7.2424709796906e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001,  6.8954050540924e-001, -6.8954050540924e-001,
+	 8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -5.9569931030273e-001,  5.9569931030273e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  5.7580816745758e-001,  5.7580816745758e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -8.1758487224579e-001,  8.1758487224579e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001, -9.8527765274048e-001, -9.8527765274048e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001,  1.7096191644669e-001, -1.7096191644669e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001, -5.3499770164490e-001, -5.3499770164490e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -8.4485352039337e-001,  8.4485352039337e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -1.4673048257828e-001,  1.4673048257828e-001,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002, -2.1910125017166e-001, -2.1910125017166e-001,
+	-9.9729043245316e-001,  9.9729043245316e-001,  9.7570210695267e-001, -9.7570210695267e-001,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -1.4673048257828e-001,  1.4673048257828e-001,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -2.1910125017166e-001,  2.1910125017166e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001, -8.4485352039337e-001, -8.4485352039337e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -5.3499770164490e-001,  5.3499770164490e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  1.7096191644669e-001,  1.7096191644669e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -5.9569931030273e-001,  5.9569931030273e-001,
+	 8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,
+	 3.1368175148964e-001,  3.1368175148964e-001, -8.1758487224579e-001, -8.1758487224579e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001,  5.7580816745758e-001, -5.7580816745758e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,
+	-5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  6.8954050540924e-001,  6.8954050540924e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001, -9.9969875812531e-001, -9.9969875812531e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001,  2.4541199207306e-002, -2.4541199207306e-002,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001, -4.0524137020111e-001, -4.0524137020111e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001, -3.5989505052567e-001, -3.5989505052567e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001,  9.3299281597137e-001, -9.3299281597137e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 9.8527765274048e-001,  9.8527765274048e-001,  8.7008702754974e-001,  8.7008702754974e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001, -9.6377605199814e-001, -9.6377605199814e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -2.6671284437180e-001,  2.6671284437180e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001, -1.2241071462631e-001, -1.2241071462631e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -9.9247962236404e-001,  9.9247962236404e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	 2.1910125017166e-001,  2.1910125017166e-001, -6.1523157358170e-001, -6.1523157358170e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001,  7.8834640979767e-001, -7.8834640979767e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  4.4961130619049e-001,  4.4961130619049e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001,
+	 6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001, -9.4952815771103e-001, -9.4952815771103e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001,  3.1368172168732e-001, -3.1368172168732e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,
+	-9.9879544973373e-001,  9.9879544973373e-001, -9.9879544973373e-001,  9.9879544973373e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001, -6.5317285060883e-001, -6.5317285060883e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -7.5720888376236e-001,  7.5720888376236e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -4.9067676067352e-002,  4.9067676067352e-002,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002, -7.3564566671848e-002, -7.3564566671848e-002,
+	-9.9969881772995e-001,  9.9969881772995e-001,  9.9729043245316e-001, -9.9729043245316e-001,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -2.4541229009628e-002,  2.4541229009628e-002,
+	 9.9992471933365e-001,  9.9992471933365e-001,  9.9932241439819e-001,  9.9932241439819e-001,
+	-1.2271538376808e-002,  1.2271538376808e-002, -3.6807224154472e-002,  3.6807224154472e-002,
+	-9.9969881772995e-001,  9.9969881772995e-001, -9.9969881772995e-001,  9.9969881772995e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,
+	 6.9837623834610e-001,  6.9837623834610e-001, -7.3265415430069e-001, -7.3265415430069e-001,
+	-7.1573078632355e-001,  7.1573078632355e-001, -6.8060100078583e-001,  6.8060100078583e-001,
+	 6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	 9.1911387443542e-001,  9.1911387443542e-001,  3.4841871261597e-001,  3.4841871261597e-001,
+	-3.9399203658104e-001,  3.9399203658104e-001, -9.3733906745911e-001,  9.3733906745911e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -6.8954056501389e-001,  6.8954056501389e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,
+	 3.7131720781326e-001,  3.7131720781326e-001, -9.0916794538498e-001, -9.0916794538498e-001,
+	-9.2850607633591e-001,  9.2850607633591e-001,  4.1642951965332e-001, -4.1642951965332e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -4.0524131059647e-001,  4.0524131059647e-001,
+	 9.7831737995148e-001,  9.7831737995148e-001,  8.1045722961426e-001,  8.1045722961426e-001,
+	-2.0711138844490e-001,  2.0711138844490e-001, -5.8579778671265e-001,  5.8579778671265e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,
+	 5.4532498121262e-001,  5.4532498121262e-001, -9.8730140924454e-001, -9.8730140924454e-001,
+	-8.3822470903397e-001,  8.3822470903397e-001, -1.5885812044144e-001,  1.5885812044144e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001,
+	 8.2458931207657e-001,  8.2458931207657e-001, -2.3105818033218e-001, -2.3105818033218e-001,
+	-5.6573182344437e-001,  5.6573182344437e-001, -9.7294002771378e-001,  9.7294002771378e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	 1.8303988873959e-001,  1.8303988873959e-001, -5.2458971738815e-001, -5.2458971738815e-001,
+	-9.8310548067093e-001,  9.8310548067093e-001,  8.5135519504547e-001, -8.5135519504547e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 9.9390697479248e-001,  9.9390697479248e-001,  9.4560730457306e-001,  9.4560730457306e-001,
+	-1.1022221297026e-001,  1.1022221297026e-001, -3.2531028985977e-001,  3.2531028985977e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001,
+	 2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,
+	 6.2485951185226e-001,  6.2485951185226e-001, -8.9867442846298e-001, -8.9867442846298e-001,
+	-7.8073722124100e-001,  7.8073722124100e-001, -4.3861621618271e-001,  4.3861621618271e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -8.4485357999802e-001,  8.4485357999802e-001,
+	 8.7607008218765e-001,  8.7607008218765e-001,  6.1320662498474e-002,  6.1320662498474e-002,
+	-4.8218378424644e-001,  4.8218378424644e-001, -9.9811804294586e-001,  9.9811804294586e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,
+	 2.7851969003677e-001,  2.7851969003677e-001, -7.4913638830185e-001, -7.4913638830185e-001,
+	-9.6043050289154e-001,  9.6043050289154e-001,  6.6241574287415e-001, -6.6241574287415e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -5.7580822706223e-001,  5.7580822706223e-001,
+	 9.5330601930618e-001,  9.5330601930618e-001,  6.0551100969315e-001,  6.0551100969315e-001,
+	-3.0200594663620e-001,  3.0200594663620e-001, -7.9583698511124e-001,  7.9583698511124e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,
+	 4.6053871512413e-001,  4.6053871512413e-001, -9.9090266227722e-001, -9.9090266227722e-001,
+	-8.8763964176178e-001,  8.8763964176178e-001,  1.3458073139191e-001, -1.3458073139191e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	 7.6516723632813e-001,  7.6516723632813e-001, -5.0353848934174e-001, -5.0353848934174e-001,
+	-6.4383155107498e-001,  6.4383155107498e-001, -8.6397284269333e-001,  8.6397284269333e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001,
+	 9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,
+	 8.5797317326069e-002,  8.5797317326069e-002, -2.5486564636230e-001, -2.5486564636230e-001,
+	-9.9631261825562e-001,  9.9631261825562e-001,  9.6697646379471e-001, -9.6697646379471e-001,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -1.2241067737341e-001,  1.2241067737341e-001,
+	 9.9811810255051e-001,  9.9811810255051e-001,  9.8310548067093e-001,  9.8310548067093e-001,
+	-6.1320737004280e-002,  6.1320737004280e-002, -1.8303988873959e-001,  1.8303988873959e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,
+	 6.6241580247879e-001,  6.6241580247879e-001, -8.2458931207657e-001, -8.2458931207657e-001,
+	-7.4913638830185e-001,  7.4913638830185e-001, -5.6573194265366e-001,  5.6573194265366e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001,
+	 8.9867448806763e-001,  8.9867448806763e-001,  2.0711141824722e-001,  2.0711141824722e-001,
+	-4.3861624598503e-001,  4.3861624598503e-001, -9.7831737995148e-001,  9.7831737995148e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -6.1523163318634e-001,  6.1523163318634e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	 3.2531028985977e-001,  3.2531028985977e-001, -8.3822470903397e-001, -8.3822470903397e-001,
+	-9.4560730457306e-001,  9.4560730457306e-001,  5.4532492160797e-001, -5.4532492160797e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,
+	-4.9289819598198e-001,  4.9289819598198e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	 9.6697646379471e-001,  9.6697646379471e-001,  7.1573078632355e-001,  7.1573078632355e-001,
+	-2.5486567616463e-001,  2.5486567616463e-001, -6.9837617874146e-001,  6.9837617874146e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001, -8.7008696794510e-001,  8.7008696794510e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,
+	 5.0353837013245e-001,  5.0353837013245e-001, -9.9992465972900e-001, -9.9992465972900e-001,
+	-8.6397284269333e-001,  8.6397284269333e-001, -1.2271523475647e-002,  1.2271523475647e-002,
+	 2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	 7.9583686590195e-001,  7.9583686590195e-001, -3.7131732702255e-001, -3.7131732702255e-001,
+	-6.0551106929779e-001,  6.0551106929779e-001, -9.2850589752197e-001,  9.2850589752197e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -2.6671275496483e-001,  2.6671275496483e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,
+	 1.3458071649075e-001,  1.3458071649075e-001, -3.9399200677872e-001, -3.9399200677872e-001,
+	-9.9090266227722e-001,  9.9090266227722e-001,  9.1911387443542e-001, -9.1911387443542e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -3.1368175148964e-001,  3.1368175148964e-001,
+	 9.8730140924454e-001,  9.8730140924454e-001,  8.8763958215714e-001,  8.8763958215714e-001,
+	-1.5885815024376e-001,  1.5885815024376e-001, -4.6053871512413e-001,  4.6053871512413e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	 3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,
+	 5.8579784631729e-001,  5.8579784631729e-001, -9.5330601930618e-001, -9.5330601930618e-001,
+	-8.1045717000961e-001,  8.1045717000961e-001, -3.0200594663620e-001,  3.0200594663620e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	 8.5135519504547e-001,  8.5135519504547e-001, -8.5797369480133e-002, -8.5797369480133e-002,
+	-5.2458971738815e-001,  5.2458971738815e-001, -9.9631255865097e-001,  9.9631255865097e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -4.4961133599281e-001,  4.4961133599281e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,
+	 2.3105812072754e-001,  2.3105812072754e-001, -6.4383155107498e-001, -6.4383155107498e-001,
+	-9.7293996810913e-001,  9.7293996810913e-001,  7.6516723632813e-001, -7.6516723632813e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 9.3733900785446e-001,  9.3733900785446e-001,  4.8218375444412e-001,  4.8218375444412e-001,
+	-3.4841868281364e-001,  3.4841868281364e-001, -8.7607002258301e-001,  8.7607002258301e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,
+	 4.1642957925797e-001,  4.1642957925797e-001, -9.6043044328690e-001, -9.6043044328690e-001,
+	-9.0916800498962e-001,  9.0916800498962e-001,  2.7851969003677e-001, -2.7851969003677e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,
+	-9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001,
+	 7.3265427350998e-001,  7.3265427350998e-001, -6.2485951185226e-001, -6.2485951185226e-001,
+	-6.8060100078583e-001,  6.8060100078583e-001, -7.8073716163635e-001,  7.8073716163635e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	 3.6807224154472e-002,  3.6807224154472e-002, -1.1022220551968e-001, -1.1022220551968e-001,
+	-9.9932235479355e-001,  9.9932235479355e-001,  9.9390691518784e-001, -9.9390691518784e-001,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 9.9932235479355e-001,  9.9932235479355e-001,  9.9390691518784e-001,  9.9390691518784e-001,
+	-3.6807224154472e-002,  3.6807224154472e-002, -1.1022220551968e-001,  1.1022220551968e-001,
+	-9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,
+	 6.8060100078583e-001,  6.8060100078583e-001, -7.8073716163635e-001, -7.8073716163635e-001,
+	-7.3265427350998e-001,  7.3265427350998e-001, -6.2485951185226e-001,  6.2485951185226e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	 9.0916800498962e-001,  9.0916800498962e-001,  2.7851969003677e-001,  2.7851969003677e-001,
+	-4.1642957925797e-001,  4.1642957925797e-001, -9.6043044328690e-001,  9.6043044328690e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,
+	 3.4841868281364e-001,  3.4841868281364e-001, -8.7607002258301e-001, -8.7607002258301e-001,
+	-9.3733900785446e-001,  9.3733900785446e-001,  4.8218375444412e-001, -4.8218375444412e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -4.4961133599281e-001,  4.4961133599281e-001,
+	 9.7293996810913e-001,  9.7293996810913e-001,  7.6516723632813e-001,  7.6516723632813e-001,
+	-2.3105812072754e-001,  2.3105812072754e-001, -6.4383155107498e-001,  6.4383155107498e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,
+	 5.2458971738815e-001,  5.2458971738815e-001, -9.9631255865097e-001, -9.9631255865097e-001,
+	-8.5135519504547e-001,  8.5135519504547e-001, -8.5797369480133e-002,  8.5797369480133e-002,
+	 3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	 8.1045717000961e-001,  8.1045717000961e-001, -3.0200594663620e-001, -3.0200594663620e-001,
+	-5.8579784631729e-001,  5.8579784631729e-001, -9.5330601930618e-001,  9.5330601930618e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -3.1368175148964e-001,  3.1368175148964e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,
+	 1.5885815024376e-001,  1.5885815024376e-001, -4.6053871512413e-001, -4.6053871512413e-001,
+	-9.8730140924454e-001,  9.8730140924454e-001,  8.8763958215714e-001, -8.8763958215714e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -2.6671275496483e-001,  2.6671275496483e-001,
+	 9.9090266227722e-001,  9.9090266227722e-001,  9.1911387443542e-001,  9.1911387443542e-001,
+	-1.3458071649075e-001,  1.3458071649075e-001, -3.9399200677872e-001,  3.9399200677872e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	 2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,
+	 6.0551106929779e-001,  6.0551106929779e-001, -9.2850589752197e-001, -9.2850589752197e-001,
+	-7.9583686590195e-001,  7.9583686590195e-001, -3.7131732702255e-001,  3.7131732702255e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001, -8.7008696794510e-001,  8.7008696794510e-001,
+	 8.6397284269333e-001,  8.6397284269333e-001, -1.2271523475647e-002, -1.2271523475647e-002,
+	-5.0353837013245e-001,  5.0353837013245e-001, -9.9992465972900e-001,  9.9992465972900e-001,
+	-4.9289819598198e-001,  4.9289819598198e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,
+	 2.5486567616463e-001,  2.5486567616463e-001, -6.9837617874146e-001, -6.9837617874146e-001,
+	-9.6697646379471e-001,  9.6697646379471e-001,  7.1573078632355e-001, -7.1573078632355e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -6.1523163318634e-001,  6.1523163318634e-001,
+	 9.4560730457306e-001,  9.4560730457306e-001,  5.4532492160797e-001,  5.4532492160797e-001,
+	-3.2531028985977e-001,  3.2531028985977e-001, -8.3822470903397e-001,  8.3822470903397e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,
+	 4.3861624598503e-001,  4.3861624598503e-001, -9.7831737995148e-001, -9.7831737995148e-001,
+	-8.9867448806763e-001,  8.9867448806763e-001,  2.0711141824722e-001, -2.0711141824722e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	 7.4913638830185e-001,  7.4913638830185e-001, -5.6573194265366e-001, -5.6573194265366e-001,
+	-6.6241580247879e-001,  6.6241580247879e-001, -8.2458931207657e-001,  8.2458931207657e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -1.2241067737341e-001,  1.2241067737341e-001,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,
+	 6.1320737004280e-002,  6.1320737004280e-002, -1.8303988873959e-001, -1.8303988873959e-001,
+	-9.9811810255051e-001,  9.9811810255051e-001,  9.8310548067093e-001, -9.8310548067093e-001,
+	 9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001,
+	 9.9631261825562e-001,  9.9631261825562e-001,  9.6697646379471e-001,  9.6697646379471e-001,
+	-8.5797317326069e-002,  8.5797317326069e-002, -2.5486564636230e-001,  2.5486564636230e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	 6.4383155107498e-001,  6.4383155107498e-001, -8.6397284269333e-001, -8.6397284269333e-001,
+	-7.6516723632813e-001,  7.6516723632813e-001, -5.0353848934174e-001,  5.0353848934174e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	 8.8763964176178e-001,  8.8763964176178e-001,  1.3458073139191e-001,  1.3458073139191e-001,
+	-4.6053871512413e-001,  4.6053871512413e-001, -9.9090266227722e-001,  9.9090266227722e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -5.7580822706223e-001,  5.7580822706223e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,
+	 3.0200594663620e-001,  3.0200594663620e-001, -7.9583698511124e-001, -7.9583698511124e-001,
+	-9.5330601930618e-001,  9.5330601930618e-001,  6.0551100969315e-001, -6.0551100969315e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 9.6043050289154e-001,  9.6043050289154e-001,  6.6241574287415e-001,  6.6241574287415e-001,
+	-2.7851969003677e-001,  2.7851969003677e-001, -7.4913638830185e-001,  7.4913638830185e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -8.4485357999802e-001,  8.4485357999802e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,
+	 4.8218378424644e-001,  4.8218378424644e-001, -9.9811804294586e-001, -9.9811804294586e-001,
+	-8.7607008218765e-001,  8.7607008218765e-001,  6.1320662498474e-002, -6.1320662498474e-002,
+	 2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001,
+	 7.8073722124100e-001,  7.8073722124100e-001, -4.3861621618271e-001, -4.3861621618271e-001,
+	-6.2485951185226e-001,  6.2485951185226e-001, -8.9867442846298e-001,  8.9867442846298e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	 1.1022221297026e-001,  1.1022221297026e-001, -3.2531028985977e-001, -3.2531028985977e-001,
+	-9.9390697479248e-001,  9.9390697479248e-001,  9.4560730457306e-001, -9.4560730457306e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 9.8310548067093e-001,  9.8310548067093e-001,  8.5135519504547e-001,  8.5135519504547e-001,
+	-1.8303988873959e-001,  1.8303988873959e-001, -5.2458971738815e-001,  5.2458971738815e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,
+	 5.6573182344437e-001,  5.6573182344437e-001, -9.7294002771378e-001, -9.7294002771378e-001,
+	-8.2458931207657e-001,  8.2458931207657e-001, -2.3105818033218e-001,  2.3105818033218e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	 8.3822470903397e-001,  8.3822470903397e-001, -1.5885812044144e-001, -1.5885812044144e-001,
+	-5.4532498121262e-001,  5.4532498121262e-001, -9.8730140924454e-001,  9.8730140924454e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -4.0524131059647e-001,  4.0524131059647e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,
+	 2.0711138844490e-001,  2.0711138844490e-001, -5.8579778671265e-001, -5.8579778671265e-001,
+	-9.7831737995148e-001,  9.7831737995148e-001,  8.1045722961426e-001, -8.1045722961426e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -6.8954056501389e-001,  6.8954056501389e-001,
+	 9.2850607633591e-001,  9.2850607633591e-001,  4.1642951965332e-001,  4.1642951965332e-001,
+	-3.7131720781326e-001,  3.7131720781326e-001, -9.0916794538498e-001,  9.0916794538498e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	 6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,
+	 3.9399203658104e-001,  3.9399203658104e-001, -9.3733906745911e-001, -9.3733906745911e-001,
+	-9.1911387443542e-001,  9.1911387443542e-001,  3.4841871261597e-001, -3.4841871261597e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,
+	-9.9969881772995e-001,  9.9969881772995e-001, -9.9969881772995e-001,  9.9969881772995e-001,
+	 7.1573078632355e-001,  7.1573078632355e-001, -6.8060100078583e-001, -6.8060100078583e-001,
+	-6.9837623834610e-001,  6.9837623834610e-001, -7.3265415430069e-001,  7.3265415430069e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -2.4541229009628e-002,  2.4541229009628e-002,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,
+	 1.2271538376808e-002,  1.2271538376808e-002, -3.6807224154472e-002, -3.6807224154472e-002,
+	-9.9992471933365e-001,  9.9992471933365e-001,  9.9932241439819e-001, -9.9932241439819e-001,
+	 9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,
+	-1.2271538376808e-002,  1.2271538376808e-002, -1.2271538376808e-002,  1.2271538376808e-002,
+	 9.9998116493225e-001,  9.9998116493225e-001,  9.9983054399490e-001,  9.9983054399490e-001,
+	-6.1358846724033e-003,  6.1358846724033e-003, -1.8406730145216e-002,  1.8406730145216e-002,
+	-9.9992471933365e-001,  9.9992471933365e-001, -9.9992471933365e-001,  9.9992471933365e-001,
+	 1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,
+	 7.0275473594666e-001,  7.0275473594666e-001, -7.2000241279602e-001, -7.2000241279602e-001,
+	-7.1143215894699e-001,  7.1143215894699e-001, -6.9397145509720e-001,  6.9397145509720e-001,
+	 6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,
+	-7.1573078632355e-001,  7.1573078632355e-001, -7.1573078632355e-001,  7.1573078632355e-001,
+	 9.2151403427124e-001,  9.2151403427124e-001,  3.6561298370361e-001,  3.6561298370361e-001,
+	-3.8834506273270e-001,  3.8834506273270e-001, -9.3076688051224e-001,  9.3076688051224e-001,
+	-6.9837623834610e-001,  6.9837623834610e-001, -6.9837623834610e-001,  6.9837623834610e-001,
+	 7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,
+	 3.7700742483139e-001,  3.7700742483139e-001, -9.1667896509171e-001, -9.1667896509171e-001,
+	-9.2621022462845e-001,  9.2621022462845e-001,  3.9962416887283e-001, -3.9962416887283e-001,
+	 9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,
+	-3.9399203658104e-001,  3.9399203658104e-001, -3.9399203658104e-001,  3.9399203658104e-001,
+	 9.7956979274750e-001,  9.7956979274750e-001,  8.2110255956650e-001,  8.2110255956650e-001,
+	-2.0110464096069e-001,  2.0110464096069e-001, -5.7078075408936e-001,  5.7078075408936e-001,
+	-9.1911387443542e-001,  9.1911387443542e-001, -9.1911387443542e-001,  9.1911387443542e-001,
+	 3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,
+	 5.5045801401138e-001,  5.5045801401138e-001, -9.8421007394791e-001, -9.8421007394791e-001,
+	-8.3486288785934e-001,  8.3486288785934e-001, -1.7700427770615e-001,  1.7700427770615e-001,
+	 3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,
+	-9.2850607633591e-001,  9.2850607633591e-001, -9.2850607633591e-001,  9.2850607633591e-001,
+	 8.2804501056671e-001,  8.2804501056671e-001, -2.1311044692993e-001, -2.1311044692993e-001,
+	-5.6066161394119e-001,  5.6066161394119e-001, -9.7702807188034e-001,  9.7702807188034e-001,
+	-3.7131720781326e-001,  3.7131720781326e-001, -3.7131720781326e-001,  3.7131720781326e-001,
+	 9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,
+	 1.8906867504120e-001,  1.8906867504120e-001, -5.4017150402069e-001, -5.4017150402069e-001,
+	-9.8196387290955e-001,  9.8196387290955e-001,  8.4155499935150e-001, -8.4155499935150e-001,
+	 9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,
+	-2.0711138844490e-001,  2.0711138844490e-001, -2.0711138844490e-001,  2.0711138844490e-001,
+	 9.9456459283829e-001,  9.9456459283829e-001,  9.5143502950668e-001,  9.5143502950668e-001,
+	-1.0412164032459e-001,  1.0412164032459e-001, -3.0784964561462e-001,  3.0784964561462e-001,
+	-9.7831737995148e-001,  9.7831737995148e-001, -9.7831737995148e-001,  9.7831737995148e-001,
+	 2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,
+	 6.2963825464249e-001,  6.2963825464249e-001, -8.9044862985611e-001, -8.9044862985611e-001,
+	-7.7688843011856e-001,  7.7688843011856e-001, -4.5508366823196e-001,  4.5508366823196e-001,
+	 5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,
+	-8.3822470903397e-001,  8.3822470903397e-001, -8.3822470903397e-001,  8.3822470903397e-001,
+	 8.7901222705841e-001,  8.7901222705841e-001,  7.9682409763336e-002,  7.9682409763336e-002,
+	-4.7679924964905e-001,  4.7679924964905e-001, -9.9682033061981e-001,  9.9682033061981e-001,
+	-5.4532498121262e-001,  5.4532498121262e-001, -5.4532498121262e-001,  5.4532498121262e-001,
+	 8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,
+	 2.8440755605698e-001,  2.8440755605698e-001, -7.6120227575302e-001, -7.6120227575302e-001,
+	-9.5870345830917e-001,  9.5870345830917e-001,  6.4851438999176e-001, -6.4851438999176e-001,
+	 8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,
+	-5.6573182344437e-001,  5.6573182344437e-001, -5.6573182344437e-001,  5.6573182344437e-001,
+	 9.5514118671417e-001,  9.5514118671417e-001,  6.2005722522736e-001,  6.2005722522736e-001,
+	-2.9615089297295e-001,  2.9615089297295e-001, -7.8455662727356e-001,  7.8455662727356e-001,
+	-8.2458931207657e-001,  8.2458931207657e-001, -8.2458931207657e-001,  8.2458931207657e-001,
+	 5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,
+	 4.6597650647163e-001,  4.6597650647163e-001, -9.9321198463440e-001, -9.9321198463440e-001,
+	-8.8479709625244e-001,  8.8479709625244e-001,  1.1631858348846e-001, -1.1631858348846e-001,
+	 1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,
+	-9.8310548067093e-001,  9.8310548067093e-001, -9.8310548067093e-001,  9.8310548067093e-001,
+	 7.6910334825516e-001,  7.6910334825516e-001, -4.8755019903183e-001, -4.8755019903183e-001,
+	-6.3912445306778e-001,  6.3912445306778e-001, -8.7309497594833e-001,  8.7309497594833e-001,
+	-1.8303988873959e-001,  1.8303988873959e-001, -1.8303988873959e-001,  1.8303988873959e-001,
+	 9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,
+	 9.1908961534500e-002,  9.1908961534500e-002, -2.7262136340141e-001, -2.7262136340141e-001,
+	-9.9576741456985e-001,  9.9576741456985e-001,  9.6212142705917e-001, -9.6212142705917e-001,
+	 9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,
+	-1.1022221297026e-001,  1.1022221297026e-001, -1.1022221297026e-001,  1.1022221297026e-001,
+	 9.9847555160522e-001,  9.9847555160522e-001,  9.8630803823471e-001,  9.8630803823471e-001,
+	-5.5195245891809e-002,  5.5195245891809e-002, -1.6491313278675e-001,  1.6491313278675e-001,
+	-9.9390697479248e-001,  9.9390697479248e-001, -9.9390697479248e-001,  9.9390697479248e-001,
+	 1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,
+	 6.6699993610382e-001,  6.6699993610382e-001, -8.1403625011444e-001, -8.1403625011444e-001,
+	-7.4505776166916e-001,  7.4505776166916e-001, -5.8081406354904e-001,  5.8081406354904e-001,
+	 6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,
+	-7.8073722124100e-001,  7.8073722124100e-001, -7.8073722124100e-001,  7.8073722124100e-001,
+	 9.0134882926941e-001,  9.0134882926941e-001,  2.2508388757706e-001,  2.2508388757706e-001,
+	-4.3309381604195e-001,  4.3309381604195e-001, -9.7433936595917e-001,  9.7433936595917e-001,
+	-6.2485951185226e-001,  6.2485951185226e-001, -6.2485951185226e-001,  6.2485951185226e-001,
+	 7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,
+	 3.3110630512238e-001,  3.3110630512238e-001, -8.4812033176422e-001, -8.4812033176422e-001,
+	-9.4359344244003e-001,  9.4359344244003e-001,  5.2980363368988e-001, -5.2980363368988e-001,
+	 8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,
+	-4.8218378424644e-001,  4.8218378424644e-001, -4.8218378424644e-001,  4.8218378424644e-001,
+	 9.6852207183838e-001,  9.6852207183838e-001,  7.2846436500549e-001,  7.2846436500549e-001,
+	-2.4892760813236e-001,  2.4892760813236e-001, -6.8508368730545e-001,  6.8508368730545e-001,
+	-8.7607008218765e-001,  8.7607008218765e-001, -8.7607008218765e-001,  8.7607008218765e-001,
+	 4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,
+	 5.0883013010025e-001,  5.0883013010025e-001, -9.9952930212021e-001, -9.9952930212021e-001,
+	-8.6086690425873e-001,  8.6086690425873e-001, -3.0674815177917e-002,  3.0674815177917e-002,
+	 2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,
+	-9.6043050289154e-001,  9.6043050289154e-001, -9.6043050289154e-001,  9.6043050289154e-001,
+	 7.9953724145889e-001,  7.9953724145889e-001, -3.5416358709335e-001, -3.5416358709335e-001,
+	-6.0061651468277e-001,  6.0061651468277e-001, -9.3518334627151e-001,  9.3518334627151e-001,
+	-2.7851969003677e-001,  2.7851969003677e-001, -2.7851969003677e-001,  2.7851969003677e-001,
+	 9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,
+	 1.4065824449062e-001,  1.4065824449062e-001, -4.1084313392639e-001, -4.1084313392639e-001,
+	-9.9005818367004e-001,  9.9005818367004e-001,  9.1170597076416e-001, -9.1170597076416e-001,
+	 9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,
+	-3.0200594663620e-001,  3.0200594663620e-001, -3.0200594663620e-001,  3.0200594663620e-001,
+	 9.8825758695602e-001,  9.8825758695602e-001,  8.9596629142761e-001,  8.9596629142761e-001,
+	-1.5279719233513e-001,  1.5279719233513e-001, -4.4412216544151e-001,  4.4412216544151e-001,
+	-9.5330601930618e-001,  9.5330601930618e-001, -9.5330601930618e-001,  9.5330601930618e-001,
+	 3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,
+	 5.9075969457626e-001,  5.9075969457626e-001, -9.4758564233780e-001, -9.4758564233780e-001,
+	-8.0684757232666e-001,  8.0684757232666e-001, -3.1950199604034e-001,  3.1950199604034e-001,
+	 4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,
+	-8.8763964176178e-001,  8.8763964176178e-001, -8.8763964176178e-001,  8.8763964176178e-001,
+	 8.5455799102783e-001,  8.5455799102783e-001, -6.7443966865540e-002, -6.7443966865540e-002,
+	-5.1935601234436e-001,  5.1935601234436e-001, -9.9772310256958e-001,  9.9772310256958e-001,
+	-4.6053871512413e-001,  4.6053871512413e-001, -4.6053871512413e-001,  4.6053871512413e-001,
+	 8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,
+	 2.3702360689640e-001,  2.3702360689640e-001, -6.5780675411224e-001, -6.5780675411224e-001,
+	-9.7150391340256e-001,  9.7150391340256e-001,  7.5318682193756e-001, -7.5318682193756e-001,
+	 7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,
+	-6.4383155107498e-001,  6.4383155107498e-001, -6.4383155107498e-001,  6.4383155107498e-001,
+	 9.3945920467377e-001,  9.3945920467377e-001,  4.9822762608528e-001,  4.9822762608528e-001,
+	-3.4266072511673e-001,  3.4266072511673e-001, -8.6704617738724e-001,  8.6704617738724e-001,
+	-7.6516723632813e-001,  7.6516723632813e-001, -7.6516723632813e-001,  7.6516723632813e-001,
+	 6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,
+	 4.2200028896332e-001,  4.2200028896332e-001, -9.6539437770844e-001, -9.6539437770844e-001,
+	-9.0659570693970e-001,  9.0659570693970e-001,  2.6079410314560e-001, -2.6079410314560e-001,
+	 8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,
+	-9.9631261825562e-001,  9.9631261825562e-001, -9.9631261825562e-001,  9.9631261825562e-001,
+	 7.3681652545929e-001,  7.3681652545929e-001, -6.1038291454315e-001, -6.1038291454315e-001,
+	-6.7609274387360e-001,  6.7609274387360e-001, -7.9210650920868e-001,  7.9210650920868e-001,
+	-8.5797317326069e-002,  8.5797317326069e-002, -8.5797317326069e-002,  8.5797317326069e-002,
+	 9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,
+	 4.2938258498907e-002,  4.2938258498907e-002, -1.2849812209606e-001, -1.2849812209606e-001,
+	-9.9907773733139e-001,  9.9907773733139e-001,  9.9170976877213e-001, -9.9170976877213e-001,
+	 9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,
+	-6.1320737004280e-002,  6.1320737004280e-002, -6.1320737004280e-002,  6.1320737004280e-002,
+	 9.9952942132950e-001,  9.9952942132950e-001,  9.9576741456985e-001,  9.9576741456985e-001,
+	-3.0674804002047e-002,  3.0674804002047e-002, -9.1908961534500e-002,  9.1908961534500e-002,
+	-9.9811810255051e-001,  9.9811810255051e-001, -9.9811810255051e-001,  9.9811810255051e-001,
+	 6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,
+	 6.8508368730545e-001,  6.8508368730545e-001, -7.6910322904587e-001, -7.6910322904587e-001,
+	-7.2846436500549e-001,  7.2846436500549e-001, -6.3912451267242e-001,  6.3912451267242e-001,
+	 6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,
+	-7.4913638830185e-001,  7.4913638830185e-001, -7.4913638830185e-001,  7.4913638830185e-001,
+	 9.1170603036880e-001,  9.1170603036880e-001,  2.9615086317062e-001,  2.9615086317062e-001,
+	-4.1084319353104e-001,  4.1084319353104e-001, -9.5514112710953e-001,  9.5514112710953e-001,
+	-6.6241580247879e-001,  6.6241580247879e-001, -6.6241580247879e-001,  6.6241580247879e-001,
+	 7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,
+	 3.5416352748871e-001,  3.5416352748871e-001, -8.8479721546173e-001, -8.8479721546173e-001,
+	-9.3518352508545e-001,  9.3518352508545e-001,  4.6597647666931e-001, -4.6597647666931e-001,
+	 8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,
+	-4.3861624598503e-001,  4.3861624598503e-001, -4.3861624598503e-001,  4.3861624598503e-001,
+	 9.7433936595917e-001,  9.7433936595917e-001,  7.7688843011856e-001,  7.7688843011856e-001,
+	-2.2508391737938e-001,  2.2508391737938e-001, -6.2963819503784e-001,  6.2963819503784e-001,
+	-8.9867448806763e-001,  8.9867448806763e-001, -8.9867448806763e-001,  8.9867448806763e-001,
+	 4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,
+	 5.2980363368988e-001,  5.2980363368988e-001, -9.9456453323364e-001, -9.9456453323364e-001,
+	-8.4812033176422e-001,  8.4812033176422e-001, -1.0412168502808e-001,  1.0412168502808e-001,
+	 3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,
+	-9.4560730457306e-001,  9.4560730457306e-001, -9.4560730457306e-001,  9.4560730457306e-001,
+	 8.1403630971909e-001,  8.1403630971909e-001, -2.8440755605698e-001, -2.8440755605698e-001,
+	-5.8081394433975e-001,  5.8081394433975e-001, -9.5870345830917e-001,  9.5870345830917e-001,
+	-3.2531028985977e-001,  3.2531028985977e-001, -3.2531028985977e-001,  3.2531028985977e-001,
+	 9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,
+	 1.6491311788559e-001,  1.6491311788559e-001, -4.7679924964905e-001, -4.7679924964905e-001,
+	-9.8630809783936e-001,  9.8630809783936e-001,  8.7901222705841e-001, -8.7901222705841e-001,
+	 9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,
+	-2.5486567616463e-001,  2.5486567616463e-001, -2.5486567616463e-001,  2.5486567616463e-001,
+	 9.9170976877213e-001,  9.9170976877213e-001,  9.2621028423309e-001,  9.2621028423309e-001,
+	-1.2849810719490e-001,  1.2849810719490e-001, -3.7700745463371e-001,  3.7700745463371e-001,
+	-9.6697646379471e-001,  9.6697646379471e-001, -9.6697646379471e-001,  9.6697646379471e-001,
+	 2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,
+	 6.1038279533386e-001,  6.1038279533386e-001, -9.2151403427124e-001, -9.2151403427124e-001,
+	-7.9210656881332e-001,  7.9210656881332e-001, -3.8834506273270e-001,  3.8834506273270e-001,
+	 5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,
+	-8.6397284269333e-001,  8.6397284269333e-001, -8.6397284269333e-001,  8.6397284269333e-001,
+	 8.6704623699188e-001,  8.6704623699188e-001,  6.1358809471130e-003,  6.1358809471130e-003,
+	-4.9822768568993e-001,  4.9822768568993e-001, -9.9998104572296e-001,  9.9998104572296e-001,
+	-5.0353837013245e-001,  5.0353837013245e-001, -5.0353837013245e-001,  5.0353837013245e-001,
+	 8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,
+	 2.6079413294792e-001,  2.6079413294792e-001, -7.1143209934235e-001, -7.1143209934235e-001,
+	-9.6539443731308e-001,  9.6539443731308e-001,  7.0275473594666e-001, -7.0275473594666e-001,
+	 7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,
+	-6.0551106929779e-001,  6.0551106929779e-001, -6.0551106929779e-001,  6.0551106929779e-001,
+	 9.4758558273315e-001,  9.4758558273315e-001,  5.6066155433655e-001,  5.6066155433655e-001,
+	-3.1950202584267e-001,  3.1950202584267e-001, -8.2804512977600e-001,  8.2804512977600e-001,
+	-7.9583686590195e-001,  7.9583686590195e-001, -7.9583686590195e-001,  7.9583686590195e-001,
+	 6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,
+	 4.4412216544151e-001,  4.4412216544151e-001, -9.8196375370026e-001, -9.8196375370026e-001,
+	-8.9596623182297e-001,  8.9596623182297e-001,  1.8906867504120e-001, -1.8906867504120e-001,
+	 1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,
+	-9.9090266227722e-001,  9.9090266227722e-001, -9.9090266227722e-001,  9.9090266227722e-001,
+	 7.5318676233292e-001,  7.5318676233292e-001, -5.5045801401138e-001, -5.5045801401138e-001,
+	-6.5780669450760e-001,  6.5780669450760e-001, -8.3486288785934e-001,  8.3486288785934e-001,
+	-1.3458071649075e-001,  1.3458071649075e-001, -1.3458071649075e-001,  1.3458071649075e-001,
+	 9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,
+	 6.7443922162056e-002,  6.7443922162056e-002, -2.0110465586185e-001, -2.0110465586185e-001,
+	-9.9772304296494e-001,  9.9772304296494e-001,  9.7956973314285e-001, -9.7956973314285e-001,
+	 9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,
+	-1.5885815024376e-001,  1.5885815024376e-001, -1.5885815024376e-001,  1.5885815024376e-001,
+	 9.9682027101517e-001,  9.9682027101517e-001,  9.7150385379791e-001,  9.7150385379791e-001,
+	-7.9682439565659e-002,  7.9682439565659e-002, -2.3702362179756e-001,  2.3702362179756e-001,
+	-9.8730140924454e-001,  9.8730140924454e-001, -9.8730140924454e-001,  9.8730140924454e-001,
+	 1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,
+	 6.4851438999176e-001,  6.4851438999176e-001, -8.5455799102783e-001, -8.5455799102783e-001,
+	-7.6120239496231e-001,  7.6120239496231e-001, -5.1935595273972e-001,  5.1935595273972e-001,
+	 5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,
+	-8.1045717000961e-001,  8.1045717000961e-001, -8.1045717000961e-001,  8.1045717000961e-001,
+	 8.9044868946075e-001,  8.9044868946075e-001,  1.5279716253281e-001,  1.5279716253281e-001,
+	-4.5508360862732e-001,  4.5508360862732e-001, -9.8825740814209e-001,  9.8825740814209e-001,
+	-5.8579784631729e-001,  5.8579784631729e-001, -5.8579784631729e-001,  5.8579784631729e-001,
+	 8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,
+	 3.0784964561462e-001,  3.0784964561462e-001, -8.0684757232666e-001, -8.0684757232666e-001,
+	-9.5143502950668e-001,  9.5143502950668e-001,  5.9075975418091e-001, -5.9075975418091e-001,
+	 8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,
+	-5.2458971738815e-001,  5.2458971738815e-001, -5.2458971738815e-001,  5.2458971738815e-001,
+	 9.6212142705917e-001,  9.6212142705917e-001,  6.7609268426895e-001,  6.7609268426895e-001,
+	-2.7262136340141e-001,  2.7262136340141e-001, -7.3681664466858e-001,  7.3681664466858e-001,
+	-8.5135519504547e-001,  8.5135519504547e-001, -8.5135519504547e-001,  8.5135519504547e-001,
+	 5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,
+	 4.8755016922951e-001,  4.8755016922951e-001, -9.9907779693604e-001, -9.9907779693604e-001,
+	-8.7309497594833e-001,  8.7309497594833e-001,  4.2938232421875e-002, -4.2938232421875e-002,
+	 2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,
+	-9.7293996810913e-001,  9.7293996810913e-001, -9.7293996810913e-001,  9.7293996810913e-001,
+	 7.8455656766891e-001,  7.8455656766891e-001, -4.2200034856796e-001, -4.2200034856796e-001,
+	-6.2005722522736e-001,  6.2005722522736e-001, -9.0659570693970e-001,  9.0659570693970e-001,
+	-2.3105812072754e-001,  2.3105812072754e-001, -2.3105812072754e-001,  2.3105812072754e-001,
+	 9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,
+	 1.1631863564253e-001,  1.1631863564253e-001, -3.4266072511673e-001, -3.4266072511673e-001,
+	-9.9321192502975e-001,  9.9321192502975e-001,  9.3945920467377e-001, -9.3945920467377e-001,
+	 9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,
+	-3.4841868281364e-001,  3.4841868281364e-001, -3.4841868281364e-001,  3.4841868281364e-001,
+	 9.8421007394791e-001,  9.8421007394791e-001,  8.6086690425873e-001,  8.6086690425873e-001,
+	-1.7700421810150e-001,  1.7700421810150e-001, -5.0883013010025e-001,  5.0883013010025e-001,
+	-9.3733900785446e-001,  9.3733900785446e-001, -9.3733900785446e-001,  9.3733900785446e-001,
+	 3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,
+	 5.7078075408936e-001,  5.7078075408936e-001, -9.6852207183838e-001, -9.6852207183838e-001,
+	-8.2110249996185e-001,  8.2110249996185e-001, -2.4892759323120e-001,  2.4892759323120e-001,
+	 4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,
+	-9.0916800498962e-001,  9.0916800498962e-001, -9.0916800498962e-001,  9.0916800498962e-001,
+	 8.4155493974686e-001,  8.4155493974686e-001, -1.4065837860107e-001, -1.4065837860107e-001,
+	-5.4017150402069e-001,  5.4017150402069e-001, -9.9005818367004e-001,  9.9005818367004e-001,
+	-4.1642957925797e-001,  4.1642957925797e-001, -4.1642957925797e-001,  4.1642957925797e-001,
+	 9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,
+	 2.1311032772064e-001,  2.1311032772064e-001, -6.0061651468277e-001, -6.0061651468277e-001,
+	-9.7702813148499e-001,  9.7702813148499e-001,  7.9953724145889e-001, -7.9953724145889e-001,
+	 7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,
+	-6.8060100078583e-001,  6.8060100078583e-001, -6.8060100078583e-001,  6.8060100078583e-001,
+	 9.3076694011688e-001,  9.3076694011688e-001,  4.3309378623962e-001,  4.3309378623962e-001,
+	-3.6561301350594e-001,  3.6561301350594e-001, -9.0134882926941e-001,  9.0134882926941e-001,
+	-7.3265427350998e-001,  7.3265427350998e-001, -7.3265427350998e-001,  7.3265427350998e-001,
+	 6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,
+	 3.9962419867516e-001,  3.9962419867516e-001, -9.4359350204468e-001, -9.4359350204468e-001,
+	-9.1667908430099e-001,  9.1667908430099e-001,  3.3110630512238e-001, -3.3110630512238e-001,
+	 3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,
+	-9.9932235479355e-001,  9.9932235479355e-001, -9.9932235479355e-001,  9.9932235479355e-001,
+	 7.2000247240067e-001,  7.2000247240067e-001, -6.6699987649918e-001, -6.6699987649918e-001,
+	-6.9397145509720e-001,  6.9397145509720e-001, -7.4505764245987e-001,  7.4505764245987e-001,
+	-3.6807224154472e-002,  3.6807224154472e-002, -3.6807224154472e-002,  3.6807224154472e-002,
+	 9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,
+	 1.8406730145216e-002,  1.8406730145216e-002, -5.5195245891809e-002, -5.5195245891809e-002,
+	-9.9983060359955e-001,  9.9983060359955e-001,  9.9847561120987e-001, -9.9847561120987e-001,
+	 9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,
+	-3.6807224154472e-002,  3.6807224154472e-002, -3.6807224154472e-002,  3.6807224154472e-002,
+	 9.9983060359955e-001,  9.9983060359955e-001,  9.9847561120987e-001,  9.9847561120987e-001,
+	-1.8406730145216e-002,  1.8406730145216e-002, -5.5195245891809e-002,  5.5195245891809e-002,
+	-9.9932235479355e-001,  9.9932235479355e-001, -9.9932235479355e-001,  9.9932235479355e-001,
+	 3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,
+	 6.9397145509720e-001,  6.9397145509720e-001, -7.4505764245987e-001, -7.4505764245987e-001,
+	-7.2000247240067e-001,  7.2000247240067e-001, -6.6699987649918e-001,  6.6699987649918e-001,
+	 6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,
+	-7.3265427350998e-001,  7.3265427350998e-001, -7.3265427350998e-001,  7.3265427350998e-001,
+	 9.1667908430099e-001,  9.1667908430099e-001,  3.3110630512238e-001,  3.3110630512238e-001,
+	-3.9962419867516e-001,  3.9962419867516e-001, -9.4359350204468e-001,  9.4359350204468e-001,
+	-6.8060100078583e-001,  6.8060100078583e-001, -6.8060100078583e-001,  6.8060100078583e-001,
+	 7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,
+	 3.6561301350594e-001,  3.6561301350594e-001, -9.0134882926941e-001, -9.0134882926941e-001,
+	-9.3076694011688e-001,  9.3076694011688e-001,  4.3309378623962e-001, -4.3309378623962e-001,
+	 9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,
+	-4.1642957925797e-001,  4.1642957925797e-001, -4.1642957925797e-001,  4.1642957925797e-001,
+	 9.7702813148499e-001,  9.7702813148499e-001,  7.9953724145889e-001,  7.9953724145889e-001,
+	-2.1311032772064e-001,  2.1311032772064e-001, -6.0061651468277e-001,  6.0061651468277e-001,
+	-9.0916800498962e-001,  9.0916800498962e-001, -9.0916800498962e-001,  9.0916800498962e-001,
+	 4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,
+	 5.4017150402069e-001,  5.4017150402069e-001, -9.9005818367004e-001, -9.9005818367004e-001,
+	-8.4155493974686e-001,  8.4155493974686e-001, -1.4065837860107e-001,  1.4065837860107e-001,
+	 3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,
+	-9.3733900785446e-001,  9.3733900785446e-001, -9.3733900785446e-001,  9.3733900785446e-001,
+	 8.2110249996185e-001,  8.2110249996185e-001, -2.4892759323120e-001, -2.4892759323120e-001,
+	-5.7078075408936e-001,  5.7078075408936e-001, -9.6852207183838e-001,  9.6852207183838e-001,
+	-3.4841868281364e-001,  3.4841868281364e-001, -3.4841868281364e-001,  3.4841868281364e-001,
+	 9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,
+	 1.7700421810150e-001,  1.7700421810150e-001, -5.0883013010025e-001, -5.0883013010025e-001,
+	-9.8421007394791e-001,  9.8421007394791e-001,  8.6086690425873e-001, -8.6086690425873e-001,
+	 9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,
+	-2.3105812072754e-001,  2.3105812072754e-001, -2.3105812072754e-001,  2.3105812072754e-001,
+	 9.9321192502975e-001,  9.9321192502975e-001,  9.3945920467377e-001,  9.3945920467377e-001,
+	-1.1631863564253e-001,  1.1631863564253e-001, -3.4266072511673e-001,  3.4266072511673e-001,
+	-9.7293996810913e-001,  9.7293996810913e-001, -9.7293996810913e-001,  9.7293996810913e-001,
+	 2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,
+	 6.2005722522736e-001,  6.2005722522736e-001, -9.0659570693970e-001, -9.0659570693970e-001,
+	-7.8455656766891e-001,  7.8455656766891e-001, -4.2200034856796e-001,  4.2200034856796e-001,
+	 5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,
+	-8.5135519504547e-001,  8.5135519504547e-001, -8.5135519504547e-001,  8.5135519504547e-001,
+	 8.7309497594833e-001,  8.7309497594833e-001,  4.2938232421875e-002,  4.2938232421875e-002,
+	-4.8755016922951e-001,  4.8755016922951e-001, -9.9907779693604e-001,  9.9907779693604e-001,
+	-5.2458971738815e-001,  5.2458971738815e-001, -5.2458971738815e-001,  5.2458971738815e-001,
+	 8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,
+	 2.7262136340141e-001,  2.7262136340141e-001, -7.3681664466858e-001, -7.3681664466858e-001,
+	-9.6212142705917e-001,  9.6212142705917e-001,  6.7609268426895e-001, -6.7609268426895e-001,
+	 8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,
+	-5.8579784631729e-001,  5.8579784631729e-001, -5.8579784631729e-001,  5.8579784631729e-001,
+	 9.5143502950668e-001,  9.5143502950668e-001,  5.9075975418091e-001,  5.9075975418091e-001,
+	-3.0784964561462e-001,  3.0784964561462e-001, -8.0684757232666e-001,  8.0684757232666e-001,
+	-8.1045717000961e-001,  8.1045717000961e-001, -8.1045717000961e-001,  8.1045717000961e-001,
+	 5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,
+	 4.5508360862732e-001,  4.5508360862732e-001, -9.8825740814209e-001, -9.8825740814209e-001,
+	-8.9044868946075e-001,  8.9044868946075e-001,  1.5279716253281e-001, -1.5279716253281e-001,
+	 1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,
+	-9.8730140924454e-001,  9.8730140924454e-001, -9.8730140924454e-001,  9.8730140924454e-001,
+	 7.6120239496231e-001,  7.6120239496231e-001, -5.1935595273972e-001, -5.1935595273972e-001,
+	-6.4851438999176e-001,  6.4851438999176e-001, -8.5455799102783e-001,  8.5455799102783e-001,
+	-1.5885815024376e-001,  1.5885815024376e-001, -1.5885815024376e-001,  1.5885815024376e-001,
+	 9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,
+	 7.9682439565659e-002,  7.9682439565659e-002, -2.3702362179756e-001, -2.3702362179756e-001,
+	-9.9682027101517e-001,  9.9682027101517e-001,  9.7150385379791e-001, -9.7150385379791e-001,
+	 9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,
+	-1.3458071649075e-001,  1.3458071649075e-001, -1.3458071649075e-001,  1.3458071649075e-001,
+	 9.9772304296494e-001,  9.9772304296494e-001,  9.7956973314285e-001,  9.7956973314285e-001,
+	-6.7443922162056e-002,  6.7443922162056e-002, -2.0110465586185e-001,  2.0110465586185e-001,
+	-9.9090266227722e-001,  9.9090266227722e-001, -9.9090266227722e-001,  9.9090266227722e-001,
+	 1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,
+	 6.5780669450760e-001,  6.5780669450760e-001, -8.3486288785934e-001, -8.3486288785934e-001,
+	-7.5318676233292e-001,  7.5318676233292e-001, -5.5045801401138e-001,  5.5045801401138e-001,
+	 6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,
+	-7.9583686590195e-001,  7.9583686590195e-001, -7.9583686590195e-001,  7.9583686590195e-001,
+	 8.9596623182297e-001,  8.9596623182297e-001,  1.8906867504120e-001,  1.8906867504120e-001,
+	-4.4412216544151e-001,  4.4412216544151e-001, -9.8196375370026e-001,  9.8196375370026e-001,
+	-6.0551106929779e-001,  6.0551106929779e-001, -6.0551106929779e-001,  6.0551106929779e-001,
+	 7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,
+	 3.1950202584267e-001,  3.1950202584267e-001, -8.2804512977600e-001, -8.2804512977600e-001,
+	-9.4758558273315e-001,  9.4758558273315e-001,  5.6066155433655e-001, -5.6066155433655e-001,
+	 8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,
+	-5.0353837013245e-001,  5.0353837013245e-001, -5.0353837013245e-001,  5.0353837013245e-001,
+	 9.6539443731308e-001,  9.6539443731308e-001,  7.0275473594666e-001,  7.0275473594666e-001,
+	-2.6079413294792e-001,  2.6079413294792e-001, -7.1143209934235e-001,  7.1143209934235e-001,
+	-8.6397284269333e-001,  8.6397284269333e-001, -8.6397284269333e-001,  8.6397284269333e-001,
+	 5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,
+	 4.9822768568993e-001,  4.9822768568993e-001, -9.9998104572296e-001, -9.9998104572296e-001,
+	-8.6704623699188e-001,  8.6704623699188e-001,  6.1358809471130e-003, -6.1358809471130e-003,
+	 2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,
+	-9.6697646379471e-001,  9.6697646379471e-001, -9.6697646379471e-001,  9.6697646379471e-001,
+	 7.9210656881332e-001,  7.9210656881332e-001, -3.8834506273270e-001, -3.8834506273270e-001,
+	-6.1038279533386e-001,  6.1038279533386e-001, -9.2151403427124e-001,  9.2151403427124e-001,
+	-2.5486567616463e-001,  2.5486567616463e-001, -2.5486567616463e-001,  2.5486567616463e-001,
+	 9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,
+	 1.2849810719490e-001,  1.2849810719490e-001, -3.7700745463371e-001, -3.7700745463371e-001,
+	-9.9170976877213e-001,  9.9170976877213e-001,  9.2621028423309e-001, -9.2621028423309e-001,
+	 9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,
+	-3.2531028985977e-001,  3.2531028985977e-001, -3.2531028985977e-001,  3.2531028985977e-001,
+	 9.8630809783936e-001,  9.8630809783936e-001,  8.7901222705841e-001,  8.7901222705841e-001,
+	-1.6491311788559e-001,  1.6491311788559e-001, -4.7679924964905e-001,  4.7679924964905e-001,
+	-9.4560730457306e-001,  9.4560730457306e-001, -9.4560730457306e-001,  9.4560730457306e-001,
+	 3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,
+	 5.8081394433975e-001,  5.8081394433975e-001, -9.5870345830917e-001, -9.5870345830917e-001,
+	-8.1403630971909e-001,  8.1403630971909e-001, -2.8440755605698e-001,  2.8440755605698e-001,
+	 4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,
+	-8.9867448806763e-001,  8.9867448806763e-001, -8.9867448806763e-001,  8.9867448806763e-001,
+	 8.4812033176422e-001,  8.4812033176422e-001, -1.0412168502808e-001, -1.0412168502808e-001,
+	-5.2980363368988e-001,  5.2980363368988e-001, -9.9456453323364e-001,  9.9456453323364e-001,
+	-4.3861624598503e-001,  4.3861624598503e-001, -4.3861624598503e-001,  4.3861624598503e-001,
+	 8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,
+	 2.2508391737938e-001,  2.2508391737938e-001, -6.2963819503784e-001, -6.2963819503784e-001,
+	-9.7433936595917e-001,  9.7433936595917e-001,  7.7688843011856e-001, -7.7688843011856e-001,
+	 7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,
+	-6.6241580247879e-001,  6.6241580247879e-001, -6.6241580247879e-001,  6.6241580247879e-001,
+	 9.3518352508545e-001,  9.3518352508545e-001,  4.6597647666931e-001,  4.6597647666931e-001,
+	-3.5416352748871e-001,  3.5416352748871e-001, -8.8479721546173e-001,  8.8479721546173e-001,
+	-7.4913638830185e-001,  7.4913638830185e-001, -7.4913638830185e-001,  7.4913638830185e-001,
+	 6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,
+	 4.1084319353104e-001,  4.1084319353104e-001, -9.5514112710953e-001, -9.5514112710953e-001,
+	-9.1170603036880e-001,  9.1170603036880e-001,  2.9615086317062e-001, -2.9615086317062e-001,
+	 6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,
+	-9.9811810255051e-001,  9.9811810255051e-001, -9.9811810255051e-001,  9.9811810255051e-001,
+	 7.2846436500549e-001,  7.2846436500549e-001, -6.3912451267242e-001, -6.3912451267242e-001,
+	-6.8508368730545e-001,  6.8508368730545e-001, -7.6910322904587e-001,  7.6910322904587e-001,
+	-6.1320737004280e-002,  6.1320737004280e-002, -6.1320737004280e-002,  6.1320737004280e-002,
+	 9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,
+	 3.0674804002047e-002,  3.0674804002047e-002, -9.1908961534500e-002, -9.1908961534500e-002,
+	-9.9952942132950e-001,  9.9952942132950e-001,  9.9576741456985e-001, -9.9576741456985e-001,
+	 9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,
+	-8.5797317326069e-002,  8.5797317326069e-002, -8.5797317326069e-002,  8.5797317326069e-002,
+	 9.9907773733139e-001,  9.9907773733139e-001,  9.9170976877213e-001,  9.9170976877213e-001,
+	-4.2938258498907e-002,  4.2938258498907e-002, -1.2849812209606e-001,  1.2849812209606e-001,
+	-9.9631261825562e-001,  9.9631261825562e-001, -9.9631261825562e-001,  9.9631261825562e-001,
+	 8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,
+	 6.7609274387360e-001,  6.7609274387360e-001, -7.9210650920868e-001, -7.9210650920868e-001,
+	-7.3681652545929e-001,  7.3681652545929e-001, -6.1038291454315e-001,  6.1038291454315e-001,
+	 6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,
+	-7.6516723632813e-001,  7.6516723632813e-001, -7.6516723632813e-001,  7.6516723632813e-001,
+	 9.0659570693970e-001,  9.0659570693970e-001,  2.6079410314560e-001,  2.6079410314560e-001,
+	-4.2200028896332e-001,  4.2200028896332e-001, -9.6539437770844e-001,  9.6539437770844e-001,
+	-6.4383155107498e-001,  6.4383155107498e-001, -6.4383155107498e-001,  6.4383155107498e-001,
+	 7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,
+	 3.4266072511673e-001,  3.4266072511673e-001, -8.6704617738724e-001, -8.6704617738724e-001,
+	-9.3945920467377e-001,  9.3945920467377e-001,  4.9822762608528e-001, -4.9822762608528e-001,
+	 8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,
+	-4.6053871512413e-001,  4.6053871512413e-001, -4.6053871512413e-001,  4.6053871512413e-001,
+	 9.7150391340256e-001,  9.7150391340256e-001,  7.5318682193756e-001,  7.5318682193756e-001,
+	-2.3702360689640e-001,  2.3702360689640e-001, -6.5780675411224e-001,  6.5780675411224e-001,
+	-8.8763964176178e-001,  8.8763964176178e-001, -8.8763964176178e-001,  8.8763964176178e-001,
+	 4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,
+	 5.1935601234436e-001,  5.1935601234436e-001, -9.9772310256958e-001, -9.9772310256958e-001,
+	-8.5455799102783e-001,  8.5455799102783e-001, -6.7443966865540e-002,  6.7443966865540e-002,
+	 3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,
+	-9.5330601930618e-001,  9.5330601930618e-001, -9.5330601930618e-001,  9.5330601930618e-001,
+	 8.0684757232666e-001,  8.0684757232666e-001, -3.1950199604034e-001, -3.1950199604034e-001,
+	-5.9075969457626e-001,  5.9075969457626e-001, -9.4758564233780e-001,  9.4758564233780e-001,
+	-3.0200594663620e-001,  3.0200594663620e-001, -3.0200594663620e-001,  3.0200594663620e-001,
+	 9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,
+	 1.5279719233513e-001,  1.5279719233513e-001, -4.4412216544151e-001, -4.4412216544151e-001,
+	-9.8825758695602e-001,  9.8825758695602e-001,  8.9596629142761e-001, -8.9596629142761e-001,
+	 9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,
+	-2.7851969003677e-001,  2.7851969003677e-001, -2.7851969003677e-001,  2.7851969003677e-001,
+	 9.9005818367004e-001,  9.9005818367004e-001,  9.1170597076416e-001,  9.1170597076416e-001,
+	-1.4065824449062e-001,  1.4065824449062e-001, -4.1084313392639e-001,  4.1084313392639e-001,
+	-9.6043050289154e-001,  9.6043050289154e-001, -9.6043050289154e-001,  9.6043050289154e-001,
+	 2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,
+	 6.0061651468277e-001,  6.0061651468277e-001, -9.3518334627151e-001, -9.3518334627151e-001,
+	-7.9953724145889e-001,  7.9953724145889e-001, -3.5416358709335e-001,  3.5416358709335e-001,
+	 4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,
+	-8.7607008218765e-001,  8.7607008218765e-001, -8.7607008218765e-001,  8.7607008218765e-001,
+	 8.6086690425873e-001,  8.6086690425873e-001, -3.0674815177917e-002, -3.0674815177917e-002,
+	-5.0883013010025e-001,  5.0883013010025e-001, -9.9952930212021e-001,  9.9952930212021e-001,
+	-4.8218378424644e-001,  4.8218378424644e-001, -4.8218378424644e-001,  4.8218378424644e-001,
+	 8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,
+	 2.4892760813236e-001,  2.4892760813236e-001, -6.8508368730545e-001, -6.8508368730545e-001,
+	-9.6852207183838e-001,  9.6852207183838e-001,  7.2846436500549e-001, -7.2846436500549e-001,
+	 7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,
+	-6.2485951185226e-001,  6.2485951185226e-001, -6.2485951185226e-001,  6.2485951185226e-001,
+	 9.4359344244003e-001,  9.4359344244003e-001,  5.2980363368988e-001,  5.2980363368988e-001,
+	-3.3110630512238e-001,  3.3110630512238e-001, -8.4812033176422e-001,  8.4812033176422e-001,
+	-7.8073722124100e-001,  7.8073722124100e-001, -7.8073722124100e-001,  7.8073722124100e-001,
+	 6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,
+	 4.3309381604195e-001,  4.3309381604195e-001, -9.7433936595917e-001, -9.7433936595917e-001,
+	-9.0134882926941e-001,  9.0134882926941e-001,  2.2508388757706e-001, -2.2508388757706e-001,
+	 1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,
+	-9.9390697479248e-001,  9.9390697479248e-001, -9.9390697479248e-001,  9.9390697479248e-001,
+	 7.4505776166916e-001,  7.4505776166916e-001, -5.8081406354904e-001, -5.8081406354904e-001,
+	-6.6699993610382e-001,  6.6699993610382e-001, -8.1403625011444e-001,  8.1403625011444e-001,
+	-1.1022221297026e-001,  1.1022221297026e-001, -1.1022221297026e-001,  1.1022221297026e-001,
+	 9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,
+	 5.5195245891809e-002,  5.5195245891809e-002, -1.6491313278675e-001, -1.6491313278675e-001,
+	-9.9847555160522e-001,  9.9847555160522e-001,  9.8630803823471e-001, -9.8630803823471e-001,
+	 9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,
+	-1.8303988873959e-001,  1.8303988873959e-001, -1.8303988873959e-001,  1.8303988873959e-001,
+	 9.9576741456985e-001,  9.9576741456985e-001,  9.6212142705917e-001,  9.6212142705917e-001,
+	-9.1908961534500e-002,  9.1908961534500e-002, -2.7262136340141e-001,  2.7262136340141e-001,
+	-9.8310548067093e-001,  9.8310548067093e-001, -9.8310548067093e-001,  9.8310548067093e-001,
+	 1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,
+	 6.3912445306778e-001,  6.3912445306778e-001, -8.7309497594833e-001, -8.7309497594833e-001,
+	-7.6910334825516e-001,  7.6910334825516e-001, -4.8755019903183e-001,  4.8755019903183e-001,
+	 5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,
+	-8.2458931207657e-001,  8.2458931207657e-001, -8.2458931207657e-001,  8.2458931207657e-001,
+	 8.8479709625244e-001,  8.8479709625244e-001,  1.1631858348846e-001,  1.1631858348846e-001,
+	-4.6597650647163e-001,  4.6597650647163e-001, -9.9321198463440e-001,  9.9321198463440e-001,
+	-5.6573182344437e-001,  5.6573182344437e-001, -5.6573182344437e-001,  5.6573182344437e-001,
+	 8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,
+	 2.9615089297295e-001,  2.9615089297295e-001, -7.8455662727356e-001, -7.8455662727356e-001,
+	-9.5514118671417e-001,  9.5514118671417e-001,  6.2005722522736e-001, -6.2005722522736e-001,
+	 8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,
+	-5.4532498121262e-001,  5.4532498121262e-001, -5.4532498121262e-001,  5.4532498121262e-001,
+	 9.5870345830917e-001,  9.5870345830917e-001,  6.4851438999176e-001,  6.4851438999176e-001,
+	-2.8440755605698e-001,  2.8440755605698e-001, -7.6120227575302e-001,  7.6120227575302e-001,
+	-8.3822470903397e-001,  8.3822470903397e-001, -8.3822470903397e-001,  8.3822470903397e-001,
+	 5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,
+	 4.7679924964905e-001,  4.7679924964905e-001, -9.9682033061981e-001, -9.9682033061981e-001,
+	-8.7901222705841e-001,  8.7901222705841e-001,  7.9682409763336e-002, -7.9682409763336e-002,
+	 2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,
+	-9.7831737995148e-001,  9.7831737995148e-001, -9.7831737995148e-001,  9.7831737995148e-001,
+	 7.7688843011856e-001,  7.7688843011856e-001, -4.5508366823196e-001, -4.5508366823196e-001,
+	-6.2963825464249e-001,  6.2963825464249e-001, -8.9044862985611e-001,  8.9044862985611e-001,
+	-2.0711138844490e-001,  2.0711138844490e-001, -2.0711138844490e-001,  2.0711138844490e-001,
+	 9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,
+	 1.0412164032459e-001,  1.0412164032459e-001, -3.0784964561462e-001, -3.0784964561462e-001,
+	-9.9456459283829e-001,  9.9456459283829e-001,  9.5143502950668e-001, -9.5143502950668e-001,
+	 9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,
+	-3.7131720781326e-001,  3.7131720781326e-001, -3.7131720781326e-001,  3.7131720781326e-001,
+	 9.8196387290955e-001,  9.8196387290955e-001,  8.4155499935150e-001,  8.4155499935150e-001,
+	-1.8906867504120e-001,  1.8906867504120e-001, -5.4017150402069e-001,  5.4017150402069e-001,
+	-9.2850607633591e-001,  9.2850607633591e-001, -9.2850607633591e-001,  9.2850607633591e-001,
+	 3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,
+	 5.6066161394119e-001,  5.6066161394119e-001, -9.7702807188034e-001, -9.7702807188034e-001,
+	-8.2804501056671e-001,  8.2804501056671e-001, -2.1311044692993e-001,  2.1311044692993e-001,
+	 3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,
+	-9.1911387443542e-001,  9.1911387443542e-001, -9.1911387443542e-001,  9.1911387443542e-001,
+	 8.3486288785934e-001,  8.3486288785934e-001, -1.7700427770615e-001, -1.7700427770615e-001,
+	-5.5045801401138e-001,  5.5045801401138e-001, -9.8421007394791e-001,  9.8421007394791e-001,
+	-3.9399203658104e-001,  3.9399203658104e-001, -3.9399203658104e-001,  3.9399203658104e-001,
+	 9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,
+	 2.0110464096069e-001,  2.0110464096069e-001, -5.7078075408936e-001, -5.7078075408936e-001,
+	-9.7956979274750e-001,  9.7956979274750e-001,  8.2110255956650e-001, -8.2110255956650e-001,
+	 7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,
+	-6.9837623834610e-001,  6.9837623834610e-001, -6.9837623834610e-001,  6.9837623834610e-001,
+	 9.2621022462845e-001,  9.2621022462845e-001,  3.9962416887283e-001,  3.9962416887283e-001,
+	-3.7700742483139e-001,  3.7700742483139e-001, -9.1667896509171e-001,  9.1667896509171e-001,
+	-7.1573078632355e-001,  7.1573078632355e-001, -7.1573078632355e-001,  7.1573078632355e-001,
+	 6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,
+	 3.8834506273270e-001,  3.8834506273270e-001, -9.3076688051224e-001, -9.3076688051224e-001,
+	-9.2151403427124e-001,  9.2151403427124e-001,  3.6561298370361e-001, -3.6561298370361e-001,
+	 1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,
+	-9.9992471933365e-001,  9.9992471933365e-001, -9.9992471933365e-001,  9.9992471933365e-001,
+	 7.1143215894699e-001,  7.1143215894699e-001, -6.9397145509720e-001, -6.9397145509720e-001,
+	-7.0275473594666e-001,  7.0275473594666e-001, -7.2000241279602e-001,  7.2000241279602e-001,
+	-1.2271538376808e-002,  1.2271538376808e-002, -1.2271538376808e-002,  1.2271538376808e-002,
+	 9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,
+	 6.1358846724033e-003,  6.1358846724033e-003, -1.8406730145216e-002, -1.8406730145216e-002,
+	-9.9998116493225e-001,  9.9998116493225e-001,  9.9983054399490e-001, -9.9983054399490e-001,
+	 9.9998116493225e-001,  9.9998116493225e-001,  9.9998116493225e-001,  9.9998116493225e-001,
+	-6.1358846724033e-003,  6.1358846724033e-003, -6.1358846724033e-003,  6.1358846724033e-003,
+	 9.9999529123306e-001,  9.9999529123306e-001,  9.9995762109756e-001,  9.9995762109756e-001,
+	-3.0679567717016e-003,  3.0679567717016e-003, -9.2037543654442e-003,  9.2037543654442e-003,
+	-9.9998116493225e-001,  9.9998116493225e-001, -9.9998116493225e-001,  9.9998116493225e-001,
+	 6.1358846724033e-003,  6.1358846724033e-003,  6.1358846724033e-003,  6.1358846724033e-003,
+	 7.0493412017822e-001,  7.0493412017822e-001, -7.1358478069305e-001, -7.1358478069305e-001,
+	-7.0927280187607e-001,  7.0927280187607e-001, -7.0056885480881e-001,  7.0056885480881e-001,
+	 7.0275473594666e-001,  7.0275473594666e-001,  7.0275473594666e-001,  7.0275473594666e-001,
+	-7.1143215894699e-001,  7.1143215894699e-001, -7.1143215894699e-001,  7.1143215894699e-001,
+	 9.2270112037659e-001,  9.2270112037659e-001,  3.7416404485703e-001,  3.7416404485703e-001,
+	-3.8551607728004e-001,  3.8551607728004e-001, -9.2736244201660e-001,  9.2736244201660e-001,
+	-7.0275473594666e-001,  7.0275473594666e-001, -7.0275473594666e-001,  7.0275473594666e-001,
+	 7.1143215894699e-001,  7.1143215894699e-001,  7.1143215894699e-001,  7.1143215894699e-001,
+	 3.7984722852707e-001,  3.7984722852707e-001, -9.2031830549240e-001, -9.2031830549240e-001,
+	-9.2504924535751e-001,  9.2504924535751e-001,  3.9117038249969e-001, -3.9117038249969e-001,
+	 9.2151403427124e-001,  9.2151403427124e-001,  9.2151403427124e-001,  9.2151403427124e-001,
+	-3.8834506273270e-001,  3.8834506273270e-001, -3.8834506273270e-001,  3.8834506273270e-001,
+	 9.8018211126328e-001,  9.8018211126328e-001,  8.2632100582123e-001,  8.2632100582123e-001,
+	-1.9809842109680e-001,  1.9809842109680e-001, -5.6319934129715e-001,  5.6319934129715e-001,
+	-9.2151403427124e-001,  9.2151403427124e-001, -9.2151403427124e-001,  9.2151403427124e-001,
+	 3.8834506273270e-001,  3.8834506273270e-001,  3.8834506273270e-001,  3.8834506273270e-001,
+	 5.5301672220230e-001,  5.5301672220230e-001, -9.8253935575485e-001, -9.8253935575485e-001,
+	-8.3317017555237e-001,  8.3317017555237e-001, -1.8605518341064e-001,  1.8605518341064e-001,
+	 3.7700742483139e-001,  3.7700742483139e-001,  3.7700742483139e-001,  3.7700742483139e-001,
+	-9.2621022462845e-001,  9.2621022462845e-001, -9.2621022462845e-001,  9.2621022462845e-001,
+	 8.2976120710373e-001,  8.2976120710373e-001, -2.0410901308060e-001, -2.0410901308060e-001,
+	-5.5811852216721e-001,  5.5811852216721e-001, -9.7894805669785e-001,  9.7894805669785e-001,
+	-3.7700742483139e-001,  3.7700742483139e-001, -3.7700742483139e-001,  3.7700742483139e-001,
+	 9.2621022462845e-001,  9.2621022462845e-001,  9.2621022462845e-001,  9.2621022462845e-001,
+	 1.9208040833473e-001,  1.9208040833473e-001, -5.4789412021637e-001, -5.4789412021637e-001,
+	-9.8137921094894e-001,  9.8137921094894e-001,  8.3654773235321e-001, -8.3654773235321e-001,
+	 9.7956979274750e-001,  9.7956979274750e-001,  9.7956979274750e-001,  9.7956979274750e-001,
+	-2.0110464096069e-001,  2.0110464096069e-001, -2.0110464096069e-001,  2.0110464096069e-001,
+	 9.9487930536270e-001,  9.9487930536270e-001,  9.5422804355621e-001,  9.5422804355621e-001,
+	-1.0106986761093e-001,  1.0106986761093e-001, -2.9907983541489e-001,  2.9907983541489e-001,
+	-9.7956979274750e-001,  9.7956979274750e-001, -9.7956979274750e-001,  9.7956979274750e-001,
+	 2.0110464096069e-001,  2.0110464096069e-001,  2.0110464096069e-001,  2.0110464096069e-001,
+	 6.3201874494553e-001,  6.3201874494553e-001, -8.8622254133224e-001, -8.8622254133224e-001,
+	-7.7495306730270e-001,  7.7495306730270e-001, -4.6325987577438e-001,  4.6325987577438e-001,
+	 5.5045801401138e-001,  5.5045801401138e-001,  5.5045801401138e-001,  5.5045801401138e-001,
+	-8.3486288785934e-001,  8.3486288785934e-001, -8.3486288785934e-001,  8.3486288785934e-001,
+	 8.8047087192535e-001,  8.8047087192535e-001,  8.8853478431702e-002,  8.8853478431702e-002,
+	-4.7410023212433e-001,  4.7410023212433e-001, -9.9604463577271e-001,  9.9604463577271e-001,
+	-5.5045801401138e-001,  5.5045801401138e-001, -5.5045801401138e-001,  5.5045801401138e-001,
+	 8.3486288785934e-001,  8.3486288785934e-001,  8.3486288785934e-001,  8.3486288785934e-001,
+	 2.8734746575356e-001,  2.8734746575356e-001, -7.6713907718658e-001, -7.6713907718658e-001,
+	-9.5782643556595e-001,  9.5782643556595e-001,  6.4148104190826e-001, -6.4148104190826e-001,
+	 8.2804501056671e-001,  8.2804501056671e-001,  8.2804501056671e-001,  8.2804501056671e-001,
+	-5.6066161394119e-001,  5.6066161394119e-001, -5.6066161394119e-001,  5.6066161394119e-001,
+	 9.5604526996613e-001,  9.5604526996613e-001,  6.2725180387497e-001,  6.2725180387497e-001,
+	-2.9321917891502e-001,  2.9321917891502e-001, -7.7881658077240e-001,  7.7881658077240e-001,
+	-8.2804501056671e-001,  8.2804501056671e-001, -8.2804501056671e-001,  8.2804501056671e-001,
+	 5.6066161394119e-001,  5.6066161394119e-001,  5.6066161394119e-001,  5.6066161394119e-001,
+	 4.6868884563446e-001,  4.6868884563446e-001, -9.9424028396606e-001, -9.9424028396606e-001,
+	-8.8336330652237e-001,  8.8336330652237e-001,  1.0717236995697e-001, -1.0717236995697e-001,
+	 1.8906867504120e-001,  1.8906867504120e-001,  1.8906867504120e-001,  1.8906867504120e-001,
+	-9.8196387290955e-001,  9.8196387290955e-001, -9.8196387290955e-001,  9.8196387290955e-001,
+	 7.7106052637100e-001,  7.7106052637100e-001, -4.7949379682541e-001, -4.7949379682541e-001,
+	-6.3676190376282e-001,  6.3676190376282e-001, -8.7754523754120e-001,  8.7754523754120e-001,
+	-1.8906867504120e-001,  1.8906867504120e-001, -1.8906867504120e-001,  1.8906867504120e-001,
+	 9.8196387290955e-001,  9.8196387290955e-001,  9.8196387290955e-001,  9.8196387290955e-001,
+	 9.4963498413563e-002,  9.4963498413563e-002, -2.8146496415138e-001, -2.8146496415138e-001,
+	-9.9548077583313e-001,  9.9548077583313e-001,  9.5957154035568e-001, -9.5957154035568e-001,
+	 9.9456459283829e-001,  9.9456459283829e-001,  9.9456459283829e-001,  9.9456459283829e-001,
+	-1.0412164032459e-001,  1.0412164032459e-001, -1.0412164032459e-001,  1.0412164032459e-001,
+	 9.9864023923874e-001,  9.9864023923874e-001,  9.8778414726257e-001,  9.8778414726257e-001,
+	-5.2131704986095e-002,  5.2131704986095e-002, -1.5582841634750e-001,  1.5582841634750e-001,
+	-9.9456459283829e-001,  9.9456459283829e-001, -9.9456459283829e-001,  9.9456459283829e-001,
+	 1.0412164032459e-001,  1.0412164032459e-001,  1.0412164032459e-001,  1.0412164032459e-001,
+	 6.6928261518478e-001,  6.6928261518478e-001, -8.0865615606308e-001, -8.0865615606308e-001,
+	-7.4300795793533e-001,  7.4300795793533e-001, -5.8828157186508e-001,  5.8828157186508e-001,
+	 6.2963825464249e-001,  6.2963825464249e-001,  6.2963825464249e-001,  6.2963825464249e-001,
+	-7.7688843011856e-001,  7.7688843011856e-001, -7.7688843011856e-001,  7.7688843011856e-001,
+	 9.0267330408096e-001,  9.0267330408096e-001,  2.3404198884964e-001,  2.3404198884964e-001,
+	-4.3032649159431e-001,  4.3032649159431e-001, -9.7222638130188e-001,  9.7222638130188e-001,
+	-6.2963825464249e-001,  6.2963825464249e-001, -6.2963825464249e-001,  6.2963825464249e-001,
+	 7.7688843011856e-001,  7.7688843011856e-001,  7.7688843011856e-001,  7.7688843011856e-001,
+	 3.3399966359138e-001,  3.3399966359138e-001, -8.5296058654785e-001, -8.5296058654785e-001,
+	-9.4257318973541e-001,  9.4257318973541e-001,  5.2197527885437e-001, -5.2197527885437e-001,
+	 8.7901222705841e-001,  8.7901222705841e-001,  8.7901222705841e-001,  8.7901222705841e-001,
+	-4.7679924964905e-001,  4.7679924964905e-001, -4.7679924964905e-001,  4.7679924964905e-001,
+	 9.6928125619888e-001,  9.6928125619888e-001,  7.3473888635635e-001,  7.3473888635635e-001,
+	-2.4595504999161e-001,  2.4595504999161e-001, -6.7835009098053e-001,  6.7835009098053e-001,
+	-8.7901222705841e-001,  8.7901222705841e-001, -8.7901222705841e-001,  8.7901222705841e-001,
+	 4.7679924964905e-001,  4.7679924964905e-001,  4.7679924964905e-001,  4.7679924964905e-001,
+	 5.1146888732910e-001,  5.1146888732910e-001, -9.9920475482941e-001, -9.9920475482941e-001,
+	-8.5930180549622e-001,  8.5930180549622e-001, -3.9873003959656e-002,  3.9873003959656e-002,
+	 2.8440755605698e-001,  2.8440755605698e-001,  2.8440755605698e-001,  2.8440755605698e-001,
+	-9.5870345830917e-001,  9.5870345830917e-001, -9.5870345830917e-001,  9.5870345830917e-001,
+	 8.0137616395950e-001,  8.0137616395950e-001, -3.4554141759872e-001, -3.4554141759872e-001,
+	-5.9816074371338e-001,  5.9816074371338e-001, -9.3840348720551e-001,  9.3840348720551e-001,
+	-2.8440755605698e-001,  2.8440755605698e-001, -2.8440755605698e-001,  2.8440755605698e-001,
+	 9.5870345830917e-001,  9.5870345830917e-001,  9.5870345830917e-001,  9.5870345830917e-001,
+	 1.4369504153728e-001,  1.4369504153728e-001, -4.1921687126160e-001, -4.1921687126160e-001,
+	-9.8962199687958e-001,  9.8962199687958e-001,  9.0788608789444e-001, -9.0788608789444e-001,
+	 9.5514118671417e-001,  9.5514118671417e-001,  9.5514118671417e-001,  9.5514118671417e-001,
+	-2.9615089297295e-001,  2.9615089297295e-001, -2.9615089297295e-001,  2.9615089297295e-001,
+	 9.8872166872025e-001,  9.8872166872025e-001,  9.0001589059830e-001,  9.0001589059830e-001,
+	-1.4976453781128e-001,  1.4976453781128e-001, -4.3585705757141e-001,  4.3585705757141e-001,
+	-9.5514118671417e-001,  9.5514118671417e-001, -9.5514118671417e-001,  9.5514118671417e-001,
+	 2.9615089297295e-001,  2.9615089297295e-001,  2.9615089297295e-001,  2.9615089297295e-001,
+	 5.9323233366013e-001,  5.9323233366013e-001, -9.4460481405258e-001, -9.4460481405258e-001,
+	-8.0503129959106e-001,  8.0503129959106e-001, -3.2820999622345e-001,  3.2820999622345e-001,
+	 4.6597650647163e-001,  4.6597650647163e-001,  4.6597650647163e-001,  4.6597650647163e-001,
+	-8.8479709625244e-001,  8.8479709625244e-001, -8.8479709625244e-001,  8.8479709625244e-001,
+	 8.5614734888077e-001,  8.5614734888077e-001, -5.8258235454559e-002, -5.8258235454559e-002,
+	-5.1673179864883e-001,  5.1673179864883e-001, -9.9830156564713e-001,  9.9830156564713e-001,
+	-4.6597650647163e-001,  4.6597650647163e-001, -4.6597650647163e-001,  4.6597650647163e-001,
+	 8.8479709625244e-001,  8.8479709625244e-001,  8.8479709625244e-001,  8.8479709625244e-001,
+	 2.4000303447247e-001,  2.4000303447247e-001, -6.6471099853516e-001, -6.6471099853516e-001,
+	-9.7077214717865e-001,  9.7077214717865e-001,  7.4710059165955e-001, -7.4710059165955e-001,
+	 7.6910334825516e-001,  7.6910334825516e-001,  7.6910334825516e-001,  7.6910334825516e-001,
+	-6.3912445306778e-001,  6.3912445306778e-001, -6.3912445306778e-001,  6.3912445306778e-001,
+	 9.4050604104996e-001,  9.4050604104996e-001,  5.0618660449982e-001,  5.0618660449982e-001,
+	-3.3977690339088e-001,  3.3977690339088e-001, -8.6242389678955e-001,  8.6242389678955e-001,
+	-7.6910334825516e-001,  7.6910334825516e-001, -7.6910334825516e-001,  7.6910334825516e-001,
+	 6.3912445306778e-001,  6.3912445306778e-001,  6.3912445306778e-001,  6.3912445306778e-001,
+	 4.2477968335152e-001,  4.2477968335152e-001, -9.6775388717651e-001, -9.6775388717651e-001,
+	-9.0529674291611e-001,  9.0529674291611e-001,  2.5189781188965e-001, -2.5189781188965e-001,
+	 9.1908961534500e-002,  9.1908961534500e-002,  9.1908961534500e-002,  9.1908961534500e-002,
+	-9.9576741456985e-001,  9.9576741456985e-001, -9.9576741456985e-001,  9.9576741456985e-001,
+	 7.3888731002808e-001,  7.3888731002808e-001, -6.0306668281555e-001, -6.0306668281555e-001,
+	-6.7382901906967e-001,  6.7382901906967e-001, -7.9769080877304e-001,  7.9769080877304e-001,
+	-9.1908961534500e-002,  9.1908961534500e-002, -9.1908961534500e-002,  9.1908961534500e-002,
+	 9.9576741456985e-001,  9.9576741456985e-001,  9.9576741456985e-001,  9.9576741456985e-001,
+	 4.6003185212612e-002,  4.6003185212612e-002, -1.3762012124062e-001, -1.3762012124062e-001,
+	-9.9894130229950e-001,  9.9894130229950e-001,  9.9048507213593e-001, -9.9048507213593e-001,
+	 9.9847555160522e-001,  9.9847555160522e-001,  9.9847555160522e-001,  9.9847555160522e-001,
+	-5.5195245891809e-002,  5.5195245891809e-002, -5.5195245891809e-002,  5.5195245891809e-002,
+	 9.9961882829666e-001,  9.9961882829666e-001,  9.9657112360001e-001,  9.9657112360001e-001,
+	-2.7608146890998e-002,  2.7608146890998e-002, -8.2740262150764e-002,  8.2740262150764e-002,
+	-9.9847555160522e-001,  9.9847555160522e-001, -9.9847555160522e-001,  9.9847555160522e-001,
+	 5.5195245891809e-002,  5.5195245891809e-002,  5.5195245891809e-002,  5.5195245891809e-002,
+	 6.8731534481049e-001,  6.8731534481049e-001, -7.6318836212158e-001, -7.6318836212158e-001,
+	-7.2635912895203e-001,  7.2635912895203e-001, -6.4617598056793e-001,  6.4617598056793e-001,
+	 6.6699993610382e-001,  6.6699993610382e-001,  6.6699993610382e-001,  6.6699993610382e-001,
+	-7.4505776166916e-001,  7.4505776166916e-001, -7.4505776166916e-001,  7.4505776166916e-001,
+	 9.1296219825745e-001,  9.1296219825745e-001,  3.0492925643921e-001,  3.0492925643921e-001,
+	-4.0804415941238e-001,  4.0804415941238e-001, -9.5237499475479e-001,  9.5237499475479e-001,
+	-6.6699993610382e-001,  6.6699993610382e-001, -6.6699993610382e-001,  6.6699993610382e-001,
+	 7.4505776166916e-001,  7.4505776166916e-001,  7.4505776166916e-001,  7.4505776166916e-001,
+	 3.5703095793724e-001,  3.5703095793724e-001, -8.8904833793640e-001, -8.8904833793640e-001,
+	-9.3409252166748e-001,  9.3409252166748e-001,  4.5781326293945e-001, -4.5781326293945e-001,
+	 9.0134882926941e-001,  9.0134882926941e-001,  9.0134882926941e-001,  9.0134882926941e-001,
+	-4.3309381604195e-001,  4.3309381604195e-001, -4.3309381604195e-001,  4.3309381604195e-001,
+	 9.7502535581589e-001,  9.7502535581589e-001,  7.8265058994293e-001,  7.8265058994293e-001,
+	-2.2209362685680e-001,  2.2209362685680e-001, -6.2246125936508e-001,  6.2246125936508e-001,
+	-9.0134882926941e-001,  9.0134882926941e-001, -9.0134882926941e-001,  9.0134882926941e-001,
+	 4.3309381604195e-001,  4.3309381604195e-001,  4.3309381604195e-001,  4.3309381604195e-001,
+	 5.3240311145782e-001,  5.3240311145782e-001, -9.9356412887573e-001, -9.9356412887573e-001,
+	-8.4649091959000e-001,  8.4649091959000e-001, -1.1327093839645e-001,  1.1327093839645e-001,
+	 3.3110630512238e-001,  3.3110630512238e-001,  3.3110630512238e-001,  3.3110630512238e-001,
+	-9.4359344244003e-001,  9.4359344244003e-001, -9.4359344244003e-001,  9.4359344244003e-001,
+	 8.1581437587738e-001,  8.1581437587738e-001, -2.7557194232941e-001, -2.7557194232941e-001,
+	-5.7831382751465e-001,  5.7831382751465e-001, -9.6128034591675e-001,  9.6128034591675e-001,
+	-3.3110630512238e-001,  3.3110630512238e-001, -3.3110630512238e-001,  3.3110630512238e-001,
+	 9.4359344244003e-001,  9.4359344244003e-001,  9.4359344244003e-001,  9.4359344244003e-001,
+	 1.6793830692768e-001,  1.6793830692768e-001, -4.8486924171448e-001, -4.8486924171448e-001,
+	-9.8579752445221e-001,  9.8579752445221e-001,  8.7458664178848e-001, -8.7458664178848e-001,
+	 9.6852207183838e-001,  9.6852207183838e-001,  9.6852207183838e-001,  9.6852207183838e-001,
+	-2.4892760813236e-001,  2.4892760813236e-001, -2.4892760813236e-001,  2.4892760813236e-001,
+	 9.9209928512573e-001,  9.9209928512573e-001,  9.2964088916779e-001,  9.2964088916779e-001,
+	-1.2545499205589e-001,  1.2545499205589e-001, -3.6846682429314e-001,  3.6846682429314e-001,
+	-9.6852207183838e-001,  9.6852207183838e-001, -9.6852207183838e-001,  9.6852207183838e-001,
+	 2.4892760813236e-001,  2.4892760813236e-001,  2.4892760813236e-001,  2.4892760813236e-001,
+	 6.1281007528305e-001,  6.1281007528305e-001, -9.1790074110031e-001, -9.1790074110031e-001,
+	-7.9023021459579e-001,  7.9023021459579e-001, -3.9680999517441e-001,  3.9680999517441e-001,
+	 5.0883013010025e-001,  5.0883013010025e-001,  5.0883013010025e-001,  5.0883013010025e-001,
+	-8.6086690425873e-001,  8.6086690425873e-001, -8.6086690425873e-001,  8.6086690425873e-001,
+	 8.6857068538666e-001,  8.6857068538666e-001,  1.5339195728302e-002,  1.5339195728302e-002,
+	-4.9556526541710e-001,  4.9556526541710e-001, -9.9988222122192e-001,  9.9988222122192e-001,
+	-5.0883013010025e-001,  5.0883013010025e-001, -5.0883013010025e-001,  5.0883013010025e-001,
+	 8.6086690425873e-001,  8.6086690425873e-001,  8.6086690425873e-001,  8.6086690425873e-001,
+	 2.6375469565392e-001,  2.6375469565392e-001, -7.1786999702454e-001, -7.1786999702454e-001,
+	-9.6458977460861e-001,  9.6458977460861e-001,  6.9617712497711e-001, -6.9617712497711e-001,
+	 7.9953724145889e-001,  7.9953724145889e-001,  7.9953724145889e-001,  7.9953724145889e-001,
+	-6.0061651468277e-001,  6.0061651468277e-001, -6.0061651468277e-001,  6.0061651468277e-001,
+	 9.4856137037277e-001,  9.4856137037277e-001,  5.6825894117355e-001,  5.6825894117355e-001,
+	-3.1659337878227e-001,  3.1659337878227e-001, -8.2284986972809e-001,  8.2284986972809e-001,
+	-7.9953724145889e-001,  7.9953724145889e-001, -7.9953724145889e-001,  7.9953724145889e-001,
+	 6.0061651468277e-001,  6.0061651468277e-001,  6.0061651468277e-001,  6.0061651468277e-001,
+	 4.4686883687973e-001,  4.4686883687973e-001, -9.8366242647171e-001, -9.8366242647171e-001,
+	-8.9459949731827e-001,  8.9459949731827e-001,  1.8002295494080e-001, -1.8002295494080e-001,
+	 1.4065824449062e-001,  1.4065824449062e-001,  1.4065824449062e-001,  1.4065824449062e-001,
+	-9.9005818367004e-001,  9.9005818367004e-001, -9.9005818367004e-001,  9.9005818367004e-001,
+	 7.5520133972168e-001,  7.5520133972168e-001, -5.4275071620941e-001, -5.4275071620941e-001,
+	-6.5549284219742e-001,  6.5549284219742e-001, -8.3989363908768e-001,  8.3989363908768e-001,
+	-1.4065824449062e-001,  1.4065824449062e-001, -1.4065824449062e-001,  1.4065824449062e-001,
+	 9.9005818367004e-001,  9.9005818367004e-001,  9.9005818367004e-001,  9.9005818367004e-001,
+	 7.0504575967789e-002,  7.0504575967789e-002, -2.1011185646057e-001, -2.1011185646057e-001,
+	-9.9751144647598e-001,  9.9751144647598e-001,  9.7767734527588e-001, -9.7767734527588e-001,
+	 9.8825758695602e-001,  9.8825758695602e-001,  9.8825758695602e-001,  9.8825758695602e-001,
+	-1.5279719233513e-001,  1.5279719233513e-001, -1.5279719233513e-001,  1.5279719233513e-001,
+	 9.9706006050110e-001,  9.9706006050110e-001,  9.7364425659180e-001,  9.7364425659180e-001,
+	-7.6623864471912e-002,  7.6623864471912e-002, -2.2807210683823e-001,  2.2807210683823e-001,
+	-9.8825758695602e-001,  9.8825758695602e-001, -9.8825758695602e-001,  9.8825758695602e-001,
+	 1.5279719233513e-001,  1.5279719233513e-001,  1.5279719233513e-001,  1.5279719233513e-001,
+	 6.5084671974182e-001,  6.5084671974182e-001, -8.4974169731140e-001, -8.4974169731140e-001,
+	-7.5920915603638e-001,  7.5920915603638e-001, -5.2719926834106e-001,  5.2719926834106e-001,
+	 5.9075969457626e-001,  5.9075969457626e-001,  5.9075969457626e-001,  5.9075969457626e-001,
+	-8.0684757232666e-001,  8.0684757232666e-001, -8.0684757232666e-001,  8.0684757232666e-001,
+	 8.9184069633484e-001,  8.9184069633484e-001,  1.6188633441925e-001,  1.6188633441925e-001,
+	-4.5234960317612e-001,  4.5234960317612e-001, -9.8680943250656e-001,  9.8680943250656e-001,
+	-5.9075969457626e-001,  5.9075969457626e-001, -5.9075969457626e-001,  5.9075969457626e-001,
+	 8.0684757232666e-001,  8.0684757232666e-001,  8.0684757232666e-001,  8.0684757232666e-001,
+	 3.1076717376709e-001,  3.1076717376709e-001, -8.1225049495697e-001, -8.1225049495697e-001,
+	-9.5048606395721e-001,  9.5048606395721e-001,  5.8330863714218e-001, -5.8330863714218e-001,
+	 8.5455799102783e-001,  8.5455799102783e-001,  8.5455799102783e-001,  8.5455799102783e-001,
+	-5.1935601234436e-001,  5.1935601234436e-001, -5.1935601234436e-001,  5.1935601234436e-001,
+	 9.6295326948166e-001,  9.6295326948166e-001,  6.8284553289413e-001,  6.8284553289413e-001,
+	-2.6966834068298e-001,  2.6966834068298e-001, -7.3056280612946e-001,  7.3056280612946e-001,
+	-8.5455799102783e-001,  8.5455799102783e-001, -8.5455799102783e-001,  8.5455799102783e-001,
+	 5.1935601234436e-001,  5.1935601234436e-001,  5.1935601234436e-001,  5.1935601234436e-001,
+	 4.9022650718689e-001,  4.9022650718689e-001, -9.9943053722382e-001, -9.9943053722382e-001,
+	-8.7159508466721e-001,  8.7159508466721e-001,  3.3741116523743e-002, -3.3741116523743e-002,
+	 2.3702360689640e-001,  2.3702360689640e-001,  2.3702360689640e-001,  2.3702360689640e-001,
+	-9.7150391340256e-001,  9.7150391340256e-001, -9.7150391340256e-001,  9.7150391340256e-001,
+	 7.8645521402359e-001,  7.8645521402359e-001, -4.1363841295242e-001, -4.1363841295242e-001,
+	-6.1764734983444e-001,  6.1764734983444e-001, -9.1044133901596e-001,  9.1044133901596e-001,
+	-2.3702360689640e-001,  2.3702360689640e-001, -2.3702360689640e-001,  2.3702360689640e-001,
+	 9.7150391340256e-001,  9.7150391340256e-001,  9.7150391340256e-001,  9.7150391340256e-001,
+	 1.1936521530151e-001,  1.1936521530151e-001, -3.5129275918007e-001, -3.5129275918007e-001,
+	-9.9285042285919e-001,  9.9285042285919e-001,  9.3626564741135e-001, -9.3626564741135e-001,
+	 9.3945920467377e-001,  9.3945920467377e-001,  9.3945920467377e-001,  9.3945920467377e-001,
+	-3.4266072511673e-001,  3.4266072511673e-001, -3.4266072511673e-001,  3.4266072511673e-001,
+	 9.8474848270416e-001,  9.8474848270416e-001,  8.6551362276077e-001,  8.6551362276077e-001,
+	-1.7398387193680e-001,  1.7398387193680e-001, -5.0088536739349e-001,  5.0088536739349e-001,
+	-9.3945920467377e-001,  9.3945920467377e-001, -9.3945920467377e-001,  9.3945920467377e-001,
+	 3.4266072511673e-001,  3.4266072511673e-001,  3.4266072511673e-001,  3.4266072511673e-001,
+	 5.7329720258713e-001,  5.7329720258713e-001, -9.6618992090225e-001, -9.6618992090225e-001,
+	-8.1934750080109e-001,  8.1934750080109e-001, -2.5783121585846e-001,  2.5783121585846e-001,
+	 4.2200028896332e-001,  4.2200028896332e-001,  4.2200028896332e-001,  4.2200028896332e-001,
+	-9.0659570693970e-001,  9.0659570693970e-001, -9.0659570693970e-001,  9.0659570693970e-001,
+	 8.4320825338364e-001,  8.4320825338364e-001, -1.3154006004333e-001, -1.3154006004333e-001,
+	-5.3758710622787e-001,  5.3758710622787e-001, -9.9131089448929e-001,  9.9131089448929e-001,
+	-4.2200028896332e-001,  4.2200028896332e-001, -4.2200028896332e-001,  4.2200028896332e-001,
+	 9.0659570693970e-001,  9.0659570693970e-001,  9.0659570693970e-001,  9.0659570693970e-001,
+	 2.1610680222511e-001,  2.1610680222511e-001, -6.0794985294342e-001, -6.0794985294342e-001,
+	-9.7636973857880e-001,  9.7636973857880e-001,  7.9397547245026e-001, -7.9397547245026e-001,
+	 7.3681652545929e-001,  7.3681652545929e-001,  7.3681652545929e-001,  7.3681652545929e-001,
+	-6.7609274387360e-001,  6.7609274387360e-001, -6.7609274387360e-001,  6.7609274387360e-001,
+	 9.3188428878784e-001,  9.3188428878784e-001,  4.4137123227119e-001,  4.4137123227119e-001,
+	-3.6275574564934e-001,  3.6275574564934e-001, -8.9732468128204e-001,  8.9732468128204e-001,
+	-7.3681652545929e-001,  7.3681652545929e-001, -7.3681652545929e-001,  7.3681652545929e-001,
+	 6.7609274387360e-001,  6.7609274387360e-001,  6.7609274387360e-001,  6.7609274387360e-001,
+	 4.0243464708328e-001,  4.0243464708328e-001, -9.4660085439682e-001, -9.4660085439682e-001,
+	-9.1544872522354e-001,  9.1544872522354e-001,  3.2240772247314e-001, -3.2240772247314e-001,
+	 4.2938258498907e-002,  4.2938258498907e-002,  4.2938258498907e-002,  4.2938258498907e-002,
+	-9.9907773733139e-001,  9.9907773733139e-001, -9.9907773733139e-001,  9.9907773733139e-001,
+	 7.2212815284729e-001,  7.2212815284729e-001, -6.6011440753937e-001, -6.6011440753937e-001,
+	-6.9175928831100e-001,  6.9175928831100e-001, -7.5116509199142e-001,  7.5116509199142e-001,
+	-4.2938258498907e-002,  4.2938258498907e-002, -4.2938258498907e-002,  4.2938258498907e-002,
+	 9.9907773733139e-001,  9.9907773733139e-001,  9.9907773733139e-001,  9.9907773733139e-001,
+	 2.1474080160260e-002,  2.1474080160260e-002, -6.4382635056973e-002, -6.4382635056973e-002,
+	-9.9976938962936e-001,  9.9976938962936e-001,  9.9792528152466e-001, -9.9792528152466e-001,
+	 9.9952942132950e-001,  9.9952942132950e-001,  9.9952942132950e-001,  9.9952942132950e-001,
+	-3.0674804002047e-002,  3.0674804002047e-002, -3.0674804002047e-002,  3.0674804002047e-002,
+	 9.9988234043121e-001,  9.9988234043121e-001,  9.9894130229950e-001,  9.9894130229950e-001,
+	-1.5339206904173e-002,  1.5339206904173e-002, -4.6003181487322e-002,  4.6003181487322e-002,
+	-9.9952942132950e-001,  9.9952942132950e-001, -9.9952942132950e-001,  9.9952942132950e-001,
+	 3.0674804002047e-002,  3.0674804002047e-002,  3.0674804002047e-002,  3.0674804002047e-002,
+	 6.9617712497711e-001,  6.9617712497711e-001, -7.3888731002808e-001, -7.3888731002808e-001,
+	-7.1787005662918e-001,  7.1787005662918e-001, -6.7382901906967e-001,  6.7382901906967e-001,
+	 6.8508368730545e-001,  6.8508368730545e-001,  6.8508368730545e-001,  6.8508368730545e-001,
+	-7.2846436500549e-001,  7.2846436500549e-001, -7.2846436500549e-001,  7.2846436500549e-001,
+	 9.1790080070496e-001,  9.1790080070496e-001,  3.3977693319321e-001,  3.3977693319321e-001,
+	-3.9680999517441e-001,  3.9680999517441e-001, -9.4050604104996e-001,  9.4050604104996e-001,
+	-6.8508368730545e-001,  6.8508368730545e-001, -6.8508368730545e-001,  6.8508368730545e-001,
+	 7.2846436500549e-001,  7.2846436500549e-001,  7.2846436500549e-001,  7.2846436500549e-001,
+	 3.6846685409546e-001,  3.6846685409546e-001, -9.0529680252075e-001, -9.0529680252075e-001,
+	-9.2964088916779e-001,  9.2964088916779e-001,  4.2477965354919e-001, -4.2477965354919e-001,
+	 9.1170603036880e-001,  9.1170603036880e-001,  9.1170603036880e-001,  9.1170603036880e-001,
+	-4.1084319353104e-001,  4.1084319353104e-001, -4.1084319353104e-001,  4.1084319353104e-001,
+	 9.7767734527588e-001,  9.7767734527588e-001,  8.0503129959106e-001,  8.0503129959106e-001,
+	-2.1011184155941e-001,  2.1011184155941e-001, -5.9323233366013e-001,  5.9323233366013e-001,
+	-9.1170603036880e-001,  9.1170603036880e-001, -9.1170603036880e-001,  9.1170603036880e-001,
+	 4.1084319353104e-001,  4.1084319353104e-001,  4.1084319353104e-001,  4.1084319353104e-001,
+	 5.4275077581406e-001,  5.4275077581406e-001, -9.8872166872025e-001, -9.8872166872025e-001,
+	-8.3989375829697e-001,  8.3989375829697e-001, -1.4976453781128e-001,  1.4976453781128e-001,
+	 3.5416352748871e-001,  3.5416352748871e-001,  3.5416352748871e-001,  3.5416352748871e-001,
+	-9.3518352508545e-001,  9.3518352508545e-001, -9.3518352508545e-001,  9.3518352508545e-001,
+	 8.2284975051880e-001,  8.2284975051880e-001, -2.4000298976898e-001, -2.4000298976898e-001,
+	-5.6825894117355e-001,  5.6825894117355e-001, -9.7077208757401e-001,  9.7077208757401e-001,
+	-3.5416352748871e-001,  3.5416352748871e-001, -3.5416352748871e-001,  3.5416352748871e-001,
+	 9.3518352508545e-001,  9.3518352508545e-001,  9.3518352508545e-001,  9.3518352508545e-001,
+	 1.8002291023731e-001,  1.8002291023731e-001, -5.1673179864883e-001, -5.1673179864883e-001,
+	-9.8366242647171e-001,  9.8366242647171e-001,  8.5614734888077e-001, -8.5614734888077e-001,
+	 9.7433936595917e-001,  9.7433936595917e-001,  9.7433936595917e-001,  9.7433936595917e-001,
+	-2.2508391737938e-001,  2.2508391737938e-001, -2.2508391737938e-001,  2.2508391737938e-001,
+	 9.9356412887573e-001,  9.9356412887573e-001,  9.4257318973541e-001,  9.4257318973541e-001,
+	-1.1327095329762e-001,  1.1327095329762e-001, -3.3399963378906e-001,  3.3399963378906e-001,
+	-9.7433936595917e-001,  9.7433936595917e-001, -9.7433936595917e-001,  9.7433936595917e-001,
+	 2.2508391737938e-001,  2.2508391737938e-001,  2.2508391737938e-001,  2.2508391737938e-001,
+	 6.2246131896973e-001,  6.2246131896973e-001, -9.0267324447632e-001, -9.0267324447632e-001,
+	-7.8265058994293e-001,  7.8265058994293e-001, -4.3032658100128e-001,  4.3032658100128e-001,
+	 5.2980363368988e-001,  5.2980363368988e-001,  5.2980363368988e-001,  5.2980363368988e-001,
+	-8.4812033176422e-001,  8.4812033176422e-001, -8.4812033176422e-001,  8.4812033176422e-001,
+	 8.7458664178848e-001,  8.7458664178848e-001,  5.2131652832031e-002,  5.2131652832031e-002,
+	-4.8486927151680e-001,  4.8486927151680e-001, -9.9864017963409e-001,  9.9864017963409e-001,
+	-5.2980363368988e-001,  5.2980363368988e-001, -5.2980363368988e-001,  5.2980363368988e-001,
+	 8.4812033176422e-001,  8.4812033176422e-001,  8.4812033176422e-001,  8.4812033176422e-001,
+	 2.7557182312012e-001,  2.7557182312012e-001, -7.4300789833069e-001, -7.4300789833069e-001,
+	-9.6128046512604e-001,  9.6128046512604e-001,  6.6928255558014e-001, -6.6928255558014e-001,
+	 8.1403630971909e-001,  8.1403630971909e-001,  8.1403630971909e-001,  8.1403630971909e-001,
+	-5.8081394433975e-001,  5.8081394433975e-001, -5.8081394433975e-001,  5.8081394433975e-001,
+	 9.5237499475479e-001,  9.5237499475479e-001,  5.9816074371338e-001,  5.9816074371338e-001,
+	-3.0492922663689e-001,  3.0492922663689e-001, -8.0137610435486e-001,  8.0137610435486e-001,
+	-8.1403630971909e-001,  8.1403630971909e-001, -8.1403630971909e-001,  8.1403630971909e-001,
+	 5.8081394433975e-001,  5.8081394433975e-001,  5.8081394433975e-001,  5.8081394433975e-001,
+	 4.5781332254410e-001,  4.5781332254410e-001, -9.8962193727493e-001, -9.8962193727493e-001,
+	-8.8904833793640e-001,  8.8904833793640e-001,  1.4369499683380e-001, -1.4369499683380e-001,
+	 1.6491311788559e-001,  1.6491311788559e-001,  1.6491311788559e-001,  1.6491311788559e-001,
+	-9.8630809783936e-001,  9.8630809783936e-001, -9.8630809783936e-001,  9.8630809783936e-001,
+	 7.6318842172623e-001,  7.6318842172623e-001, -5.1146894693375e-001, -5.1146894693375e-001,
+	-6.4617604017258e-001,  6.4617604017258e-001, -8.5930174589157e-001,  8.5930174589157e-001,
+	-1.6491311788559e-001,  1.6491311788559e-001, -1.6491311788559e-001,  1.6491311788559e-001,
+	 9.8630809783936e-001,  9.8630809783936e-001,  9.8630809783936e-001,  9.8630809783936e-001,
+	 8.2740269601345e-002,  8.2740269601345e-002, -2.4595502018929e-001, -2.4595502018929e-001,
+	-9.9657112360001e-001,  9.9657112360001e-001,  9.6928119659424e-001, -9.6928119659424e-001,
+	 9.9170976877213e-001,  9.9170976877213e-001,  9.9170976877213e-001,  9.9170976877213e-001,
+	-1.2849810719490e-001,  1.2849810719490e-001, -1.2849810719490e-001,  1.2849810719490e-001,
+	 9.9792528152466e-001,  9.9792528152466e-001,  9.8137921094894e-001,  9.8137921094894e-001,
+	-6.4382635056973e-002,  6.4382635056973e-002, -1.9208037853241e-001,  1.9208037853241e-001,
+	-9.9170976877213e-001,  9.9170976877213e-001, -9.9170976877213e-001,  9.9170976877213e-001,
+	 1.2849810719490e-001,  1.2849810719490e-001,  1.2849810719490e-001,  1.2849810719490e-001,
+	 6.6011434793472e-001,  6.6011434793472e-001, -8.2976120710373e-001, -8.2976120710373e-001,
+	-7.5116509199142e-001,  7.5116509199142e-001, -5.5811864137650e-001,  5.5811864137650e-001,
+	 6.1038279533386e-001,  6.1038279533386e-001,  6.1038279533386e-001,  6.1038279533386e-001,
+	-7.9210656881332e-001,  7.9210656881332e-001, -7.9210656881332e-001,  7.9210656881332e-001,
+	 8.9732456207275e-001,  8.9732456207275e-001,  1.9809836149216e-001,  1.9809836149216e-001,
+	-4.4137129187584e-001,  4.4137129187584e-001, -9.8018205165863e-001,  9.8018205165863e-001,
+	-6.1038279533386e-001,  6.1038279533386e-001, -6.1038279533386e-001,  6.1038279533386e-001,
+	 7.9210656881332e-001,  7.9210656881332e-001,  7.9210656881332e-001,  7.9210656881332e-001,
+	 3.2240769267082e-001,  3.2240769267082e-001, -8.3317005634308e-001, -8.3317005634308e-001,
+	-9.4660091400146e-001,  9.4660091400146e-001,  5.5301666259766e-001, -5.5301666259766e-001,
+	 8.6704623699188e-001,  8.6704623699188e-001,  8.6704623699188e-001,  8.6704623699188e-001,
+	-4.9822768568993e-001,  4.9822768568993e-001, -4.9822768568993e-001,  4.9822768568993e-001,
+	 9.6618998050690e-001,  9.6618998050690e-001,  7.0927280187607e-001,  7.0927280187607e-001,
+	-2.5783109664917e-001,  2.5783109664917e-001, -7.0493412017822e-001,  7.0493412017822e-001,
+	-8.6704623699188e-001,  8.6704623699188e-001, -8.6704623699188e-001,  8.6704623699188e-001,
+	 4.9822768568993e-001,  4.9822768568993e-001,  4.9822768568993e-001,  4.9822768568993e-001,
+	 5.0088536739349e-001,  5.0088536739349e-001, -9.9999535083771e-001, -9.9999535083771e-001,
+	-8.6551362276077e-001,  8.6551362276077e-001, -3.0679106712341e-003,  3.0679106712341e-003,
+	 2.6079413294792e-001,  2.6079413294792e-001,  2.6079413294792e-001,  2.6079413294792e-001,
+	-9.6539443731308e-001,  9.6539443731308e-001, -9.6539443731308e-001,  9.6539443731308e-001,
+	 7.9397547245026e-001,  7.9397547245026e-001, -3.7984716892242e-001, -3.7984716892242e-001,
+	-6.0794979333878e-001,  6.0794979333878e-001, -9.2504924535751e-001,  9.2504924535751e-001,
+	-2.6079413294792e-001,  2.6079413294792e-001, -2.6079413294792e-001,  2.6079413294792e-001,
+	 9.6539443731308e-001,  9.6539443731308e-001,  9.6539443731308e-001,  9.6539443731308e-001,
+	 1.3154003024101e-001,  1.3154003024101e-001, -3.8551607728004e-001, -3.8551607728004e-001,
+	-9.9131083488464e-001,  9.9131083488464e-001,  9.2270112037659e-001, -9.2270112037659e-001,
+	 9.4758558273315e-001,  9.4758558273315e-001,  9.4758558273315e-001,  9.4758558273315e-001,
+	-3.1950202584267e-001,  3.1950202584267e-001, -3.1950202584267e-001,  3.1950202584267e-001,
+	 9.8680937290192e-001,  9.8680937290192e-001,  8.8336330652237e-001,  8.8336330652237e-001,
+	-1.6188639402390e-001,  1.6188639402390e-001, -4.6868878602982e-001,  4.6868878602982e-001,
+	-9.4758558273315e-001,  9.4758558273315e-001, -9.4758558273315e-001,  9.4758558273315e-001,
+	 3.1950202584267e-001,  3.1950202584267e-001,  3.1950202584267e-001,  3.1950202584267e-001,
+	 5.8330869674683e-001,  5.8330869674683e-001, -9.5604515075684e-001, -9.5604515075684e-001,
+	-8.1225055456161e-001,  8.1225055456161e-001, -2.9321926832199e-001,  2.9321926832199e-001,
+	 4.4412216544151e-001,  4.4412216544151e-001,  4.4412216544151e-001,  4.4412216544151e-001,
+	-8.9596623182297e-001,  8.9596623182297e-001, -8.9596623182297e-001,  8.9596623182297e-001,
+	 8.4974175691605e-001,  8.4974175691605e-001, -9.4963490962982e-002, -9.4963490962982e-002,
+	-5.2719914913177e-001,  5.2719914913177e-001, -9.9548065662384e-001,  9.9548065662384e-001,
+	-4.4412216544151e-001,  4.4412216544151e-001, -4.4412216544151e-001,  4.4412216544151e-001,
+	 8.9596623182297e-001,  8.9596623182297e-001,  8.9596623182297e-001,  8.9596623182297e-001,
+	 2.2807209193707e-001,  2.2807209193707e-001, -6.3676190376282e-001, -6.3676190376282e-001,
+	-9.7364425659180e-001,  9.7364425659180e-001,  7.7106052637100e-001, -7.7106052637100e-001,
+	 7.5318676233292e-001,  7.5318676233292e-001,  7.5318676233292e-001,  7.5318676233292e-001,
+	-6.5780669450760e-001,  6.5780669450760e-001, -6.5780669450760e-001,  6.5780669450760e-001,
+	 9.3626564741135e-001,  9.3626564741135e-001,  4.7410020232201e-001,  4.7410020232201e-001,
+	-3.5129275918007e-001,  3.5129275918007e-001, -8.8047087192535e-001,  8.8047087192535e-001,
+	-7.5318676233292e-001,  7.5318676233292e-001, -7.5318676233292e-001,  7.5318676233292e-001,
+	 6.5780669450760e-001,  6.5780669450760e-001,  6.5780669450760e-001,  6.5780669450760e-001,
+	 4.1363832354546e-001,  4.1363832354546e-001, -9.5782625675201e-001, -9.5782625675201e-001,
+	-9.1044127941132e-001,  9.1044127941132e-001,  2.8734743595123e-001, -2.8734743595123e-001,
+	 6.7443922162056e-002,  6.7443922162056e-002,  6.7443922162056e-002,  6.7443922162056e-002,
+	-9.9772304296494e-001,  9.9772304296494e-001, -9.9772304296494e-001,  9.9772304296494e-001,
+	 7.3056274652481e-001,  7.3056274652481e-001, -6.3201874494553e-001, -6.3201874494553e-001,
+	-6.8284553289413e-001,  6.8284553289413e-001, -7.7495306730270e-001,  7.7495306730270e-001,
+	-6.7443922162056e-002,  6.7443922162056e-002, -6.7443922162056e-002,  6.7443922162056e-002,
+	 9.9772304296494e-001,  9.9772304296494e-001,  9.9772304296494e-001,  9.9772304296494e-001,
+	 3.3741172403097e-002,  3.3741172403097e-002, -1.0106986761093e-001, -1.0106986761093e-001,
+	-9.9943059682846e-001,  9.9943059682846e-001,  9.9487930536270e-001, -9.9487930536270e-001,
+	 9.9682027101517e-001,  9.9682027101517e-001,  9.9682027101517e-001,  9.9682027101517e-001,
+	-7.9682439565659e-002,  7.9682439565659e-002, -7.9682439565659e-002,  7.9682439565659e-002,
+	 9.9920475482941e-001,  9.9920475482941e-001,  9.9285042285919e-001,  9.9285042285919e-001,
+	-3.9872929453850e-002,  3.9872929453850e-002, -1.1936521530151e-001,  1.1936521530151e-001,
+	-9.9682027101517e-001,  9.9682027101517e-001, -9.9682027101517e-001,  9.9682027101517e-001,
+	 7.9682439565659e-002,  7.9682439565659e-002,  7.9682439565659e-002,  7.9682439565659e-002,
+	 6.7835003137589e-001,  6.7835003137589e-001, -7.8645521402359e-001, -7.8645521402359e-001,
+	-7.3473888635635e-001,  7.3473888635635e-001, -6.1764723062515e-001,  6.1764723062515e-001,
+	 6.4851438999176e-001,  6.4851438999176e-001,  6.4851438999176e-001,  6.4851438999176e-001,
+	-7.6120239496231e-001,  7.6120239496231e-001, -7.6120239496231e-001,  7.6120239496231e-001,
+	 9.0788608789444e-001,  9.0788608789444e-001,  2.6966828107834e-001,  2.6966828107834e-001,
+	-4.1921690106392e-001,  4.1921690106392e-001, -9.6295320987701e-001,  9.6295320987701e-001,
+	-6.4851438999176e-001,  6.4851438999176e-001, -6.4851438999176e-001,  6.4851438999176e-001,
+	 7.6120239496231e-001,  7.6120239496231e-001,  7.6120239496231e-001,  7.6120239496231e-001,
+	 3.4554132819176e-001,  3.4554132819176e-001, -8.7159502506256e-001, -8.7159502506256e-001,
+	-9.3840354681015e-001,  9.3840354681015e-001,  4.9022650718689e-001, -4.9022650718689e-001,
+	 8.9044868946075e-001,  8.9044868946075e-001,  8.9044868946075e-001,  8.9044868946075e-001,
+	-4.5508360862732e-001,  4.5508360862732e-001, -4.5508360862732e-001,  4.5508360862732e-001,
+	 9.7222650051117e-001,  9.7222650051117e-001,  7.5920915603638e-001,  7.5920915603638e-001,
+	-2.3404195904732e-001,  2.3404195904732e-001, -6.5084671974182e-001,  6.5084671974182e-001,
+	-8.9044868946075e-001,  8.9044868946075e-001, -8.9044868946075e-001,  8.9044868946075e-001,
+	 4.5508360862732e-001,  4.5508360862732e-001,  4.5508360862732e-001,  4.5508360862732e-001,
+	 5.2197527885437e-001,  5.2197527885437e-001, -9.9705994129181e-001, -9.9705994129181e-001,
+	-8.5296058654785e-001,  8.5296058654785e-001, -7.6623797416687e-002,  7.6623797416687e-002,
+	 3.0784964561462e-001,  3.0784964561462e-001,  3.0784964561462e-001,  3.0784964561462e-001,
+	-9.5143502950668e-001,  9.5143502950668e-001, -9.5143502950668e-001,  9.5143502950668e-001,
+	 8.0865615606308e-001,  8.0865615606308e-001, -3.1076723337173e-001, -3.1076723337173e-001,
+	-5.8828157186508e-001,  5.8828157186508e-001, -9.5048600435257e-001,  9.5048600435257e-001,
+	-3.0784964561462e-001,  3.0784964561462e-001, -3.0784964561462e-001,  3.0784964561462e-001,
+	 9.5143502950668e-001,  9.5143502950668e-001,  9.5143502950668e-001,  9.5143502950668e-001,
+	 1.5582840144634e-001,  1.5582840144634e-001, -4.5234960317612e-001, -4.5234960317612e-001,
+	-9.8778414726257e-001,  9.8778414726257e-001,  8.9184069633484e-001, -8.9184069633484e-001,
+	 9.6212142705917e-001,  9.6212142705917e-001,  9.6212142705917e-001,  9.6212142705917e-001,
+	-2.7262136340141e-001,  2.7262136340141e-001, -2.7262136340141e-001,  2.7262136340141e-001,
+	 9.9048507213593e-001,  9.9048507213593e-001,  9.1544872522354e-001,  9.1544872522354e-001,
+	-1.3762012124062e-001,  1.3762012124062e-001, -4.0243467688560e-001,  4.0243467688560e-001,
+	-9.6212142705917e-001,  9.6212142705917e-001, -9.6212142705917e-001,  9.6212142705917e-001,
+	 2.7262136340141e-001,  2.7262136340141e-001,  2.7262136340141e-001,  2.7262136340141e-001,
+	 6.0306662321091e-001,  6.0306662321091e-001, -9.3188422918320e-001, -9.3188422918320e-001,
+	-7.9769080877304e-001,  7.9769080877304e-001, -3.6275583505630e-001,  3.6275583505630e-001,
+	 4.8755016922951e-001,  4.8755016922951e-001,  4.8755016922951e-001,  4.8755016922951e-001,
+	-8.7309497594833e-001,  8.7309497594833e-001, -8.7309497594833e-001,  8.7309497594833e-001,
+	 8.6242395639420e-001,  8.6242395639420e-001, -2.1474123001099e-002, -2.1474123001099e-002,
+	-5.0618666410446e-001,  5.0618666410446e-001, -9.9976938962936e-001,  9.9976938962936e-001,
+	-4.8755016922951e-001,  4.8755016922951e-001, -4.8755016922951e-001,  4.8755016922951e-001,
+	 8.7309497594833e-001,  8.7309497594833e-001,  8.7309497594833e-001,  8.7309497594833e-001,
+	 2.5189781188965e-001,  2.5189781188965e-001, -6.9175928831100e-001, -6.9175928831100e-001,
+	-9.6775382757187e-001,  9.6775382757187e-001,  7.2212815284729e-001, -7.2212815284729e-001,
+	 7.8455656766891e-001,  7.8455656766891e-001,  7.8455656766891e-001,  7.8455656766891e-001,
+	-6.2005722522736e-001,  6.2005722522736e-001, -6.2005722522736e-001,  6.2005722522736e-001,
+	 9.4460481405258e-001,  9.4460481405258e-001,  5.3758704662323e-001,  5.3758704662323e-001,
+	-3.2820984721184e-001,  3.2820984721184e-001, -8.4320819377899e-001,  8.4320819377899e-001,
+	-7.8455656766891e-001,  7.8455656766891e-001, -7.8455656766891e-001,  7.8455656766891e-001,
+	 6.2005722522736e-001,  6.2005722522736e-001,  6.2005722522736e-001,  6.2005722522736e-001,
+	 4.3585708737373e-001,  4.3585708737373e-001, -9.7636973857880e-001, -9.7636973857880e-001,
+	-9.0001589059830e-001,  9.0001589059830e-001,  2.1610683202744e-001, -2.1610683202744e-001,
+	 1.1631863564253e-001,  1.1631863564253e-001,  1.1631863564253e-001,  1.1631863564253e-001,
+	-9.9321192502975e-001,  9.9321192502975e-001, -9.9321192502975e-001,  9.9321192502975e-001,
+	 7.4710059165955e-001,  7.4710059165955e-001, -5.7329714298248e-001, -5.7329714298248e-001,
+	-6.6471099853516e-001,  6.6471099853516e-001, -8.1934738159180e-001,  8.1934738159180e-001,
+	-1.1631863564253e-001,  1.1631863564253e-001, -1.1631863564253e-001,  1.1631863564253e-001,
+	 9.9321192502975e-001,  9.9321192502975e-001,  9.9321192502975e-001,  9.9321192502975e-001,
+	 5.8258265256882e-002,  5.8258265256882e-002, -1.7398388683796e-001, -1.7398388683796e-001,
+	-9.9830156564713e-001,  9.9830156564713e-001,  9.8474854230881e-001, -9.8474854230881e-001,
+	 9.8421007394791e-001,  9.8421007394791e-001,  9.8421007394791e-001,  9.8421007394791e-001,
+	-1.7700421810150e-001,  1.7700421810150e-001, -1.7700421810150e-001,  1.7700421810150e-001,
+	 9.9604469537735e-001,  9.9604469537735e-001,  9.6458977460861e-001,  9.6458977460861e-001,
+	-8.8853552937508e-002,  8.8853552937508e-002, -2.6375466585159e-001,  2.6375466585159e-001,
+	-9.8421007394791e-001,  9.8421007394791e-001, -9.8421007394791e-001,  9.8421007394791e-001,
+	 1.7700421810150e-001,  1.7700421810150e-001,  1.7700421810150e-001,  1.7700421810150e-001,
+	 6.4148104190826e-001,  6.4148104190826e-001, -8.6857056617737e-001, -8.6857056617737e-001,
+	-7.6713889837265e-001,  7.6713889837265e-001, -4.9556535482407e-001,  4.9556535482407e-001,
+	 5.7078075408936e-001,  5.7078075408936e-001,  5.7078075408936e-001,  5.7078075408936e-001,
+	-8.2110249996185e-001,  8.2110249996185e-001, -8.2110249996185e-001,  8.2110249996185e-001,
+	 8.8622254133224e-001,  8.8622254133224e-001,  1.2545502185822e-001,  1.2545502185822e-001,
+	-4.6325978636742e-001,  4.6325978636742e-001, -9.9209928512573e-001,  9.9209928512573e-001,
+	-5.7078075408936e-001,  5.7078075408936e-001, -5.7078075408936e-001,  5.7078075408936e-001,
+	 8.2110249996185e-001,  8.2110249996185e-001,  8.2110249996185e-001,  8.2110249996185e-001,
+	 2.9907983541489e-001,  2.9907983541489e-001, -7.9023021459579e-001, -7.9023021459579e-001,
+	-9.5422810316086e-001,  9.5422810316086e-001,  6.1281007528305e-001, -6.1281007528305e-001,
+	 8.4155493974686e-001,  8.4155493974686e-001,  8.4155493974686e-001,  8.4155493974686e-001,
+	-5.4017150402069e-001,  5.4017150402069e-001, -5.4017150402069e-001,  5.4017150402069e-001,
+	 9.5957154035568e-001,  9.5957154035568e-001,  6.5549290180206e-001,  6.5549290180206e-001,
+	-2.8146493434906e-001,  2.8146493434906e-001, -7.5520145893097e-001,  7.5520145893097e-001,
+	-8.4155493974686e-001,  8.4155493974686e-001, -8.4155493974686e-001,  8.4155493974686e-001,
+	 5.4017150402069e-001,  5.4017150402069e-001,  5.4017150402069e-001,  5.4017150402069e-001,
+	 4.7949376702309e-001,  4.7949376702309e-001, -9.9751138687134e-001, -9.9751138687134e-001,
+	-8.7754529714584e-001,  8.7754529714584e-001,  7.0504605770111e-002, -7.0504605770111e-002,
+	 2.1311032772064e-001,  2.1311032772064e-001,  2.1311032772064e-001,  2.1311032772064e-001,
+	-9.7702813148499e-001,  9.7702813148499e-001, -9.7702813148499e-001,  9.7702813148499e-001,
+	 7.7881652116776e-001,  7.7881652116776e-001, -4.4686883687973e-001, -4.4686883687973e-001,
+	-6.2725180387497e-001,  6.2725180387497e-001, -8.9459949731827e-001,  8.9459949731827e-001,
+	-2.1311032772064e-001,  2.1311032772064e-001, -2.1311032772064e-001,  2.1311032772064e-001,
+	 9.7702813148499e-001,  9.7702813148499e-001,  9.7702813148499e-001,  9.7702813148499e-001,
+	 1.0717242956161e-001,  1.0717242956161e-001, -3.1659337878227e-001, -3.1659337878227e-001,
+	-9.9424046278000e-001,  9.9424046278000e-001,  9.4856137037277e-001, -9.4856137037277e-001,
+	 9.3076694011688e-001,  9.3076694011688e-001,  9.3076694011688e-001,  9.3076694011688e-001,
+	-3.6561301350594e-001,  3.6561301350594e-001, -3.6561301350594e-001,  3.6561301350594e-001,
+	 9.8253929615021e-001,  9.8253929615021e-001,  8.4649091959000e-001,  8.4649091959000e-001,
+	-1.8605515360832e-001,  1.8605515360832e-001, -5.3240311145782e-001,  5.3240311145782e-001,
+	-9.3076694011688e-001,  9.3076694011688e-001, -9.3076694011688e-001,  9.3076694011688e-001,
+	 3.6561301350594e-001,  3.6561301350594e-001,  3.6561301350594e-001,  3.6561301350594e-001,
+	 5.6319934129715e-001,  5.6319934129715e-001, -9.7502535581589e-001, -9.7502535581589e-001,
+	-8.2632106542587e-001,  8.2632106542587e-001, -2.2209364175797e-001,  2.2209364175797e-001,
+	 3.9962419867516e-001,  3.9962419867516e-001,  3.9962419867516e-001,  3.9962419867516e-001,
+	-9.1667908430099e-001,  9.1667908430099e-001, -9.1667908430099e-001,  9.1667908430099e-001,
+	 8.3654773235321e-001,  8.3654773235321e-001, -1.6793835163116e-001, -1.6793835163116e-001,
+	-5.4789406061172e-001,  5.4789406061172e-001, -9.8579758405685e-001,  9.8579758405685e-001,
+	-3.9962419867516e-001,  3.9962419867516e-001, -3.9962419867516e-001,  3.9962419867516e-001,
+	 9.1667908430099e-001,  9.1667908430099e-001,  9.1667908430099e-001,  9.1667908430099e-001,
+	 2.0410896837711e-001,  2.0410896837711e-001, -5.7831382751465e-001, -5.7831382751465e-001,
+	-9.7894817590714e-001,  9.7894817590714e-001,  8.1581437587738e-001, -8.1581437587738e-001,
+	 7.2000247240067e-001,  7.2000247240067e-001,  7.2000247240067e-001,  7.2000247240067e-001,
+	-6.9397145509720e-001,  6.9397145509720e-001, -6.9397145509720e-001,  6.9397145509720e-001,
+	 9.2736250162125e-001,  9.2736250162125e-001,  4.0804409980774e-001,  4.0804409980774e-001,
+	-3.7416407465935e-001,  3.7416407465935e-001, -9.1296207904816e-001,  9.1296207904816e-001,
+	-7.2000247240067e-001,  7.2000247240067e-001, -7.2000247240067e-001,  7.2000247240067e-001,
+	 6.9397145509720e-001,  6.9397145509720e-001,  6.9397145509720e-001,  6.9397145509720e-001,
+	 3.9117038249969e-001,  3.9117038249969e-001, -9.3409240245819e-001, -9.3409240245819e-001,
+	-9.2031824588776e-001,  9.2031824588776e-001,  3.5703098773956e-001, -3.5703098773956e-001,
+	 1.8406730145216e-002,  1.8406730145216e-002,  1.8406730145216e-002,  1.8406730145216e-002,
+	-9.9983060359955e-001,  9.9983060359955e-001, -9.9983060359955e-001,  9.9983060359955e-001,
+	 7.1358484029770e-001,  7.1358484029770e-001, -6.8731540441513e-001, -6.8731540441513e-001,
+	-7.0056879520416e-001,  7.0056879520416e-001, -7.2635912895203e-001,  7.2635912895203e-001,
+	-1.8406730145216e-002,  1.8406730145216e-002, -1.8406730145216e-002,  1.8406730145216e-002,
+	 9.9983060359955e-001,  9.9983060359955e-001,  9.9983060359955e-001,  9.9983060359955e-001,
+	 9.2037552967668e-003,  9.2037552967668e-003, -2.7608145028353e-002, -2.7608145028353e-002,
+	-9.9995762109756e-001,  9.9995762109756e-001,  9.9961882829666e-001, -9.9961882829666e-001,
+	 9.9983060359955e-001,  9.9983060359955e-001,  9.9983060359955e-001,  9.9983060359955e-001,
+	-1.8406730145216e-002,  1.8406730145216e-002, -1.8406730145216e-002,  1.8406730145216e-002,
+	 9.9995762109756e-001,  9.9995762109756e-001,  9.9961882829666e-001,  9.9961882829666e-001,
+	-9.2037552967668e-003,  9.2037552967668e-003, -2.7608145028353e-002,  2.7608145028353e-002,
+	-9.9983060359955e-001,  9.9983060359955e-001, -9.9983060359955e-001,  9.9983060359955e-001,
+	 1.8406730145216e-002,  1.8406730145216e-002,  1.8406730145216e-002,  1.8406730145216e-002,
+	 7.0056879520416e-001,  7.0056879520416e-001, -7.2635912895203e-001, -7.2635912895203e-001,
+	-7.1358484029770e-001,  7.1358484029770e-001, -6.8731540441513e-001,  6.8731540441513e-001,
+	 6.9397145509720e-001,  6.9397145509720e-001,  6.9397145509720e-001,  6.9397145509720e-001,
+	-7.2000247240067e-001,  7.2000247240067e-001, -7.2000247240067e-001,  7.2000247240067e-001,
+	 9.2031824588776e-001,  9.2031824588776e-001,  3.5703098773956e-001,  3.5703098773956e-001,
+	-3.9117038249969e-001,  3.9117038249969e-001, -9.3409240245819e-001,  9.3409240245819e-001,
+	-6.9397145509720e-001,  6.9397145509720e-001, -6.9397145509720e-001,  6.9397145509720e-001,
+	 7.2000247240067e-001,  7.2000247240067e-001,  7.2000247240067e-001,  7.2000247240067e-001,
+	 3.7416407465935e-001,  3.7416407465935e-001, -9.1296207904816e-001, -9.1296207904816e-001,
+	-9.2736250162125e-001,  9.2736250162125e-001,  4.0804409980774e-001, -4.0804409980774e-001,
+	 9.1667908430099e-001,  9.1667908430099e-001,  9.1667908430099e-001,  9.1667908430099e-001,
+	-3.9962419867516e-001,  3.9962419867516e-001, -3.9962419867516e-001,  3.9962419867516e-001,
+	 9.7894817590714e-001,  9.7894817590714e-001,  8.1581437587738e-001,  8.1581437587738e-001,
+	-2.0410896837711e-001,  2.0410896837711e-001, -5.7831382751465e-001,  5.7831382751465e-001,
+	-9.1667908430099e-001,  9.1667908430099e-001, -9.1667908430099e-001,  9.1667908430099e-001,
+	 3.9962419867516e-001,  3.9962419867516e-001,  3.9962419867516e-001,  3.9962419867516e-001,
+	 5.4789406061172e-001,  5.4789406061172e-001, -9.8579758405685e-001, -9.8579758405685e-001,
+	-8.3654773235321e-001,  8.3654773235321e-001, -1.6793835163116e-001,  1.6793835163116e-001,
+	 3.6561301350594e-001,  3.6561301350594e-001,  3.6561301350594e-001,  3.6561301350594e-001,
+	-9.3076694011688e-001,  9.3076694011688e-001, -9.3076694011688e-001,  9.3076694011688e-001,
+	 8.2632106542587e-001,  8.2632106542587e-001, -2.2209364175797e-001, -2.2209364175797e-001,
+	-5.6319934129715e-001,  5.6319934129715e-001, -9.7502535581589e-001,  9.7502535581589e-001,
+	-3.6561301350594e-001,  3.6561301350594e-001, -3.6561301350594e-001,  3.6561301350594e-001,
+	 9.3076694011688e-001,  9.3076694011688e-001,  9.3076694011688e-001,  9.3076694011688e-001,
+	 1.8605515360832e-001,  1.8605515360832e-001, -5.3240311145782e-001, -5.3240311145782e-001,
+	-9.8253929615021e-001,  9.8253929615021e-001,  8.4649091959000e-001, -8.4649091959000e-001,
+	 9.7702813148499e-001,  9.7702813148499e-001,  9.7702813148499e-001,  9.7702813148499e-001,
+	-2.1311032772064e-001,  2.1311032772064e-001, -2.1311032772064e-001,  2.1311032772064e-001,
+	 9.9424046278000e-001,  9.9424046278000e-001,  9.4856137037277e-001,  9.4856137037277e-001,
+	-1.0717242956161e-001,  1.0717242956161e-001, -3.1659337878227e-001,  3.1659337878227e-001,
+	-9.7702813148499e-001,  9.7702813148499e-001, -9.7702813148499e-001,  9.7702813148499e-001,
+	 2.1311032772064e-001,  2.1311032772064e-001,  2.1311032772064e-001,  2.1311032772064e-001,
+	 6.2725180387497e-001,  6.2725180387497e-001, -8.9459949731827e-001, -8.9459949731827e-001,
+	-7.7881652116776e-001,  7.7881652116776e-001, -4.4686883687973e-001,  4.4686883687973e-001,
+	 5.4017150402069e-001,  5.4017150402069e-001,  5.4017150402069e-001,  5.4017150402069e-001,
+	-8.4155493974686e-001,  8.4155493974686e-001, -8.4155493974686e-001,  8.4155493974686e-001,
+	 8.7754529714584e-001,  8.7754529714584e-001,  7.0504605770111e-002,  7.0504605770111e-002,
+	-4.7949376702309e-001,  4.7949376702309e-001, -9.9751138687134e-001,  9.9751138687134e-001,
+	-5.4017150402069e-001,  5.4017150402069e-001, -5.4017150402069e-001,  5.4017150402069e-001,
+	 8.4155493974686e-001,  8.4155493974686e-001,  8.4155493974686e-001,  8.4155493974686e-001,
+	 2.8146493434906e-001,  2.8146493434906e-001, -7.5520145893097e-001, -7.5520145893097e-001,
+	-9.5957154035568e-001,  9.5957154035568e-001,  6.5549290180206e-001, -6.5549290180206e-001,
+	 8.2110249996185e-001,  8.2110249996185e-001,  8.2110249996185e-001,  8.2110249996185e-001,
+	-5.7078075408936e-001,  5.7078075408936e-001, -5.7078075408936e-001,  5.7078075408936e-001,
+	 9.5422810316086e-001,  9.5422810316086e-001,  6.1281007528305e-001,  6.1281007528305e-001,
+	-2.9907983541489e-001,  2.9907983541489e-001, -7.9023021459579e-001,  7.9023021459579e-001,
+	-8.2110249996185e-001,  8.2110249996185e-001, -8.2110249996185e-001,  8.2110249996185e-001,
+	 5.7078075408936e-001,  5.7078075408936e-001,  5.7078075408936e-001,  5.7078075408936e-001,
+	 4.6325978636742e-001,  4.6325978636742e-001, -9.9209928512573e-001, -9.9209928512573e-001,
+	-8.8622254133224e-001,  8.8622254133224e-001,  1.2545502185822e-001, -1.2545502185822e-001,
+	 1.7700421810150e-001,  1.7700421810150e-001,  1.7700421810150e-001,  1.7700421810150e-001,
+	-9.8421007394791e-001,  9.8421007394791e-001, -9.8421007394791e-001,  9.8421007394791e-001,
+	 7.6713889837265e-001,  7.6713889837265e-001, -4.9556535482407e-001, -4.9556535482407e-001,
+	-6.4148104190826e-001,  6.4148104190826e-001, -8.6857056617737e-001,  8.6857056617737e-001,
+	-1.7700421810150e-001,  1.7700421810150e-001, -1.7700421810150e-001,  1.7700421810150e-001,
+	 9.8421007394791e-001,  9.8421007394791e-001,  9.8421007394791e-001,  9.8421007394791e-001,
+	 8.8853552937508e-002,  8.8853552937508e-002, -2.6375466585159e-001, -2.6375466585159e-001,
+	-9.9604469537735e-001,  9.9604469537735e-001,  9.6458977460861e-001, -9.6458977460861e-001,
+	 9.9321192502975e-001,  9.9321192502975e-001,  9.9321192502975e-001,  9.9321192502975e-001,
+	-1.1631863564253e-001,  1.1631863564253e-001, -1.1631863564253e-001,  1.1631863564253e-001,
+	 9.9830156564713e-001,  9.9830156564713e-001,  9.8474854230881e-001,  9.8474854230881e-001,
+	-5.8258265256882e-002,  5.8258265256882e-002, -1.7398388683796e-001,  1.7398388683796e-001,
+	-9.9321192502975e-001,  9.9321192502975e-001, -9.9321192502975e-001,  9.9321192502975e-001,
+	 1.1631863564253e-001,  1.1631863564253e-001,  1.1631863564253e-001,  1.1631863564253e-001,
+	 6.6471099853516e-001,  6.6471099853516e-001, -8.1934738159180e-001, -8.1934738159180e-001,
+	-7.4710059165955e-001,  7.4710059165955e-001, -5.7329714298248e-001,  5.7329714298248e-001,
+	 6.2005722522736e-001,  6.2005722522736e-001,  6.2005722522736e-001,  6.2005722522736e-001,
+	-7.8455656766891e-001,  7.8455656766891e-001, -7.8455656766891e-001,  7.8455656766891e-001,
+	 9.0001589059830e-001,  9.0001589059830e-001,  2.1610683202744e-001,  2.1610683202744e-001,
+	-4.3585708737373e-001,  4.3585708737373e-001, -9.7636973857880e-001,  9.7636973857880e-001,
+	-6.2005722522736e-001,  6.2005722522736e-001, -6.2005722522736e-001,  6.2005722522736e-001,
+	 7.8455656766891e-001,  7.8455656766891e-001,  7.8455656766891e-001,  7.8455656766891e-001,
+	 3.2820984721184e-001,  3.2820984721184e-001, -8.4320819377899e-001, -8.4320819377899e-001,
+	-9.4460481405258e-001,  9.4460481405258e-001,  5.3758704662323e-001, -5.3758704662323e-001,
+	 8.7309497594833e-001,  8.7309497594833e-001,  8.7309497594833e-001,  8.7309497594833e-001,
+	-4.8755016922951e-001,  4.8755016922951e-001, -4.8755016922951e-001,  4.8755016922951e-001,
+	 9.6775382757187e-001,  9.6775382757187e-001,  7.2212815284729e-001,  7.2212815284729e-001,
+	-2.5189781188965e-001,  2.5189781188965e-001, -6.9175928831100e-001,  6.9175928831100e-001,
+	-8.7309497594833e-001,  8.7309497594833e-001, -8.7309497594833e-001,  8.7309497594833e-001,
+	 4.8755016922951e-001,  4.8755016922951e-001,  4.8755016922951e-001,  4.8755016922951e-001,
+	 5.0618666410446e-001,  5.0618666410446e-001, -9.9976938962936e-001, -9.9976938962936e-001,
+	-8.6242395639420e-001,  8.6242395639420e-001, -2.1474123001099e-002,  2.1474123001099e-002,
+	 2.7262136340141e-001,  2.7262136340141e-001,  2.7262136340141e-001,  2.7262136340141e-001,
+	-9.6212142705917e-001,  9.6212142705917e-001, -9.6212142705917e-001,  9.6212142705917e-001,
+	 7.9769080877304e-001,  7.9769080877304e-001, -3.6275583505630e-001, -3.6275583505630e-001,
+	-6.0306662321091e-001,  6.0306662321091e-001, -9.3188422918320e-001,  9.3188422918320e-001,
+	-2.7262136340141e-001,  2.7262136340141e-001, -2.7262136340141e-001,  2.7262136340141e-001,
+	 9.6212142705917e-001,  9.6212142705917e-001,  9.6212142705917e-001,  9.6212142705917e-001,
+	 1.3762012124062e-001,  1.3762012124062e-001, -4.0243467688560e-001, -4.0243467688560e-001,
+	-9.9048507213593e-001,  9.9048507213593e-001,  9.1544872522354e-001, -9.1544872522354e-001,
+	 9.5143502950668e-001,  9.5143502950668e-001,  9.5143502950668e-001,  9.5143502950668e-001,
+	-3.0784964561462e-001,  3.0784964561462e-001, -3.0784964561462e-001,  3.0784964561462e-001,
+	 9.8778414726257e-001,  9.8778414726257e-001,  8.9184069633484e-001,  8.9184069633484e-001,
+	-1.5582840144634e-001,  1.5582840144634e-001, -4.5234960317612e-001,  4.5234960317612e-001,
+	-9.5143502950668e-001,  9.5143502950668e-001, -9.5143502950668e-001,  9.5143502950668e-001,
+	 3.0784964561462e-001,  3.0784964561462e-001,  3.0784964561462e-001,  3.0784964561462e-001,
+	 5.8828157186508e-001,  5.8828157186508e-001, -9.5048600435257e-001, -9.5048600435257e-001,
+	-8.0865615606308e-001,  8.0865615606308e-001, -3.1076723337173e-001,  3.1076723337173e-001,
+	 4.5508360862732e-001,  4.5508360862732e-001,  4.5508360862732e-001,  4.5508360862732e-001,
+	-8.9044868946075e-001,  8.9044868946075e-001, -8.9044868946075e-001,  8.9044868946075e-001,
+	 8.5296058654785e-001,  8.5296058654785e-001, -7.6623797416687e-002, -7.6623797416687e-002,
+	-5.2197527885437e-001,  5.2197527885437e-001, -9.9705994129181e-001,  9.9705994129181e-001,
+	-4.5508360862732e-001,  4.5508360862732e-001, -4.5508360862732e-001,  4.5508360862732e-001,
+	 8.9044868946075e-001,  8.9044868946075e-001,  8.9044868946075e-001,  8.9044868946075e-001,
+	 2.3404195904732e-001,  2.3404195904732e-001, -6.5084671974182e-001, -6.5084671974182e-001,
+	-9.7222650051117e-001,  9.7222650051117e-001,  7.5920915603638e-001, -7.5920915603638e-001,
+	 7.6120239496231e-001,  7.6120239496231e-001,  7.6120239496231e-001,  7.6120239496231e-001,
+	-6.4851438999176e-001,  6.4851438999176e-001, -6.4851438999176e-001,  6.4851438999176e-001,
+	 9.3840354681015e-001,  9.3840354681015e-001,  4.9022650718689e-001,  4.9022650718689e-001,
+	-3.4554132819176e-001,  3.4554132819176e-001, -8.7159502506256e-001,  8.7159502506256e-001,
+	-7.6120239496231e-001,  7.6120239496231e-001, -7.6120239496231e-001,  7.6120239496231e-001,
+	 6.4851438999176e-001,  6.4851438999176e-001,  6.4851438999176e-001,  6.4851438999176e-001,
+	 4.1921690106392e-001,  4.1921690106392e-001, -9.6295320987701e-001, -9.6295320987701e-001,
+	-9.0788608789444e-001,  9.0788608789444e-001,  2.6966828107834e-001, -2.6966828107834e-001,
+	 7.9682439565659e-002,  7.9682439565659e-002,  7.9682439565659e-002,  7.9682439565659e-002,
+	-9.9682027101517e-001,  9.9682027101517e-001, -9.9682027101517e-001,  9.9682027101517e-001,
+	 7.3473888635635e-001,  7.3473888635635e-001, -6.1764723062515e-001, -6.1764723062515e-001,
+	-6.7835003137589e-001,  6.7835003137589e-001, -7.8645521402359e-001,  7.8645521402359e-001,
+	-7.9682439565659e-002,  7.9682439565659e-002, -7.9682439565659e-002,  7.9682439565659e-002,
+	 9.9682027101517e-001,  9.9682027101517e-001,  9.9682027101517e-001,  9.9682027101517e-001,
+	 3.9872929453850e-002,  3.9872929453850e-002, -1.1936521530151e-001, -1.1936521530151e-001,
+	-9.9920475482941e-001,  9.9920475482941e-001,  9.9285042285919e-001, -9.9285042285919e-001,
+	 9.9772304296494e-001,  9.9772304296494e-001,  9.9772304296494e-001,  9.9772304296494e-001,
+	-6.7443922162056e-002,  6.7443922162056e-002, -6.7443922162056e-002,  6.7443922162056e-002,
+	 9.9943059682846e-001,  9.9943059682846e-001,  9.9487930536270e-001,  9.9487930536270e-001,
+	-3.3741172403097e-002,  3.3741172403097e-002, -1.0106986761093e-001,  1.0106986761093e-001,
+	-9.9772304296494e-001,  9.9772304296494e-001, -9.9772304296494e-001,  9.9772304296494e-001,
+	 6.7443922162056e-002,  6.7443922162056e-002,  6.7443922162056e-002,  6.7443922162056e-002,
+	 6.8284553289413e-001,  6.8284553289413e-001, -7.7495306730270e-001, -7.7495306730270e-001,
+	-7.3056274652481e-001,  7.3056274652481e-001, -6.3201874494553e-001,  6.3201874494553e-001,
+	 6.5780669450760e-001,  6.5780669450760e-001,  6.5780669450760e-001,  6.5780669450760e-001,
+	-7.5318676233292e-001,  7.5318676233292e-001, -7.5318676233292e-001,  7.5318676233292e-001,
+	 9.1044127941132e-001,  9.1044127941132e-001,  2.8734743595123e-001,  2.8734743595123e-001,
+	-4.1363832354546e-001,  4.1363832354546e-001, -9.5782625675201e-001,  9.5782625675201e-001,
+	-6.5780669450760e-001,  6.5780669450760e-001, -6.5780669450760e-001,  6.5780669450760e-001,
+	 7.5318676233292e-001,  7.5318676233292e-001,  7.5318676233292e-001,  7.5318676233292e-001,
+	 3.5129275918007e-001,  3.5129275918007e-001, -8.8047087192535e-001, -8.8047087192535e-001,
+	-9.3626564741135e-001,  9.3626564741135e-001,  4.7410020232201e-001, -4.7410020232201e-001,
+	 8.9596623182297e-001,  8.9596623182297e-001,  8.9596623182297e-001,  8.9596623182297e-001,
+	-4.4412216544151e-001,  4.4412216544151e-001, -4.4412216544151e-001,  4.4412216544151e-001,
+	 9.7364425659180e-001,  9.7364425659180e-001,  7.7106052637100e-001,  7.7106052637100e-001,
+	-2.2807209193707e-001,  2.2807209193707e-001, -6.3676190376282e-001,  6.3676190376282e-001,
+	-8.9596623182297e-001,  8.9596623182297e-001, -8.9596623182297e-001,  8.9596623182297e-001,
+	 4.4412216544151e-001,  4.4412216544151e-001,  4.4412216544151e-001,  4.4412216544151e-001,
+	 5.2719914913177e-001,  5.2719914913177e-001, -9.9548065662384e-001, -9.9548065662384e-001,
+	-8.4974175691605e-001,  8.4974175691605e-001, -9.4963490962982e-002,  9.4963490962982e-002,
+	 3.1950202584267e-001,  3.1950202584267e-001,  3.1950202584267e-001,  3.1950202584267e-001,
+	-9.4758558273315e-001,  9.4758558273315e-001, -9.4758558273315e-001,  9.4758558273315e-001,
+	 8.1225055456161e-001,  8.1225055456161e-001, -2.9321926832199e-001, -2.9321926832199e-001,
+	-5.8330869674683e-001,  5.8330869674683e-001, -9.5604515075684e-001,  9.5604515075684e-001,
+	-3.1950202584267e-001,  3.1950202584267e-001, -3.1950202584267e-001,  3.1950202584267e-001,
+	 9.4758558273315e-001,  9.4758558273315e-001,  9.4758558273315e-001,  9.4758558273315e-001,
+	 1.6188639402390e-001,  1.6188639402390e-001, -4.6868878602982e-001, -4.6868878602982e-001,
+	-9.8680937290192e-001,  9.8680937290192e-001,  8.8336330652237e-001, -8.8336330652237e-001,
+	 9.6539443731308e-001,  9.6539443731308e-001,  9.6539443731308e-001,  9.6539443731308e-001,
+	-2.6079413294792e-001,  2.6079413294792e-001, -2.6079413294792e-001,  2.6079413294792e-001,
+	 9.9131083488464e-001,  9.9131083488464e-001,  9.2270112037659e-001,  9.2270112037659e-001,
+	-1.3154003024101e-001,  1.3154003024101e-001, -3.8551607728004e-001,  3.8551607728004e-001,
+	-9.6539443731308e-001,  9.6539443731308e-001, -9.6539443731308e-001,  9.6539443731308e-001,
+	 2.6079413294792e-001,  2.6079413294792e-001,  2.6079413294792e-001,  2.6079413294792e-001,
+	 6.0794979333878e-001,  6.0794979333878e-001, -9.2504924535751e-001, -9.2504924535751e-001,
+	-7.9397547245026e-001,  7.9397547245026e-001, -3.7984716892242e-001,  3.7984716892242e-001,
+	 4.9822768568993e-001,  4.9822768568993e-001,  4.9822768568993e-001,  4.9822768568993e-001,
+	-8.6704623699188e-001,  8.6704623699188e-001, -8.6704623699188e-001,  8.6704623699188e-001,
+	 8.6551362276077e-001,  8.6551362276077e-001, -3.0679106712341e-003, -3.0679106712341e-003,
+	-5.0088536739349e-001,  5.0088536739349e-001, -9.9999535083771e-001,  9.9999535083771e-001,
+	-4.9822768568993e-001,  4.9822768568993e-001, -4.9822768568993e-001,  4.9822768568993e-001,
+	 8.6704623699188e-001,  8.6704623699188e-001,  8.6704623699188e-001,  8.6704623699188e-001,
+	 2.5783109664917e-001,  2.5783109664917e-001, -7.0493412017822e-001, -7.0493412017822e-001,
+	-9.6618998050690e-001,  9.6618998050690e-001,  7.0927280187607e-001, -7.0927280187607e-001,
+	 7.9210656881332e-001,  7.9210656881332e-001,  7.9210656881332e-001,  7.9210656881332e-001,
+	-6.1038279533386e-001,  6.1038279533386e-001, -6.1038279533386e-001,  6.1038279533386e-001,
+	 9.4660091400146e-001,  9.4660091400146e-001,  5.5301666259766e-001,  5.5301666259766e-001,
+	-3.2240769267082e-001,  3.2240769267082e-001, -8.3317005634308e-001,  8.3317005634308e-001,
+	-7.9210656881332e-001,  7.9210656881332e-001, -7.9210656881332e-001,  7.9210656881332e-001,
+	 6.1038279533386e-001,  6.1038279533386e-001,  6.1038279533386e-001,  6.1038279533386e-001,
+	 4.4137129187584e-001,  4.4137129187584e-001, -9.8018205165863e-001, -9.8018205165863e-001,
+	-8.9732456207275e-001,  8.9732456207275e-001,  1.9809836149216e-001, -1.9809836149216e-001,
+	 1.2849810719490e-001,  1.2849810719490e-001,  1.2849810719490e-001,  1.2849810719490e-001,
+	-9.9170976877213e-001,  9.9170976877213e-001, -9.9170976877213e-001,  9.9170976877213e-001,
+	 7.5116509199142e-001,  7.5116509199142e-001, -5.5811864137650e-001, -5.5811864137650e-001,
+	-6.6011434793472e-001,  6.6011434793472e-001, -8.2976120710373e-001,  8.2976120710373e-001,
+	-1.2849810719490e-001,  1.2849810719490e-001, -1.2849810719490e-001,  1.2849810719490e-001,
+	 9.9170976877213e-001,  9.9170976877213e-001,  9.9170976877213e-001,  9.9170976877213e-001,
+	 6.4382635056973e-002,  6.4382635056973e-002, -1.9208037853241e-001, -1.9208037853241e-001,
+	-9.9792528152466e-001,  9.9792528152466e-001,  9.8137921094894e-001, -9.8137921094894e-001,
+	 9.8630809783936e-001,  9.8630809783936e-001,  9.8630809783936e-001,  9.8630809783936e-001,
+	-1.6491311788559e-001,  1.6491311788559e-001, -1.6491311788559e-001,  1.6491311788559e-001,
+	 9.9657112360001e-001,  9.9657112360001e-001,  9.6928119659424e-001,  9.6928119659424e-001,
+	-8.2740269601345e-002,  8.2740269601345e-002, -2.4595502018929e-001,  2.4595502018929e-001,
+	-9.8630809783936e-001,  9.8630809783936e-001, -9.8630809783936e-001,  9.8630809783936e-001,
+	 1.6491311788559e-001,  1.6491311788559e-001,  1.6491311788559e-001,  1.6491311788559e-001,
+	 6.4617604017258e-001,  6.4617604017258e-001, -8.5930174589157e-001, -8.5930174589157e-001,
+	-7.6318842172623e-001,  7.6318842172623e-001, -5.1146894693375e-001,  5.1146894693375e-001,
+	 5.8081394433975e-001,  5.8081394433975e-001,  5.8081394433975e-001,  5.8081394433975e-001,
+	-8.1403630971909e-001,  8.1403630971909e-001, -8.1403630971909e-001,  8.1403630971909e-001,
+	 8.8904833793640e-001,  8.8904833793640e-001,  1.4369499683380e-001,  1.4369499683380e-001,
+	-4.5781332254410e-001,  4.5781332254410e-001, -9.8962193727493e-001,  9.8962193727493e-001,
+	-5.8081394433975e-001,  5.8081394433975e-001, -5.8081394433975e-001,  5.8081394433975e-001,
+	 8.1403630971909e-001,  8.1403630971909e-001,  8.1403630971909e-001,  8.1403630971909e-001,
+	 3.0492922663689e-001,  3.0492922663689e-001, -8.0137610435486e-001, -8.0137610435486e-001,
+	-9.5237499475479e-001,  9.5237499475479e-001,  5.9816074371338e-001, -5.9816074371338e-001,
+	 8.4812033176422e-001,  8.4812033176422e-001,  8.4812033176422e-001,  8.4812033176422e-001,
+	-5.2980363368988e-001,  5.2980363368988e-001, -5.2980363368988e-001,  5.2980363368988e-001,
+	 9.6128046512604e-001,  9.6128046512604e-001,  6.6928255558014e-001,  6.6928255558014e-001,
+	-2.7557182312012e-001,  2.7557182312012e-001, -7.4300789833069e-001,  7.4300789833069e-001,
+	-8.4812033176422e-001,  8.4812033176422e-001, -8.4812033176422e-001,  8.4812033176422e-001,
+	 5.2980363368988e-001,  5.2980363368988e-001,  5.2980363368988e-001,  5.2980363368988e-001,
+	 4.8486927151680e-001,  4.8486927151680e-001, -9.9864017963409e-001, -9.9864017963409e-001,
+	-8.7458664178848e-001,  8.7458664178848e-001,  5.2131652832031e-002, -5.2131652832031e-002,
+	 2.2508391737938e-001,  2.2508391737938e-001,  2.2508391737938e-001,  2.2508391737938e-001,
+	-9.7433936595917e-001,  9.7433936595917e-001, -9.7433936595917e-001,  9.7433936595917e-001,
+	 7.8265058994293e-001,  7.8265058994293e-001, -4.3032658100128e-001, -4.3032658100128e-001,
+	-6.2246131896973e-001,  6.2246131896973e-001, -9.0267324447632e-001,  9.0267324447632e-001,
+	-2.2508391737938e-001,  2.2508391737938e-001, -2.2508391737938e-001,  2.2508391737938e-001,
+	 9.7433936595917e-001,  9.7433936595917e-001,  9.7433936595917e-001,  9.7433936595917e-001,
+	 1.1327095329762e-001,  1.1327095329762e-001, -3.3399963378906e-001, -3.3399963378906e-001,
+	-9.9356412887573e-001,  9.9356412887573e-001,  9.4257318973541e-001, -9.4257318973541e-001,
+	 9.3518352508545e-001,  9.3518352508545e-001,  9.3518352508545e-001,  9.3518352508545e-001,
+	-3.5416352748871e-001,  3.5416352748871e-001, -3.5416352748871e-001,  3.5416352748871e-001,
+	 9.8366242647171e-001,  9.8366242647171e-001,  8.5614734888077e-001,  8.5614734888077e-001,
+	-1.8002291023731e-001,  1.8002291023731e-001, -5.1673179864883e-001,  5.1673179864883e-001,
+	-9.3518352508545e-001,  9.3518352508545e-001, -9.3518352508545e-001,  9.3518352508545e-001,
+	 3.5416352748871e-001,  3.5416352748871e-001,  3.5416352748871e-001,  3.5416352748871e-001,
+	 5.6825894117355e-001,  5.6825894117355e-001, -9.7077208757401e-001, -9.7077208757401e-001,
+	-8.2284975051880e-001,  8.2284975051880e-001, -2.4000298976898e-001,  2.4000298976898e-001,
+	 4.1084319353104e-001,  4.1084319353104e-001,  4.1084319353104e-001,  4.1084319353104e-001,
+	-9.1170603036880e-001,  9.1170603036880e-001, -9.1170603036880e-001,  9.1170603036880e-001,
+	 8.3989375829697e-001,  8.3989375829697e-001, -1.4976453781128e-001, -1.4976453781128e-001,
+	-5.4275077581406e-001,  5.4275077581406e-001, -9.8872166872025e-001,  9.8872166872025e-001,
+	-4.1084319353104e-001,  4.1084319353104e-001, -4.1084319353104e-001,  4.1084319353104e-001,
+	 9.1170603036880e-001,  9.1170603036880e-001,  9.1170603036880e-001,  9.1170603036880e-001,
+	 2.1011184155941e-001,  2.1011184155941e-001, -5.9323233366013e-001, -5.9323233366013e-001,
+	-9.7767734527588e-001,  9.7767734527588e-001,  8.0503129959106e-001, -8.0503129959106e-001,
+	 7.2846436500549e-001,  7.2846436500549e-001,  7.2846436500549e-001,  7.2846436500549e-001,
+	-6.8508368730545e-001,  6.8508368730545e-001, -6.8508368730545e-001,  6.8508368730545e-001,
+	 9.2964088916779e-001,  9.2964088916779e-001,  4.2477965354919e-001,  4.2477965354919e-001,
+	-3.6846685409546e-001,  3.6846685409546e-001, -9.0529680252075e-001,  9.0529680252075e-001,
+	-7.2846436500549e-001,  7.2846436500549e-001, -7.2846436500549e-001,  7.2846436500549e-001,
+	 6.8508368730545e-001,  6.8508368730545e-001,  6.8508368730545e-001,  6.8508368730545e-001,
+	 3.9680999517441e-001,  3.9680999517441e-001, -9.4050604104996e-001, -9.4050604104996e-001,
+	-9.1790080070496e-001,  9.1790080070496e-001,  3.3977693319321e-001, -3.3977693319321e-001,
+	 3.0674804002047e-002,  3.0674804002047e-002,  3.0674804002047e-002,  3.0674804002047e-002,
+	-9.9952942132950e-001,  9.9952942132950e-001, -9.9952942132950e-001,  9.9952942132950e-001,
+	 7.1787005662918e-001,  7.1787005662918e-001, -6.7382901906967e-001, -6.7382901906967e-001,
+	-6.9617712497711e-001,  6.9617712497711e-001, -7.3888731002808e-001,  7.3888731002808e-001,
+	-3.0674804002047e-002,  3.0674804002047e-002, -3.0674804002047e-002,  3.0674804002047e-002,
+	 9.9952942132950e-001,  9.9952942132950e-001,  9.9952942132950e-001,  9.9952942132950e-001,
+	 1.5339206904173e-002,  1.5339206904173e-002, -4.6003181487322e-002, -4.6003181487322e-002,
+	-9.9988234043121e-001,  9.9988234043121e-001,  9.9894130229950e-001, -9.9894130229950e-001,
+	 9.9907773733139e-001,  9.9907773733139e-001,  9.9907773733139e-001,  9.9907773733139e-001,
+	-4.2938258498907e-002,  4.2938258498907e-002, -4.2938258498907e-002,  4.2938258498907e-002,
+	 9.9976938962936e-001,  9.9976938962936e-001,  9.9792528152466e-001,  9.9792528152466e-001,
+	-2.1474080160260e-002,  2.1474080160260e-002, -6.4382635056973e-002,  6.4382635056973e-002,
+	-9.9907773733139e-001,  9.9907773733139e-001, -9.9907773733139e-001,  9.9907773733139e-001,
+	 4.2938258498907e-002,  4.2938258498907e-002,  4.2938258498907e-002,  4.2938258498907e-002,
+	 6.9175928831100e-001,  6.9175928831100e-001, -7.5116509199142e-001, -7.5116509199142e-001,
+	-7.2212815284729e-001,  7.2212815284729e-001, -6.6011440753937e-001,  6.6011440753937e-001,
+	 6.7609274387360e-001,  6.7609274387360e-001,  6.7609274387360e-001,  6.7609274387360e-001,
+	-7.3681652545929e-001,  7.3681652545929e-001, -7.3681652545929e-001,  7.3681652545929e-001,
+	 9.1544872522354e-001,  9.1544872522354e-001,  3.2240772247314e-001,  3.2240772247314e-001,
+	-4.0243464708328e-001,  4.0243464708328e-001, -9.4660085439682e-001,  9.4660085439682e-001,
+	-6.7609274387360e-001,  6.7609274387360e-001, -6.7609274387360e-001,  6.7609274387360e-001,
+	 7.3681652545929e-001,  7.3681652545929e-001,  7.3681652545929e-001,  7.3681652545929e-001,
+	 3.6275574564934e-001,  3.6275574564934e-001, -8.9732468128204e-001, -8.9732468128204e-001,
+	-9.3188428878784e-001,  9.3188428878784e-001,  4.4137123227119e-001, -4.4137123227119e-001,
+	 9.0659570693970e-001,  9.0659570693970e-001,  9.0659570693970e-001,  9.0659570693970e-001,
+	-4.2200028896332e-001,  4.2200028896332e-001, -4.2200028896332e-001,  4.2200028896332e-001,
+	 9.7636973857880e-001,  9.7636973857880e-001,  7.9397547245026e-001,  7.9397547245026e-001,
+	-2.1610680222511e-001,  2.1610680222511e-001, -6.0794985294342e-001,  6.0794985294342e-001,
+	-9.0659570693970e-001,  9.0659570693970e-001, -9.0659570693970e-001,  9.0659570693970e-001,
+	 4.2200028896332e-001,  4.2200028896332e-001,  4.2200028896332e-001,  4.2200028896332e-001,
+	 5.3758710622787e-001,  5.3758710622787e-001, -9.9131089448929e-001, -9.9131089448929e-001,
+	-8.4320825338364e-001,  8.4320825338364e-001, -1.3154006004333e-001,  1.3154006004333e-001,
+	 3.4266072511673e-001,  3.4266072511673e-001,  3.4266072511673e-001,  3.4266072511673e-001,
+	-9.3945920467377e-001,  9.3945920467377e-001, -9.3945920467377e-001,  9.3945920467377e-001,
+	 8.1934750080109e-001,  8.1934750080109e-001, -2.5783121585846e-001, -2.5783121585846e-001,
+	-5.7329720258713e-001,  5.7329720258713e-001, -9.6618992090225e-001,  9.6618992090225e-001,
+	-3.4266072511673e-001,  3.4266072511673e-001, -3.4266072511673e-001,  3.4266072511673e-001,
+	 9.3945920467377e-001,  9.3945920467377e-001,  9.3945920467377e-001,  9.3945920467377e-001,
+	 1.7398387193680e-001,  1.7398387193680e-001, -5.0088536739349e-001, -5.0088536739349e-001,
+	-9.8474848270416e-001,  9.8474848270416e-001,  8.6551362276077e-001, -8.6551362276077e-001,
+	 9.7150391340256e-001,  9.7150391340256e-001,  9.7150391340256e-001,  9.7150391340256e-001,
+	-2.3702360689640e-001,  2.3702360689640e-001, -2.3702360689640e-001,  2.3702360689640e-001,
+	 9.9285042285919e-001,  9.9285042285919e-001,  9.3626564741135e-001,  9.3626564741135e-001,
+	-1.1936521530151e-001,  1.1936521530151e-001, -3.5129275918007e-001,  3.5129275918007e-001,
+	-9.7150391340256e-001,  9.7150391340256e-001, -9.7150391340256e-001,  9.7150391340256e-001,
+	 2.3702360689640e-001,  2.3702360689640e-001,  2.3702360689640e-001,  2.3702360689640e-001,
+	 6.1764734983444e-001,  6.1764734983444e-001, -9.1044133901596e-001, -9.1044133901596e-001,
+	-7.8645521402359e-001,  7.8645521402359e-001, -4.1363841295242e-001,  4.1363841295242e-001,
+	 5.1935601234436e-001,  5.1935601234436e-001,  5.1935601234436e-001,  5.1935601234436e-001,
+	-8.5455799102783e-001,  8.5455799102783e-001, -8.5455799102783e-001,  8.5455799102783e-001,
+	 8.7159508466721e-001,  8.7159508466721e-001,  3.3741116523743e-002,  3.3741116523743e-002,
+	-4.9022650718689e-001,  4.9022650718689e-001, -9.9943053722382e-001,  9.9943053722382e-001,
+	-5.1935601234436e-001,  5.1935601234436e-001, -5.1935601234436e-001,  5.1935601234436e-001,
+	 8.5455799102783e-001,  8.5455799102783e-001,  8.5455799102783e-001,  8.5455799102783e-001,
+	 2.6966834068298e-001,  2.6966834068298e-001, -7.3056280612946e-001, -7.3056280612946e-001,
+	-9.6295326948166e-001,  9.6295326948166e-001,  6.8284553289413e-001, -6.8284553289413e-001,
+	 8.0684757232666e-001,  8.0684757232666e-001,  8.0684757232666e-001,  8.0684757232666e-001,
+	-5.9075969457626e-001,  5.9075969457626e-001, -5.9075969457626e-001,  5.9075969457626e-001,
+	 9.5048606395721e-001,  9.5048606395721e-001,  5.8330863714218e-001,  5.8330863714218e-001,
+	-3.1076717376709e-001,  3.1076717376709e-001, -8.1225049495697e-001,  8.1225049495697e-001,
+	-8.0684757232666e-001,  8.0684757232666e-001, -8.0684757232666e-001,  8.0684757232666e-001,
+	 5.9075969457626e-001,  5.9075969457626e-001,  5.9075969457626e-001,  5.9075969457626e-001,
+	 4.5234960317612e-001,  4.5234960317612e-001, -9.8680943250656e-001, -9.8680943250656e-001,
+	-8.9184069633484e-001,  8.9184069633484e-001,  1.6188633441925e-001, -1.6188633441925e-001,
+	 1.5279719233513e-001,  1.5279719233513e-001,  1.5279719233513e-001,  1.5279719233513e-001,
+	-9.8825758695602e-001,  9.8825758695602e-001, -9.8825758695602e-001,  9.8825758695602e-001,
+	 7.5920915603638e-001,  7.5920915603638e-001, -5.2719926834106e-001, -5.2719926834106e-001,
+	-6.5084671974182e-001,  6.5084671974182e-001, -8.4974169731140e-001,  8.4974169731140e-001,
+	-1.5279719233513e-001,  1.5279719233513e-001, -1.5279719233513e-001,  1.5279719233513e-001,
+	 9.8825758695602e-001,  9.8825758695602e-001,  9.8825758695602e-001,  9.8825758695602e-001,
+	 7.6623864471912e-002,  7.6623864471912e-002, -2.2807210683823e-001, -2.2807210683823e-001,
+	-9.9706006050110e-001,  9.9706006050110e-001,  9.7364425659180e-001, -9.7364425659180e-001,
+	 9.9005818367004e-001,  9.9005818367004e-001,  9.9005818367004e-001,  9.9005818367004e-001,
+	-1.4065824449062e-001,  1.4065824449062e-001, -1.4065824449062e-001,  1.4065824449062e-001,
+	 9.9751144647598e-001,  9.9751144647598e-001,  9.7767734527588e-001,  9.7767734527588e-001,
+	-7.0504575967789e-002,  7.0504575967789e-002, -2.1011185646057e-001,  2.1011185646057e-001,
+	-9.9005818367004e-001,  9.9005818367004e-001, -9.9005818367004e-001,  9.9005818367004e-001,
+	 1.4065824449062e-001,  1.4065824449062e-001,  1.4065824449062e-001,  1.4065824449062e-001,
+	 6.5549284219742e-001,  6.5549284219742e-001, -8.3989363908768e-001, -8.3989363908768e-001,
+	-7.5520133972168e-001,  7.5520133972168e-001, -5.4275071620941e-001,  5.4275071620941e-001,
+	 6.0061651468277e-001,  6.0061651468277e-001,  6.0061651468277e-001,  6.0061651468277e-001,
+	-7.9953724145889e-001,  7.9953724145889e-001, -7.9953724145889e-001,  7.9953724145889e-001,
+	 8.9459949731827e-001,  8.9459949731827e-001,  1.8002295494080e-001,  1.8002295494080e-001,
+	-4.4686883687973e-001,  4.4686883687973e-001, -9.8366242647171e-001,  9.8366242647171e-001,
+	-6.0061651468277e-001,  6.0061651468277e-001, -6.0061651468277e-001,  6.0061651468277e-001,
+	 7.9953724145889e-001,  7.9953724145889e-001,  7.9953724145889e-001,  7.9953724145889e-001,
+	 3.1659337878227e-001,  3.1659337878227e-001, -8.2284986972809e-001, -8.2284986972809e-001,
+	-9.4856137037277e-001,  9.4856137037277e-001,  5.6825894117355e-001, -5.6825894117355e-001,
+	 8.6086690425873e-001,  8.6086690425873e-001,  8.6086690425873e-001,  8.6086690425873e-001,
+	-5.0883013010025e-001,  5.0883013010025e-001, -5.0883013010025e-001,  5.0883013010025e-001,
+	 9.6458977460861e-001,  9.6458977460861e-001,  6.9617712497711e-001,  6.9617712497711e-001,
+	-2.6375469565392e-001,  2.6375469565392e-001, -7.1786999702454e-001,  7.1786999702454e-001,
+	-8.6086690425873e-001,  8.6086690425873e-001, -8.6086690425873e-001,  8.6086690425873e-001,
+	 5.0883013010025e-001,  5.0883013010025e-001,  5.0883013010025e-001,  5.0883013010025e-001,
+	 4.9556526541710e-001,  4.9556526541710e-001, -9.9988222122192e-001, -9.9988222122192e-001,
+	-8.6857068538666e-001,  8.6857068538666e-001,  1.5339195728302e-002, -1.5339195728302e-002,
+	 2.4892760813236e-001,  2.4892760813236e-001,  2.4892760813236e-001,  2.4892760813236e-001,
+	-9.6852207183838e-001,  9.6852207183838e-001, -9.6852207183838e-001,  9.6852207183838e-001,
+	 7.9023021459579e-001,  7.9023021459579e-001, -3.9680999517441e-001, -3.9680999517441e-001,
+	-6.1281007528305e-001,  6.1281007528305e-001, -9.1790074110031e-001,  9.1790074110031e-001,
+	-2.4892760813236e-001,  2.4892760813236e-001, -2.4892760813236e-001,  2.4892760813236e-001,
+	 9.6852207183838e-001,  9.6852207183838e-001,  9.6852207183838e-001,  9.6852207183838e-001,
+	 1.2545499205589e-001,  1.2545499205589e-001, -3.6846682429314e-001, -3.6846682429314e-001,
+	-9.9209928512573e-001,  9.9209928512573e-001,  9.2964088916779e-001, -9.2964088916779e-001,
+	 9.4359344244003e-001,  9.4359344244003e-001,  9.4359344244003e-001,  9.4359344244003e-001,
+	-3.3110630512238e-001,  3.3110630512238e-001, -3.3110630512238e-001,  3.3110630512238e-001,
+	 9.8579752445221e-001,  9.8579752445221e-001,  8.7458664178848e-001,  8.7458664178848e-001,
+	-1.6793830692768e-001,  1.6793830692768e-001, -4.8486924171448e-001,  4.8486924171448e-001,
+	-9.4359344244003e-001,  9.4359344244003e-001, -9.4359344244003e-001,  9.4359344244003e-001,
+	 3.3110630512238e-001,  3.3110630512238e-001,  3.3110630512238e-001,  3.3110630512238e-001,
+	 5.7831382751465e-001,  5.7831382751465e-001, -9.6128034591675e-001, -9.6128034591675e-001,
+	-8.1581437587738e-001,  8.1581437587738e-001, -2.7557194232941e-001,  2.7557194232941e-001,
+	 4.3309381604195e-001,  4.3309381604195e-001,  4.3309381604195e-001,  4.3309381604195e-001,
+	-9.0134882926941e-001,  9.0134882926941e-001, -9.0134882926941e-001,  9.0134882926941e-001,
+	 8.4649091959000e-001,  8.4649091959000e-001, -1.1327093839645e-001, -1.1327093839645e-001,
+	-5.3240311145782e-001,  5.3240311145782e-001, -9.9356412887573e-001,  9.9356412887573e-001,
+	-4.3309381604195e-001,  4.3309381604195e-001, -4.3309381604195e-001,  4.3309381604195e-001,
+	 9.0134882926941e-001,  9.0134882926941e-001,  9.0134882926941e-001,  9.0134882926941e-001,
+	 2.2209362685680e-001,  2.2209362685680e-001, -6.2246125936508e-001, -6.2246125936508e-001,
+	-9.7502535581589e-001,  9.7502535581589e-001,  7.8265058994293e-001, -7.8265058994293e-001,
+	 7.4505776166916e-001,  7.4505776166916e-001,  7.4505776166916e-001,  7.4505776166916e-001,
+	-6.6699993610382e-001,  6.6699993610382e-001, -6.6699993610382e-001,  6.6699993610382e-001,
+	 9.3409252166748e-001,  9.3409252166748e-001,  4.5781326293945e-001,  4.5781326293945e-001,
+	-3.5703095793724e-001,  3.5703095793724e-001, -8.8904833793640e-001,  8.8904833793640e-001,
+	-7.4505776166916e-001,  7.4505776166916e-001, -7.4505776166916e-001,  7.4505776166916e-001,
+	 6.6699993610382e-001,  6.6699993610382e-001,  6.6699993610382e-001,  6.6699993610382e-001,
+	 4.0804415941238e-001,  4.0804415941238e-001, -9.5237499475479e-001, -9.5237499475479e-001,
+	-9.1296219825745e-001,  9.1296219825745e-001,  3.0492925643921e-001, -3.0492925643921e-001,
+	 5.5195245891809e-002,  5.5195245891809e-002,  5.5195245891809e-002,  5.5195245891809e-002,
+	-9.9847555160522e-001,  9.9847555160522e-001, -9.9847555160522e-001,  9.9847555160522e-001,
+	 7.2635912895203e-001,  7.2635912895203e-001, -6.4617598056793e-001, -6.4617598056793e-001,
+	-6.8731534481049e-001,  6.8731534481049e-001, -7.6318836212158e-001,  7.6318836212158e-001,
+	-5.5195245891809e-002,  5.5195245891809e-002, -5.5195245891809e-002,  5.5195245891809e-002,
+	 9.9847555160522e-001,  9.9847555160522e-001,  9.9847555160522e-001,  9.9847555160522e-001,
+	 2.7608146890998e-002,  2.7608146890998e-002, -8.2740262150764e-002, -8.2740262150764e-002,
+	-9.9961882829666e-001,  9.9961882829666e-001,  9.9657112360001e-001, -9.9657112360001e-001,
+	 9.9576741456985e-001,  9.9576741456985e-001,  9.9576741456985e-001,  9.9576741456985e-001,
+	-9.1908961534500e-002,  9.1908961534500e-002, -9.1908961534500e-002,  9.1908961534500e-002,
+	 9.9894130229950e-001,  9.9894130229950e-001,  9.9048507213593e-001,  9.9048507213593e-001,
+	-4.6003185212612e-002,  4.6003185212612e-002, -1.3762012124062e-001,  1.3762012124062e-001,
+	-9.9576741456985e-001,  9.9576741456985e-001, -9.9576741456985e-001,  9.9576741456985e-001,
+	 9.1908961534500e-002,  9.1908961534500e-002,  9.1908961534500e-002,  9.1908961534500e-002,
+	 6.7382901906967e-001,  6.7382901906967e-001, -7.9769080877304e-001, -7.9769080877304e-001,
+	-7.3888731002808e-001,  7.3888731002808e-001, -6.0306668281555e-001,  6.0306668281555e-001,
+	 6.3912445306778e-001,  6.3912445306778e-001,  6.3912445306778e-001,  6.3912445306778e-001,
+	-7.6910334825516e-001,  7.6910334825516e-001, -7.6910334825516e-001,  7.6910334825516e-001,
+	 9.0529674291611e-001,  9.0529674291611e-001,  2.5189781188965e-001,  2.5189781188965e-001,
+	-4.2477968335152e-001,  4.2477968335152e-001, -9.6775388717651e-001,  9.6775388717651e-001,
+	-6.3912445306778e-001,  6.3912445306778e-001, -6.3912445306778e-001,  6.3912445306778e-001,
+	 7.6910334825516e-001,  7.6910334825516e-001,  7.6910334825516e-001,  7.6910334825516e-001,
+	 3.3977690339088e-001,  3.3977690339088e-001, -8.6242389678955e-001, -8.6242389678955e-001,
+	-9.4050604104996e-001,  9.4050604104996e-001,  5.0618660449982e-001, -5.0618660449982e-001,
+	 8.8479709625244e-001,  8.8479709625244e-001,  8.8479709625244e-001,  8.8479709625244e-001,
+	-4.6597650647163e-001,  4.6597650647163e-001, -4.6597650647163e-001,  4.6597650647163e-001,
+	 9.7077214717865e-001,  9.7077214717865e-001,  7.4710059165955e-001,  7.4710059165955e-001,
+	-2.4000303447247e-001,  2.4000303447247e-001, -6.6471099853516e-001,  6.6471099853516e-001,
+	-8.8479709625244e-001,  8.8479709625244e-001, -8.8479709625244e-001,  8.8479709625244e-001,
+	 4.6597650647163e-001,  4.6597650647163e-001,  4.6597650647163e-001,  4.6597650647163e-001,
+	 5.1673179864883e-001,  5.1673179864883e-001, -9.9830156564713e-001, -9.9830156564713e-001,
+	-8.5614734888077e-001,  8.5614734888077e-001, -5.8258235454559e-002,  5.8258235454559e-002,
+	 2.9615089297295e-001,  2.9615089297295e-001,  2.9615089297295e-001,  2.9615089297295e-001,
+	-9.5514118671417e-001,  9.5514118671417e-001, -9.5514118671417e-001,  9.5514118671417e-001,
+	 8.0503129959106e-001,  8.0503129959106e-001, -3.2820999622345e-001, -3.2820999622345e-001,
+	-5.9323233366013e-001,  5.9323233366013e-001, -9.4460481405258e-001,  9.4460481405258e-001,
+	-2.9615089297295e-001,  2.9615089297295e-001, -2.9615089297295e-001,  2.9615089297295e-001,
+	 9.5514118671417e-001,  9.5514118671417e-001,  9.5514118671417e-001,  9.5514118671417e-001,
+	 1.4976453781128e-001,  1.4976453781128e-001, -4.3585705757141e-001, -4.3585705757141e-001,
+	-9.8872166872025e-001,  9.8872166872025e-001,  9.0001589059830e-001, -9.0001589059830e-001,
+	 9.5870345830917e-001,  9.5870345830917e-001,  9.5870345830917e-001,  9.5870345830917e-001,
+	-2.8440755605698e-001,  2.8440755605698e-001, -2.8440755605698e-001,  2.8440755605698e-001,
+	 9.8962199687958e-001,  9.8962199687958e-001,  9.0788608789444e-001,  9.0788608789444e-001,
+	-1.4369504153728e-001,  1.4369504153728e-001, -4.1921687126160e-001,  4.1921687126160e-001,
+	-9.5870345830917e-001,  9.5870345830917e-001, -9.5870345830917e-001,  9.5870345830917e-001,
+	 2.8440755605698e-001,  2.8440755605698e-001,  2.8440755605698e-001,  2.8440755605698e-001,
+	 5.9816074371338e-001,  5.9816074371338e-001, -9.3840348720551e-001, -9.3840348720551e-001,
+	-8.0137616395950e-001,  8.0137616395950e-001, -3.4554141759872e-001,  3.4554141759872e-001,
+	 4.7679924964905e-001,  4.7679924964905e-001,  4.7679924964905e-001,  4.7679924964905e-001,
+	-8.7901222705841e-001,  8.7901222705841e-001, -8.7901222705841e-001,  8.7901222705841e-001,
+	 8.5930180549622e-001,  8.5930180549622e-001, -3.9873003959656e-002, -3.9873003959656e-002,
+	-5.1146888732910e-001,  5.1146888732910e-001, -9.9920475482941e-001,  9.9920475482941e-001,
+	-4.7679924964905e-001,  4.7679924964905e-001, -4.7679924964905e-001,  4.7679924964905e-001,
+	 8.7901222705841e-001,  8.7901222705841e-001,  8.7901222705841e-001,  8.7901222705841e-001,
+	 2.4595504999161e-001,  2.4595504999161e-001, -6.7835009098053e-001, -6.7835009098053e-001,
+	-9.6928125619888e-001,  9.6928125619888e-001,  7.3473888635635e-001, -7.3473888635635e-001,
+	 7.7688843011856e-001,  7.7688843011856e-001,  7.7688843011856e-001,  7.7688843011856e-001,
+	-6.2963825464249e-001,  6.2963825464249e-001, -6.2963825464249e-001,  6.2963825464249e-001,
+	 9.4257318973541e-001,  9.4257318973541e-001,  5.2197527885437e-001,  5.2197527885437e-001,
+	-3.3399966359138e-001,  3.3399966359138e-001, -8.5296058654785e-001,  8.5296058654785e-001,
+	-7.7688843011856e-001,  7.7688843011856e-001, -7.7688843011856e-001,  7.7688843011856e-001,
+	 6.2963825464249e-001,  6.2963825464249e-001,  6.2963825464249e-001,  6.2963825464249e-001,
+	 4.3032649159431e-001,  4.3032649159431e-001, -9.7222638130188e-001, -9.7222638130188e-001,
+	-9.0267330408096e-001,  9.0267330408096e-001,  2.3404198884964e-001, -2.3404198884964e-001,
+	 1.0412164032459e-001,  1.0412164032459e-001,  1.0412164032459e-001,  1.0412164032459e-001,
+	-9.9456459283829e-001,  9.9456459283829e-001, -9.9456459283829e-001,  9.9456459283829e-001,
+	 7.4300795793533e-001,  7.4300795793533e-001, -5.8828157186508e-001, -5.8828157186508e-001,
+	-6.6928261518478e-001,  6.6928261518478e-001, -8.0865615606308e-001,  8.0865615606308e-001,
+	-1.0412164032459e-001,  1.0412164032459e-001, -1.0412164032459e-001,  1.0412164032459e-001,
+	 9.9456459283829e-001,  9.9456459283829e-001,  9.9456459283829e-001,  9.9456459283829e-001,
+	 5.2131704986095e-002,  5.2131704986095e-002, -1.5582841634750e-001, -1.5582841634750e-001,
+	-9.9864023923874e-001,  9.9864023923874e-001,  9.8778414726257e-001, -9.8778414726257e-001,
+	 9.8196387290955e-001,  9.8196387290955e-001,  9.8196387290955e-001,  9.8196387290955e-001,
+	-1.8906867504120e-001,  1.8906867504120e-001, -1.8906867504120e-001,  1.8906867504120e-001,
+	 9.9548077583313e-001,  9.9548077583313e-001,  9.5957154035568e-001,  9.5957154035568e-001,
+	-9.4963498413563e-002,  9.4963498413563e-002, -2.8146496415138e-001,  2.8146496415138e-001,
+	-9.8196387290955e-001,  9.8196387290955e-001, -9.8196387290955e-001,  9.8196387290955e-001,
+	 1.8906867504120e-001,  1.8906867504120e-001,  1.8906867504120e-001,  1.8906867504120e-001,
+	 6.3676190376282e-001,  6.3676190376282e-001, -8.7754523754120e-001, -8.7754523754120e-001,
+	-7.7106052637100e-001,  7.7106052637100e-001, -4.7949379682541e-001,  4.7949379682541e-001,
+	 5.6066161394119e-001,  5.6066161394119e-001,  5.6066161394119e-001,  5.6066161394119e-001,
+	-8.2804501056671e-001,  8.2804501056671e-001, -8.2804501056671e-001,  8.2804501056671e-001,
+	 8.8336330652237e-001,  8.8336330652237e-001,  1.0717236995697e-001,  1.0717236995697e-001,
+	-4.6868884563446e-001,  4.6868884563446e-001, -9.9424028396606e-001,  9.9424028396606e-001,
+	-5.6066161394119e-001,  5.6066161394119e-001, -5.6066161394119e-001,  5.6066161394119e-001,
+	 8.2804501056671e-001,  8.2804501056671e-001,  8.2804501056671e-001,  8.2804501056671e-001,
+	 2.9321917891502e-001,  2.9321917891502e-001, -7.7881658077240e-001, -7.7881658077240e-001,
+	-9.5604526996613e-001,  9.5604526996613e-001,  6.2725180387497e-001, -6.2725180387497e-001,
+	 8.3486288785934e-001,  8.3486288785934e-001,  8.3486288785934e-001,  8.3486288785934e-001,
+	-5.5045801401138e-001,  5.5045801401138e-001, -5.5045801401138e-001,  5.5045801401138e-001,
+	 9.5782643556595e-001,  9.5782643556595e-001,  6.4148104190826e-001,  6.4148104190826e-001,
+	-2.8734746575356e-001,  2.8734746575356e-001, -7.6713907718658e-001,  7.6713907718658e-001,
+	-8.3486288785934e-001,  8.3486288785934e-001, -8.3486288785934e-001,  8.3486288785934e-001,
+	 5.5045801401138e-001,  5.5045801401138e-001,  5.5045801401138e-001,  5.5045801401138e-001,
+	 4.7410023212433e-001,  4.7410023212433e-001, -9.9604463577271e-001, -9.9604463577271e-001,
+	-8.8047087192535e-001,  8.8047087192535e-001,  8.8853478431702e-002, -8.8853478431702e-002,
+	 2.0110464096069e-001,  2.0110464096069e-001,  2.0110464096069e-001,  2.0110464096069e-001,
+	-9.7956979274750e-001,  9.7956979274750e-001, -9.7956979274750e-001,  9.7956979274750e-001,
+	 7.7495306730270e-001,  7.7495306730270e-001, -4.6325987577438e-001, -4.6325987577438e-001,
+	-6.3201874494553e-001,  6.3201874494553e-001, -8.8622254133224e-001,  8.8622254133224e-001,
+	-2.0110464096069e-001,  2.0110464096069e-001, -2.0110464096069e-001,  2.0110464096069e-001,
+	 9.7956979274750e-001,  9.7956979274750e-001,  9.7956979274750e-001,  9.7956979274750e-001,
+	 1.0106986761093e-001,  1.0106986761093e-001, -2.9907983541489e-001, -2.9907983541489e-001,
+	-9.9487930536270e-001,  9.9487930536270e-001,  9.5422804355621e-001, -9.5422804355621e-001,
+	 9.2621022462845e-001,  9.2621022462845e-001,  9.2621022462845e-001,  9.2621022462845e-001,
+	-3.7700742483139e-001,  3.7700742483139e-001, -3.7700742483139e-001,  3.7700742483139e-001,
+	 9.8137921094894e-001,  9.8137921094894e-001,  8.3654773235321e-001,  8.3654773235321e-001,
+	-1.9208040833473e-001,  1.9208040833473e-001, -5.4789412021637e-001,  5.4789412021637e-001,
+	-9.2621022462845e-001,  9.2621022462845e-001, -9.2621022462845e-001,  9.2621022462845e-001,
+	 3.7700742483139e-001,  3.7700742483139e-001,  3.7700742483139e-001,  3.7700742483139e-001,
+	 5.5811852216721e-001,  5.5811852216721e-001, -9.7894805669785e-001, -9.7894805669785e-001,
+	-8.2976120710373e-001,  8.2976120710373e-001, -2.0410901308060e-001,  2.0410901308060e-001,
+	 3.8834506273270e-001,  3.8834506273270e-001,  3.8834506273270e-001,  3.8834506273270e-001,
+	-9.2151403427124e-001,  9.2151403427124e-001, -9.2151403427124e-001,  9.2151403427124e-001,
+	 8.3317017555237e-001,  8.3317017555237e-001, -1.8605518341064e-001, -1.8605518341064e-001,
+	-5.5301672220230e-001,  5.5301672220230e-001, -9.8253935575485e-001,  9.8253935575485e-001,
+	-3.8834506273270e-001,  3.8834506273270e-001, -3.8834506273270e-001,  3.8834506273270e-001,
+	 9.2151403427124e-001,  9.2151403427124e-001,  9.2151403427124e-001,  9.2151403427124e-001,
+	 1.9809842109680e-001,  1.9809842109680e-001, -5.6319934129715e-001, -5.6319934129715e-001,
+	-9.8018211126328e-001,  9.8018211126328e-001,  8.2632100582123e-001, -8.2632100582123e-001,
+	 7.1143215894699e-001,  7.1143215894699e-001,  7.1143215894699e-001,  7.1143215894699e-001,
+	-7.0275473594666e-001,  7.0275473594666e-001, -7.0275473594666e-001,  7.0275473594666e-001,
+	 9.2504924535751e-001,  9.2504924535751e-001,  3.9117038249969e-001,  3.9117038249969e-001,
+	-3.7984722852707e-001,  3.7984722852707e-001, -9.2031830549240e-001,  9.2031830549240e-001,
+	-7.1143215894699e-001,  7.1143215894699e-001, -7.1143215894699e-001,  7.1143215894699e-001,
+	 7.0275473594666e-001,  7.0275473594666e-001,  7.0275473594666e-001,  7.0275473594666e-001,
+	 3.8551607728004e-001,  3.8551607728004e-001, -9.2736244201660e-001, -9.2736244201660e-001,
+	-9.2270112037659e-001,  9.2270112037659e-001,  3.7416404485703e-001, -3.7416404485703e-001,
+	 6.1358846724033e-003,  6.1358846724033e-003,  6.1358846724033e-003,  6.1358846724033e-003,
+	-9.9998116493225e-001,  9.9998116493225e-001, -9.9998116493225e-001,  9.9998116493225e-001,
+	 7.0927280187607e-001,  7.0927280187607e-001, -7.0056885480881e-001, -7.0056885480881e-001,
+	-7.0493412017822e-001,  7.0493412017822e-001, -7.1358478069305e-001,  7.1358478069305e-001,
+	-6.1358846724033e-003,  6.1358846724033e-003, -6.1358846724033e-003,  6.1358846724033e-003,
+	 9.9998116493225e-001,  9.9998116493225e-001,  9.9998116493225e-001,  9.9998116493225e-001,
+	 3.0679567717016e-003,  3.0679567717016e-003, -9.2037543654442e-003, -9.2037543654442e-003,
+	-9.9999529123306e-001,  9.9999529123306e-001,  9.9995762109756e-001, -9.9995762109756e-001
+};
+
+static _MM_ALIGN16 float	CTMDLP[]	 = {
+	 9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -3.8268345594406e-001,  3.8268345594406e-001,
+	 7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,
+	-7.0710676908493e-001,  7.0710676908493e-001, -7.0710676908493e-001,  7.0710676908493e-001,
+	 3.8268336653709e-001,  3.8268336653709e-001,  3.8268336653709e-001,  3.8268336653709e-001,
+	-9.2387944459915e-001,  9.2387944459915e-001, -9.2387944459915e-001,  9.2387944459915e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001, -9.2387950420380e-001,  9.2387950420380e-001,
+	-7.0710676908493e-001,  7.0710676908493e-001, -7.0710676908493e-001,  7.0710676908493e-001,
+	 7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,  7.0710676908493e-001,
+	-9.2387944459915e-001, -9.2387944459915e-001, -9.2387944459915e-001, -9.2387944459915e-001,
+	 3.8268336653709e-001, -3.8268336653709e-001,  3.8268336653709e-001, -3.8268336653709e-001,
+	 9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -3.8268345594406e-001,  3.8268345594406e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001, -9.2387950420380e-001,  9.2387950420380e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,
+	-9.8078519105911e-001, -9.8078519105911e-001, -9.8078519105911e-001, -9.8078519105911e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,  3.8268345594406e-001,
+	-9.2387950420380e-001,  9.2387950420380e-001, -9.2387950420380e-001,  9.2387950420380e-001,
+	-1.9509032368660e-001, -1.9509032368660e-001, -1.9509032368660e-001, -1.9509032368660e-001,
+	-9.8078519105911e-001,  9.8078519105911e-001, -9.8078519105911e-001,  9.8078519105911e-001,
+	 1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001, -9.8078525066376e-001,  9.8078525066376e-001,
+	-3.8268345594406e-001,  3.8268345594406e-001, -3.8268345594406e-001,  3.8268345594406e-001,
+	 9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,  9.2387950420380e-001,
+	-5.5557024478912e-001, -5.5557024478912e-001, -5.5557024478912e-001, -5.5557024478912e-001,
+	 8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -9.8017141222954e-002,  9.8017141222954e-002,
+	 9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-2.9028466343880e-001,  2.9028466343880e-001, -2.9028466343880e-001,  2.9028466343880e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001, -9.8078525066376e-001,  9.8078525066376e-001,
+	 1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,
+	-8.8192117214203e-001, -8.8192117214203e-001, -8.8192117214203e-001, -8.8192117214203e-001,
+	-4.7139671444893e-001,  4.7139671444893e-001, -4.7139671444893e-001,  4.7139671444893e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,
+	-4.7139674425125e-001,  4.7139674425125e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001,
+	 9.8017111420631e-002,  9.8017111420631e-002,  9.8017111420631e-002,  9.8017111420631e-002,
+	-9.9518465995789e-001,  9.9518465995789e-001, -9.9518465995789e-001,  9.9518465995789e-001,
+	 2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	-7.7301049232483e-001, -7.7301049232483e-001, -7.7301049232483e-001, -7.7301049232483e-001,
+	 6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -2.9028469324112e-001,  2.9028469324112e-001,
+	 8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,  8.3146959543228e-001,
+	-5.5557024478912e-001,  5.5557024478912e-001, -5.5557024478912e-001,  5.5557024478912e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-7.7301049232483e-001,  7.7301049232483e-001, -7.7301049232483e-001,  7.7301049232483e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	-8.3146959543228e-001,  8.3146959543228e-001, -8.3146959543228e-001,  8.3146959543228e-001,
+	 5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,  5.5557024478912e-001,
+	-9.9518465995789e-001, -9.9518465995789e-001, -9.9518465995789e-001, -9.9518465995789e-001,
+	 9.8017111420631e-002, -9.8017111420631e-002,  9.8017111420631e-002, -9.8017111420631e-002,
+	 7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001,
+	 1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,  1.9509032368660e-001,
+	-9.8078525066376e-001,  9.8078525066376e-001, -9.8078525066376e-001,  9.8078525066376e-001,
+	-4.7139671444893e-001, -4.7139671444893e-001, -4.7139671444893e-001, -4.7139671444893e-001,
+	-8.8192117214203e-001,  8.8192117214203e-001, -8.8192117214203e-001,  8.8192117214203e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,
+	-9.9518471956253e-001,  9.9518471956253e-001, -9.9518471956253e-001,  9.9518471956253e-001,
+	-1.9509032368660e-001,  1.9509032368660e-001, -1.9509032368660e-001,  1.9509032368660e-001,
+	 9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,  9.8078525066376e-001,
+	-2.9028466343880e-001, -2.9028466343880e-001, -2.9028466343880e-001, -2.9028466343880e-001,
+	 9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -4.9067676067352e-002,  4.9067676067352e-002,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -9.8017141222954e-002,  9.8017141222954e-002,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-1.4673046767712e-001,  1.4673046767712e-001, -1.4673046767712e-001,  1.4673046767712e-001,
+	 6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001,
+	-9.9518471956253e-001,  9.9518471956253e-001, -9.9518471956253e-001,  9.9518471956253e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,
+	-8.0320751667023e-001, -8.0320751667023e-001, -8.0320751667023e-001, -8.0320751667023e-001,
+	-5.9569936990738e-001,  5.9569936990738e-001, -5.9569936990738e-001,  5.9569936990738e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-9.7003120183945e-001,  9.7003120183945e-001, -9.7003120183945e-001,  9.7003120183945e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001,
+	 7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,
+	-8.5772860050201e-001, -8.5772860050201e-001, -8.5772860050201e-001, -8.5772860050201e-001,
+	 5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,
+	-4.7139674425125e-001,  4.7139674425125e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,
+	-9.9879539012909e-001, -9.9879539012909e-001, -9.9879539012909e-001, -9.9879539012909e-001,
+	-4.9067672342062e-002,  4.9067672342062e-002, -4.9067672342062e-002,  4.9067672342062e-002,
+	 8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -5.9569931030273e-001,  5.9569931030273e-001,
+	 2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001,
+	-3.3688989281654e-001, -3.3688989281654e-001, -3.3688989281654e-001, -3.3688989281654e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -2.9028469324112e-001,  2.9028469324112e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-4.2755511403084e-001, -4.2755511403084e-001, -4.2755511403084e-001, -4.2755511403084e-001,
+	 9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -1.4673048257828e-001,  1.4673048257828e-001,
+	 9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,  9.5694035291672e-001,
+	-2.9028469324112e-001,  2.9028469324112e-001, -2.9028469324112e-001,  2.9028469324112e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	-9.5694035291672e-001,  9.5694035291672e-001, -9.5694035291672e-001,  9.5694035291672e-001,
+	 2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,  2.9028469324112e-001,
+	-9.4154405593872e-001, -9.4154405593872e-001, -9.4154405593872e-001, -9.4154405593872e-001,
+	-3.3688989281654e-001,  3.3688989281654e-001, -3.3688989281654e-001,  3.3688989281654e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,
+	-5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001,
+	 4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,  4.7139674425125e-001,
+	-8.8192123174667e-001,  8.8192123174667e-001, -8.8192123174667e-001,  8.8192123174667e-001,
+	-4.9067672342062e-002, -4.9067672342062e-002, -4.9067672342062e-002, -4.9067672342062e-002,
+	-9.9879539012909e-001,  9.9879539012909e-001, -9.9879539012909e-001,  9.9879539012909e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	-4.7139674425125e-001,  4.7139674425125e-001, -4.7139674425125e-001,  4.7139674425125e-001,
+	 8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,  8.8192123174667e-001,
+	-6.7155897617340e-001, -6.7155897617340e-001, -6.7155897617340e-001, -6.7155897617340e-001,
+	 7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,  7.7301043272018e-001,
+	-6.3439327478409e-001,  6.3439327478409e-001, -6.3439327478409e-001,  6.3439327478409e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001,
+	-7.7301043272018e-001,  7.7301043272018e-001, -7.7301043272018e-001,  7.7301043272018e-001,
+	 6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,  6.3439327478409e-001,
+	-9.7003120183945e-001, -9.7003120183945e-001, -9.7003120183945e-001, -9.7003120183945e-001,
+	 2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,  9.8017141222954e-002,
+	-9.9518471956253e-001,  9.9518471956253e-001, -9.9518471956253e-001,  9.9518471956253e-001,
+	-5.9569936990738e-001, -5.9569936990738e-001, -5.9569936990738e-001, -5.9569936990738e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,
+	-9.9879544973373e-001,  9.9879544973373e-001, -9.9879544973373e-001,  9.9879544973373e-001,
+	-9.8017141222954e-002,  9.8017141222954e-002, -9.8017141222954e-002,  9.8017141222954e-002,
+	 9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,  9.9518471956253e-001,
+	-1.4673046767712e-001, -1.4673046767712e-001, -1.4673046767712e-001, -1.4673046767712e-001,
+	 9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -2.4541229009628e-002,  2.4541229009628e-002,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -4.9067676067352e-002,  4.9067676067352e-002,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	-9.9879544973373e-001,  9.9879544973373e-001, -9.9879544973373e-001,  9.9879544973373e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,
+	-7.5720882415771e-001, -7.5720882415771e-001, -7.5720882415771e-001, -7.5720882415771e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -4.0524131059647e-001,  4.0524131059647e-001,
+	 6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001,
+	 3.1368172168732e-001,  3.1368172168732e-001,  3.1368172168732e-001,  3.1368172168732e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-8.9322435855865e-001, -8.9322435855865e-001, -8.9322435855865e-001, -8.9322435855865e-001,
+	 4.4961130619049e-001, -4.4961130619049e-001,  4.4961130619049e-001, -4.4961130619049e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-6.1523157358170e-001,  6.1523157358170e-001, -6.1523157358170e-001,  6.1523157358170e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -8.4485357999802e-001,  8.4485357999802e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,
+	-9.9247956275940e-001, -9.9247956275940e-001, -9.9247956275940e-001, -9.9247956275940e-001,
+	-1.2241072207689e-001,  1.2241072207689e-001, -1.2241072207689e-001,  1.2241072207689e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -5.7580822706223e-001,  5.7580822706223e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	-2.6671281456947e-001, -2.6671281456947e-001, -2.6671281456947e-001, -2.6671281456947e-001,
+	-9.6377599239349e-001,  9.6377599239349e-001, -9.6377599239349e-001,  9.6377599239349e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,
+	-4.9289822578430e-001, -4.9289822578430e-001, -4.9289822578430e-001, -4.9289822578430e-001,
+	 8.7008702754974e-001, -8.7008702754974e-001,  8.7008702754974e-001, -8.7008702754974e-001,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -1.2241067737341e-001,  1.2241067737341e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-9.1420966386795e-001, -9.1420966386795e-001, -9.1420966386795e-001, -9.1420966386795e-001,
+	-4.0524142980576e-001,  4.0524142980576e-001, -4.0524142980576e-001,  4.0524142980576e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,
+	-4.9289819598198e-001,  4.9289819598198e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	 2.4541208520532e-002,  2.4541208520532e-002,  2.4541208520532e-002,  2.4541208520532e-002,
+	-9.9969875812531e-001,  9.9969875812531e-001, -9.9969875812531e-001,  9.9969875812531e-001,
+	 2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	-5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,
+	-7.2424709796906e-001, -7.2424709796906e-001, -7.2424709796906e-001, -7.2424709796906e-001,
+	 6.8954050540924e-001, -6.8954050540924e-001,  6.8954050540924e-001, -6.8954050540924e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -3.1368175148964e-001,  3.1368175148964e-001,
+	 8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -5.9569931030273e-001,  5.9569931030273e-001,
+	 5.7580816745758e-001,  5.7580816745758e-001,  5.7580816745758e-001,  5.7580816745758e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,
+	-9.8527759313583e-001, -9.8527759313583e-001, -9.8527759313583e-001, -9.8527759313583e-001,
+	 1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001,
+	-5.3499764204025e-001, -5.3499764204025e-001, -5.3499764204025e-001, -5.3499764204025e-001,
+	-8.4485352039337e-001,  8.4485352039337e-001, -8.4485352039337e-001,  8.4485352039337e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,
+	-9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -1.4673048257828e-001,  1.4673048257828e-001,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-2.1910125017166e-001, -2.1910125017166e-001, -2.1910125017166e-001, -2.1910125017166e-001,
+	 9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,  9.8917651176453e-001,
+	-1.4673048257828e-001,  1.4673048257828e-001, -1.4673048257828e-001,  1.4673048257828e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	-9.8917651176453e-001,  9.8917651176453e-001, -9.8917651176453e-001,  9.8917651176453e-001,
+	 1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,  1.4673048257828e-001,
+	-8.4485352039337e-001, -8.4485352039337e-001, -8.4485352039337e-001, -8.4485352039337e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -4.4961133599281e-001,  4.4961133599281e-001,
+	 5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,  5.9569931030273e-001,
+	-8.0320751667023e-001,  8.0320751667023e-001, -8.0320751667023e-001,  8.0320751667023e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	-9.8527759313583e-001,  9.8527759313583e-001, -9.8527759313583e-001,  9.8527759313583e-001,
+	 3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	-5.9569931030273e-001,  5.9569931030273e-001, -5.9569931030273e-001,  5.9569931030273e-001,
+	 8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,  8.0320751667023e-001,
+	-8.1758481264114e-001, -8.1758481264114e-001, -8.1758481264114e-001, -8.1758481264114e-001,
+	 5.7580816745758e-001, -5.7580816745758e-001,  5.7580816745758e-001, -5.7580816745758e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -2.6671275496483e-001,  2.6671275496483e-001,
+	 8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,  8.5772860050201e-001,
+	-5.1410275697708e-001,  5.1410275697708e-001, -5.1410275697708e-001,  5.1410275697708e-001,
+	 6.8954050540924e-001,  6.8954050540924e-001,  6.8954050540924e-001,  6.8954050540924e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001, -8.7008696794510e-001,  8.7008696794510e-001,
+	-8.5772860050201e-001,  8.5772860050201e-001, -8.5772860050201e-001,  8.5772860050201e-001,
+	 5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,  5.1410275697708e-001,
+	-9.9969875812531e-001, -9.9969875812531e-001, -9.9969875812531e-001, -9.9969875812531e-001,
+	 2.4541208520532e-002, -2.4541208520532e-002,  2.4541208520532e-002, -2.4541208520532e-002,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -6.1523163318634e-001,  6.1523163318634e-001,
+	 2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,  2.4298018217087e-001,
+	-9.7003126144409e-001,  9.7003126144409e-001, -9.7003126144409e-001,  9.7003126144409e-001,
+	-4.0524142980576e-001, -4.0524142980576e-001, -4.0524142980576e-001, -4.0524142980576e-001,
+	-9.1420966386795e-001,  9.1420966386795e-001, -9.1420966386795e-001,  9.1420966386795e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	-2.4298018217087e-001,  2.4298018217087e-001, -2.4298018217087e-001,  2.4298018217087e-001,
+	 9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,  9.7003126144409e-001,
+	-3.5989505052567e-001, -3.5989505052567e-001, -3.5989505052567e-001, -3.5989505052567e-001,
+	 9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,
+	 9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001,
+	 9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,  9.4154405593872e-001,
+	-3.3688986301422e-001,  3.3688986301422e-001, -3.3688986301422e-001,  3.3688986301422e-001,
+	 8.7008702754974e-001,  8.7008702754974e-001,  8.7008702754974e-001,  8.7008702754974e-001,
+	-4.9289822578430e-001,  4.9289822578430e-001, -4.9289822578430e-001,  4.9289822578430e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	-9.4154405593872e-001,  9.4154405593872e-001, -9.4154405593872e-001,  9.4154405593872e-001,
+	 3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,  3.3688986301422e-001,
+	-9.6377599239349e-001, -9.6377599239349e-001, -9.6377599239349e-001, -9.6377599239349e-001,
+	-2.6671281456947e-001,  2.6671281456947e-001, -2.6671281456947e-001,  2.6671281456947e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,  4.2755511403084e-001,
+	-9.0398931503296e-001,  9.0398931503296e-001, -9.0398931503296e-001,  9.0398931503296e-001,
+	-1.2241072207689e-001, -1.2241072207689e-001, -1.2241072207689e-001, -1.2241072207689e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	 2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001,
+	-4.2755511403084e-001,  4.2755511403084e-001, -4.2755511403084e-001,  4.2755511403084e-001,
+	 9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,  9.0398931503296e-001,
+	-6.1523157358170e-001, -6.1523157358170e-001, -6.1523157358170e-001, -6.1523157358170e-001,
+	 7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,  7.4095112085342e-001,
+	-6.7155897617340e-001,  6.7155897617340e-001, -6.7155897617340e-001,  6.7155897617340e-001,
+	 4.4961130619049e-001,  4.4961130619049e-001,  4.4961130619049e-001,  4.4961130619049e-001,
+	-8.9322435855865e-001,  8.9322435855865e-001, -8.9322435855865e-001,  8.9322435855865e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	-7.4095112085342e-001,  7.4095112085342e-001, -7.4095112085342e-001,  7.4095112085342e-001,
+	 6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,  6.7155897617340e-001,
+	-9.4952815771103e-001, -9.4952815771103e-001, -9.4952815771103e-001, -9.4952815771103e-001,
+	 3.1368172168732e-001, -3.1368172168732e-001,  3.1368172168732e-001, -3.1368172168732e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -6.8954056501389e-001,  6.8954056501389e-001,
+	 4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,  4.9067676067352e-002,
+	-9.9879544973373e-001,  9.9879544973373e-001, -9.9879544973373e-001,  9.9879544973373e-001,
+	-6.5317285060883e-001, -6.5317285060883e-001, -6.5317285060883e-001, -6.5317285060883e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,
+	-9.9969881772995e-001,  9.9969881772995e-001, -9.9969881772995e-001,  9.9969881772995e-001,
+	-4.9067676067352e-002,  4.9067676067352e-002, -4.9067676067352e-002,  4.9067676067352e-002,
+	 9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,  9.9879544973373e-001,
+	-7.3564566671848e-002, -7.3564566671848e-002, -7.3564566671848e-002, -7.3564566671848e-002,
+	 9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,
+	 9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,  9.9992471933365e-001,
+	-1.2271538376808e-002,  1.2271538376808e-002, -1.2271538376808e-002,  1.2271538376808e-002,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -2.4541229009628e-002,  2.4541229009628e-002,
+	 9.9932241439819e-001,  9.9932241439819e-001,  9.9932241439819e-001,  9.9932241439819e-001,
+	-3.6807224154472e-002,  3.6807224154472e-002, -3.6807224154472e-002,  3.6807224154472e-002,
+	 6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,  6.9837623834610e-001,
+	-7.1573078632355e-001,  7.1573078632355e-001, -7.1573078632355e-001,  7.1573078632355e-001,
+	-9.9969881772995e-001,  9.9969881772995e-001, -9.9969881772995e-001,  9.9969881772995e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,
+	-7.3265421390533e-001, -7.3265421390533e-001, -7.3265421390533e-001, -7.3265421390533e-001,
+	-6.8060100078583e-001,  6.8060100078583e-001, -6.8060100078583e-001,  6.8060100078583e-001,
+	 9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,
+	-3.9399203658104e-001,  3.9399203658104e-001, -3.9399203658104e-001,  3.9399203658104e-001,
+	 6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	 3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,
+	-9.3733906745911e-001,  9.3733906745911e-001, -9.3733906745911e-001,  9.3733906745911e-001,
+	 3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,  3.7131720781326e-001,
+	-9.2850607633591e-001,  9.2850607633591e-001, -9.2850607633591e-001,  9.2850607633591e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -6.8954056501389e-001,  6.8954056501389e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,
+	-9.0916800498962e-001, -9.0916800498962e-001, -9.0916800498962e-001, -9.0916800498962e-001,
+	 4.1642951965332e-001, -4.1642951965332e-001,  4.1642951965332e-001, -4.1642951965332e-001,
+	 9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,  9.7831737995148e-001,
+	-2.0711138844490e-001,  2.0711138844490e-001, -2.0711138844490e-001,  2.0711138844490e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -4.0524131059647e-001,  4.0524131059647e-001,
+	 8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,
+	-5.8579784631729e-001,  5.8579784631729e-001, -5.8579784631729e-001,  5.8579784631729e-001,
+	 5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,  5.4532498121262e-001,
+	-8.3822470903397e-001,  8.3822470903397e-001, -8.3822470903397e-001,  8.3822470903397e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,
+	-9.8730134963989e-001, -9.8730134963989e-001, -9.8730134963989e-001, -9.8730134963989e-001,
+	-1.5885809063911e-001,  1.5885809063911e-001, -1.5885809063911e-001,  1.5885809063911e-001,
+	 8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,  8.2458931207657e-001,
+	-5.6573182344437e-001,  5.6573182344437e-001, -5.6573182344437e-001,  5.6573182344437e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001,
+	-2.3105813562870e-001, -2.3105813562870e-001, -2.3105813562870e-001, -2.3105813562870e-001,
+	-9.7293996810913e-001,  9.7293996810913e-001, -9.7293996810913e-001,  9.7293996810913e-001,
+	 1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,  1.8303988873959e-001,
+	-9.8310548067093e-001,  9.8310548067093e-001, -9.8310548067093e-001,  9.8310548067093e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-5.2458971738815e-001, -5.2458971738815e-001, -5.2458971738815e-001, -5.2458971738815e-001,
+	 8.5135519504547e-001, -8.5135519504547e-001,  8.5135519504547e-001, -8.5135519504547e-001,
+	 9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,  9.9390697479248e-001,
+	-1.1022221297026e-001,  1.1022221297026e-001, -1.1022221297026e-001,  1.1022221297026e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,
+	-3.2531031966209e-001,  3.2531031966209e-001, -3.2531031966209e-001,  3.2531031966209e-001,
+	 6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,  6.2485951185226e-001,
+	-7.8073722124100e-001,  7.8073722124100e-001, -7.8073722124100e-001,  7.8073722124100e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001,
+	 2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,
+	-8.9867436885834e-001, -8.9867436885834e-001, -8.9867436885834e-001, -8.9867436885834e-001,
+	-4.3861627578735e-001,  4.3861627578735e-001, -4.3861627578735e-001,  4.3861627578735e-001,
+	 8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,  8.7607008218765e-001,
+	-4.8218378424644e-001,  4.8218378424644e-001, -4.8218378424644e-001,  4.8218378424644e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -8.4485357999802e-001,  8.4485357999802e-001,
+	 6.1320688575506e-002,  6.1320688575506e-002,  6.1320688575506e-002,  6.1320688575506e-002,
+	-9.9811810255051e-001,  9.9811810255051e-001, -9.9811810255051e-001,  9.9811810255051e-001,
+	 2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,
+	-9.6043050289154e-001,  9.6043050289154e-001, -9.6043050289154e-001,  9.6043050289154e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,
+	-7.4913644790649e-001, -7.4913644790649e-001, -7.4913644790649e-001, -7.4913644790649e-001,
+	 6.6241574287415e-001, -6.6241574287415e-001,  6.6241574287415e-001, -6.6241574287415e-001,
+	 9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,  9.5330601930618e-001,
+	-3.0200594663620e-001,  3.0200594663620e-001, -3.0200594663620e-001,  3.0200594663620e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -5.7580822706223e-001,  5.7580822706223e-001,
+	 6.0551100969315e-001,  6.0551100969315e-001,  6.0551100969315e-001,  6.0551100969315e-001,
+	-7.9583692550659e-001,  7.9583692550659e-001, -7.9583692550659e-001,  7.9583692550659e-001,
+	 4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,  4.6053871512413e-001,
+	-8.8763964176178e-001,  8.8763964176178e-001, -8.8763964176178e-001,  8.8763964176178e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,
+	-9.9090266227722e-001, -9.9090266227722e-001, -9.9090266227722e-001, -9.9090266227722e-001,
+	 1.3458071649075e-001, -1.3458071649075e-001,  1.3458071649075e-001, -1.3458071649075e-001,
+	 7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,  7.6516723632813e-001,
+	-6.4383155107498e-001,  6.4383155107498e-001, -6.4383155107498e-001,  6.4383155107498e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	-5.0353842973709e-001, -5.0353842973709e-001, -5.0353842973709e-001, -5.0353842973709e-001,
+	-8.6397278308868e-001,  8.6397278308868e-001, -8.6397278308868e-001,  8.6397278308868e-001,
+	 8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,  8.5797317326069e-002,
+	-9.9631261825562e-001,  9.9631261825562e-001, -9.9631261825562e-001,  9.9631261825562e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001,
+	 9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,
+	-2.5486564636230e-001, -2.5486564636230e-001, -2.5486564636230e-001, -2.5486564636230e-001,
+	 9.6697646379471e-001, -9.6697646379471e-001,  9.6697646379471e-001, -9.6697646379471e-001,
+	 9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,  9.9811810255051e-001,
+	-6.1320737004280e-002,  6.1320737004280e-002, -6.1320737004280e-002,  6.1320737004280e-002,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -1.2241067737341e-001,  1.2241067737341e-001,
+	 9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,
+	-1.8303988873959e-001,  1.8303988873959e-001, -1.8303988873959e-001,  1.8303988873959e-001,
+	 6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,  6.6241580247879e-001,
+	-7.4913638830185e-001,  7.4913638830185e-001, -7.4913638830185e-001,  7.4913638830185e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,
+	-8.2458931207657e-001, -8.2458931207657e-001, -8.2458931207657e-001, -8.2458931207657e-001,
+	-5.6573188304901e-001,  5.6573188304901e-001, -5.6573188304901e-001,  5.6573188304901e-001,
+	 8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,  8.9867448806763e-001,
+	-4.3861624598503e-001,  4.3861624598503e-001, -4.3861624598503e-001,  4.3861624598503e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001,
+	 2.0711140334606e-001,  2.0711140334606e-001,  2.0711140334606e-001,  2.0711140334606e-001,
+	-9.7831737995148e-001,  9.7831737995148e-001, -9.7831737995148e-001,  9.7831737995148e-001,
+	 3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,  3.2531028985977e-001,
+	-9.4560730457306e-001,  9.4560730457306e-001, -9.4560730457306e-001,  9.4560730457306e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -6.1523163318634e-001,  6.1523163318634e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-8.3822476863861e-001, -8.3822476863861e-001, -8.3822476863861e-001, -8.3822476863861e-001,
+	 5.4532492160797e-001, -5.4532492160797e-001,  5.4532492160797e-001, -5.4532492160797e-001,
+	 9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,
+	-2.5486567616463e-001,  2.5486567616463e-001, -2.5486567616463e-001,  2.5486567616463e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,
+	-4.9289819598198e-001,  4.9289819598198e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	 7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,
+	-6.9837623834610e-001,  6.9837623834610e-001, -6.9837623834610e-001,  6.9837623834610e-001,
+	 5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,  5.0353837013245e-001,
+	-8.6397284269333e-001,  8.6397284269333e-001, -8.6397284269333e-001,  8.6397284269333e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001, -8.7008696794510e-001,  8.7008696794510e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,
+	-9.9992465972900e-001, -9.9992465972900e-001, -9.9992465972900e-001, -9.9992465972900e-001,
+	-1.2271504849195e-002,  1.2271504849195e-002, -1.2271504849195e-002,  1.2271504849195e-002,
+	 7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,  7.9583686590195e-001,
+	-6.0551106929779e-001,  6.0551106929779e-001, -6.0551106929779e-001,  6.0551106929779e-001,
+	 2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	-3.7131726741791e-001, -3.7131726741791e-001, -3.7131726741791e-001, -3.7131726741791e-001,
+	-9.2850595712662e-001,  9.2850595712662e-001, -9.2850595712662e-001,  9.2850595712662e-001,
+	 1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,
+	-9.9090266227722e-001,  9.9090266227722e-001, -9.9090266227722e-001,  9.9090266227722e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -2.6671275496483e-001,  2.6671275496483e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,
+	-3.9399203658104e-001, -3.9399203658104e-001, -3.9399203658104e-001, -3.9399203658104e-001,
+	 9.1911387443542e-001, -9.1911387443542e-001,  9.1911387443542e-001, -9.1911387443542e-001,
+	 9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,  9.8730140924454e-001,
+	-1.5885815024376e-001,  1.5885815024376e-001, -1.5885815024376e-001,  1.5885815024376e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -3.1368175148964e-001,  3.1368175148964e-001,
+	 8.8763958215714e-001,  8.8763958215714e-001,  8.8763958215714e-001,  8.8763958215714e-001,
+	-4.6053871512413e-001,  4.6053871512413e-001, -4.6053871512413e-001,  4.6053871512413e-001,
+	 5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,  5.8579784631729e-001,
+	-8.1045717000961e-001,  8.1045717000961e-001, -8.1045717000961e-001,  8.1045717000961e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	 3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,
+	-9.5330595970154e-001, -9.5330595970154e-001, -9.5330595970154e-001, -9.5330595970154e-001,
+	-3.0200591683388e-001,  3.0200591683388e-001, -3.0200591683388e-001,  3.0200591683388e-001,
+	 8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,
+	-5.2458971738815e-001,  5.2458971738815e-001, -5.2458971738815e-001,  5.2458971738815e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	-8.5797369480133e-002, -8.5797369480133e-002, -8.5797369480133e-002, -8.5797369480133e-002,
+	-9.9631255865097e-001,  9.9631255865097e-001, -9.9631255865097e-001,  9.9631255865097e-001,
+	 2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,  2.3105812072754e-001,
+	-9.7293996810913e-001,  9.7293996810913e-001, -9.7293996810913e-001,  9.7293996810913e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -4.4961133599281e-001,  4.4961133599281e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,
+	-6.4383155107498e-001, -6.4383155107498e-001, -6.4383155107498e-001, -6.4383155107498e-001,
+	 7.6516729593277e-001, -7.6516729593277e-001,  7.6516729593277e-001, -7.6516729593277e-001,
+	 9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,  9.3733900785446e-001,
+	-3.4841868281364e-001,  3.4841868281364e-001, -3.4841868281364e-001,  3.4841868281364e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 4.8218375444412e-001,  4.8218375444412e-001,  4.8218375444412e-001,  4.8218375444412e-001,
+	-8.7607008218765e-001,  8.7607008218765e-001, -8.7607008218765e-001,  8.7607008218765e-001,
+	 4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,  4.1642957925797e-001,
+	-9.0916800498962e-001,  9.0916800498962e-001, -9.0916800498962e-001,  9.0916800498962e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,
+	-9.6043050289154e-001, -9.6043050289154e-001, -9.6043050289154e-001, -9.6043050289154e-001,
+	 2.7851969003677e-001, -2.7851969003677e-001,  2.7851969003677e-001, -2.7851969003677e-001,
+	 7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,  7.3265427350998e-001,
+	-6.8060100078583e-001,  6.8060100078583e-001, -6.8060100078583e-001,  6.8060100078583e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,
+	-9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001,
+	-6.2485945224762e-001, -6.2485945224762e-001, -6.2485945224762e-001, -6.2485945224762e-001,
+	-7.8073722124100e-001,  7.8073722124100e-001, -7.8073722124100e-001,  7.8073722124100e-001,
+	 3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,  3.6807224154472e-002,
+	-9.9932235479355e-001,  9.9932235479355e-001, -9.9932235479355e-001,  9.9932235479355e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-1.1022220551968e-001, -1.1022220551968e-001, -1.1022220551968e-001, -1.1022220551968e-001,
+	 9.9390691518784e-001, -9.9390691518784e-001,  9.9390691518784e-001, -9.9390691518784e-001,
+	 9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,  9.9932235479355e-001,
+	-3.6807224154472e-002,  3.6807224154472e-002, -3.6807224154472e-002,  3.6807224154472e-002,
+	 9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,  9.9729043245316e-001,
+	-7.3564566671848e-002,  7.3564566671848e-002, -7.3564566671848e-002,  7.3564566671848e-002,
+	 9.9390691518784e-001,  9.9390691518784e-001,  9.9390691518784e-001,  9.9390691518784e-001,
+	-1.1022220551968e-001,  1.1022220551968e-001, -1.1022220551968e-001,  1.1022220551968e-001,
+	 6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,  6.8060100078583e-001,
+	-7.3265427350998e-001,  7.3265427350998e-001, -7.3265427350998e-001,  7.3265427350998e-001,
+	-9.9729043245316e-001,  9.9729043245316e-001, -9.9729043245316e-001,  9.9729043245316e-001,
+	 7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,  7.3564566671848e-002,
+	-7.8073722124100e-001, -7.8073722124100e-001, -7.8073722124100e-001, -7.8073722124100e-001,
+	-6.2485945224762e-001,  6.2485945224762e-001, -6.2485945224762e-001,  6.2485945224762e-001,
+	 9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,  9.0916800498962e-001,
+	-4.1642957925797e-001,  4.1642957925797e-001, -4.1642957925797e-001,  4.1642957925797e-001,
+	 6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,  6.5317285060883e-001,
+	-7.5720882415771e-001,  7.5720882415771e-001, -7.5720882415771e-001,  7.5720882415771e-001,
+	 2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,  2.7851969003677e-001,
+	-9.6043050289154e-001,  9.6043050289154e-001, -9.6043050289154e-001,  9.6043050289154e-001,
+	 3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,  3.4841868281364e-001,
+	-9.3733900785446e-001,  9.3733900785446e-001, -9.3733900785446e-001,  9.3733900785446e-001,
+	-6.5317285060883e-001,  6.5317285060883e-001, -6.5317285060883e-001,  6.5317285060883e-001,
+	 7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,  7.5720882415771e-001,
+	-8.7607008218765e-001, -8.7607008218765e-001, -8.7607008218765e-001, -8.7607008218765e-001,
+	 4.8218375444412e-001, -4.8218375444412e-001,  4.8218375444412e-001, -4.8218375444412e-001,
+	 9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,  9.7293996810913e-001,
+	-2.3105812072754e-001,  2.3105812072754e-001, -2.3105812072754e-001,  2.3105812072754e-001,
+	 8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,  8.9322429895401e-001,
+	-4.4961133599281e-001,  4.4961133599281e-001, -4.4961133599281e-001,  4.4961133599281e-001,
+	 7.6516729593277e-001,  7.6516729593277e-001,  7.6516729593277e-001,  7.6516729593277e-001,
+	-6.4383155107498e-001,  6.4383155107498e-001, -6.4383155107498e-001,  6.4383155107498e-001,
+	 5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,  5.2458971738815e-001,
+	-8.5135519504547e-001,  8.5135519504547e-001, -8.5135519504547e-001,  8.5135519504547e-001,
+	-8.9322429895401e-001,  8.9322429895401e-001, -8.9322429895401e-001,  8.9322429895401e-001,
+	 4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,  4.4961133599281e-001,
+	-9.9631255865097e-001, -9.9631255865097e-001, -9.9631255865097e-001, -9.9631255865097e-001,
+	-8.5797369480133e-002,  8.5797369480133e-002, -8.5797369480133e-002,  8.5797369480133e-002,
+	 8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,  8.1045717000961e-001,
+	-5.8579784631729e-001,  5.8579784631729e-001, -5.8579784631729e-001,  5.8579784631729e-001,
+	 3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,  3.1368175148964e-001,
+	-9.4952815771103e-001,  9.4952815771103e-001, -9.4952815771103e-001,  9.4952815771103e-001,
+	-3.0200591683388e-001, -3.0200591683388e-001, -3.0200591683388e-001, -3.0200591683388e-001,
+	-9.5330595970154e-001,  9.5330595970154e-001, -9.5330595970154e-001,  9.5330595970154e-001,
+	 1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,  1.5885815024376e-001,
+	-9.8730140924454e-001,  9.8730140924454e-001, -9.8730140924454e-001,  9.8730140924454e-001,
+	-3.1368175148964e-001,  3.1368175148964e-001, -3.1368175148964e-001,  3.1368175148964e-001,
+	 9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,  9.4952815771103e-001,
+	-4.6053871512413e-001, -4.6053871512413e-001, -4.6053871512413e-001, -4.6053871512413e-001,
+	 8.8763958215714e-001, -8.8763958215714e-001,  8.8763958215714e-001, -8.8763958215714e-001,
+	 9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,  9.9090266227722e-001,
+	-1.3458071649075e-001,  1.3458071649075e-001, -1.3458071649075e-001,  1.3458071649075e-001,
+	 9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,  9.6377605199814e-001,
+	-2.6671275496483e-001,  2.6671275496483e-001, -2.6671275496483e-001,  2.6671275496483e-001,
+	 9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,  9.1911387443542e-001,
+	-3.9399203658104e-001,  3.9399203658104e-001, -3.9399203658104e-001,  3.9399203658104e-001,
+	 6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,  6.0551106929779e-001,
+	-7.9583686590195e-001,  7.9583686590195e-001, -7.9583686590195e-001,  7.9583686590195e-001,
+	-9.6377605199814e-001,  9.6377605199814e-001, -9.6377605199814e-001,  9.6377605199814e-001,
+	 2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,  2.6671275496483e-001,
+	-9.2850595712662e-001, -9.2850595712662e-001, -9.2850595712662e-001, -9.2850595712662e-001,
+	-3.7131726741791e-001,  3.7131726741791e-001, -3.7131726741791e-001,  3.7131726741791e-001,
+	 8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,  8.6397284269333e-001,
+	-5.0353837013245e-001,  5.0353837013245e-001, -5.0353837013245e-001,  5.0353837013245e-001,
+	 4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,  4.9289819598198e-001,
+	-8.7008696794510e-001,  8.7008696794510e-001, -8.7008696794510e-001,  8.7008696794510e-001,
+	-1.2271504849195e-002, -1.2271504849195e-002, -1.2271504849195e-002, -1.2271504849195e-002,
+	-9.9992465972900e-001,  9.9992465972900e-001, -9.9992465972900e-001,  9.9992465972900e-001,
+	 2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,  2.5486567616463e-001,
+	-9.6697646379471e-001,  9.6697646379471e-001, -9.6697646379471e-001,  9.6697646379471e-001,
+	-4.9289819598198e-001,  4.9289819598198e-001, -4.9289819598198e-001,  4.9289819598198e-001,
+	 8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,  8.7008696794510e-001,
+	-6.9837623834610e-001, -6.9837623834610e-001, -6.9837623834610e-001, -6.9837623834610e-001,
+	 7.1573078632355e-001, -7.1573078632355e-001,  7.1573078632355e-001, -7.1573078632355e-001,
+	 9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,  9.4560730457306e-001,
+	-3.2531028985977e-001,  3.2531028985977e-001, -3.2531028985977e-001,  3.2531028985977e-001,
+	 7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,  7.8834640979767e-001,
+	-6.1523163318634e-001,  6.1523163318634e-001, -6.1523163318634e-001,  6.1523163318634e-001,
+	 5.4532492160797e-001,  5.4532492160797e-001,  5.4532492160797e-001,  5.4532492160797e-001,
+	-8.3822476863861e-001,  8.3822476863861e-001, -8.3822476863861e-001,  8.3822476863861e-001,
+	 4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,  4.3861624598503e-001,
+	-8.9867448806763e-001,  8.9867448806763e-001, -8.9867448806763e-001,  8.9867448806763e-001,
+	-7.8834640979767e-001,  7.8834640979767e-001, -7.8834640979767e-001,  7.8834640979767e-001,
+	 6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,  6.1523163318634e-001,
+	-9.7831737995148e-001, -9.7831737995148e-001, -9.7831737995148e-001, -9.7831737995148e-001,
+	 2.0711140334606e-001, -2.0711140334606e-001,  2.0711140334606e-001, -2.0711140334606e-001,
+	 7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,  7.4913638830185e-001,
+	-6.6241580247879e-001,  6.6241580247879e-001, -6.6241580247879e-001,  6.6241580247879e-001,
+	 1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,  1.2241067737341e-001,
+	-9.9247956275940e-001,  9.9247956275940e-001, -9.9247956275940e-001,  9.9247956275940e-001,
+	-5.6573188304901e-001, -5.6573188304901e-001, -5.6573188304901e-001, -5.6573188304901e-001,
+	-8.2458931207657e-001,  8.2458931207657e-001, -8.2458931207657e-001,  8.2458931207657e-001,
+	 6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,  6.1320737004280e-002,
+	-9.9811810255051e-001,  9.9811810255051e-001, -9.9811810255051e-001,  9.9811810255051e-001,
+	-1.2241067737341e-001,  1.2241067737341e-001, -1.2241067737341e-001,  1.2241067737341e-001,
+	 9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,  9.9247956275940e-001,
+	-1.8303988873959e-001, -1.8303988873959e-001, -1.8303988873959e-001, -1.8303988873959e-001,
+	 9.8310548067093e-001, -9.8310548067093e-001,  9.8310548067093e-001, -9.8310548067093e-001,
+	 9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,  9.9631261825562e-001,
+	-8.5797317326069e-002,  8.5797317326069e-002, -8.5797317326069e-002,  8.5797317326069e-002,
+	 9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,  9.8527765274048e-001,
+	-1.7096188664436e-001,  1.7096188664436e-001, -1.7096188664436e-001,  1.7096188664436e-001,
+	 9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,  9.6697646379471e-001,
+	-2.5486564636230e-001,  2.5486564636230e-001, -2.5486564636230e-001,  2.5486564636230e-001,
+	 6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,  6.4383155107498e-001,
+	-7.6516723632813e-001,  7.6516723632813e-001, -7.6516723632813e-001,  7.6516723632813e-001,
+	-9.8527765274048e-001,  9.8527765274048e-001, -9.8527765274048e-001,  9.8527765274048e-001,
+	 1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,  1.7096188664436e-001,
+	-8.6397278308868e-001, -8.6397278308868e-001, -8.6397278308868e-001, -8.6397278308868e-001,
+	-5.0353842973709e-001,  5.0353842973709e-001, -5.0353842973709e-001,  5.0353842973709e-001,
+	 8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,  8.8763964176178e-001,
+	-4.6053871512413e-001,  4.6053871512413e-001, -4.6053871512413e-001,  4.6053871512413e-001,
+	 5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,  5.7580822706223e-001,
+	-8.1758481264114e-001,  8.1758481264114e-001, -8.1758481264114e-001,  8.1758481264114e-001,
+	 1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,  1.3458071649075e-001,
+	-9.9090266227722e-001,  9.9090266227722e-001, -9.9090266227722e-001,  9.9090266227722e-001,
+	 3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,  3.0200594663620e-001,
+	-9.5330601930618e-001,  9.5330601930618e-001, -9.5330601930618e-001,  9.5330601930618e-001,
+	-5.7580822706223e-001,  5.7580822706223e-001, -5.7580822706223e-001,  5.7580822706223e-001,
+	 8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,  8.1758481264114e-001,
+	-7.9583692550659e-001, -7.9583692550659e-001, -7.9583692550659e-001, -7.9583692550659e-001,
+	 6.0551100969315e-001, -6.0551100969315e-001,  6.0551100969315e-001, -6.0551100969315e-001,
+	 9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,  9.6043050289154e-001,
+	-2.7851969003677e-001,  2.7851969003677e-001, -2.7851969003677e-001,  2.7851969003677e-001,
+	 8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,  8.4485357999802e-001,
+	-5.3499764204025e-001,  5.3499764204025e-001, -5.3499764204025e-001,  5.3499764204025e-001,
+	 6.6241574287415e-001,  6.6241574287415e-001,  6.6241574287415e-001,  6.6241574287415e-001,
+	-7.4913644790649e-001,  7.4913644790649e-001, -7.4913644790649e-001,  7.4913644790649e-001,
+	 4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,  4.8218378424644e-001,
+	-8.7607008218765e-001,  8.7607008218765e-001, -8.7607008218765e-001,  8.7607008218765e-001,
+	-8.4485357999802e-001,  8.4485357999802e-001, -8.4485357999802e-001,  8.4485357999802e-001,
+	 5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,  5.3499764204025e-001,
+	-9.9811810255051e-001, -9.9811810255051e-001, -9.9811810255051e-001, -9.9811810255051e-001,
+	 6.1320688575506e-002, -6.1320688575506e-002,  6.1320688575506e-002, -6.1320688575506e-002,
+	 7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,  7.8073722124100e-001,
+	-6.2485951185226e-001,  6.2485951185226e-001, -6.2485951185226e-001,  6.2485951185226e-001,
+	 2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,  2.1910125017166e-001,
+	-9.7570210695267e-001,  9.7570210695267e-001, -9.7570210695267e-001,  9.7570210695267e-001,
+	-4.3861627578735e-001, -4.3861627578735e-001, -4.3861627578735e-001, -4.3861627578735e-001,
+	-8.9867436885834e-001,  8.9867436885834e-001, -8.9867436885834e-001,  8.9867436885834e-001,
+	 1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,  1.1022221297026e-001,
+	-9.9390697479248e-001,  9.9390697479248e-001, -9.9390697479248e-001,  9.9390697479248e-001,
+	-2.1910125017166e-001,  2.1910125017166e-001, -2.1910125017166e-001,  2.1910125017166e-001,
+	 9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,  9.7570210695267e-001,
+	-3.2531031966209e-001, -3.2531031966209e-001, -3.2531031966209e-001, -3.2531031966209e-001,
+	 9.4560730457306e-001, -9.4560730457306e-001,  9.4560730457306e-001, -9.4560730457306e-001,
+	 9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,  9.8310548067093e-001,
+	-1.8303988873959e-001,  1.8303988873959e-001, -1.8303988873959e-001,  1.8303988873959e-001,
+	 9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,  9.3299281597137e-001,
+	-3.5989505052567e-001,  3.5989505052567e-001, -3.5989505052567e-001,  3.5989505052567e-001,
+	 8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,  8.5135519504547e-001,
+	-5.2458971738815e-001,  5.2458971738815e-001, -5.2458971738815e-001,  5.2458971738815e-001,
+	 5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,  5.6573182344437e-001,
+	-8.2458931207657e-001,  8.2458931207657e-001, -8.2458931207657e-001,  8.2458931207657e-001,
+	-9.3299281597137e-001,  9.3299281597137e-001, -9.3299281597137e-001,  9.3299281597137e-001,
+	 3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,  3.5989505052567e-001,
+	-9.7293996810913e-001, -9.7293996810913e-001, -9.7293996810913e-001, -9.7293996810913e-001,
+	-2.3105813562870e-001,  2.3105813562870e-001, -2.3105813562870e-001,  2.3105813562870e-001,
+	 8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,  8.3822470903397e-001,
+	-5.4532498121262e-001,  5.4532498121262e-001, -5.4532498121262e-001,  5.4532498121262e-001,
+	 4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,  4.0524131059647e-001,
+	-9.1420972347260e-001,  9.1420972347260e-001, -9.1420972347260e-001,  9.1420972347260e-001,
+	-1.5885809063911e-001, -1.5885809063911e-001, -1.5885809063911e-001, -1.5885809063911e-001,
+	-9.8730134963989e-001,  9.8730134963989e-001, -9.8730134963989e-001,  9.8730134963989e-001,
+	 2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,  2.0711138844490e-001,
+	-9.7831737995148e-001,  9.7831737995148e-001, -9.7831737995148e-001,  9.7831737995148e-001,
+	-4.0524131059647e-001,  4.0524131059647e-001, -4.0524131059647e-001,  4.0524131059647e-001,
+	 9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,  9.1420972347260e-001,
+	-5.8579784631729e-001, -5.8579784631729e-001, -5.8579784631729e-001, -5.8579784631729e-001,
+	 8.1045717000961e-001, -8.1045717000961e-001,  8.1045717000961e-001, -8.1045717000961e-001,
+	 9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,  9.2850607633591e-001,
+	-3.7131720781326e-001,  3.7131720781326e-001, -3.7131720781326e-001,  3.7131720781326e-001,
+	 7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,  7.2424709796906e-001,
+	-6.8954056501389e-001,  6.8954056501389e-001, -6.8954056501389e-001,  6.8954056501389e-001,
+	 4.1642951965332e-001,  4.1642951965332e-001,  4.1642951965332e-001,  4.1642951965332e-001,
+	-9.0916800498962e-001,  9.0916800498962e-001, -9.0916800498962e-001,  9.0916800498962e-001,
+	 3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,  3.9399203658104e-001,
+	-9.1911387443542e-001,  9.1911387443542e-001, -9.1911387443542e-001,  9.1911387443542e-001,
+	-7.2424709796906e-001,  7.2424709796906e-001, -7.2424709796906e-001,  7.2424709796906e-001,
+	 6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,  6.8954056501389e-001,
+	-9.3733906745911e-001, -9.3733906745911e-001, -9.3733906745911e-001, -9.3733906745911e-001,
+	 3.4841868281364e-001, -3.4841868281364e-001,  3.4841868281364e-001, -3.4841868281364e-001,
+	 7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,  7.1573078632355e-001,
+	-6.9837623834610e-001,  6.9837623834610e-001, -6.9837623834610e-001,  6.9837623834610e-001,
+	 2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,  2.4541229009628e-002,
+	-9.9969881772995e-001,  9.9969881772995e-001, -9.9969881772995e-001,  9.9969881772995e-001,
+	-6.8060100078583e-001, -6.8060100078583e-001, -6.8060100078583e-001, -6.8060100078583e-001,
+	-7.3265421390533e-001,  7.3265421390533e-001, -7.3265421390533e-001,  7.3265421390533e-001,
+	 1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,  1.2271538376808e-002,
+	-9.9992471933365e-001,  9.9992471933365e-001, -9.9992471933365e-001,  9.9992471933365e-001,
+	-2.4541229009628e-002,  2.4541229009628e-002, -2.4541229009628e-002,  2.4541229009628e-002,
+	 9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,  9.9969881772995e-001,
+	-3.6807224154472e-002, -3.6807224154472e-002, -3.6807224154472e-002, -3.6807224154472e-002,
+	 9.9932241439819e-001, -9.9932241439819e-001,  9.9932241439819e-001, -9.9932241439819e-001
+};
+
+static inline void cft1st(int n, float *a)
+{
+	int		j;
+	float	*w	 = CT1STP;
+	
+	__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+	XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(a   ));
+	XMM2	 = _mm_loadl_pi(XMM2, (__m64*)(a+ 2));
+	XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(a+ 4));
+	XMM2	 = _mm_loadh_pi(XMM2, (__m64*)(a+ 6));
+	XMM1	 = XMM0;
+	XMM0	 = _mm_add_ps(XMM0, XMM2);
+	XMM1	 = _mm_sub_ps(XMM1, XMM2);
+	XMM2	 = XMM0;
+	XMM3	 = XMM1;
+	XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+	XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+	XMM1	 = _mm_movelh_ps(XMM1, XMM1);
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,2,3));
+	XMM2	 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+	XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+	XMM4	 = _mm_loadl_pi(XMM4, (__m64*)(a+ 8));
+	XMM5	 = _mm_loadl_pi(XMM5, (__m64*)(a+10));
+	XMM0	 = _mm_add_ps(XMM0, XMM2);
+	XMM1	 = _mm_add_ps(XMM1, XMM3);
+	XMM4	 = _mm_loadh_pi(XMM4, (__m64*)(a+12));
+	XMM5	 = _mm_loadh_pi(XMM5, (__m64*)(a+14));
+	XMM2	 = XMM4;
+	_mm_storel_pi((__m64*)(a   ), XMM0);
+	_mm_storel_pi((__m64*)(a+ 2), XMM1);
+	XMM4	 = _mm_add_ps(XMM4, XMM5);
+	XMM2	 = _mm_sub_ps(XMM2, XMM5);
+	_mm_storeh_pi((__m64*)(a+ 4), XMM0);
+	_mm_storeh_pi((__m64*)(a+ 6), XMM1);
+	XMM5	 = XMM4;
+	XMM3	 = XMM2;
+	XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(0,3,1,0));
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,1,3,2));
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,1,0));
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(1,0,2,3));
+	XMM5	 = _mm_xor_ps(XMM5, PCS_RRNN.ps);
+	XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+	XMM4	 = _mm_add_ps(XMM4, XMM5);
+	XMM2	 = _mm_add_ps(XMM2, XMM3);
+	_mm_storel_pi((__m64*)(a+ 8), XMM4);
+	_mm_storeh_pi((__m64*)(a+12), XMM4);
+	XMM5	 = XMM2;
+	XMM3	 = _mm_load_ss(w+2);
+	XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,0,0));
+	XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,1,1));
+	XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,0,0,0));
+#if	defined(__SSE3__)
+	XMM2	 = _mm_addsub_ps(XMM2, XMM5);
+#else
+	XMM5	 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+	XMM2	 = _mm_add_ps(XMM2, XMM5);
+#endif
+	XMM2	 = _mm_mul_ps(XMM2, XMM3);
+	_mm_storel_pi((__m64*)(a+10), XMM2);
+	_mm_storeh_pi((__m64*)(a+14), XMM2);
+	for (j = 16; j < n; j += 16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(a+j   ));
+		XMM2	 = _mm_loadl_pi(XMM2, (__m64*)(a+j+ 2));
+		XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(a+j+ 4));
+		XMM2	 = _mm_loadh_pi(XMM2, (__m64*)(a+j+ 6));
+		XMM1	 = XMM0;
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_sub_ps(XMM1, XMM2);
+		XMM2	 = XMM0;
+		XMM3	 = XMM1;
+		XMM0	 = _mm_movelh_ps(XMM0, XMM0);
+		XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+		XMM1	 = _mm_movelh_ps(XMM1, XMM1);
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,2,3));
+		XMM2	 = _mm_xor_ps(XMM2, PCS_RRNN.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_RNNR.ps);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		_mm_storel_pi((__m64*)(a+j   ), XMM0);
+		XMM2	 = XMM0;
+		XMM4	 = _mm_loadl_pi(XMM4, (__m64*)(a+j+ 8));
+		XMM5	 = _mm_loadl_pi(XMM5, (__m64*)(a+j+10));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,3,2,3));
+		XMM0	 = _mm_mul_ps(XMM0, PM128(w   ));
+		XMM2	 = _mm_mul_ps(XMM2, PM128(w+ 4));
+		XMM3	 = XMM1;
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+		XMM4	 = _mm_loadh_pi(XMM4, (__m64*)(a+j+12));
+		XMM5	 = _mm_loadh_pi(XMM5, (__m64*)(a+j+14));
+		_mm_storeh_pi((__m64*)(a+j+ 4), XMM0);
+		XMM1	 = _mm_mul_ps(XMM1, PM128(w+ 8));
+		XMM2	 = XMM4;
+		XMM3	 = _mm_mul_ps(XMM3, PM128(w+12));
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		XMM2	 = _mm_sub_ps(XMM2, XMM5);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		XMM5	 = XMM4;
+		XMM0	 = XMM2;
+		_mm_storel_pi((__m64*)(a+j+ 2), XMM1);
+		XMM4	 = _mm_movelh_ps(XMM4, XMM4);
+		XMM5	 = _mm_movehl_ps(XMM5, XMM5);
+		_mm_storeh_pi((__m64*)(a+j+ 6), XMM1);
+		XMM2	 = _mm_movelh_ps(XMM2, XMM2);
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,2,3));
+		XMM5	 = _mm_xor_ps(XMM5, PCS_RRNN.ps);
+		XMM0	 = _mm_xor_ps(XMM0, PCS_RNNR.ps);
+		XMM4	 = _mm_add_ps(XMM4, XMM5);
+		XMM2	 = _mm_add_ps(XMM2, XMM0);
+		_mm_storel_pi((__m64*)(a+j+ 8), XMM4);
+		XMM5	 = XMM4;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,2,3));
+		XMM4	 = _mm_mul_ps(XMM4, PM128(w+16));
+		XMM5	 = _mm_mul_ps(XMM5, PM128(w+20));
+		XMM0	 = XMM2;
+		XMM4	 = _mm_sub_ps(XMM4, XMM5);
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+		_mm_storeh_pi((__m64*)(a+j+12), XMM4);
+		XMM2	 = _mm_mul_ps(XMM2, PM128(w+24));
+		XMM0	 = _mm_mul_ps(XMM0, PM128(w+28));
+		XMM2	 = _mm_add_ps(XMM2, XMM0);
+		_mm_storel_pi((__m64*)(a+j+10), XMM2);
+		_mm_storeh_pi((__m64*)(a+j+14), XMM2);
+		w	+= 32;
+	}
+}
+
+
+STIN void cftmdl(int n, int l, float *a)
+{
+	int j, j1, j2, j3, k, m, m2;
+	__m128	XMM6;
+	__m128	*ctmdl = (__m128*)CTMDLP;
+
+	m = l << 2;
+	for (j = 0; j < l; j += 8) {
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		j1 = j  + l;
+		j2 = j1 + l;
+		j3 = j2 + l;
+		XMM0	 = _mm_load_ps(a+j   );
+		XMM4	 = _mm_load_ps(a+j1  );
+		XMM2	 = _mm_load_ps(a+j2  );
+		XMM5	 = _mm_load_ps(a+j3  );
+		XMM1	 = XMM0;
+		XMM3	 = XMM2;
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		XMM2	 = _mm_add_ps(XMM2, XMM5);
+		XMM1	 = _mm_sub_ps(XMM1, XMM4);
+		XMM3	 = _mm_sub_ps(XMM3, XMM5);
+		XMM4	 = XMM0;
+		XMM5	 = XMM1;
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+		XMM6	 = _mm_load_ps(a+j +4);
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+		XMM4	 = _mm_sub_ps(XMM4, XMM2);
+		XMM2	 = _mm_load_ps(a+j1+4);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		XMM5	 = _mm_sub_ps(XMM5, XMM3);
+		XMM3	 = _mm_load_ps(a+j2+4);
+		_mm_store_ps(a+j   , XMM0);
+		XMM0	 = _mm_load_ps(a+j3+4);
+		_mm_store_ps(a+j1  , XMM1);
+		XMM1	 = XMM6;
+		_mm_store_ps(a+j2  , XMM4);
+		XMM4	 = XMM3;
+		XMM6	 = _mm_add_ps(XMM6, XMM2);
+		XMM3	 = _mm_add_ps(XMM3, XMM0);
+		XMM1	 = _mm_sub_ps(XMM1, XMM2);
+		XMM4	 = _mm_sub_ps(XMM4, XMM0);
+		XMM2	 = XMM6;
+		XMM0	 = XMM1;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+		_mm_store_ps(a+j3  , XMM5);
+		XMM6	 = _mm_add_ps(XMM6, XMM3);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+		XMM2	 = _mm_sub_ps(XMM2, XMM3);
+		XMM1	 = _mm_add_ps(XMM1, XMM4);
+		XMM0	 = _mm_sub_ps(XMM0, XMM4);
+		_mm_store_ps(a+j +4, XMM6);
+		_mm_store_ps(a+j1+4, XMM1);
+		_mm_store_ps(a+j2+4, XMM2);
+		_mm_store_ps(a+j3+4, XMM0);
+	}
+	XMM6	 = _mm_load_ps(W2);
+	for (j = m; j < l + m; j += 8) {
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+		j1 = j  + l;
+		j2 = j1 + l;
+		j3 = j2 + l;
+		XMM0	 = _mm_load_ps(a+j   );
+		XMM4	 = _mm_load_ps(a+j1  );
+		XMM2	 = _mm_load_ps(a+j2  );
+		XMM5	 = _mm_load_ps(a+j3  );
+		XMM1	 = XMM0;
+		XMM3	 = XMM2;
+		XMM0	 = _mm_add_ps(XMM0, XMM4);
+		XMM2	 = _mm_add_ps(XMM2, XMM5);
+		XMM1	 = _mm_sub_ps(XMM1, XMM4);
+		XMM3	 = _mm_sub_ps(XMM3, XMM5);
+		XMM4	 = XMM0;
+		XMM5	 = XMM0;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM2, _MM_SHUFFLE(3,1,2,0));	/* (x2i_1,x2i_0,x0r_1,x0r_0) */
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM2, _MM_SHUFFLE(2,0,3,1));	/* (x2r_1,x2r_0,x0i_1,x0i_0) */
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(1,3,0,2));	/* (x0r_1,x2i_1,x0r_0,x2i_0) */
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,1,2,0));	/* (x2r_1,x0i_1,x2r_0,x0i_0) */
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM2	 = XMM1;												/* x1  */
+		XMM4	 = _mm_sub_ps(XMM4, XMM5);
+		XMM5	 = XMM3;												/* x3  */
+#if	defined(__SSE3__)
+		XMM2	 = _mm_moveldup_ps(XMM2);
+		XMM1	 = _mm_movehdup_ps(XMM1);
+#else
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(2,2,0,0));	/* x1r */
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(3,3,1,1));	/* x1i */
+#endif
+		_mm_store_ps(a+j   , XMM0);
+		_mm_store_ps(a+j2  , XMM4);
+#if	defined(__SSE3__)
+		XMM5	 = _mm_moveldup_ps(XMM5);
+		XMM3	 = _mm_movehdup_ps(XMM3);
+#else
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,2,0,0));	/* x3r */
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,3,1,1));	/* x3i */
+#endif
+		XMM0	 = XMM2;												/* x1r */
+		XMM4	 = XMM1;												/* x1i */
+		XMM2	 = _mm_sub_ps(XMM2, XMM3);								/* x1r - x3i */
+		XMM1	 = _mm_add_ps(XMM1, XMM5);								/* x1i + x3r */
+		XMM5	 = _mm_sub_ps(XMM5, XMM4);								/* x3r - x1i */
+		XMM4	 = _mm_load_ps(a+j +4);
+		XMM3	 = _mm_add_ps(XMM3, XMM0);								/* x3i + x1r */
+		XMM0	 = _mm_load_ps(a+j1+4);
+#if	defined(__SSE3__)
+		XMM2	 = _mm_addsub_ps(XMM2, XMM1);
+		XMM1	 = _mm_load_ps(a+j2+4);
+		XMM5	 = _mm_addsub_ps(XMM5, XMM3);
+#else
+		XMM1	 = _mm_xor_ps(XMM1, PCS_NRNR.ps);
+		XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+		XMM2	 = _mm_add_ps(XMM2, XMM1);
+		XMM1	 = _mm_load_ps(a+j2+4);
+		XMM5	 = _mm_add_ps(XMM5, XMM3);
+#endif
+		XMM3	 = _mm_load_ps(a+j3+4);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM5	 = _mm_mul_ps(XMM5, XMM6);
+		_mm_store_ps(a+j1  , XMM2);
+		XMM2	 = XMM4;
+		_mm_store_ps(a+j3  , XMM5);
+		XMM5	 = XMM1;
+		XMM4	 = _mm_add_ps(XMM4, XMM0);
+		XMM1	 = _mm_add_ps(XMM1, XMM3);
+		XMM2	 = _mm_sub_ps(XMM2, XMM0);
+		XMM5	 = _mm_sub_ps(XMM5, XMM3);
+		XMM0	 = XMM4;
+		XMM3	 = XMM4;
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM1, _MM_SHUFFLE(3,1,2,0));	/* (x2i_1,x2i_0,x0r_1,x0r_0) */
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM1, _MM_SHUFFLE(2,0,3,1));	/* (x2r_1,x2r_0,x0i_1,x0i_0) */
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(1,3,0,2));	/* (x0r_1,x2i_1,x0r_0,x2i_0) */
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(3,1,2,0));	/* (x2r_1,x0i_1,x2r_0,x0i_0) */
+		XMM4	 = _mm_add_ps(XMM4, XMM1);
+		XMM1	 = XMM2;												/* x1  */
+		XMM0	 = _mm_sub_ps(XMM0, XMM3);
+		XMM3	 = XMM5;												/* x3  */
+#if	defined(__SSE3__)
+		XMM1	 = _mm_moveldup_ps(XMM1);
+		XMM2	 = _mm_movehdup_ps(XMM2);
+#else
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(2,2,0,0));	/* x1r */
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(3,3,1,1));	/* x1i */
+#endif
+		_mm_store_ps(a+j +4, XMM4);
+		_mm_store_ps(a+j2+4, XMM0);
+#if	defined(__SSE3__)
+		XMM3	 = _mm_moveldup_ps(XMM3);
+		XMM5	 = _mm_movehdup_ps(XMM5);
+#else
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,2,0,0));	/* x3r */
+		XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(3,3,1,1));	/* x3i */
+#endif
+		XMM4	 = XMM1;												/* x1r */
+		XMM0	 = XMM2;												/* x1i */
+		XMM1	 = _mm_sub_ps(XMM1, XMM5);								/* x1r - x3i */
+		XMM2	 = _mm_add_ps(XMM2, XMM3);								/* x1i + x3r */
+		XMM3	 = _mm_sub_ps(XMM3, XMM0);								/* x3r - x1i */
+		XMM5	 = _mm_add_ps(XMM5, XMM4);								/* x3i + x1r */
+#if	defined(__SSE3__)
+		XMM1	 = _mm_addsub_ps(XMM1, XMM2);
+		XMM3	 = _mm_addsub_ps(XMM3, XMM5);
+#else
+		XMM2	 = _mm_xor_ps(XMM2, PCS_NRNR.ps);
+		XMM5	 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+		XMM1	 = _mm_add_ps(XMM1, XMM2);
+		XMM3	 = _mm_add_ps(XMM3, XMM5);
+#endif
+		XMM1	 = _mm_mul_ps(XMM1, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM6);
+		_mm_store_ps(a+j1+4, XMM1);
+		_mm_store_ps(a+j3+4, XMM3);
+	}
+	m2 = 2 * m;
+	for (k = m2; k < n; k += m2) {
+		for (j = k; j < l + k; j += 4) {
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			j1 = j  + l;
+			j2 = j1 + l;
+			j3 = j2 + l;
+			XMM0	 = _mm_load_ps(a+j );
+			XMM4	 = _mm_load_ps(a+j1);
+			XMM2	 = _mm_load_ps(a+j2);
+			XMM5	 = _mm_load_ps(a+j3);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_add_ps(XMM0, XMM4);
+			XMM2	 = _mm_add_ps(XMM2, XMM5);
+			XMM1	 = _mm_sub_ps(XMM1, XMM4);
+			XMM3	 = _mm_sub_ps(XMM3, XMM5);
+
+			XMM4	 = XMM0;
+			XMM5	 = XMM0;
+			XMM6	 = XMM2;
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(2,3,0,1));
+			XMM4	 = _mm_sub_ps(XMM4, XMM2);
+			XMM5	 = _mm_sub_ps(XMM5, XMM6);
+			XMM4	 = _mm_mul_ps(XMM4, *(ctmdl+ 2));
+			XMM5	 = _mm_mul_ps(XMM5, *(ctmdl+ 3));
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			XMM2	 = XMM1;
+			XMM4	 = _mm_add_ps(XMM4, XMM5);
+			XMM5	 = XMM3;
+			_mm_store_ps(a+j , XMM0);
+			XMM0	 = XMM1;
+			_mm_store_ps(a+j2, XMM4);
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+			XMM4	 = XMM0;
+			XMM5	 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+			XMM1	 = _mm_add_ps(XMM1, XMM5);
+			XMM0	 = _mm_sub_ps(XMM0, XMM3);
+			XMM2	 = _mm_sub_ps(XMM2, XMM5);
+			XMM4	 = _mm_add_ps(XMM4, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, *(ctmdl   ));
+			XMM0	 = _mm_mul_ps(XMM0, *(ctmdl+ 1));
+			XMM2	 = _mm_mul_ps(XMM2, *(ctmdl+ 4));
+			XMM4	 = _mm_mul_ps(XMM4, *(ctmdl+ 5));
+			XMM1	 = _mm_add_ps(XMM1, XMM0);
+			XMM2	 = _mm_add_ps(XMM2, XMM4);
+			_mm_store_ps(a+j1, XMM1);
+			_mm_store_ps(a+j3, XMM2);
+		}
+		for (j = k + m; j < l + (k + m); j += 4) {
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			j1 = j  + l;
+			j2 = j1 + l;
+			j3 = j2 + l;
+			XMM0	 = _mm_load_ps(a+j );
+			XMM4	 = _mm_load_ps(a+j1);
+			XMM2	 = _mm_load_ps(a+j2);
+			XMM5	 = _mm_load_ps(a+j3);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_add_ps(XMM0, XMM4);
+			XMM2	 = _mm_add_ps(XMM2, XMM5);
+			XMM1	 = _mm_sub_ps(XMM1, XMM4);
+			XMM3	 = _mm_sub_ps(XMM3, XMM5);
+			XMM4	 = XMM0;
+			XMM5	 = XMM0;
+			XMM6	 = XMM2;
+			XMM5	 = _mm_shuffle_ps(XMM5, XMM5, _MM_SHUFFLE(2,3,0,1));
+			XMM6	 = _mm_shuffle_ps(XMM6, XMM6, _MM_SHUFFLE(2,3,0,1));
+			XMM4	 = _mm_sub_ps(XMM4, XMM2);
+			XMM5	 = _mm_sub_ps(XMM5, XMM6);
+			XMM4	 = _mm_mul_ps(XMM4, *(ctmdl+ 9));
+			XMM5	 = _mm_mul_ps(XMM5, *(ctmdl+ 8));
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			XMM2	 = XMM1;
+			XMM5	 = _mm_sub_ps(XMM5, XMM4);
+			XMM4	 = XMM3;
+			_mm_store_ps(a+j , XMM0);
+			XMM0	 = XMM1;
+			XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+			XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(2,3,0,1));
+			_mm_store_ps(a+j2, XMM5);
+			XMM5	 = XMM0;
+			XMM4	 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+			XMM1	 = _mm_add_ps(XMM1, XMM4);
+			XMM0	 = _mm_sub_ps(XMM0, XMM3);
+			XMM2	 = _mm_sub_ps(XMM2, XMM4);
+			XMM5	 = _mm_add_ps(XMM5, XMM3);
+			XMM1	 = _mm_mul_ps(XMM1, *(ctmdl+ 6));
+			XMM0	 = _mm_mul_ps(XMM0, *(ctmdl+ 7));
+			XMM2	 = _mm_mul_ps(XMM2, *(ctmdl+10));
+			XMM5	 = _mm_mul_ps(XMM5, *(ctmdl+11));
+			XMM1	 = _mm_add_ps(XMM1, XMM0);
+			XMM2	 = _mm_add_ps(XMM2, XMM5);
+			_mm_store_ps(a+j1, XMM1);
+			_mm_store_ps(a+j3, XMM2);
+		}
+		ctmdl	+= 12;
+	}
+}
+
+
+static inline void bitrv2(int n, int *ip, float *a)
+{
+	int j, j1, k, k1, l, m = 0, m2;
+	float xr, xi, yr, yi;
+	
+	ip[0] = 0;
+	l = n;
+	if(n==256)
+	{
+		l	 = 32;
+		m	 = 8;
+	}
+	else if(n==512)
+	{
+		l	 = 64;
+		m	 = 8;
+	}
+	else if(n==1024)
+	{
+		l	 = 64;
+		m	 = 16;
+	}
+	else if(n==2048)
+	{
+		l	 = 128;
+		m	 = 16;
+	}
+	else if(n==4096)
+	{
+		l	 = 128;
+		m	 = 32;
+	}
+	m2 = 2 * m;
+	if ((m << 3) == l) {
+		for (k = 0; k < m; k++) {
+			for (j = 0; j < k; j++) {
+				__m128	X0, Y0, X1, Y1;
+				j1 = 2 * j + ip[k];
+				k1 = 2 * k + ip[j];
+				X0	 = _mm_loadl_pi(X0, (__m64*)(a+j1     ));
+				Y0	 = _mm_loadl_pi(Y0, (__m64*)(a+k1     ));
+				X1	 = _mm_loadl_pi(X1, (__m64*)(a+j1+m2*2));
+				Y1	 = _mm_loadl_pi(Y1, (__m64*)(a+k1+m2  ));
+				X0	 = _mm_loadh_pi(X0, (__m64*)(a+j1+m2  ));
+				Y0	 = _mm_loadh_pi(Y0, (__m64*)(a+k1+m2*2));
+				X1	 = _mm_loadh_pi(X1, (__m64*)(a+j1+m2*3));
+				Y1	 = _mm_loadh_pi(Y1, (__m64*)(a+k1+m2*3));
+				_mm_storel_pi((__m64*)(a+k1     ), X0);
+				_mm_storel_pi((__m64*)(a+j1     ), Y0);
+				_mm_storel_pi((__m64*)(a+k1+m2  ), X1);
+				_mm_storel_pi((__m64*)(a+j1+m2*2), Y1);
+				_mm_storeh_pi((__m64*)(a+k1+m2*2), X0);
+				_mm_storeh_pi((__m64*)(a+j1+m2  ), Y0);
+				_mm_storeh_pi((__m64*)(a+k1+m2*3), X1);
+				_mm_storeh_pi((__m64*)(a+j1+m2*3), Y1);
+			}
+			j1 = 2 * k + m2 + ip[k];
+			k1 = j1 + m2;
+			xr = a[j1];
+			xi = a[j1 + 1];
+			yr = a[k1];
+			yi = a[k1 + 1];
+			a[j1] = yr;
+			a[j1 + 1] = yi;
+			a[k1] = xr;
+			a[k1 + 1] = xi;
+		}
+	} else {
+		for (k = 1; k < m; k++) {
+			for (j = 0; j < k; j++) {
+				__m128	X, Y;
+				j1 = 2 * j + ip[k];
+				k1 = 2 * k + ip[j];
+				X	 = _mm_loadl_pi(X, (__m64*)(a+j1   ));
+				Y	 = _mm_loadl_pi(Y, (__m64*)(a+k1   ));
+				X	 = _mm_loadh_pi(X, (__m64*)(a+j1+m2));
+				Y	 = _mm_loadh_pi(Y, (__m64*)(a+k1+m2));
+				_mm_storel_pi((__m64*)(a+k1   ), X);
+				_mm_storel_pi((__m64*)(a+j1   ), Y);
+				_mm_storeh_pi((__m64*)(a+k1+m2), X);
+				_mm_storeh_pi((__m64*)(a+j1+m2), Y);
+			}
+		}
+	}
+}
+
+
+STIN void cftfsub(int n, float *a)
+{
+	int j, j1, j2, j3, l;
+	
+	l = 2;
+	if (n > 8) {
+		cft1st(n, a);
+		l = 8;
+		while ((l << 2) < n) {
+			cftmdl(n, l, a);
+			l <<= 2;
+		}
+	}
+	if ((l << 2) == n) {
+		for (j = 0; j < l; j += 4) {
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+
+			j1 = j  + l;
+			j2 = j1 + l;
+			j3 = j2 + l;
+
+			XMM0	 = _mm_load_ps(a+j );
+			XMM4	 = _mm_load_ps(a+j1);
+			XMM2	 = _mm_load_ps(a+j2);
+			XMM5	 = _mm_load_ps(a+j3);
+			XMM1	 = XMM0;
+			XMM3	 = XMM2;
+			XMM0	 = _mm_add_ps(XMM0, XMM4);
+			XMM2	 = _mm_add_ps(XMM2, XMM5);
+			XMM1	 = _mm_sub_ps(XMM1, XMM4);
+			XMM3	 = _mm_sub_ps(XMM3, XMM5);
+			XMM4	 = XMM0;
+			XMM5	 = XMM1;
+			XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+			XMM0	 = _mm_add_ps(XMM0, XMM2);
+			XMM4	 = _mm_sub_ps(XMM4, XMM2);
+			XMM3	 = _mm_xor_ps(XMM3, PCS_NRNR.ps);
+			_mm_store_ps(a+j , XMM0);
+			_mm_store_ps(a+j2, XMM4);
+			XMM1	 = _mm_add_ps(XMM1, XMM3);
+			XMM5	 = _mm_sub_ps(XMM5, XMM3);
+			_mm_store_ps(a+j1, XMM1);
+			_mm_store_ps(a+j3, XMM5);
+		}
+	} else {
+		for (j = 0; j < l; j += 8)
+		{
+			__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5;
+			j1 = j + l;
+
+			XMM0	 = _mm_load_ps(a+j   );
+			XMM4	 = _mm_load_ps(a+j1  );
+			XMM1	 = _mm_load_ps(a+j+ 4);
+			XMM5	 = _mm_load_ps(a+j1+4);
+			XMM2	 = XMM0;
+			XMM3	 = XMM1;
+			XMM0	 = _mm_add_ps(XMM0, XMM4);
+			XMM1	 = _mm_add_ps(XMM1, XMM5);
+			XMM2	 = _mm_sub_ps(XMM2, XMM4);
+			XMM3	 = _mm_sub_ps(XMM3, XMM5);
+			_mm_store_ps(a+j   , XMM0);
+			_mm_store_ps(a+j +4, XMM1);
+			_mm_store_ps(a+j1  , XMM2);
+			_mm_store_ps(a+j1+4, XMM3);
+		}
+	}
+}
+
+STIN void rftfsub(int n, float *a, int nc, float *w)
+{
+	int		j, k, m, o;
+	
+	m	 = n >> 1;
+	j	 = 2;
+	{
+		float	wkr, wki, xr, xi, yr, yi;
+		k	 = n - j;
+		wkr	 = w[0];
+		wki	 = w[1];
+		xr	 = a[j  ] - a[k  ];
+		xi	 = a[j+1] + a[k+1];
+		yr	 = wkr * xr - wki * xi;
+		yi	 = wkr * xi + wki * xr;
+		a[j  ]	-= yr;
+		a[j+1]	-= yi;
+		a[k  ]	+= yr;
+		a[k+1]	-= yi;
+		j	+= 2;
+	}
+	n	-= 2;
+	w	-= 4;
+	o	 = ((m-j)&(~7))+j;
+	for(;j<o;j+=8)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		k	 = n - j;
+		XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(a+k+2));
+		XMM5	 = _mm_load_ps(w+j*2   );
+		XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(a+k  ));
+		XMM6	 = _mm_load_ps(w+j*2+ 4);
+		XMM1	 = XMM0;
+		XMM0	 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+		XMM2	 = _mm_load_ps(a+j  );
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM3	 = XMM0;
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+		XMM0	 = _mm_mul_ps(XMM0, XMM5);
+		XMM5	 = _mm_loadl_pi(XMM5, (__m64*)(a+k-2));
+		XMM3	 = _mm_mul_ps(XMM3, XMM6);
+		XMM6	 = _mm_load_ps(w+j*2+ 8);
+		XMM0	 = _mm_add_ps(XMM0, XMM3);
+		XMM5	 = _mm_loadh_pi(XMM5, (__m64*)(a+k-4));
+		XMM4	 = XMM0;
+		XMM2	 = _mm_sub_ps(XMM2, XMM0);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+		_mm_store_ps(a+j  , XMM2);
+		XMM3	 = _mm_load_ps(w+j*2+12);
+		XMM1	 = _mm_sub_ps(XMM1, XMM4);
+		XMM0	 = XMM5;
+		_mm_storel_pi((__m64*)(a+k+2), XMM1);
+		XMM2	 = _mm_load_ps(a+j+4);
+		XMM5	 = _mm_xor_ps(XMM5, PCS_NRNR.ps);
+		_mm_storeh_pi((__m64*)(a+k  ), XMM1);
+		XMM5	 = _mm_add_ps(XMM5, XMM2);
+		XMM4	 = XMM5;
+		XMM4	 = _mm_shuffle_ps(XMM4, XMM4, _MM_SHUFFLE(2,3,0,1));
+		XMM5	 = _mm_mul_ps(XMM5, XMM6);
+		XMM4	 = _mm_mul_ps(XMM4, XMM3);
+		XMM5	 = _mm_add_ps(XMM5, XMM4);
+		XMM1	 = XMM5;
+		XMM2	 = _mm_sub_ps(XMM2, XMM5);
+		XMM1	 = _mm_xor_ps(XMM1, PCS_NRNR.ps);
+		_mm_store_ps(a+j+4, XMM2);
+		XMM0	 = _mm_sub_ps(XMM0, XMM1);
+		_mm_storel_pi((__m64*)(a+k-2), XMM0);
+		_mm_storeh_pi((__m64*)(a+k-4), XMM0);
+	}
+	for(;j<m;j+=4)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6;
+		k	 = n - j;
+		XMM0	 = _mm_loadl_pi(XMM0, (__m64*)(a+k+2));
+		XMM5	 = _mm_load_ps(w+j*2  );
+		XMM0	 = _mm_loadh_pi(XMM0, (__m64*)(a+k  ));
+		XMM6	 = _mm_load_ps(w+j*2+4);
+		XMM1	 = XMM0;
+		XMM0	 = _mm_xor_ps(XMM0, PCS_NRNR.ps);
+		XMM2	 = _mm_load_ps(a+j  );
+		XMM0	 = _mm_add_ps(XMM0, XMM2);
+		XMM3	 = XMM0;
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(2,3,0,1));
+		XMM0	 = _mm_mul_ps(XMM0, XMM5);
+		XMM3	 = _mm_mul_ps(XMM3, XMM6);
+		XMM0	 = _mm_add_ps(XMM0, XMM3);
+		XMM4	 = XMM0;
+		XMM2	 = _mm_sub_ps(XMM2, XMM0);
+		XMM4	 = _mm_xor_ps(XMM4, PCS_NRNR.ps);
+		_mm_store_ps(a+j  , XMM2);
+		XMM1	 = _mm_sub_ps(XMM1, XMM4);
+		_mm_storel_pi((__m64*)(a+k+2), XMM1);
+		_mm_storeh_pi((__m64*)(a+k  ), XMM1);
+	}
+}
+
+STIN void rdft(int n, float *a, int *ip, float *w)
+{
+	int nw, nc;
+	float xi;
+	
+	nw = ip[0];
+	nc = ip[1];
+	if (n > 4) {
+		bitrv2(n, ip + 2, a);
+		cftfsub(n, a);
+		rftfsub(n, a, nc, w);
+	} else if (n == 4) {
+		cftfsub(n, a);
+	}
+	xi = a[0] - a[1];
+	a[0] += a[1];
+	a[1] = xi;
+}
+
+/*------ Interface ------*/
+
+static void drftf256(float* a)
+{
+	rdft(256, a, IP256, W256);
+}
+static void drftf512(float* a)
+{
+	rdft(512, a, IP512, W512);
+}
+static void drftf1024(float* a)
+{
+	rdft(1024, a, IP1024, W1024);
+}
+static void drftf2048(float* a)
+{
+	rdft(2048, a, IP2048, W2048);
+}
+static void drftf4096(float* a)
+{
+	rdft(4096, a, IP4096, W4096);
+}
+#endif														/* SSE Optimize */
 
 static void drfti1(int n, float *wa, int *ifac){
   static int ntryh[4] = { 4,2,3,5 };
@@ -1231,16 +6754,52 @@
 
 void drft_forward(drft_lookup *l,float *data){
   if(l->n==1)return;
+#ifdef __SSE__												/* SSE Optimize */
+	if(l->n==256)
+	{
+		drftf256(data);
+		return;
+	}
+	if(l->n==512)
+	{
+		drftf512(data);
+		return;
+	}
+	if(l->n==1024)
+	{
+		drftf1024(data);
+		return;
+	}
+	if(l->n==2048)
+	{
+		drftf2048(data);
+		return;
+	}
+	if(l->n==4096)
+	{
+		drftf4096(data);
+		return;
+	}
+#endif														/* SSE Optimize */
+  {
   drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
+  }
 }
 
 void drft_backward(drft_lookup *l,float *data){
+#ifdef __SSE__												/* SSE Optimize */
+  if (l->n==1||(l->n>=256&&l->n<=4096))return;
+#else														/* SSE Optimize */
   if (l->n==1)return;
+#endif														/* SSE Optimize */
   drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
 }
 
 void drft_init(drft_lookup *l,int n){
   l->n=n;
+#ifdef __SSE__												/* SSE Optimize */
+  if (l->n>=256&&l->n<=4096)return;
+#endif														/* SSE Optimize */
   l->trigcache=_ogg_calloc(3*n,sizeof(*l->trigcache));
   l->splitcache=_ogg_calloc(32,sizeof(*l->splitcache));
   fdrffti(n, l->trigcache, l->splitcache);
@@ -1248,8 +6807,12 @@
 
 void drft_clear(drft_lookup *l){
   if(l){
+#ifdef __SSE__												/* SSE Optimize */
+    if (l->n>=256&&l->n<=4096)return;
+#endif														/* SSE Optimize */
     if(l->trigcache)_ogg_free(l->trigcache);
     if(l->splitcache)_ogg_free(l->splitcache);
     memset(l,0,sizeof(*l));
   }
 }
+
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/vorbisenc.c libvorbis-1.2.0-sse/lib/vorbisenc.c
--- libvorbis-1.2.0/lib/vorbisenc.c	2007-08-02 12:42:08.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/vorbisenc.c	2007-08-02 12:43:15.000000000 +0200
@@ -23,6 +23,9 @@
 #include "vorbis/vorbisenc.h"
 
 #include "codec_internal.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 #include "os.h"
 #include "misc.h"
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/vorbisfile.c libvorbis-1.2.0-sse/lib/vorbisfile.c
--- libvorbis-1.2.0/lib/vorbisfile.c	2007-07-24 03:08:23.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/vorbisfile.c	2007-08-02 12:43:15.000000000 +0200
@@ -26,6 +26,9 @@
 
 #include "os.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
 /* A 'chained bitstream' is a Vorbis bitstream that contains more than
    one logical bitstream arranged end to end (the only form of Ogg
@@ -1706,6 +1709,135 @@
   return 0;
 }
 
+#ifdef	__SSE__											/* SSE Optimize */
+STIN void ov_read_float2pcm(float *src1, float *src2, short *dest, long samples)
+{
+	register long	i;
+#if 0 //	defined(__SSE2__)
+	int samples8	 = samples&(~15);
+	static _MM_ALIGN16 const float parm[4]	 = {
+		32768.f, 32768.f, 32768.f, 32768.f
+	};
+	for(i=0;i<samples8;i+=8)
+	{
+		__m128i	XMM0	 = _mm_castps_si128(_mm_load_ps(src1+i  ));
+		__m128i	XMM2	 = _mm_castps_si128(_mm_load_ps(src1+i+4));
+		__m128i	XMM1	 = _mm_castps_si128(_mm_load_ps(src2+i  ));
+		__m128i	XMM3	 = _mm_castps_si128(_mm_load_ps(src2+i+4));
+		XMM0	 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM0), PM128(parm)));
+		XMM2	 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM2), PM128(parm)));
+		XMM1	 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM1), PM128(parm)));
+		XMM3	 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(XMM3), PM128(parm)));
+		XMM0	 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM0));
+		XMM2	 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM2));
+		XMM1	 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM1));
+		XMM3	 = _mm_cvtps_epi32(_mm_castsi128_ps(XMM3));
+		XMM0	 = _mm_packs_epi32(XMM0, XMM2);
+		XMM1	 = _mm_packs_epi32(XMM1, XMM3);
+		XMM2	 = XMM0;
+		XMM0	 = _mm_unpacklo_epi16(XMM0, XMM1);
+		XMM2	 = _mm_unpackhi_epi16(XMM2, XMM1);
+		_mm_store_si128((__m128i*)(dest+i*2   ), XMM0);
+		_mm_store_si128((__m128i*)(dest+i*2+ 8), XMM2);
+	}
+#else
+	int samples4	 = samples&(~7);
+	static _MM_ALIGN16 const float parm[4]	 = {
+		32768.f, 32768.f, 32768.f, 32768.f
+	};
+	register __m128	XMM0, XMM1, XMM2, XMM3;
+	register __m64	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7;
+	for(i=0;i<samples4;i+=8){
+		XMM0	 = _mm_load_ps(src1+i  );
+		XMM1	 = _mm_load_ps(src2+i  );
+		XMM2	 = _mm_load_ps(src1+i+4);
+		XMM3	 = _mm_load_ps(src2+i+4);
+		XMM0	 = _mm_mul_ps(XMM0, PM128(parm));
+		XMM1	 = _mm_mul_ps(XMM1, PM128(parm));
+		XMM2	 = _mm_mul_ps(XMM2, PM128(parm));
+		XMM3	 = _mm_mul_ps(XMM3, PM128(parm));
+		MM0		 = _mm_cvtps_pi32(XMM0);
+		MM2		 = _mm_cvtps_pi32(XMM1);
+		MM4		 = _mm_cvtps_pi32(XMM2);
+		MM6		 = _mm_cvtps_pi32(XMM3);
+
+		XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+		XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+		XMM2	 = _mm_movehl_ps(XMM2, XMM2);
+		XMM3	 = _mm_movehl_ps(XMM3, XMM3);
+
+		MM1		 = _mm_cvtps_pi32(XMM0);
+		MM3		 = _mm_cvtps_pi32(XMM1);
+		MM5		 = _mm_cvtps_pi32(XMM2);
+		MM7		 = _mm_cvtps_pi32(XMM3);
+
+		MM0		 = _mm_packs_pi32(MM0, MM1);
+		MM2		 = _mm_packs_pi32(MM2, MM3);
+		MM4		 = _mm_packs_pi32(MM4, MM5);
+		MM6		 = _mm_packs_pi32(MM6, MM7);
+
+		MM1		 = MM0;
+		MM5		 = MM4;
+
+		MM0	 = _mm_unpacklo_pi16(MM0, MM2);
+		MM1	 = _mm_unpackhi_pi16(MM1, MM2);
+		MM4	 = _mm_unpacklo_pi16(MM4, MM6);
+		MM5	 = _mm_unpackhi_pi16(MM5, MM6);
+
+		SETPM64(dest+i*2, MM0);
+		SETPM64(dest+i*2+ 4, MM1);
+		SETPM64(dest+i*2+ 8, MM4);
+		SETPM64(dest+i*2+12, MM5);
+	}
+	samples4	 = samples&(~3);
+	for(;i<samples4;i+=4){
+		XMM0	 = _mm_load_ps(src1+i  );
+		XMM1	 = _mm_load_ps(src2+i  );
+		XMM0	 = _mm_mul_ps(XMM0, PM128(parm));
+		XMM1	 = _mm_mul_ps(XMM1, PM128(parm));
+
+		MM0		 = _mm_cvtps_pi32(XMM0);
+		MM2		 = _mm_cvtps_pi32(XMM1);
+
+		XMM0	 = _mm_movehl_ps(XMM0, XMM0);
+		XMM1	 = _mm_movehl_ps(XMM1, XMM1);
+
+		MM1		 = _mm_cvtps_pi32(XMM0);
+		MM3		 = _mm_cvtps_pi32(XMM1);
+
+		MM0		 = _mm_packs_pi32(MM0, MM1);
+		MM2		 = _mm_packs_pi32(MM2, MM3);
+
+		MM1		 = MM0;
+
+		MM0	 = _mm_unpacklo_pi16(MM0, MM2);
+		MM1	 = _mm_unpackhi_pi16(MM1, MM2);
+
+		SETPM64(dest+i*2, MM0);
+		SETPM64(dest+i*2+ 4, MM1);
+	}
+	_mm_empty();
+#endif
+	for(;i<samples;i++)
+	{
+		float	f1	 = src1[i];
+		float	f2	 = src2[i];
+		f1	*= 32768.f;
+		f2	*= 32768.f;
+		if(f1>32767.f)
+			f1	 = 32767.f;
+		if(f1<-32768.f)
+			f1	 =-32768.f;
+		if(f2>32767.f)
+			f2	 = 32767.f;
+		if(f2<-32768.f)
+			f2	 =-32768.f;
+		dest[i*2  ]	 = (short)f1;
+		dest[i*2+1]	 = (short)f2;
+	}
+}
+#endif														/* SSE Optimize */
+
 /* up to this point, everything could more or less hide the multiple
    logical bitstream nature of chaining from the toplevel application
    if the toplevel application didn't particularly care.  However, at
@@ -1795,7 +1927,26 @@
 	
 	if(host_endian==bigendianp){
 	  if(sgned){
-	    
+#ifdef __SSE__												/* SSE Optimize */
+		if(channels==2){
+			ov_read_float2pcm(pcm[0], pcm[1], ((short *)buffer), samples);
+		}else{
+			for(i=0;i<channels;i++){ /* It's faster in this order */
+				float	*src	 = pcm[i];
+				short	*dest	 = ((short *)buffer)+i;
+				for(j=0;j<samples;j++){
+					val	 = vorbis_ftoi(src[j]*32768.f);
+					if(val>32767)
+						val	 = 32767;
+					else
+						if(val<-32768)
+							val	 = -32768;
+					*dest=val;
+					dest+=channels;
+				}
+			}
+		}
+#else														/* SSE Optimize */
 	    vorbis_fpu_setround(&fpu);
 	    for(i=0;i<channels;i++) { /* It's faster in this order */
 	      float *src=pcm[i];
@@ -1810,6 +1961,8 @@
 	    }
 	    vorbis_fpu_restore(fpu);
 	    
+#endif														/* SSE Optimize */
+	    
 	  }else{
 	    
 	    vorbis_fpu_setround(&fpu);
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/window.c libvorbis-1.2.0-sse/lib/window.c
--- libvorbis-1.2.0/lib/window.c	2007-07-24 02:09:47.000000000 +0200
+++ libvorbis-1.2.0-sse/lib/window.c	2007-08-02 12:43:15.000000000 +0200
@@ -19,8 +19,15 @@
 #include <math.h>
 #include "os.h"
 #include "misc.h"
+#ifdef __SSE__												/* SSE Optimize */
+#include "xmmlib.h"
+#endif														/* SSE Optimize */
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin64[32] = {
+#else														/* SSE Optimize */
 static float vwin64[32] = {
+#endif														/* SSE Optimize */
   0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F, 
   0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F, 
   0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F, 
@@ -31,7 +38,11 @@
   0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin128[64] = {
+#else														/* SSE Optimize */
 static float vwin128[64] = {
+#endif														/* SSE Optimize */
   0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F, 
   0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F, 
   0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F, 
@@ -50,7 +61,11 @@
   0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin256[128] = {
+#else														/* SSE Optimize */
 static float vwin256[128] = {
+#endif														/* SSE Optimize */
   0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F, 
   0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F, 
   0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F, 
@@ -85,7 +100,11 @@
   0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin512[256] = {
+#else														/* SSE Optimize */
 static float vwin512[256] = {
+#endif														/* SSE Optimize */
   0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F, 
   0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F, 
   0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F, 
@@ -152,7 +171,11 @@
   0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin1024[512] = {
+#else														/* SSE Optimize */
 static float vwin1024[512] = {
+#endif														/* SSE Optimize */
   0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F, 
   0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F, 
   0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F, 
@@ -283,7 +306,11 @@
   0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin2048[1024] = {
+#else														/* SSE Optimize */
 static float vwin2048[1024] = {
+#endif														/* SSE Optimize */
   0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F, 
   0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F, 
   0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F, 
@@ -542,7 +569,11 @@
   0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin4096[2048] = {
+#else														/* SSE Optimize */
 static float vwin4096[2048] = {
+#endif														/* SSE Optimize */
   0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F, 
   0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F, 
   0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F, 
@@ -1057,7 +1088,11 @@
   0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F, 
 };
 
+#ifdef __SSE__												/* SSE Optimize */
+static _MM_ALIGN16 const float vwin8192[4096] = {
+#else														/* SSE Optimize */
 static float vwin8192[4096] = {
+#endif														/* SSE Optimize */
   0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F, 
   0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F, 
   0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F, 
@@ -2084,7 +2119,7 @@
   1.0000000000F, 1.0000000000F, 1.0000000000F, 1.0000000000F, 
 };
 
-static float *vwin[8] = {
+static const float *vwin[8] = {
   vwin64,
   vwin128,
   vwin256,
@@ -2095,7 +2130,7 @@
   vwin8192,
 };
 
-float *_vorbis_window_get(int n){
+const float *_vorbis_window_get(int n){
   return vwin[n];
 }
 
@@ -2105,8 +2140,8 @@
   nW=(W?nW:0);
   
   {
-    float *windowLW=vwin[winno[lW]];
-    float *windowNW=vwin[winno[nW]];
+    const float *windowLW=vwin[winno[lW]];
+    const float *windowNW=vwin[winno[nW]];
 
     long n=blocksizes[W];
     long ln=blocksizes[lW];
@@ -2120,6 +2155,96 @@
     
     int i,p;
     
+#ifdef __SSE__												/* SSE Optimize */
+	if(leftbegin>0)
+	{
+		__m128	XMM0	 = _mm_setzero_ps();
+		__m128	XMM1	 = _mm_setzero_ps();
+		__m128	XMM2	 = _mm_setzero_ps();
+		__m128	XMM3	 = _mm_setzero_ps();
+		for(i=0;i<leftbegin;i+=32)
+		{
+			_mm_store_ps(d+i   ,XMM0);
+			_mm_store_ps(d+i+ 4,XMM1);
+			_mm_store_ps(d+i+ 8,XMM2);
+			_mm_store_ps(d+i+12,XMM3);
+			_mm_store_ps(d+i+16,XMM0);
+			_mm_store_ps(d+i+20,XMM1);
+			_mm_store_ps(d+i+24,XMM2);
+			_mm_store_ps(d+i+28,XMM3);
+		}
+	}
+	_mm_prefetch(windowLW   , _MM_HINT_NTA);
+	_mm_prefetch(windowLW+32, _MM_HINT_NTA);
+	for(i=leftbegin,p=0;i<leftend;i+=16,p+=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		__m128	XMM4, XMM5, XMM6, XMM7;
+		_mm_prefetch(windowLW+p+64, _MM_HINT_NTA);
+		XMM0	 = _mm_load_ps(d+i   );
+		XMM4	 = _mm_load_ps(windowLW+p   );
+		XMM1	 = _mm_load_ps(d+i+ 4);
+		XMM5	 = _mm_load_ps(windowLW+p+ 4);
+		XMM2	 = _mm_load_ps(d+i+ 8);
+		XMM6	 = _mm_load_ps(windowLW+p+ 8);
+		XMM3	 = _mm_load_ps(d+i+12);
+		XMM7	 = _mm_load_ps(windowLW+p+12);
+		XMM0	 = _mm_mul_ps(XMM0, XMM4);
+		XMM1	 = _mm_mul_ps(XMM1, XMM5);
+		XMM2	 = _mm_mul_ps(XMM2, XMM6);
+		XMM3	 = _mm_mul_ps(XMM3, XMM7);
+		_mm_store_ps(d+i   ,XMM0);
+		_mm_store_ps(d+i+ 4,XMM1);
+		_mm_store_ps(d+i+ 8,XMM2);
+		_mm_store_ps(d+i+12,XMM3);
+	}
+	p	 = rn/2-16;
+	_mm_prefetch(windowLW+p-16, _MM_HINT_NTA);
+	_mm_prefetch(windowLW+p-48, _MM_HINT_NTA);
+	for(i=rightbegin;i<rightend;i+=16,p-=16)
+	{
+		__m128	XMM0, XMM1, XMM2, XMM3,XMM4, XMM5, XMM6, XMM7;
+		_mm_prefetch(windowLW+p-80, _MM_HINT_NTA);
+		XMM0	 = _mm_load_ps(windowNW+p+12);
+		XMM1	 = _mm_load_ps(windowNW+p+ 8);
+		XMM2	 = _mm_load_ps(windowNW+p+ 4);
+		XMM3	 = _mm_load_ps(windowNW+p   );
+		XMM4	 = _mm_load_ps(d+i   );
+		XMM5	 = _mm_load_ps(d+i+ 4);
+		XMM6	 = _mm_load_ps(d+i+ 8);
+		XMM7	 = _mm_load_ps(d+i+12);
+		XMM0	 = _mm_shuffle_ps(XMM0, XMM0, _MM_SHUFFLE(0,1,2,3));
+		XMM1	 = _mm_shuffle_ps(XMM1, XMM1, _MM_SHUFFLE(0,1,2,3));
+		XMM2	 = _mm_shuffle_ps(XMM2, XMM2, _MM_SHUFFLE(0,1,2,3));
+		XMM3	 = _mm_shuffle_ps(XMM3, XMM3, _MM_SHUFFLE(0,1,2,3));
+		XMM4	 = _mm_mul_ps(XMM4, XMM0);
+		XMM5	 = _mm_mul_ps(XMM5, XMM1);
+		XMM6	 = _mm_mul_ps(XMM6, XMM2);
+		XMM7	 = _mm_mul_ps(XMM7, XMM3);
+		_mm_store_ps(d+i   ,XMM4);
+		_mm_store_ps(d+i+ 4,XMM5);
+		_mm_store_ps(d+i+ 8,XMM6);
+		_mm_store_ps(d+i+12,XMM7);
+	}
+	if(i<n)
+	{
+		__m128	XMM0	 = _mm_setzero_ps();
+		__m128	XMM1	 = _mm_setzero_ps();
+		__m128	XMM2	 = _mm_setzero_ps();
+		__m128	XMM3	 = _mm_setzero_ps();
+		for(;i<n;i+=32)
+		{
+			_mm_store_ps(d+i   ,XMM0);
+			_mm_store_ps(d+i+ 4,XMM1);
+			_mm_store_ps(d+i+ 8,XMM2);
+			_mm_store_ps(d+i+12,XMM3);
+			_mm_store_ps(d+i+16,XMM0);
+			_mm_store_ps(d+i+20,XMM1);
+			_mm_store_ps(d+i+24,XMM2);
+			_mm_store_ps(d+i+28,XMM3);
+		}
+	}
+#else														/* SSE Optimize */
     for(i=0;i<leftbegin;i++)
       d[i]=0.f;
     
@@ -2131,6 +2256,7 @@
     
     for(;i<n;i++)
       d[i]=0.f;
+#endif														/* SSE Optimize */
   }
 }
 
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/xmmlib.c libvorbis-1.2.0-sse/lib/xmmlib.c
--- libvorbis-1.2.0/lib/xmmlib.c	1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/xmmlib.c	2007-08-02 12:43:15.000000000 +0200
@@ -0,0 +1,277 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003             *
+ * by the XIPHOPHORUS Company http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: SSE Function Library
+ last mod: $Id: xmmlib.c,v 1.4 2005-07-08 15:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include "xmmlib.h"
+
+#if	defined(__SSE__)
+_MM_ALIGN16 const __m128x PCS_NNRN = {.si32 = {0x00000000, 0x80000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NNRR = {.si32 = {0x80000000, 0x80000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRNN = {.si32 = {0x00000000, 0x00000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRNR = {.si32 = {0x80000000, 0x00000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRRN = {.si32 = {0x00000000, 0x80000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_NRRR = {.si32 = {0x80000000, 0x80000000, 0x80000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PCS_RNNN = {.si32 = {0x00000000, 0x00000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNRN = {.si32 = {0x00000000, 0x80000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNRR = {.si32 = {0x80000000, 0x80000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RRNN = {.si32 = {0x00000000, 0x00000000, 0x80000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RNNR = {.si32 = {0x80000000, 0x00000000, 0x00000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_RRRR = {.si32 = {0x80000000, 0x80000000, 0x80000000, 0x80000000}};
+_MM_ALIGN16 const __m128x PCS_NNNR = {.si32 = {0x80000000, 0x00000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PABSMASK = {.si32 = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM1 = {.si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM2 = {.si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PSTARTEDGEM3 = {.si32 = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}};
+_MM_ALIGN16 const __m128x PENDEDGEM1 = {.si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PENDEDGEM2 = {.si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000}};
+_MM_ALIGN16 const __m128x PENDEDGEM3 = {.si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}};
+
+_MM_ALIGN16 const __m128x PMASKTABLE[16] = {
+	{ .si32 = {0x00000000, 0x00000000, 0x00000000, 0x00000000} },
+	{ .si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000} },
+	{ .si32 = {0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000} },
+	{ .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000} },
+	{ .si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000} },
+	{ .si32 = {0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000} },
+	{ .si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000} },
+	{ .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000} },
+	{ .si32 = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF} },
+	{ .si32 = {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF} },
+	{ .si32 = {0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF} },
+	{ .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF} },
+	{ .si32 = {0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF} },
+	{ .si32 = {0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF} },
+	{ .si32 = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} },
+	{ .si32 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} }
+};
+
+_MM_ALIGN16 const __m128x PFV_0    = { .sf = { 0.0f, 0.0f, 0.0f, 0.0f} };
+_MM_ALIGN16 const __m128x PFV_1    = { .sf = { 1.0f, 1.0f, 1.0f, 1.0f} };
+_MM_ALIGN16 const __m128x PFV_2    = { .sf = { 2.0f, 2.0f, 2.0f, 2.0f} };
+_MM_ALIGN16 const __m128x PFV_4    = { .sf = { 4.0f, 4.0f, 4.0f, 4.0f} };
+_MM_ALIGN16 const __m128x PFV_8    = { .sf = { 8.0f, 8.0f, 8.0f, 8.0f} };
+_MM_ALIGN16 const __m128x PFV_INIT = { .sf = { 0.0f, 1.0f, 2.0f, 3.0f} };
+_MM_ALIGN16 const __m128x PFV_0P5  = { .sf = { 0.5f, 0.5f, 0.5f, 0.5f} };
+_MM_ALIGN16 const __m128x PFV_M0P5 = { .sf = {-0.5f,-0.5f,-0.5f,-0.5f} };
+
+#endif /* defined(__SSE__) */
+
+const int bitCountTable[16] = {
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
+};
+
+#if	0
+/*---------------------------------------------------------------------------
+// for calcurate performance
+//-------------------------------------------------------------------------*/
+static double perfsum[16];
+static unsigned long perfcount[16];
+
+unsigned __int64* _perf_start(void)
+{
+    unsigned __int64* stime;
+  {
+    stime = malloc(sizeof(*stime));
+    *stime = _rdtsc();
+  }
+  return stime;
+}
+
+void _perf_end(unsigned __int64 *stime, int index)
+{
+  *stime = _rdtsc() - *stime;
+  {
+	perfsum[index] += (double)(*stime);
+	perfcount[index] ++;
+  }
+  free(stime);
+}
+
+void _perf_result(int index)
+{
+  printf("\nPerfSum   = %f\n", perfsum[index]);
+  printf("PerfCount = %d\n", perfcount[index]);
+  printf("PerfAvg   = %f\n", perfsum[index]/(double)perfcount[index]);
+}
+#endif
+
+/*
+ * aligned malloc wrapper; assume blindly align = 16
+ */
+#ifndef __INTEL_COMPILER
+static void *_aligned_malloc(size_t size, unsigned int align)
+{
+	unsigned int *p;
+	unsigned long addr, align_addr;
+
+	size = (size + 15) & ~15UL;
+	p = malloc(size + 16);
+	if (!p)
+		return NULL;
+	addr = (unsigned long)p;
+	align_addr = (addr + 15 + 2*4) & ~15UL;
+	p = (unsigned int *)align_addr;
+	p[-1] = align_addr - addr;
+	p[-2] = size;
+	return p;
+}
+
+static void _aligned_free(void *ptr)
+{
+	if (ptr) {
+		unsigned int offset = ((unsigned int*)ptr)[-1];
+		free(ptr - offset);
+	}
+}
+
+static void *_aligned_realloc(void *orig, size_t size, unsigned int align)
+{
+	unsigned int *p = orig;
+	unsigned int offset, new_offset, orig_len;
+	unsigned long orig_addr, addr, align_addr;
+	if (!orig)
+		return _aligned_malloc(size, align);
+	offset = p[-1];
+	orig_len = p[-2];
+	orig_addr = (unsigned long)p - offset;
+	if (orig_len >= size)
+		return orig;
+	size = (size + 15) & ~15UL;
+	p = realloc((void *)orig_addr, size + 16);
+	if (!p)
+		return NULL;
+	addr = (unsigned long)p;
+	if (addr == orig_addr)
+		return orig;
+	align_addr = (addr + 15 + 2*4) & ~15UL;
+	new_offset = align_addr - addr;
+	if (offset != new_offset)
+		memmove((char*)p + new_offset, (char*)p + offset, orig_len);
+	p = (unsigned int*)align_addr;
+	p[-1] = new_offset;
+	p[-2] = size;
+	return p;
+}
+#endif
+
+/*---------------------------------------------------------------------------
+// 16Byte Allignment malloc
+//-------------------------------------------------------------------------*/
+void* xmm_malloc(size_t size)
+{
+	return (void*)_aligned_malloc(size, 16);
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment calloc
+//-------------------------------------------------------------------------*/
+void* xmm_calloc(size_t nitems, size_t size)
+{
+	unsigned char*	t_RetPtr	 = xmm_malloc(nitems * size);
+
+	if(t_RetPtr)
+	{
+#ifdef	__SSE__
+		size_t	i,j, k;
+		__m128	XMM0, XMM1, XMM2, XMM3;
+		XMM0	 = 
+		XMM1	 = 
+		XMM2	 = 
+		XMM3	 = _mm_setzero_ps();
+		k	 = nitems*size;
+		j	 = k&(~127);
+		for(i=0;i<j;i+=128)
+		{
+			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 64), XMM0);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 80), XMM1);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 96), XMM2);
+			_mm_stream_ps((float*)(t_RetPtr+i+112), XMM3);
+		}
+		j	 = k&(~63);
+		for(;i<j;i+=64)
+		{
+			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
+		}
+		j	 = k&(~31);
+		for(;i<j;i+=32)
+		{
+			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
+			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
+		}
+		j	 = k&(~15);
+		for(;i<j;i+=16)
+		{
+			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
+		}
+		j	 = k&(~7);
+#if 0 /* XXX */
+		for(;i<j;i+=8)
+		{
+			_mm_storel_pi((__m64*)(t_RetPtr+i   ), XMM0);
+		}
+		j	 = k&(~3);
+#endif
+		for(;i<j;i+=4)
+		{
+			_mm_store_ss((float*)(t_RetPtr+i)   , XMM0);
+		}
+		for(;i<k;i++)
+			*(t_RetPtr+i    )	 = 0;
+		_mm_sfence();
+#else
+		memset(t_RetPtr, 0, nitems*size);
+#endif
+	}
+	return	(void*)t_RetPtr;
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment realloc
+//-------------------------------------------------------------------------*/
+void* xmm_realloc(void *block, size_t size)
+{
+	return (void*)_aligned_realloc(block, size, 16);
+}
+/*---------------------------------------------------------------------------
+// 16Byte Allignment free
+//-------------------------------------------------------------------------*/
+void xmm_free(void* a_AlignedPtr)
+{
+	if(a_AlignedPtr)
+		_aligned_free(a_AlignedPtr);
+}
+#if 0
+/*---------------------------------------------------------------------------
+// 16Byte Allignment alloca
+//-------------------------------------------------------------------------*/
+void* xmm_align(void *t_Ptr)
+{
+	unsigned char*	t_RetPtr		 = 0;
+	if(t_Ptr){
+		t_RetPtr	 = (unsigned char*)(( ((unsigned long)t_Ptr+15)/16)*16);
+	}
+	return	(void*)t_RetPtr;
+}
+#endif
diff -ruN --exclude '*.in' libvorbis-1.2.0/lib/xmmlib.h libvorbis-1.2.0-sse/lib/xmmlib.h
--- libvorbis-1.2.0/lib/xmmlib.h	1970-01-01 01:00:00.000000000 +0100
+++ libvorbis-1.2.0-sse/lib/xmmlib.h	2007-08-02 12:43:15.000000000 +0200
@@ -0,0 +1,266 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2003             *
+ * by the XIPHOPHORUS Company http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: Header of SSE Function Library
+ last mod: $Id: xmmlib.h,v 1.3 2005-07-08 15:00:00+09 blacksword Exp $
+
+ ********************************************************************/
+
+#ifndef _XMMLIB_H_INCLUDED
+#define _XMMLIB_H_INCLUDED
+
+#if !defined(STIN)
+#define STIN static __inline
+#endif
+
+#if defined(__SSE__)
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#include <tmmintrin.h>
+#else
+/* GCC */
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#ifdef __SSE2__
+  #include <emmintrin.h>
+#endif  
+#ifdef __SSE3__
+  #include <pmmintrin.h>
+#endif
+#define _MM_ALIGN16  __attribute__((aligned(16)))
+#define __declspec(x)
+#endif
+
+/*#define PM64(x)		(*(__m64*)(x))*/
+static __inline __m64 __attribute__((__always_inline__))
+	PM64(const float *x)
+{
+	union {
+		const float *sf;
+		__m64 *m;
+	} v = { .sf = x };
+	return *v.m;
+}
+
+static __inline void __attribute__((__always_inline__))
+	SETPM64(int *x, __m64 y)
+{
+	union {
+		int *i;
+		__m64 *m;
+	} v = { .i = x };
+	*v.m = y;
+}
+
+/*#define PM128(x)	(*(__m128*)(x))*/
+static __inline __m128 __attribute__((__always_inline__))
+	PM128(const float *x)
+{
+	union {
+		const float *sf;
+		__m128 *m;
+	} v = { .sf = x };
+	return *v.m;
+}
+#ifdef	__SSE2__
+/*#define PM128I(x)	(*(__m128i*)(x))*/
+static __inline __m128i __attribute__((__always_inline__))
+	PM128I(const int *x)
+{
+	union {
+		const int *si;
+		__m128i *m;
+	} v = { .si = x };
+	return *v.m;
+}
+#endif
+
+#include <stdint.h>
+
+typedef union {
+	uint8_t		si8[8];
+	uint16_t	si16[4];
+	uint32_t	si32[2];
+	int8_t		ssi8[8];
+	int16_t		ssi16[4];
+	int32_t		ssi32[2];
+	__m64		pi64;
+} __m64x;
+
+typedef union __declspec(intrin_type) _MM_ALIGN16 __m128x{
+	uint32_t	si32[4];
+	float		sf[4];
+	__m64		pi64[2];
+	__m128		ps;
+#ifdef	__SSE2__
+	__m128i		pi;
+	__m128d		pd;
+#endif
+} __m128x;
+
+#if defined(__SSE3__)
+#define	_mm_lddqu_ps(x)	_mm_castsi128_ps(_mm_lddqu_si128((__m128i*)(x)))
+#else
+#define	_mm_lddqu_ps(x)	_mm_loadu_ps(x)
+#endif
+
+extern const __m128x PCS_NNRN;
+extern const __m128x PCS_NNRR;
+extern const __m128x PCS_NRNN;
+extern const __m128x PCS_NRNR;
+extern const __m128x PCS_NRRN;
+extern const __m128x PCS_NRRR;
+extern const __m128x PCS_RNNN;
+extern const __m128x PCS_RNRN;
+extern const __m128x PCS_RNRR;
+extern const __m128x PCS_RRNN;
+extern const __m128x PCS_RNNR;
+extern const __m128x PCS_RRRR;
+extern const __m128x PCS_NNNR;
+extern const __m128x PABSMASK;
+extern const __m128x PSTARTEDGEM1;
+extern const __m128x PSTARTEDGEM2;
+extern const __m128x PSTARTEDGEM3;
+extern const __m128x PENDEDGEM1;
+extern const __m128x PENDEDGEM2;
+extern const __m128x PENDEDGEM3;
+extern const __m128x PMASKTABLE[16];
+
+extern const __m128x PFV_0;
+extern const __m128x PFV_1;
+extern const __m128x PFV_2;
+extern const __m128x PFV_4;
+extern const __m128x PFV_8;
+extern const __m128x PFV_INIT;
+extern const __m128x PFV_0P5;
+extern const __m128x PFV_M0P5;
+
+extern const int bitCountTable[16];
+
+extern void *xmm_malloc(size_t);
+extern void *xmm_calloc(size_t, size_t);
+extern void *xmm_realloc(void*, size_t);
+extern void xmm_free(void*);
+
+static inline void* xmm_align(void *t_Ptr)
+{
+	return t_Ptr ? (void*)(((unsigned long)t_Ptr+15) & ~15UL) :
+		(void*)0;
+}
+
+static inline __m128 _mm_todB_ps(__m128 x)
+{
+	static _MM_ALIGN16 float mparm[4] = {
+		7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f, 7.17711438e-7f
+	};
+	static _MM_ALIGN16 float aparm[4] = {
+		-764.6161886f, -764.6161886f, -764.6161886f, -764.6161886f
+	};
+#ifdef	__SSE2__
+	__m128x	U;
+	U.ps	 = _mm_and_ps(x, PABSMASK.ps);
+	U.ps	 = _mm_cvtepi32_ps(U.pi);
+	U.ps	 = _mm_mul_ps(U.ps, _mm_load_ps(mparm));
+	U.ps	 = 	_mm_add_ps(U.ps, _mm_load_ps(aparm));
+	return	U.ps;
+#else
+	__m128	RESULT;
+	__m128x	U;
+	U.ps	 = _mm_and_ps(x, PABSMASK.ps);
+	RESULT	 = _mm_cvtpi32_ps(RESULT, U.pi64[1]);
+	RESULT	 = _mm_movelh_ps(RESULT, RESULT);
+	RESULT	 = _mm_cvtpi32_ps(RESULT, U.pi64[0]);
+	RESULT	 = _mm_mul_ps(RESULT, mparm);
+	RESULT	 = _mm_add_ps(RESULT, aparm);
+	return	RESULT;
+#endif
+}
+
+static inline __m128 _mm_untnorm_ps(__m128 x)
+{
+	static _MM_ALIGN16 const __m128x PIV0 = {
+		.si32 = {
+			0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+		}
+	};
+	register __m128 r;
+	r	 = _mm_and_ps(x, PCS_RRRR.ps);
+	r	 = _mm_or_ps(x, PIV0.ps);
+	return	r;
+}
+
+static inline float _mm_add_horz(__m128 x)
+{
+#if	defined(__SSE3__)
+	x	 = _mm_hadd_ps(x, x);
+	x	 = _mm_hadd_ps(x, x);
+#else
+	__m128	y;
+	y	 = _mm_movehl_ps(y, x);
+	x	 = _mm_add_ps(x, y);
+	y	 = x;
+	y	 = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+	x	 = _mm_add_ss(x, y);
+#endif
+	return _mm_cvtss_f32(x);
+}
+
+static inline __m128 _mm_add_horz_ss(__m128 x)
+{
+#if	defined(__SSE3__)
+	x	 = _mm_hadd_ps(x, x);
+	x	 = _mm_hadd_ps(x, x);
+#else
+	__m128	y;
+	y	 = _mm_movehl_ps(y, x);
+	x	 = _mm_add_ps(x, y);
+	y	 = x;
+	y	 = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+	x	 = _mm_add_ss(x, y);
+#endif
+	return x;
+}
+
+static inline float _mm_max_horz(__m128 x)
+{
+	__m128	y;
+	y	 = _mm_movehl_ps(y, x);
+	x	 = _mm_max_ps(x, y);
+	y	 = x;
+	y	 = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+	x	 = _mm_max_ss(x, y);
+	return _mm_cvtss_f32(x);
+}
+
+static inline float _mm_min_horz(__m128 x)
+{
+	__m128	y;
+	y	 = _mm_movehl_ps(y, x);
+	x	 = _mm_min_ps(x, y);
+	y	 = x;
+	y	 = _mm_shuffle_ps(y, y, _MM_SHUFFLE(1,1,1,1));
+	x	 = _mm_min_ss(x, y);
+	return _mm_cvtss_f32(x);
+}
+
+#endif /* defined(__SSE__) */
+
+#if	0
+/*---------------------------------------------------------------------------
+// for calcurate performance
+//-------------------------------------------------------------------------*/
+extern unsigned __int64* _perf_start(void);
+extern void _perf_end(unsigned __int64 *stime, int index);
+extern void _perf_result(int index);
+#endif
+
+#endif /* _XMMLIB_H_INCLUDED */
openSUSE Build Service is sponsored by