File gcc41-ssse3.patch of Package gcc41

2006-10-22  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* config/i386/tmmintrin.h: Remove the duplicated content.
 
2006-10-22  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers.
 	(x86_64-*-*): Likewise.
 
 	* config/i386/i386.c (pta_flags): Add PTA_SSSE3.
 	(override_options): Check SSSE3.
 	(ix86_builtins): Add IX86_BUILTIN_PHADDW, IX86_BUILTIN_PHADDD,
 	IX86_BUILTIN_PHADDSW, IX86_BUILTIN_PHSUBW, IX86_BUILTIN_PHSUBD,
 	IX86_BUILTIN_PHSUBSW, IX86_BUILTIN_PMADDUBSW,
 	IX86_BUILTIN_PMULHRSW, IX86_BUILTIN_PSHUFB,
 	IX86_BUILTIN_PSIGNB, IX86_BUILTIN_PSIGNW, IX86_BUILTIN_PSIGND,
 	IX86_BUILTIN_PALIGNR, IX86_BUILTIN_PABSB, IX86_BUILTIN_PABSW,
 	IX86_BUILTIN_PABSD, IX86_BUILTIN_PHADDW128,
 	IX86_BUILTIN_PHADDD128, IX86_BUILTIN_PHADDSW128,
 	IX86_BUILTIN_PHSUBW128, IX86_BUILTIN_PHSUBD128,
 	IX86_BUILTIN_PHSUBSW128, IX86_BUILTIN_PMADDUBSW128,
 	IX86_BUILTIN_PMULHRSW128, IX86_BUILTIN_PSHUFB128,
 	IX86_BUILTIN_PSIGNB128, IX86_BUILTIN_PSIGNW128,
 	IX86_BUILTIN_PSIGND128, IX86_BUILTIN_PALIGNR128,
 	IX86_BUILTIN_PABSB128, IX86_BUILTIN_PABSW128 and
 	IX86_BUILTIN_PABSD128.
 	(bdesc_2arg): Add SSSE3.
 	(bdesc_1arg): Likewise.
 	(ix86_init_mmx_sse_builtins): Support SSSE3.
 	(ix86_expand_builtin): Likewise.
 	* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Likewise.
 
 	* config/i386/i386.md (UNSPEC_PSHUFB): New.
 	(UNSPEC_PSIGN): Likewise.
 	(UNSPEC_PALIGNR): Likewise.
 	Include mmx.md before sse.md.
 
 	* config/i386/i386.opt: Add -mssse3.
 
 	* config/i386/sse.md (ssse3_phaddwv8hi3): New pattern for SSSE3.
 	(ssse3_phaddwv4hi3): Likewise.
 	(ssse3_phadddv4si3): Likewise.
 	(ssse3_phadddv2si3): Likewise.
 	(ssse3_phaddswv8hi3): Likewise.
 	(ssse3_phaddswv4hi3): Likewise.
 	(ssse3_phsubwv8hi3): Likewise.
 	(ssse3_phsubwv4hi3): Likewise.
 	(ssse3_phsubdv4si3): Likewise.
 	(ssse3_phsubdv2si3): Likewise.
 	(ssse3_phsubswv8hi3): Likewise.
 	(ssse3_phsubswv4hi3): Likewise.
 	(ssse3_pmaddubswv8hi3): Likewise.
 	(ssse3_pmaddubswv4hi3): Likewise.
 	(ssse3_pmulhrswv8hi3): Likewise.
 	(ssse3_pmulhrswv4hi3): Likewise.
 	(ssse3_pshufbv16qi3): Likewise.
 	(ssse3_pshufbv8qi3): Likewise.
 	(ssse3_psign<mode>3): Likewise.
 	(ssse3_psign<mode>3): Likewise.
 	(ssse3_palignrti): Likewise.
 	(ssse3_palignrdi): Likewise.
 	(abs<mode>2): Likewise.
 	(abs<mode>2): Likewise.
 
 	* config/i386/tmmintrin.h: New file.
 
 	* doc/extend.texi: Document SSSE3 built-in functions.
 
 	* doc/invoke.texi: Document -mssse3/-mno-ssse3 switches.


Index: gcc/doc/extend.texi
===================================================================
*** gcc/doc/extend.texi	(revision 118001)
--- gcc/doc/extend.texi	(working copy)
*************** The following built-in functions are ava
*** 6860,6865 ****
--- 6860,6911 ----
  Generates the @code{movddup} machine instruction as a load from memory.
  @end table
  
+ The following built-in functions are available when @option{-mssse3} is used.
+ All of them generate the machine instruction that is part of the name
+ with MMX registers.
+ 
+ @smallexample
+ v2si __builtin_ia32_phaddd (v2si, v2si)
+ v4hi __builtin_ia32_phaddw (v4hi, v4hi)
+ v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
+ v2si __builtin_ia32_phsubd (v2si, v2si)
+ v4hi __builtin_ia32_phsubw (v4hi, v4hi)
+ v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
+ v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
+ v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
+ v8qi __builtin_ia32_pshufb (v8qi, v8qi)
+ v8qi __builtin_ia32_psignb (v8qi, v8qi)
+ v2si __builtin_ia32_psignd (v2si, v2si)
+ v4hi __builtin_ia32_psignw (v4hi, v4hi)
+ long long __builtin_ia32_palignr (long long, long long, int)
+ v8qi __builtin_ia32_pabsb (v8qi)
+ v2si __builtin_ia32_pabsd (v2si)
+ v4hi __builtin_ia32_pabsw (v4hi)
+ @end smallexample
+ 
+ The following built-in functions are available when @option{-mssse3} is used.
+ All of them generate the machine instruction that is part of the name
+ with SSE registers.
+ 
+ @smallexample
+ v4si __builtin_ia32_phaddd128 (v4si, v4si)
+ v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
+ v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
+ v4si __builtin_ia32_phsubd128 (v4si, v4si)
+ v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
+ v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
+ v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
+ v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
+ v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
+ v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
+ v4si __builtin_ia32_psignd128 (v4si, v4si)
+ v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
+ v2di __builtin_ia32_palignr (v2di, v2di, int)
+ v16qi __builtin_ia32_pabsb128 (v16qi)
+ v4si __builtin_ia32_pabsd128 (v4si)
+ v8hi __builtin_ia32_pabsw128 (v8hi)
+ @end smallexample
+ 
  The following built-in functions are available when @option{-m3dnow} is used.
  All of them generate the machine instruction that is part of the name.
  
Index: gcc/doc/invoke.texi
===================================================================
*** gcc/doc/invoke.texi	(revision 118001)
--- gcc/doc/invoke.texi	(working copy)
*************** Objective-C and Objective-C++ Dialects}.
*** 522,528 ****
  -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
  -mno-wide-multiply  -mrtd  -malign-double @gol
  -mpreferred-stack-boundary=@var{num} @gol
! -mmmx  -msse  -msse2 -msse3 -m3dnow @gol
  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
  -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
  -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
--- 522,528 ----
  -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
  -mno-wide-multiply  -mrtd  -malign-double @gol
  -mpreferred-stack-boundary=@var{num} @gol
! -mmmx  -msse  -msse2 -msse3 -mssse3 -m3dnow @gol
  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
  -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
  -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
*************** preferred alignment to @option{-mpreferr
*** 9313,9318 ****
--- 9313,9320 ----
  @itemx -mno-sse2
  @item -msse3
  @itemx -mno-sse3
+ @item -mssse3
+ @itemx -mno-ssse3
  @item -m3dnow
  @itemx -mno-3dnow
  @opindex mmmx
*************** preferred alignment to @option{-mpreferr
*** 9322,9330 ****
  @opindex m3dnow
  @opindex mno-3dnow
  These switches enable or disable the use of instructions in the MMX,
! SSE, SSE2 or 3DNow! extended instruction sets.  These extensions are
! also available as built-in functions: see @ref{X86 Built-in Functions},
! for details of the functions enabled and disabled by these switches.
  
  To have SSE/SSE2 instructions generated automatically from floating-point
  code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
--- 9324,9333 ----
  @opindex m3dnow
  @opindex mno-3dnow
  These switches enable or disable the use of instructions in the MMX,
! SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
! These extensions are also available as built-in functions: see
! @ref{X86 Built-in Functions}, for details of the functions enabled and
! disabled by these switches.
  
  To have SSE/SSE2 instructions generated automatically from floating-point
  code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
Index: gcc/config.gcc
===================================================================
*** gcc/config.gcc	(revision 118001)
--- gcc/config.gcc	(working copy)
*************** xscale-*-*)
*** 263,273 ****
  	;;
  i[34567]86-*-*)
  	cpu_type=i386
! 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
  	;;
  x86_64-*-*)
  	cpu_type=i386
! 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
  	need_64bit_hwint=yes
  	;;
  ia64-*-*)
--- 263,275 ----
  	;;
  i[34567]86-*-*)
  	cpu_type=i386
! 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
! 		       pmmintrin.h tmmintrin.h"
  	;;
  x86_64-*-*)
  	cpu_type=i386
! 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
! 		       pmmintrin.h tmmintrin.h"
  	need_64bit_hwint=yes
  	;;
  ia64-*-*)
Index: gcc/config/i386/i386.h
===================================================================
*** gcc/config/i386/i386.h	(revision 118001)
--- gcc/config/i386/i386.h	(working copy)
*************** extern int x86_prefetch_sse;
*** 381,386 ****
--- 381,388 ----
  	builtin_define ("__SSE2__");				\
        if (TARGET_SSE3)						\
  	builtin_define ("__SSE3__");				\
+       if (TARGET_SSSE3)						\
+ 	builtin_define ("__SSSE3__");				\
        if (TARGET_SSE_MATH && TARGET_SSE)			\
  	builtin_define ("__SSE_MATH__");			\
        if (TARGET_SSE_MATH && TARGET_SSE2)			\
Index: gcc/config/i386/i386.md
===================================================================
*** gcc/config/i386/i386.md	(revision 118001)
--- gcc/config/i386/i386.md	(working copy)
*************** (define_constants
*** 146,151 ****
--- 146,156 ----
     (UNSPEC_SP_TEST		101)
     (UNSPEC_SP_TLS_SET		102)
     (UNSPEC_SP_TLS_TEST		103)
+ 
+    ; SSSE3
+    (UNSPEC_PSHUFB		120)
+    (UNSPEC_PSIGN		121)
+    (UNSPEC_PALIGNR		122)
    ])
  
  (define_constants
*************** (define_insn "stack_tls_protect_test_di"
*** 20644,20649 ****
    "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"
    [(set_attr "type" "multi")])
  
- (include "sse.md")
  (include "mmx.md")
  (include "sync.md")
--- 20649,20654 ----
    "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"
    [(set_attr "type" "multi")])
  
  (include "mmx.md")
+ (include "sse.md")
  (include "sync.md")
Index: gcc/config/i386/sse.md
===================================================================
*** gcc/config/i386/sse.md	(revision 118001)
--- gcc/config/i386/sse.md	(working copy)
*************** (define_insn "sse3_monitor"
*** 3902,3904 ****
--- 3902,4479 ----
    "TARGET_SSE3"
    "monitor\t%0, %1, %2"
    [(set_attr "length" "3")])
+ 
+ ;; SSSE3
+ (define_insn "ssse3_phaddwv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_concat:V8HI
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (plus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 1 "register_operand" "0")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (plus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ 	      (plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+   "TARGET_SSSE3"
+   "phaddw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phaddwv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(vec_concat:V4HI
+ 	  (vec_concat:V2HI
+ 	    (plus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (plus:HI
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2HI
+ 	    (plus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (plus:HI
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phaddw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_phadddv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_concat:V4SI
+ 	  (vec_concat:V2SI
+ 	    (plus:SI
+ 	      (vec_select:SI
+ 		(match_operand:V4SI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (plus:SI
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2SI
+ 	    (plus:SI
+ 	      (vec_select:SI
+ 		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (plus:SI
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phaddd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phadddv2si3"
+   [(set (match_operand:V2SI 0 "register_operand" "=y")
+ 	(vec_concat:V2SI
+ 	  (plus:SI
+ 	    (vec_select:SI
+ 	      (match_operand:V2SI 1 "register_operand" "0")
+ 	      (parallel [(const_int 0)]))
+ 	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ 	  (plus:SI
+ 	    (vec_select:SI
+ 	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ 	      (parallel [(const_int 0)]))
+ 	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+   "TARGET_SSSE3"
+   "phaddd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_phaddswv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_concat:V8HI
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (ss_plus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 1 "register_operand" "0")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (ss_plus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ 	      (ss_plus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+   "TARGET_SSSE3"
+   "phaddsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phaddswv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(vec_concat:V4HI
+ 	  (vec_concat:V2HI
+ 	    (ss_plus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (ss_plus:HI
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2HI
+ 	    (ss_plus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (ss_plus:HI
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phaddsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_phsubwv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_concat:V8HI
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (minus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 1 "register_operand" "0")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (minus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ 	      (minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+   "TARGET_SSSE3"
+   "phsubw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phsubwv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(vec_concat:V4HI
+ 	  (vec_concat:V2HI
+ 	    (minus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (minus:HI
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2HI
+ 	    (minus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (minus:HI
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phsubw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_phsubdv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_concat:V4SI
+ 	  (vec_concat:V2SI
+ 	    (minus:SI
+ 	      (vec_select:SI
+ 		(match_operand:V4SI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (minus:SI
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2SI
+ 	    (minus:SI
+ 	      (vec_select:SI
+ 		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (minus:SI
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phsubd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phsubdv2si3"
+   [(set (match_operand:V2SI 0 "register_operand" "=y")
+ 	(vec_concat:V2SI
+ 	  (minus:SI
+ 	    (vec_select:SI
+ 	      (match_operand:V2SI 1 "register_operand" "0")
+ 	      (parallel [(const_int 0)]))
+ 	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ 	  (minus:SI
+ 	    (vec_select:SI
+ 	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ 	      (parallel [(const_int 0)]))
+ 	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+   "TARGET_SSSE3"
+   "phsubd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_phsubswv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_concat:V8HI
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (ss_minus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 1 "register_operand" "0")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ 	  (vec_concat:V4HI
+ 	    (vec_concat:V2HI
+ 	      (ss_minus:HI
+ 		(vec_select:HI
+ 		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ 		  (parallel [(const_int 0)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ 	    (vec_concat:V2HI
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ 	      (ss_minus:HI
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ 		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+   "TARGET_SSSE3"
+   "phsubsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_phsubswv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(vec_concat:V4HI
+ 	  (vec_concat:V2HI
+ 	    (ss_minus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 1 "register_operand" "0")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ 	    (ss_minus:HI
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ 	  (vec_concat:V2HI
+ 	    (ss_minus:HI
+ 	      (vec_select:HI
+ 		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ 		(parallel [(const_int 0)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ 	    (ss_minus:HI
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+   "TARGET_SSSE3"
+   "phsubsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_pmaddubswv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(ss_plus:V8HI
+ 	  (mult:V8HI
+ 	    (zero_extend:V8HI
+ 	      (vec_select:V4QI
+ 		(match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ 		(parallel [(const_int 0)
+ 			   (const_int 2)
+ 			   (const_int 4)
+ 			   (const_int 6)
+ 			   (const_int 8)
+ 			   (const_int 10)
+ 			   (const_int 12)
+ 			   (const_int 14)])))
+ 	    (sign_extend:V8HI
+ 	      (vec_select:V8QI
+ 		(match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ 		(parallel [(const_int 0)
+ 			   (const_int 2)
+ 			   (const_int 4)
+ 			   (const_int 6)
+ 			   (const_int 8)
+ 			   (const_int 10)
+ 			   (const_int 12)
+ 			   (const_int 14)]))))
+ 	  (mult:V8HI
+ 	    (zero_extend:V8HI
+ 	      (vec_select:V16QI (match_dup 1)
+ 		(parallel [(const_int 1)
+ 			   (const_int 3)
+ 			   (const_int 5)
+ 			   (const_int 7)
+ 			   (const_int 9)
+ 			   (const_int 11)
+ 			   (const_int 13)
+ 			   (const_int 15)])))
+ 	    (sign_extend:V8HI
+ 	      (vec_select:V16QI (match_dup 2)
+ 		(parallel [(const_int 1)
+ 			   (const_int 3)
+ 			   (const_int 5)
+ 			   (const_int 7)
+ 			   (const_int 9)
+ 			   (const_int 11)
+ 			   (const_int 13)
+ 			   (const_int 15)]))))))]
+   "TARGET_SSSE3"
+   "pmaddubsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_pmaddubswv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(ss_plus:V4HI
+ 	  (mult:V4HI
+ 	    (zero_extend:V4HI
+ 	      (vec_select:V4QI
+ 		(match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ 		(parallel [(const_int 0)
+ 			   (const_int 2)
+ 			   (const_int 4)
+ 			   (const_int 6)])))
+ 	    (sign_extend:V4HI
+ 	      (vec_select:V4QI
+ 		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ 		(parallel [(const_int 0)
+ 			   (const_int 2)
+ 			   (const_int 4)
+ 			   (const_int 6)]))))
+ 	  (mult:V4HI
+ 	    (zero_extend:V4HI
+ 	      (vec_select:V8QI (match_dup 1)
+ 		(parallel [(const_int 1)
+ 			   (const_int 3)
+ 			   (const_int 5)
+ 			   (const_int 7)])))
+ 	    (sign_extend:V4HI
+ 	      (vec_select:V8QI (match_dup 2)
+ 		(parallel [(const_int 1)
+ 			   (const_int 3)
+ 			   (const_int 5)
+ 			   (const_int 7)]))))))]
+   "TARGET_SSSE3"
+   "pmaddubsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseiadd")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_pmulhrswv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(truncate:V8HI
+ 	  (lshiftrt:V8SI
+ 	    (plus:V8SI
+ 	      (lshiftrt:V8SI
+ 		(mult:V8SI
+ 		  (sign_extend:V8SI
+ 		    (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ 		  (sign_extend:V8SI
+ 		    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ 		(const_int 14))
+ 	      (const_vector:V8HI [(const_int 1) (const_int 1)
+ 				  (const_int 1) (const_int 1)
+ 				  (const_int 1) (const_int 1)
+ 				  (const_int 1) (const_int 1)]))
+ 	    (const_int 1))))]
+   "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+   "pmulhrsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseimul")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_pmulhrswv4hi3"
+   [(set (match_operand:V4HI 0 "register_operand" "=y")
+ 	(truncate:V4HI
+ 	  (lshiftrt:V4SI
+ 	    (plus:V4SI
+ 	      (lshiftrt:V4SI
+ 		(mult:V4SI
+ 		  (sign_extend:V4SI
+ 		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ 		  (sign_extend:V4SI
+ 		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ 		(const_int 14))
+ 	      (const_vector:V4HI [(const_int 1) (const_int 1)
+ 				  (const_int 1) (const_int 1)]))
+ 	    (const_int 1))))]
+   "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+   "pmulhrsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseimul")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_pshufbv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+ 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ 		       UNSPEC_PSHUFB))]
+   "TARGET_SSSE3"
+   "pshufb\t{%2, %0|%0, %2}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_pshufbv8qi3"
+   [(set (match_operand:V8QI 0 "register_operand" "=y")
+ 	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+ 		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ 		      UNSPEC_PSHUFB))]
+   "TARGET_SSSE3"
+   "pshufb\t{%2, %0|%0, %2}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_psign<mode>3"
+   [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ 	(unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
+ 			    (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ 			    UNSPEC_PSIGN))]
+   "TARGET_SSSE3"
+   "psign<ssevecsize>\t{%2, %0|%0, %2}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_psign<mode>3"
+   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ 	(unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
+ 			  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ 			  UNSPEC_PSIGN))]
+   "TARGET_SSSE3"
+   "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "ssse3_palignrti"
+   [(set (match_operand:TI 0 "register_operand" "=x")
+ 	(unspec:TI [(match_operand:TI 1 "register_operand" "0")
+ 		    (match_operand:TI 2 "nonimmediate_operand" "xm")
+ 		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ 		    UNSPEC_PALIGNR))]
+   "TARGET_SSSE3"
+ {
+   operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+   return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ }
+   [(set_attr "type" "sseishft")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "ssse3_palignrdi"
+   [(set (match_operand:DI 0 "register_operand" "=y")
+ 	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+ 		    (match_operand:DI 2 "nonimmediate_operand" "ym")
+ 		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ 		    UNSPEC_PALIGNR))]
+   "TARGET_SSSE3"
+ {
+   operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+   return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ }
+   [(set_attr "type" "sseishft")
+    (set_attr "mode" "DI")])
+ 
+ (define_insn "abs<mode>2"
+   [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ 	(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
+   "TARGET_SSSE3"
+   "pabs<ssevecsize>\t{%1, %0|%0, %1}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "abs<mode>2"
+   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ 	(abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+   "TARGET_SSSE3"
+   "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+   [(set_attr "type" "sselog1")
+    (set_attr "mode" "DI")])
Index: gcc/config/i386/i386.opt
===================================================================
*** gcc/config/i386/i386.opt	(revision 118001)
--- gcc/config/i386/i386.opt	(working copy)
*************** msse3
*** 197,202 ****
--- 197,206 ----
  Target Report Mask(SSE3)
  Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
  
+ mssse3
+ Target Report Mask(SSSE3)
+ Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
+ 
  msseregparm
  Target RejectNegative Mask(SSEREGPARM)
  Use SSE register passing conventions for SF and DF mode
Index: gcc/config/i386/i386.c
===================================================================
*** gcc/config/i386/i386.c	(revision 118001)
--- gcc/config/i386/i386.c	(working copy)
*************** override_options (void)
*** 1222,1228 ****
  	  PTA_PREFETCH_SSE = 16,
  	  PTA_3DNOW = 32,
  	  PTA_3DNOW_A = 64,
! 	  PTA_64BIT = 128
  	} flags;
      }
    const processor_alias_table[] =
--- 1222,1229 ----
  	  PTA_PREFETCH_SSE = 16,
  	  PTA_3DNOW = 32,
  	  PTA_3DNOW_A = 64,
! 	  PTA_64BIT = 128,
! 	  PTA_SSSE3 = 256
  	} flags;
      }
    const processor_alias_table[] =
*************** override_options (void)
*** 1378,1383 ****
--- 1379,1387 ----
  	if (processor_alias_table[i].flags & PTA_SSE3
  	    && !(target_flags_explicit & MASK_SSE3))
  	  target_flags |= MASK_SSE3;
+ 	if (processor_alias_table[i].flags & PTA_SSSE3
+ 	    && !(target_flags_explicit & MASK_SSSE3))
+ 	  target_flags |= MASK_SSSE3;
  	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
  	  x86_prefetch_sse = true;
  	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
*************** override_options (void)
*** 1567,1572 ****
--- 1571,1580 ----
    if (!TARGET_80387)
      target_flags |= MASK_NO_FANCY_MATH_387;
  
+   /* Turn on SSE3 builtins for -mssse3.  */
+   if (TARGET_SSSE3)
+     target_flags |= MASK_SSE3;
+ 
    /* Turn on SSE2 builtins for -msse3.  */
    if (TARGET_SSE3)
      target_flags |= MASK_SSE2;
*************** enum ix86_builtins
*** 13926,13931 ****
--- 13934,13974 ----
    IX86_BUILTIN_MONITOR,
    IX86_BUILTIN_MWAIT,
  
+   /* SSSE3.  */
+   IX86_BUILTIN_PHADDW,
+   IX86_BUILTIN_PHADDD,
+   IX86_BUILTIN_PHADDSW,
+   IX86_BUILTIN_PHSUBW,
+   IX86_BUILTIN_PHSUBD,
+   IX86_BUILTIN_PHSUBSW,
+   IX86_BUILTIN_PMADDUBSW,
+   IX86_BUILTIN_PMULHRSW,
+   IX86_BUILTIN_PSHUFB,
+   IX86_BUILTIN_PSIGNB,
+   IX86_BUILTIN_PSIGNW,
+   IX86_BUILTIN_PSIGND,
+   IX86_BUILTIN_PALIGNR,
+   IX86_BUILTIN_PABSB,
+   IX86_BUILTIN_PABSW,
+   IX86_BUILTIN_PABSD,
+ 
+   IX86_BUILTIN_PHADDW128,
+   IX86_BUILTIN_PHADDD128,
+   IX86_BUILTIN_PHADDSW128,
+   IX86_BUILTIN_PHSUBW128,
+   IX86_BUILTIN_PHSUBD128,
+   IX86_BUILTIN_PHSUBSW128,
+   IX86_BUILTIN_PMADDUBSW128,
+   IX86_BUILTIN_PMULHRSW128,
+   IX86_BUILTIN_PSHUFB128,
+   IX86_BUILTIN_PSIGNB128,
+   IX86_BUILTIN_PSIGNW128,
+   IX86_BUILTIN_PSIGND128,
+   IX86_BUILTIN_PALIGNR128,
+   IX86_BUILTIN_PABSB128,
+   IX86_BUILTIN_PABSW128,
+   IX86_BUILTIN_PABSD128,
+ 
    IX86_BUILTIN_VEC_INIT_V2SI,
    IX86_BUILTIN_VEC_INIT_V4HI,
    IX86_BUILTIN_VEC_INIT_V8QI,
*************** static const struct builtin_description 
*** 14266,14272 ****
    { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
    { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
    { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
!   { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
  };
  
  static const struct builtin_description bdesc_1arg[] =
--- 14309,14341 ----
    { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
    { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
    { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
!   { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
! 
!   /* SSSE3 */
!   { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
!   { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
  };
  
  static const struct builtin_description bdesc_1arg[] =
*************** static const struct builtin_description 
*** 14313,14318 ****
--- 14382,14395 ----
    /* SSE3 */
    { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
    { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+ 
+   /* SSSE3 */
+   { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
+   { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
+   { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
+   { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
+   { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
+   { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
  };
  
  static void
*************** ix86_init_mmx_sse_builtins (void)
*** 14447,14452 ****
--- 14524,14539 ----
    /* Normal vector unops.  */
    tree v4sf_ftype_v4sf
      = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+   tree v16qi_ftype_v16qi
+     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+   tree v8hi_ftype_v8hi
+     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+   tree v4si_ftype_v4si
+     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+   tree v8qi_ftype_v8qi
+     = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
+   tree v4hi_ftype_v4hi
+     = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
  
    /* Normal vector binops.  */
    tree v4sf_ftype_v4sf_v4sf
*************** ix86_init_mmx_sse_builtins (void)
*** 14466,14471 ****
--- 14553,14564 ----
  				long_long_unsigned_type_node,
  				long_long_unsigned_type_node, NULL_TREE);
  
+   tree di_ftype_di_di_int
+     = build_function_type_list (long_long_unsigned_type_node,
+ 				long_long_unsigned_type_node,
+ 				long_long_unsigned_type_node,
+ 				integer_type_node, NULL_TREE);
+ 
    tree v2si_ftype_v2sf
      = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
    tree v2sf_ftype_v2si
*************** ix86_init_mmx_sse_builtins (void)
*** 14570,14575 ****
--- 14663,14671 ----
    tree v2di_ftype_v2di_int
      = build_function_type_list (V2DI_type_node,
  				V2DI_type_node, integer_type_node, NULL_TREE);
+   tree v2di_ftype_v2di_v2di_int
+     = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ 				V2DI_type_node, integer_type_node, NULL_TREE);
    tree v4si_ftype_v4si_int
      = build_function_type_list (V4SI_type_node,
  				V4SI_type_node, integer_type_node, NULL_TREE);
*************** ix86_init_mmx_sse_builtins (void)
*** 14695,14700 ****
--- 14791,14840 ----
        def_builtin (d->mask, d->name, type, d->code);
      }
  
+   /* Add all builtins that are more or less simple operations on 1 operand.  */
+   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+     {
+       enum machine_mode mode;
+       tree type;
+ 
+       if (d->name == 0)
+ 	continue;
+       mode = insn_data[d->icode].operand[1].mode;
+ 
+       switch (mode)
+ 	{
+ 	case V16QImode:
+ 	  type = v16qi_ftype_v16qi;
+ 	  break;
+ 	case V8HImode:
+ 	  type = v8hi_ftype_v8hi;
+ 	  break;
+ 	case V4SImode:
+ 	  type = v4si_ftype_v4si;
+ 	  break;
+ 	case V2DFmode:
+ 	  type = v2df_ftype_v2df;
+ 	  break;
+ 	case V4SFmode:
+ 	  type = v4sf_ftype_v4sf;
+ 	  break;
+ 	case V8QImode:
+ 	  type = v8qi_ftype_v8qi;
+ 	  break;
+ 	case V4HImode:
+ 	  type = v4hi_ftype_v4hi;
+ 	  break;
+ 	case V2SImode:
+ 	  type = v2si_ftype_v2si;
+ 	  break;
+ 
+ 	default:
+ 	  abort ();
+ 	}
+ 
+       def_builtin (d->mask, d->name, type, d->code);
+     }
+ 
    /* Add the remaining MMX insns with somewhat more complicated types.  */
    def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
    def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
*************** ix86_init_mmx_sse_builtins (void)
*** 14894,14899 ****
--- 15034,15045 ----
    def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
  	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
  
+   /* SSSE3.  */
+   def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
+ 	       v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
+   def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
+ 	       IX86_BUILTIN_PALIGNR);
+ 
    /* Access to the vec_init patterns.  */
    ftype = build_function_type_list (V2SI_type_node, integer_type_node,
  				    integer_type_node, NULL_TREE);
*************** ix86_expand_builtin (tree exp, rtx targe
*** 15384,15390 ****
    tree arglist = TREE_OPERAND (exp, 1);
    tree arg0, arg1, arg2;
    rtx op0, op1, op2, pat;
!   enum machine_mode tmode, mode0, mode1, mode2;
    unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
  
    switch (fcode)
--- 15530,15536 ----
    tree arglist = TREE_OPERAND (exp, 1);
    tree arg0, arg1, arg2;
    rtx op0, op1, op2, pat;
!   enum machine_mode tmode, mode0, mode1, mode2, mode3;
    unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
  
    switch (fcode)
*************** ix86_expand_builtin (tree exp, rtx targe
*** 15753,15758 ****
--- 15899,15950 ----
        return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
  				       target, 1);
  
+     case IX86_BUILTIN_PALIGNR:
+     case IX86_BUILTIN_PALIGNR128:
+       if (fcode == IX86_BUILTIN_PALIGNR)
+ 	{
+ 	  icode = CODE_FOR_ssse3_palignrdi;
+ 	  mode = DImode;
+ 	}
+       else
+ 	{
+ 	  icode = CODE_FOR_ssse3_palignrti;
+ 	  mode = V2DImode;
+ 	}
+       arg0 = TREE_VALUE (arglist);
+       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+       tmode = insn_data[icode].operand[0].mode;
+       mode1 = insn_data[icode].operand[1].mode;
+       mode2 = insn_data[icode].operand[2].mode;
+       mode3 = insn_data[icode].operand[3].mode;
+ 
+       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ 	{
+ 	  op0 = copy_to_reg (op0);
+ 	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ 	}
+       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ 	{
+ 	  op1 = copy_to_reg (op1);
+ 	  op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
+ 	}
+       if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ 	{
+ 	  error ("shift must be an immediate");
+ 	  return const0_rtx;
+ 	}
+       target = gen_reg_rtx (mode);
+       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
+ 			     op0, op1, op2);
+       if (! pat)
+ 	return 0;
+       emit_insn (pat);
+       return target;
+ 
      case IX86_BUILTIN_VEC_INIT_V2SI:
      case IX86_BUILTIN_VEC_INIT_V4HI:
      case IX86_BUILTIN_VEC_INIT_V8QI:
--- /dev/null	2006-05-02 08:46:16.000000000 +0200
+++ gcc/config/i386/tmmintrin.h	2006-10-24 11:33:34.000000000 +0200
@@ -0,0 +1,224 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.1.  */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+#ifdef __SSSE3__
+#include <pmmintrin.h>
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#define _mm_alignr_epi8(__X, __Y, __N) \
+  ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
+
+#define _mm_alignr_pi8(__X, __Y, __N) \
+  ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_abs_epi8 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_abs_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_abs_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_abs_pi8 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_abs_pi16 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+static __inline __m64 __attribute__((__always_inline__))
+_mm_abs_pi32 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#endif /* __SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
openSUSE Build Service is sponsored by