File amd-cvect-2.diff of Package gcc43
2008-08-02 Richard Guenther <rguenther@suse.de>
PR target/35252
* config/i386/sse.md (SSEMODE4S, SSEMODE2D): New mode iterators.
(ssedoublesizemode): New mode attribute.
(sse_shufps): Call gen_sse_shufps_v4sf.
(sse_shufps_1): Macroize.
(sse2_shufpd): Call gen_Sse_shufpd_v2df.
(sse2_shufpd_1): Macroize.
(vec_extract_odd, vec_extract_even): New expanders.
(vec_interleave_highv4sf, vec_interleave_lowv4sf,
vec_interleave_highv2df, vec_interleave_lowv2df): Likewise.
* i386.c (ix86_expand_vector_init_one_nonzero): Call
gen_sse_shufps_v4sf instead of gen_sse_shufps_1.
(ix86_expand_vector_set): Likewise.
(ix86_expand_reduc_v4sf): Likewise.
* lib/target-supports.exp (vect_extract_even_odd_wide) Add.
(vect_strided_wide): Likewise.
* gcc.dg/vect/fast-math-pr35982.c: Enable for
vect_extract_even_odd_wide.
* gcc.dg/vect/fast-math-vect-complex-3.c: Likewise.
* gcc.dg/vect/vect-1.c: Likewise.
* gcc.dg/vect/vect-107.c: Likewise.
* gcc.dg/vect/vect-98.c: Likewise.
* gcc.dg/vect/vect-strided-float.c: Likewise.
* gcc.dg/vect/slp-11.c: Enable for vect_strided_wide.
* gcc.dg/vect/slp-12a.c: Likewise.
* gcc.dg/vect/slp-12b.c: Likewise.
* gcc.dg/vect/slp-19.c: Likewise.
* gcc.dg/vect/slp-23.c: Likewise.
* gcc.dg/vect/slp-5.c: Likewise.
Index: gcc/testsuite/gcc.dg/vect/vect-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-1.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/vect-1.c 2008-08-12 23:29:12.000000000 +0200
*************** foo (int n)
*** 86,91 ****
fbar (a);
}
! /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_extract_even_odd } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_extract_even_odd } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 86,91 ----
fbar (a);
}
! /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-107.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-107.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/vect-107.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 39,44 ****
return main1 ();
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 39,44 ----
return main1 ();
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-11.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-11.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-11.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 106,113 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult && vect_strided } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 106,113 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided_wide && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult && vect_strided_wide } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-19.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-19.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-19.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 147,155 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_strided } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_strided } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! { vect_strided } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 147,155 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_strided_wide } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_strided_wide } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! { vect_strided_wide } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-98.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-98.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/vect-98.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 38,43 ****
}
/* Needs interleaving support. */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 38,43 ----
}
/* Needs interleaving support. */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-strided-float.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-strided-float.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/vect-strided-float.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 38,44 ****
}
/* Needs interleaving support. */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 38,44 ----
}
/* Needs interleaving support. */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c.orig 2008-04-23 15:51:28.000000000 +0200
--- gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c 2008-08-12 23:29:12.000000000 +0200
*************** float method2_int16 (struct mem *mem)
*** 19,25 ****
return avg;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 19,25 ----
return avg;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-12a.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-12a.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-12a.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 95,105 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { vect_strided && vect_int_mult} } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { {! {vect_strided}} && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided}} && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! vect_int_mult } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 95,105 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { vect_strided_wide && vect_int_mult} } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided_wide && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! vect_int_mult } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c.orig 2008-08-12 23:29:03.000000000 +0200
--- gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c 2008-08-12 23:29:12.000000000 +0200
*************** main (void)
*** 57,61 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 57,61 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-23.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-23.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-23.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 106,113 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided } && {! { vect_no_align} } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided || vect_no_align} } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 106,113 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided_wide } && {! { vect_no_align} } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide || vect_no_align} } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-5.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-5.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-5.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 121,128 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_strided } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 121,128 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided_wide } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_strided_wide } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-12b.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-12b.c.orig 2008-02-19 10:53:27.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/slp-12b.c 2008-08-12 23:29:12.000000000 +0200
*************** int main (void)
*** 43,51 ****
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { vect_strided && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided}}} } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { vect_strided && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided}}} } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 43,51 ----
return 0;
}
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { vect_strided_wide && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { vect_strided_wide && vect_int_mult } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/lib/target-supports.exp
===================================================================
*** gcc/testsuite/lib/target-supports.exp.orig 2008-07-25 17:52:11.000000000 +0200
--- gcc/testsuite/lib/target-supports.exp 2008-08-12 23:29:12.000000000 +0200
*************** proc check_effective_target_vect_extract
*** 2000,2005 ****
--- 2000,2026 ----
return $et_vect_extract_even_odd_saved
}
+ # Return 1 if the target supports vector even/odd elements extraction of
+ # vectors with SImode elements or larger, 0 otherwise.
+
+ proc check_effective_target_vect_extract_even_odd_wide { } {
+ global et_vect_extract_even_odd_wide_saved
+
+ if [info exists et_vect_extract_even_odd_wide_saved] {
+ verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2
+ } else {
+ set et_vect_extract_even_odd_wide_saved 0
+ if { [istarget powerpc*-*-*]
+ || [istarget i?86-*-*]
+ || [istarget x86_64-*-*] } {
+ set et_vect_extract_even_odd_wide_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2
+ return $et_vect_extract_even_odd_wide_saved
+ }
+
# Return 1 if the target supports vector interleaving, 0 otherwise.
proc check_effective_target_vect_interleave { } {
*************** proc check_effective_target_vect_strided
*** 2038,2043 ****
--- 2059,2083 ----
return $et_vect_strided_saved
}
+ # Return 1 if the target supports vector interleaving and extract even/odd
+ # for wide element types, 0 otherwise.
+ proc check_effective_target_vect_strided_wide { } {
+ global et_vect_strided_wide_saved
+
+ if [info exists et_vect_strided_wide_saved] {
+ verbose "check_effective_target_vect_strided_wide: using cached result" 2
+ } else {
+ set et_vect_strided_wide_saved 0
+ if { [check_effective_target_vect_interleave]
+ && [check_effective_target_vect_extract_even_odd_wide] } {
+ set et_vect_strided_wide_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2
+ return $et_vect_strided_wide_saved
+ }
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
Index: gcc/config/i386/sse.md
===================================================================
*** gcc/config/i386/sse.md.orig 2008-08-12 23:29:02.000000000 +0200
--- gcc/config/i386/sse.md 2008-08-12 23:30:22.000000000 +0200
***************
*** 36,41 ****
--- 36,45 ----
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+ ;; Int-float size matches
+ (define_mode_iterator SSEMODE4S [V4SF V4SI])
+ (define_mode_iterator SSEMODE2D [V2DF V2DI])
+
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
***************
*** 52,57 ****
--- 56,65 ----
(V16QI "QI") (V8HI "HI")
(V4SI "SI") (V2DI "DI")])
+ ;; Mapping of vector modes to a vector mode of double size
+ (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
+ (V4SF "V8SF") (V4SI "V8SI")])
+
;; Number of scalar elements in each vector type
(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
(V16QI "16") (V8HI "8")
***************
*** 1312,1318 ****
"TARGET_SSE"
{
int mask = INTVAL (operands[3]);
! emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT (((mask >> 4) & 3) + 4),
--- 1320,1326 ----
"TARGET_SSE"
{
int mask = INTVAL (operands[3]);
! emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT (((mask >> 4) & 3) + 4),
***************
*** 1320,1331 ****
DONE;
})
! (define_insn "sse_shufps_1"
! [(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_select:V4SF
! (vec_concat:V8SF
! (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_3_operand" "")
(match_operand 4 "const_0_to_3_operand" "")
(match_operand 5 "const_4_to_7_operand" "")
--- 1328,1339 ----
DONE;
})
! (define_insn "sse_shufps_<mode>"
! [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
! (vec_select:SSEMODE4S
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE4S 1 "register_operand" "0")
! (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_3_operand" "")
(match_operand 4 "const_0_to_3_operand" "")
(match_operand 5 "const_4_to_7_operand" "")
***************
*** 3121,3138 ****
"TARGET_SSE2"
{
int mask = INTVAL (operands[3]);
! emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
GEN_INT (mask & 1),
GEN_INT (mask & 2 ? 3 : 2)));
DONE;
})
! (define_insn "sse2_shufpd_1"
! [(set (match_operand:V2DF 0 "register_operand" "=x")
! (vec_select:V2DF
! (vec_concat:V4DF
! (match_operand:V2DF 1 "register_operand" "0")
! (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_1_operand" "")
(match_operand 4 "const_2_to_3_operand" "")])))]
"TARGET_SSE2"
--- 3129,3190 ----
"TARGET_SSE2"
{
int mask = INTVAL (operands[3]);
! emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
GEN_INT (mask & 1),
GEN_INT (mask & 2 ? 3 : 2)));
DONE;
})
! (define_expand "vec_extract_even<mode>"
! [(set (match_operand:SSEMODE4S 0 "register_operand" "")
! (vec_select:SSEMODE4S
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE4S 1 "register_operand" "")
! (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
! (parallel [(const_int 0)
! (const_int 2)
! (const_int 4)
! (const_int 6)])))]
! "TARGET_SSE")
!
! (define_expand "vec_extract_odd<mode>"
! [(set (match_operand:SSEMODE4S 0 "register_operand" "")
! (vec_select:SSEMODE4S
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE4S 1 "register_operand" "")
! (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
! (parallel [(const_int 1)
! (const_int 3)
! (const_int 5)
! (const_int 7)])))]
! "TARGET_SSE")
!
! (define_expand "vec_extract_even<mode>"
! [(set (match_operand:SSEMODE2D 0 "register_operand" "")
! (vec_select:SSEMODE2D
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE2D 1 "register_operand" "")
! (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
! (parallel [(const_int 0)
! (const_int 2)])))]
! "TARGET_SSE2")
!
! (define_expand "vec_extract_odd<mode>"
! [(set (match_operand:SSEMODE2D 0 "register_operand" "")
! (vec_select:SSEMODE2D
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE2D 1 "register_operand" "")
! (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
! (parallel [(const_int 1)
! (const_int 3)])))]
! "TARGET_SSE2")
!
! (define_insn "sse2_shufpd_<mode>"
! [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
! (vec_select:SSEMODE2D
! (vec_concat:<ssedoublesizemode>
! (match_operand:SSEMODE2D 1 "register_operand" "0")
! (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_1_operand" "")
(match_operand 4 "const_2_to_3_operand" "")])))]
"TARGET_SSE2"
***************
*** 4895,4900 ****
--- 4947,4992 ----
DONE;
})
+ (define_expand "vec_interleave_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE")
+
+ (define_expand "vec_interleave_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE")
+
+ (define_expand "vec_interleave_highv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2")
+
+ (define_expand "vec_interleave_lowv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2")
+
(define_insn "sse2_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
Index: gcc/config/i386/i386.c
===================================================================
*** gcc/config/i386/i386.c.orig 2008-08-12 23:29:02.000000000 +0200
--- gcc/config/i386/i386.c 2008-08-12 23:29:13.000000000 +0200
*************** ix86_expand_vector_init_one_nonzero (boo
*** 23397,23403 ****
else
tmp = new_target;
! emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
GEN_INT (1),
GEN_INT (one_var == 1 ? 0 : 1),
GEN_INT (one_var == 2 ? 0+4 : 1+4),
--- 23397,23403 ----
else
tmp = new_target;
! emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
GEN_INT (1),
GEN_INT (one_var == 1 ? 0 : 1),
GEN_INT (one_var == 2 ? 0+4 : 1+4),
*************** ix86_expand_vector_set (bool mmx_ok, rtx
*** 23772,23778 ****
/* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
/* target = A X C D */
! emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
--- 23772,23778 ----
/* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
/* target = A X C D */
! emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
*************** ix86_expand_vector_set (bool mmx_ok, rtx
*** 23783,23789 ****
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
! emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
--- 23783,23789 ----
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
! emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
*************** ix86_expand_vector_set (bool mmx_ok, rtx
*** 23794,23800 ****
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
! emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
return;
--- 23794,23800 ----
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
! emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
return;
*************** ix86_expand_vector_extract (bool mmx_ok,
*** 23915,23921 ****
case 1:
case 3:
tmp = gen_reg_rtx (mode);
! emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
GEN_INT (elt), GEN_INT (elt),
GEN_INT (elt+4), GEN_INT (elt+4)));
break;
--- 23915,23921 ----
case 1:
case 3:
tmp = gen_reg_rtx (mode);
! emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
GEN_INT (elt), GEN_INT (elt),
GEN_INT (elt+4), GEN_INT (elt+4)));
break;
*************** ix86_expand_reduc_v4sf (rtx (*fn) (rtx,
*** 24032,24038 ****
emit_insn (gen_sse_movhlps (tmp1, in, in));
emit_insn (fn (tmp2, tmp1, in));
! emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
GEN_INT (1), GEN_INT (1),
GEN_INT (1+4), GEN_INT (1+4)));
emit_insn (fn (dest, tmp2, tmp3));
--- 24032,24038 ----
emit_insn (gen_sse_movhlps (tmp1, in, in));
emit_insn (fn (tmp2, tmp1, in));
! emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
GEN_INT (1), GEN_INT (1),
GEN_INT (1+4), GEN_INT (1+4)));
emit_insn (fn (dest, tmp2, tmp3));