File nvl702947-power7-vsx-abi.patch of Package gcc43
gcc/
Backport from mainline
2010-06-22 Alan Modra <amodra@gmail.com>
PR target/44364
* config/rs6000/e500.h (HARD_REGNO_CALLER_SAVE_MODE): Define.
* caller-save.c (insert_restore, insert_save): Use non-validate
form of adjust_address.
Backport from mainline
2011-03-07 Pat Haugen <pthaugen@us.ibm.com>
PR target/47862
* config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Define.
* config/rs6000/e500.h (HARD_REGNO_CALLER_SAVE_MODE): Undefine
before definition.
Backport from mainline
2011-05-10 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/48857, 48495
* config/rs6000/rs6000.h (VSX_SCALAR_MODE): Delete.
(VSX_MODE): Ditto.
(VSX_MOVE_MODE): Ditto.
(ALTIVEC_OR_VSX_VECTOR_MODE): New macro, combine all Altivec and
VSX vector types. Add V2DImode.
(HARD_REGNO_CALLER_SAVE_MODE): Use it instead of
ALTIVEC_VECTOR_MODE and VSX_VECTOR_MODE calls.
(MODES_TIEABLE_P): Ditto.
* config/rs6000/rs6000.c (rs6000_emit_move): Use
ALTIVEC_OR_VSX_MODE instead of ALTIVEC_VECTOR_MODE and
VSX_VECTOR_MODE.
(init_cumulative_args): Ditto.
(rs6000_function_arg_boundary): Ditto.
(rs6000_function_arg_advance_1): Ditto.
(rs6000_function_arg): Ditto.
(rs6000_function_ok_for_sibcall): Ditto.
(emit_frame_save): Ditto.
(rs6000_function_value): Ditto.
(rs6000_libcall_value): Ditto.
Backport from mainline
2011-04-01 Andrew Pinski <pinskia@gmail.com>
Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/48262
* config/rs6000/vector.md (movmisalign<mode>): Allow for memory
operands, as per the specifications.
* config/rs6000/altivec.md (vec_extract_evenv4si): Correct modes.
(vec_extract_evenv4sf): Ditto.
(vec_extract_evenv8hi): Ditto.
(vec_extract_evenv16qi): Ditto.
(vec_extract_oddv4si): Ditto.
Backport from mainline
2011-04-26 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/48258
* config/rs6000/vector.md (UNSPEC_REDUC): New unspec for vector
reduction.
(VEC_reduc): New code iterator and splitters for vector reduction.
(VEC_reduc_name): Ditto.
(VEC_reduc_rtx): Ditto.
(reduc_<VEC_reduc_name>_v2df): Vector reduction expanders for VSX.
(reduc_<VEC_reduc_name>_v4sf): Ditto.
* config/rs6000/rs6000.c (rs6000_expand_vector_extract): Add
support for extracting SF on VSX.
* config/rs6000/vsx.md (vsx_xscvspdp_scalar2): New insn for
generating xscvspdp.
(vsx_extract_v4sf): New insn to extract SF from V4SF vector.
(vsx_reduc_<VEC_reduc_name>_v2df): New insns and splitters for
double add, minimum, maximum vector reduction.
(vsx_reduc_<VEC_reduc_name>_v4sf): Ditto.
(vsx_reduc_<VEC_reduc_name>_v2df2_scalar): New combiner insn to
optimize double vector reduction.
(vsx_reduc_<VEC_reduc_name>_v4sf_scalar): Ditto.
gcc/testsuite/
Backport from mainline
2011-03-07 Pat Haugen <pthaugen@us.ibm.com>
PR target/47862
* gcc.target/powerpc/pr47862.c: New.
Backport from mainline
2011-05-10 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/48857
* gcc.target/powerpc/pr48857.c: New file, make sure V2DI arguments
are passed and returned in vector registers.
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/caller-save.c gcc-4.3.4-20091019//gcc/caller-save.c
--- gcc-4.3.4-20091019-base//gcc/caller-save.c 2008-02-19 03:55:59.000000000 -0600
+++ gcc-4.3.4-20091019//gcc/caller-save.c 2011-06-28 23:38:58.000000000 -0500
@@ -701,7 +701,7 @@ insert_restore (struct insn_chain *chain
if (save_mode [regno] != VOIDmode
&& save_mode [regno] != GET_MODE (mem)
&& numregs == (unsigned int) hard_regno_nregs[regno][save_mode [regno]])
- mem = adjust_address (mem, save_mode[regno], 0);
+ mem = adjust_address_nv (mem, save_mode[regno], 0);
else
mem = copy_rtx (mem);
pat = gen_rtx_SET (VOIDmode,
@@ -773,7 +773,7 @@ insert_save (struct insn_chain *chain, i
if (save_mode [regno] != VOIDmode
&& save_mode [regno] != GET_MODE (mem)
&& numregs == (unsigned int) hard_regno_nregs[regno][save_mode [regno]])
- mem = adjust_address (mem, save_mode[regno], 0);
+ mem = adjust_address_nv (mem, save_mode[regno], 0);
else
mem = copy_rtx (mem);
pat = gen_rtx_SET (VOIDmode, mem,
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/altivec.md gcc-4.3.4-20091019//gcc/config/rs6000/altivec.md
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/altivec.md 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/altivec.md 2011-06-28 23:42:44.000000000 -0500
@@ -2435,7 +2435,7 @@
(define_expand "vec_extract_evenv4si"
[(set (match_operand:V4SI 0 "register_operand" "")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")]
UNSPEC_EXTEVEN_V4SI))]
"TARGET_ALTIVEC"
@@ -2468,7 +2468,7 @@
(define_expand "vec_extract_evenv4sf"
[(set (match_operand:V4SF 0 "register_operand" "")
- (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "register_operand" "")]
UNSPEC_EXTEVEN_V4SF))]
"TARGET_ALTIVEC"
@@ -2500,7 +2500,7 @@
}")
(define_expand "vec_extract_evenv8hi"
- [(set (match_operand:V4SI 0 "register_operand" "")
+ [(set (match_operand:V8HI 0 "register_operand" "")
(unspec:V8HI [(match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")]
UNSPEC_EXTEVEN_V8HI))]
@@ -2533,9 +2533,9 @@
}")
(define_expand "vec_extract_evenv16qi"
- [(set (match_operand:V4SI 0 "register_operand" "")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")]
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")]
UNSPEC_EXTEVEN_V16QI))]
"TARGET_ALTIVEC"
"
@@ -2567,7 +2567,7 @@
(define_expand "vec_extract_oddv4si"
[(set (match_operand:V4SI 0 "register_operand" "")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")]
UNSPEC_EXTODD_V4SI))]
"TARGET_ALTIVEC"
@@ -2600,7 +2600,7 @@
(define_expand "vec_extract_oddv4sf"
[(set (match_operand:V4SF 0 "register_operand" "")
- (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "register_operand" "")]
UNSPEC_EXTODD_V4SF))]
"TARGET_ALTIVEC"
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/e500.h gcc-4.3.4-20091019//gcc/config/rs6000/e500.h
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/e500.h 2008-02-19 03:55:53.000000000 -0600
+++ gcc-4.3.4-20091019//gcc/config/rs6000/e500.h 2011-06-29 09:09:51.000000000 -0500
@@ -45,3 +45,12 @@
error ("E500 and FPRs not supported"); \
} \
} while (0)
+
+/* Override rs6000.h definition. */
+#undef HARD_REGNO_CALLER_SAVE_MODE
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we
+ allocate space for DFmode. Save gprs in the correct mode too. */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+ (TARGET_E500_DOUBLE && ((MODE) == VOIDmode || (MODE) == DFmode) \
+ ? DFmode \
+ : choose_hard_reg_mode ((REGNO), (NREGS), false))
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000.c gcc-4.3.4-20091019//gcc/config/rs6000/rs6000.c
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000.c 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/rs6000.c 2011-06-28 23:42:44.000000000 -0500
@@ -4275,12 +4275,22 @@ rs6000_expand_vector_extract (rtx target
enum machine_mode inner_mode = GET_MODE_INNER (mode);
rtx mem, x;
- if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+ if (VECTOR_MEM_VSX_P (mode))
{
- rtx (*extract_func) (rtx, rtx, rtx)
- = ((mode == V2DFmode) ? gen_vsx_extract_v2df : gen_vsx_extract_v2di);
- emit_insn (extract_func (target, vec, GEN_INT (elt)));
- return;
+ switch (mode)
+ {
+ default:
+ break;
+ case V2DFmode:
+ emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
+ return;
+ case V2DImode:
+ emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
+ return;
+ case V4SFmode:
+ emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
+ return;
+ }
}
/* Allocate mode-sized buffer. */
@@ -6475,7 +6485,7 @@ rs6000_emit_move (rtx dest, rtx source,
/* Nonzero if we can use an AltiVec register to pass this arg. */
#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED) \
- ((ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)) \
+ (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
&& (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
&& TARGET_ALTIVEC_ABI \
&& (NAMED))
@@ -6701,7 +6711,7 @@ function_arg_padding (enum machine_mode
existing library interfaces.
Doubleword align SPE vectors.
- Quadword align Altivec vectors.
+ Quadword align Altivec/VSX vectors.
Quadword align large synthetic vector types. */
int
@@ -6718,7 +6728,7 @@ function_arg_boundary (enum machine_mode
&& int_size_in_bytes (type) >= 8
&& int_size_in_bytes (type) < 16))
return 64;
- else if ((ALTIVEC_VECTOR_MODE (mode) || VSX_VECTOR_MODE (mode))
+ else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) >= 16))
return 128;
@@ -6863,8 +6873,7 @@ function_arg_advance (CUMULATIVE_ARGS *c
cum->nargs_prototype--;
if (TARGET_ALTIVEC_ABI
- && (ALTIVEC_VECTOR_MODE (mode)
- || VSX_VECTOR_MODE (mode)
+ && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) == 16)))
{
@@ -7458,8 +7467,7 @@ function_arg (CUMULATIVE_ARGS *cum, enum
else
return gen_rtx_REG (mode, cum->vregno);
else if (TARGET_ALTIVEC_ABI
- && (ALTIVEC_VECTOR_MODE (mode)
- || VSX_VECTOR_MODE (mode)
+ && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) == 16)))
{
@@ -17887,7 +17895,7 @@ emit_frame_save (rtx frame_reg, rtx fram
/* Some cases that need register indexed addressing. */
if ((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
- || (TARGET_VSX && VSX_VECTOR_MODE (mode))
+ || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
|| (TARGET_E500_DOUBLE && mode == DFmode)
|| (TARGET_SPE_ABI
&& SPE_VECTOR_MODE (mode)
@@ -24781,13 +24789,12 @@ rs6000_function_value (const_tree valtyp
else if (TREE_CODE (valtype) == COMPLEX_TYPE
&& targetm.calls.split_complex_arg)
return rs6000_complex_function_value (mode);
+ /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
+ return register is used in both cases, and we won't see V2DImode/V2DFmode
+ for pure altivec, combine the two cases. */
else if (TREE_CODE (valtype) == VECTOR_TYPE
&& TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
- && ALTIVEC_VECTOR_MODE (mode))
- regno = ALTIVEC_ARG_RETURN;
- else if (TREE_CODE (valtype) == VECTOR_TYPE
- && TARGET_VSX && TARGET_ALTIVEC_ABI
- && VSX_VECTOR_MODE (mode))
+ && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
regno = ALTIVEC_ARG_RETURN;
else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
&& (mode == DFmode || mode == DCmode
@@ -24827,12 +24834,12 @@ rs6000_libcall_value (enum machine_mode
&& TARGET_HARD_FLOAT && TARGET_FPRS
&& ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
regno = FP_ARG_RETURN;
- else if (ALTIVEC_VECTOR_MODE (mode)
+ /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
+ return register is used in both cases, and we won't see V2DImode/V2DFmode
+ for pure altivec, combine the two cases. */
+ else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
&& TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
regno = ALTIVEC_ARG_RETURN;
- else if (VSX_VECTOR_MODE (mode)
- && TARGET_VSX && TARGET_ALTIVEC_ABI)
- regno = ALTIVEC_ARG_RETURN;
else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
return rs6000_complex_function_value (mode);
else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000-c.c gcc-4.3.4-20091019//gcc/config/rs6000/rs6000-c.c
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000-c.c 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/rs6000-c.c 2011-06-28 23:42:44.000000000 -0500
@@ -183,7 +183,10 @@ rs6000_macro_to_expand (cpp_reader *pfil
expand_this = C_CPP_HASHNODE (__vector_keyword);
expand_bool_pixel = __bool_keyword;
}
- else if (ident)
+ /* The boost libraries have code with Iterator::vector vector in it. If
+ we allow the normal handling, this module will be called recursively,
+ and the vector will be skipped.; */
+ else if (ident && (ident != C_CPP_HASHNODE (__vector_keyword)))
{
enum rid rid_code = (enum rid)(ident->rid_code);
if (ident->type == NT_MACRO)
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000.h gcc-4.3.4-20091019//gcc/config/rs6000/rs6000.h
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/rs6000.h 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/rs6000.h 2011-06-28 23:42:44.000000000 -0500
@@ -1027,6 +1027,15 @@ extern unsigned rs6000_pointer_size;
#define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
+ enough space to account for vectors in FP regs. */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+ (TARGET_VSX \
+ && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE)) \
+ && FP_REGNO_P (REGNO) \
+ ? V2DFmode \
+ : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \
((TARGET_32BIT && TARGET_POWERPC64 \
&& (GET_MODE_SIZE (MODE) > 4) \
@@ -1036,25 +1045,16 @@ extern unsigned rs6000_pointer_size;
((MODE) == V4SFmode \
|| (MODE) == V2DFmode) \
-#define VSX_SCALAR_MODE(MODE) \
- ((MODE) == DFmode)
-
-#define VSX_MODE(MODE) \
- (VSX_VECTOR_MODE (MODE) \
- || VSX_SCALAR_MODE (MODE))
-
-#define VSX_MOVE_MODE(MODE) \
- (VSX_VECTOR_MODE (MODE) \
- || VSX_SCALAR_MODE (MODE) \
- || ALTIVEC_VECTOR_MODE (MODE) \
- || (MODE) == TImode)
-
#define ALTIVEC_VECTOR_MODE(MODE) \
((MODE) == V16QImode \
|| (MODE) == V8HImode \
|| (MODE) == V4SFmode \
|| (MODE) == V4SImode)
+#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE) \
+ (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \
+ || (MODE) == V2DImode)
+
#define SPE_VECTOR_MODE(MODE) \
((MODE) == V4HImode \
|| (MODE) == V2SFmode \
@@ -1097,10 +1097,10 @@ extern unsigned rs6000_pointer_size;
? ALTIVEC_VECTOR_MODE (MODE2) \
: ALTIVEC_VECTOR_MODE (MODE2) \
? ALTIVEC_VECTOR_MODE (MODE1) \
- : VSX_VECTOR_MODE (MODE1) \
- ? VSX_VECTOR_MODE (MODE2) \
- : VSX_VECTOR_MODE (MODE2) \
- ? VSX_VECTOR_MODE (MODE1) \
+ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
+ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
+ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
+ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
: 1)
/* Post-reload, we can't use any new AltiVec registers, as we already
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/vector.md gcc-4.3.4-20091019//gcc/config/rs6000/vector.md
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/vector.md 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/vector.md 2011-06-28 23:47:19.000000000 -0500
@@ -72,7 +72,19 @@
;; constants for unspec
(define_constants
- [(UNSPEC_PREDICATE 400)])
+ [(UNSPEC_PREDICATE 400)
+ (UNSPEC_REDUC 401)])
+
+;; Vector reduction code iterators
+(define_code_iterator VEC_reduc [plus smin smax])
+
+(define_code_attr VEC_reduc_name [(plus "splus")
+ (smin "smin")
+ (smax "smax")])
+
+(define_code_attr VEC_reduc_rtx [(plus "add")
+ (smin "smin")
+ (smax "smax")])
;; Vector move instructions.
@@ -825,8 +837,8 @@
;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned
;; since the load already handles it.
(define_expand "movmisalign<mode>"
- [(set (match_operand:VEC_N 0 "vfloat_operand" "")
- (match_operand:VEC_N 1 "vfloat_operand" ""))]
+ [(set (match_operand:VEC_N 0 "nonimmediate_operand" "")
+ (match_operand:VEC_N 1 "any_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN"
"")
@@ -944,3 +956,38 @@
(match_operand:VEC_I 2 "vint_operand" "")))]
"TARGET_ALTIVEC"
"")
+
+;; Vector reduction expanders for VSX
+
+(define_expand "reduc_<VEC_reduc_name>_v2df"
+ [(parallel [(set (match_operand:V2DF 0 "vfloat_operand" "")
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1)))
+ (clobber (match_scratch:V2DF 2 ""))])]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "")
+
+; The (VEC_reduc:V4SF
+; (op1)
+; (unspec:V4SF [(const_int 0)] UNSPEC_REDUC))
+;
+; is to allow us to use a code iterator, but not completely list all of the
+; vector rotates, etc. to prevent canonicalization
+
+(define_expand "reduc_<VEC_reduc_name>_v4sf"
+ [(parallel [(set (match_operand:V4SF 0 "vfloat_operand" "")
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "")))
+ (clobber (match_scratch:V4SF 2 ""))
+ (clobber (match_scratch:V4SF 3 ""))])]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "")
+
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/config/rs6000/vsx.md gcc-4.3.4-20091019//gcc/config/rs6000/vsx.md
--- gcc-4.3.4-20091019-base//gcc/config/rs6000/vsx.md 2011-06-07 17:08:00.000000000 -0500
+++ gcc-4.3.4-20091019//gcc/config/rs6000/vsx.md 2011-06-28 23:42:44.000000000 -0500
@@ -986,6 +986,15 @@
"xscvdpsp %x0,%x1"
[(set_attr "type" "fp")])
+;; Same as vsx_xscvspdp, but use SF as the type
+(define_insn "vsx_xscvspdp_scalar2"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=f")
+ (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDP))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "xscvspdp %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1180,6 +1189,43 @@
[(set_attr "type" "fpload")
(set_attr "length" "4")])
+;; Extract a SF element from V4SF
+(define_insn_and_split "vsx_extract_v4sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
+ (vec_select:SF
+ (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
+ (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
+ (clobber (match_scratch:V4SF 3 "=X,0"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "@
+ xscvspdp %x0,%x1
+ #"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp;
+ HOST_WIDE_INT ele = INTVAL (op2);
+
+ if (ele == 0)
+ tmp = op1;
+ else
+ {
+ if (GET_CODE (op3) == SCRATCH)
+ op3 = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
+ tmp = op3;
+ }
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
+ DONE;
+}"
+ [(set_attr "length" "4,8")
+ (set_attr "type" "fp")])
+
;; General double word oriented permute, allow the other vector types for
;; optimizing the permute instruction.
(define_insn "vsx_xxpermdi_<mode>"
@@ -1291,3 +1337,153 @@
"VECTOR_MEM_VSX_P (<MODE>mode)"
"xxsldwi %x0,%x1,%x2,%3"
[(set_attr "type" "vecperm")])
+
+
+;; Vector reduction insns and splitters
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
+ [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1)))
+ (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
+ ? gen_reg_rtx (V2DFmode)
+ : operands[2];
+ emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
+ DONE;
+}"
+ [(set_attr "length" "8")
+ (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
+ [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
+ (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp2, tmp3, tmp4;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+ tmp4 = gen_reg_rtx (V4SFmode);
+ }
+ else
+ {
+ tmp2 = operands[2];
+ tmp3 = operands[3];
+ tmp4 = tmp2;
+ }
+
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
+ DONE;
+}"
+ [(set_attr "length" "16")
+ (set_attr "type" "veccomplex")])
+
+;; Combiner patterns with the vector reduction patterns that knows we can get
+;; to the top element of the V2DF array without doing an extract.
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
+ [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
+ (vec_select:DF
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1))
+ (parallel [(const_int 1)])))
+ (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx hi = gen_highpart (DFmode, operands[1]);
+ rtx lo = (GET_CODE (operands[2]) == SCRATCH)
+ ? gen_reg_rtx (DFmode)
+ : operands[2];
+
+ emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
+ DONE;
+}"
+ [(set_attr "length" "8")
+ (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
+ [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
+ (vec_select:SF
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
+ (parallel [(const_int 3)])))
+ (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 4 "=0,0"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp2, tmp3, tmp4, tmp5;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+ tmp4 = gen_reg_rtx (V4SFmode);
+ tmp5 = gen_reg_rtx (V4SFmode);
+ }
+ else
+ {
+ tmp2 = operands[2];
+ tmp3 = operands[3];
+ tmp4 = tmp2;
+ tmp5 = operands[4];
+ }
+
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
+ DONE;
+}"
+ [(set_attr "length" "20")
+ (set_attr "type" "veccomplex")])
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/testsuite/gcc.target/powerpc/pr47862.c gcc-4.3.4-20091019//gcc/testsuite/gcc.target/powerpc/pr47862.c
--- gcc-4.3.4-20091019-base//gcc/testsuite/gcc.target/powerpc/pr47862.c 1969-12-31 18:00:00.000000000 -0600
+++ gcc-4.3.4-20091019//gcc/testsuite/gcc.target/powerpc/pr47862.c 2011-06-28 17:46:43.000000000 -0500
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler-not "stfd" } } */
+
+/* PR 47862: Verify caller-save spill of vectors in FP regs do not use
+ legacy FP insns, which spill only half the vector. */
+extern vector double dd[15];
+
+vector double foo() {
+ vector double a,b,c,d,e,f,g,h,i,j,k,l,m,n;
+
+ a=dd[1]; b=dd[2]; c=dd[3]; d=dd[4]; e=dd[5]; f=dd[6]; g=dd[7]; h=dd[8]; i=dd[9];
+ j=dd[10]; k=dd[11]; l=dd[12]; m=dd[13]; n=dd[14];
+ bar();
+ return (a+b+c+d+e+f+g+h+i+j+k+l+m+n);
+}
+
diff -urpN -X /home/bergner/cvs/dontdiff gcc-4.3.4-20091019-base//gcc/testsuite/gcc.target/powerpc/pr48857.c gcc-4.3.4-20091019//gcc/testsuite/gcc.target/powerpc/pr48857.c
--- gcc-4.3.4-20091019-base//gcc/testsuite/gcc.target/powerpc/pr48857.c 1969-12-31 18:00:00.000000000 -0600
+++ gcc-4.3.4-20091019//gcc/testsuite/gcc.target/powerpc/pr48857.c 2011-06-28 23:42:44.000000000 -0500
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7 -mabi=altivec" } */
+/* { dg-final { scan-assembler-times "lxvd2x" 1 } } */
+/* { dg-final { scan-assembler-times "stxvd2x" 1 } } */
+/* { dg-final { scan-assembler-not "ld" } } */
+/* { dg-final { scan-assembler-not "lwz" } } */
+/* { dg-final { scan-assembler-not "stw" } } */
+/* { dg-final { scan-assembler-not "addi" } } */
+
+typedef vector long long v2di_type;
+
+v2di_type
+return_v2di (v2di_type *ptr)
+{
+ return *ptr; /* should generate lxvd2x 34,0,3. */
+}
+
+void
+pass_v2di (v2di_type arg, v2di_type *ptr)
+{
+ *ptr = arg; /* should generate stxvd2x 34,0,{3,5}. */
+}
+