File ibm-cell-split-fixes of Package gcc43
ChangeLog:
* config/spu/spu.h (ADDRESSES_NEVER_TRAP): Define.
* rtlanal.c (rtx_addr_can_trap_p_1): Respect ADDRESSES_NEVER_TRAP macro.
* doc/tm.texi (ADDRESSES_NEVER_TRAP): Document.
* config/spu/spu.c (spu_split_load): Trust MEM_ALIGN. When not
optimizing, do not split load unless necessary.
* config/spu/spu.md ("_abs<mode>2"): Do not split in split0 pass.
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.c 2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.c 2009-09-21 11:47:38.000000000 +0200
@@ -4269,7 +4269,10 @@ spu_split_load (rtx * ops)
rot = 0;
rot_amt = 0;
- if (GET_CODE (addr) == PLUS)
+
+ if (MEM_ALIGN (ops[1]) >= 128)
+ /* Address is already aligned; simply perform a TImode load. */;
+ else if (GET_CODE (addr) == PLUS)
{
/* 8 cases:
aligned reg + aligned reg => lqx
@@ -4380,6 +4383,14 @@ spu_split_load (rtx * ops)
rot_amt = 0;
}
+ /* If the source is properly aligned, we don't need to split this insn into
+ a TImode load plus a _spu_convert. However, we want to perform the split
+ anyway when optimizing to make the MEMs look the same as those used for
+ stores so they are more easily merged. When *not* optimizing, that will
+ not happen anyway, so we prefer to avoid generating the _spu_convert. */
+ if (!rot && !rot_amt && !optimize)
+ return 0;
+
load = gen_reg_rtx (TImode);
mem = change_address (ops[1], TImode, copy_rtx (addr));
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.h 2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.h 2009-09-21 11:47:38.000000000 +0200
@@ -641,6 +641,8 @@ extern GTY(()) rtx spu_compare_op1;
#define SPLIT_BEFORE_CSE2 1
+#define ADDRESSES_NEVER_TRAP 1
+
/* Builtins. */
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.md
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.md 2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.md 2009-09-21 11:47:38.000000000 +0200
@@ -1249,7 +1249,7 @@
(use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
""
"#"
- ""
+ "split0_completed"
[(set (match_dup:<F2I> 3)
(and:<F2I> (match_dup:<F2I> 4)
(match_dup:<F2I> 2)))]
Index: gcc-4.3.4-20090804/gcc/doc/tm.texi
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/doc/tm.texi 2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/doc/tm.texi 2009-09-21 11:47:38.000000000 +0200
@@ -10395,3 +10395,9 @@ optimizations before this pass work bett
instructions, and the optimizations right after this pass (e.g., CSE and
combine) are be able to optimize the split instructions.
@end defmac
+
+@defmac ADDRESSES_NEVER_TRAP
+Define this macro if memory accesses will never cause a trap.
+This is the case for example on the Cell SPU processor.
+@end defmac
+
Index: gcc-4.3.4-20090804/gcc/rtlanal.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/rtlanal.c 2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/rtlanal.c 2009-09-21 11:47:38.000000000 +0200
@@ -266,6 +266,10 @@ static int
rtx_addr_can_trap_p_1 (const_rtx x, HOST_WIDE_INT offset, HOST_WIDE_INT size,
enum machine_mode mode, bool unaligned_mems)
{
+#ifdef ADDRESSES_NEVER_TRAP
+ /* On some processors, like the SPU, memory accesses never trap. */
+ return 0;
+#else
enum rtx_code code = GET_CODE (x);
if (STRICT_ALIGNMENT
@@ -382,6 +386,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
/* If it isn't one of the case above, it can cause a trap. */
return 1;
+#endif
}
/* Return nonzero if the use of X as an address in a MEM can cause a trap. */