File ibm-cell-split-fixes of Package gcc43

ChangeLog:

	* config/spu/spu.h (ADDRESSES_NEVER_TRAP): Define.
	* rtlanal.c (rtx_addr_can_trap_p_1): Respect ADDRESSES_NEVER_TRAP macro.
	* doc/tm.texi (ADDRESSES_NEVER_TRAP): Document.

	* config/spu/spu.c (spu_split_load): Trust MEM_ALIGN.  When not
	optimizing, do not split load unless necessary.

	* config/spu/spu.md ("_abs<mode>2"): Do not split in split0 pass.


Index: gcc-4.3.4-20090804/gcc/config/spu/spu.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.c	2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.c	2009-09-21 11:47:38.000000000 +0200
@@ -4269,7 +4269,10 @@ spu_split_load (rtx * ops)
 
   rot = 0;
   rot_amt = 0;
-  if (GET_CODE (addr) == PLUS)
+
+  if (MEM_ALIGN (ops[1]) >= 128)
+    /* Address is already aligned; simply perform a TImode load.  */;
+  else if (GET_CODE (addr) == PLUS)
     {
       /* 8 cases:
          aligned reg   + aligned reg     => lqx
@@ -4380,6 +4383,14 @@ spu_split_load (rtx * ops)
       rot_amt = 0;
     }
 
+  /* If the source is properly aligned, we don't need to split this insn into
+     a TImode load plus a _spu_convert.  However, we want to perform the split
+     anyway when optimizing to make the MEMs look the same as those used for
+     stores so they are more easily merged.  When *not* optimizing, that will
+     not happen anyway, so we prefer to avoid generating the _spu_convert.  */
+  if (!rot && !rot_amt && !optimize)
+    return 0;
+
   load = gen_reg_rtx (TImode);
 
   mem = change_address (ops[1], TImode, copy_rtx (addr));
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.h	2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.h	2009-09-21 11:47:38.000000000 +0200
@@ -641,6 +641,8 @@ extern GTY(()) rtx spu_compare_op1;
 
 #define SPLIT_BEFORE_CSE2 1
 
+#define ADDRESSES_NEVER_TRAP 1
+
 
 /* Builtins.  */
 
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.md
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.md	2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.md	2009-09-21 11:47:38.000000000 +0200
@@ -1249,7 +1249,7 @@
    (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
   ""
   "#"
-  ""
+  "split0_completed"
   [(set (match_dup:<F2I> 3)
 	(and:<F2I> (match_dup:<F2I> 4)
 		   (match_dup:<F2I> 2)))]
Index: gcc-4.3.4-20090804/gcc/doc/tm.texi
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/doc/tm.texi	2009-09-21 11:47:27.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/doc/tm.texi	2009-09-21 11:47:38.000000000 +0200
@@ -10395,3 +10395,9 @@ optimizations before this pass work bett
 instructions, and the optimizations right after this pass (e.g., CSE and
 combine) are be able to optimize the split instructions.
 @end defmac
+
+@defmac ADDRESSES_NEVER_TRAP
+Define this macro if memory accesses will never cause a trap.
+This is the case for example on the Cell SPU processor.
+@end defmac
+
Index: gcc-4.3.4-20090804/gcc/rtlanal.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/rtlanal.c	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/rtlanal.c	2009-09-21 11:47:38.000000000 +0200
@@ -266,6 +266,10 @@ static int
 rtx_addr_can_trap_p_1 (const_rtx x, HOST_WIDE_INT offset, HOST_WIDE_INT size,
 		       enum machine_mode mode, bool unaligned_mems)
 {
+#ifdef ADDRESSES_NEVER_TRAP
+  /* On some processors, like the SPU, memory accesses never trap.  */
+  return 0;
+#else
   enum rtx_code code = GET_CODE (x);
 
   if (STRICT_ALIGNMENT
@@ -382,6 +386,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
 
   /* If it isn't one of the case above, it can cause a trap.  */
   return 1;
+#endif
 }
 
 /* Return nonzero if the use of X as an address in a MEM can cause a trap.  */