File ibm-cell-split of Package gcc43

2008-08-27  Trevor Smigiel <Trevor_Smigiel@playstation.sony.com>
	
	Improve code generated for loads and stores on SPU.

	* doc/tm.texi (SPLIT_BEFORE_CSE2) : Document.
	* tree-pass.h (pass_split_before_cse2) : Declare.
	* final.c (rest_of_clean_state) : Initialize split0_completed.
	* recog.c (split0_completed) : Define.
	(gate_handle_split_before_cse2, rest_of_handle_split_before_cse2) :
	New functions.
	(pass_split_before_cse2) : New pass.
	* rtl.h (split0_completed) : Declare.
        * passes.c (init_optimization_passes) : Add pass_split_before_cse2
        before pass_cse2 .
	* config/spu/spu-protos.h (spu_legitimate_address) : Add
	for_split argument.
	(aligned_mem_p, spu_valid_move) : Remove prototypes.
	(spu_split_load, spu_split_store) : Change return type to int.
	* config/spu/predicates.md (spu_mem_operand) : Remove.
	(spu_dest_operand) : Add.
	* config/spu/spu-builtins.md (spu_lqd, spu_lqx, spu_lqa,
	spu_lqr, spu_stqd, spu_stqx, spu_stqa, spu_stqr) : Remove AND
	operation.
	* config/spu/spu.c (regno_aligned_for_load) : Remove.
	(reg_aligned_for_addr, address_needs_split) : New functions.
	(spu_legitimate_address, spu_expand_mov, spu_split_load,
	spu_split_store) : Update.
	(spu_init_expanders) : Pregenerate a couple of pseudo-registers.
	* config/spu/spu.h (REG_ALIGN, SPLIT_BEFORE_CSE2) : Define.
	(GO_IF_LEGITIMATE_ADDRESS) : Update for spu_legitimate_address.
	* config/spu/spu.md ("_mov<mode>", "_movdi", "_movti") : Update
	predicates.
	("load", "store") : Change to define_split.

testsuite/
	* testsuite/gcc.target/spu/split0-1.c : Add test.

Index: gcc-4.3.4-20090804/gcc/config/spu/predicates.md
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/predicates.md	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/predicates.md	2009-09-21 11:47:27.000000000 +0200
@@ -39,14 +39,14 @@
        (ior (not (match_code "subreg"))
             (match_test "valid_subreg (op)"))))
 
-(define_predicate "spu_mem_operand"
-  (and (match_operand 0 "memory_operand")
-       (match_test "reload_in_progress || reload_completed || aligned_mem_p (op)")))
-
 (define_predicate "spu_mov_operand"
-  (ior (match_operand 0 "spu_mem_operand")
+  (ior (match_operand 0 "memory_operand")
        (match_operand 0 "spu_nonmem_operand")))
 
+(define_predicate "spu_dest_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "spu_reg_operand")))
+
 (define_predicate "call_operand"
   (and (match_code "mem")
        (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op))
Index: gcc-4.3.4-20090804/gcc/config/spu/spu-builtins.md
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu-builtins.md	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu-builtins.md	2009-09-21 11:47:27.000000000 +0200
@@ -23,9 +23,8 @@
 
 (define_expand "spu_lqd"
   [(set (match_operand:TI 0 "spu_reg_operand" "")
-        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
-				 (match_operand:SI 2 "spu_nonmem_operand" ""))
-		        (const_int -16))))]
+        (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+			 (match_operand:SI 2 "spu_nonmem_operand" ""))))]
   ""
   {
     if (GET_CODE (operands[2]) == CONST_INT
@@ -42,16 +41,14 @@
 
 (define_expand "spu_lqx"
   [(set (match_operand:TI 0 "spu_reg_operand" "")
-        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
-                                 (match_operand:SI 2 "spu_reg_operand" ""))
-                        (const_int -16))))]
+        (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+			 (match_operand:SI 2 "spu_reg_operand" ""))))]
   ""
   "")
 
 (define_expand "spu_lqa"
   [(set (match_operand:TI 0 "spu_reg_operand" "")
-        (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
-                        (const_int -16))))]
+        (mem:TI (match_operand:SI 1 "immediate_operand" "")))]
   ""
   {
     if (GET_CODE (operands[1]) == CONST_INT
@@ -61,15 +58,13 @@
 
 (define_expand "spu_lqr"
   [(set (match_operand:TI 0 "spu_reg_operand" "")
-	(mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
-			(const_int -16))))]
+	(mem:TI (match_operand:SI 1 "address_operand" "")))]
   ""
   "")
 
 (define_expand "spu_stqd"
-  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
-				 (match_operand:SI 2 "spu_nonmem_operand" ""))
-		        (const_int -16)))
+  [(set (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+			 (match_operand:SI 2 "spu_nonmem_operand" "")))
         (match_operand:TI 0 "spu_reg_operand" "r,r"))]
   ""
   {
@@ -86,16 +81,14 @@
   })
 
 (define_expand "spu_stqx"
-  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
-				 (match_operand:SI 2 "spu_reg_operand" ""))
-		        (const_int -16)))
+  [(set (mem:TI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+			 (match_operand:SI 2 "spu_reg_operand" "")))
         (match_operand:TI 0 "spu_reg_operand" "r"))]
   ""
   "")
 
 (define_expand "spu_stqa"
-  [(set (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
-			(const_int -16)))
+  [(set (mem:TI (match_operand:SI 1 "immediate_operand" ""))
         (match_operand:TI 0 "spu_reg_operand" "r"))]
   ""
   {
@@ -105,8 +98,7 @@
   })
 
 (define_expand "spu_stqr"
-    [(set (mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
-			  (const_int -16)))
+    [(set (mem:TI (match_operand:SI 1 "address_operand" ""))
 	  (match_operand:TI 0 "spu_reg_operand" ""))]
   ""
   "")
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.c	2009-09-21 11:44:57.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.c	2009-09-21 11:47:27.000000000 +0200
@@ -189,9 +189,9 @@ static tree spu_build_builtin_va_list (v
 static void spu_va_start (tree, rtx);
 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
 				      tree * post_p);
-static int regno_aligned_for_load (int regno);
 static int store_with_one_insn_p (rtx mem);
 static int mem_is_padded_component_ref (rtx x);
+static int reg_aligned_for_addr (rtx x, int aligned);
 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
 static void spu_asm_globalize_label (FILE * file, const char *name);
 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
@@ -3530,24 +3530,52 @@ spu_legitimate_constant_p (rtx x)
 /* Valid address are:
    - symbol_ref, label_ref, const
    - reg
-   - reg + const, where either reg or const is 16 byte aligned
+   - reg + const, where const is 16 byte aligned
    - reg + reg, alignment doesn't matter
   The alignment matters in the reg+const case because lqd and stqd
-  ignore the 4 least significant bits of the const.  (TODO: It might be
-  preferable to allow any alignment and fix it up when splitting.) */
+  ignore the 4 least significant bits of the const.  
+
+  Addresses are handled in 4 phases. 
+  1) from the beginning of rtl expansion until the split0 pass.  Any
+     address is acceptable.  
+  2) The split0 pass. It is responsible for making every load and store
+     valid.  It calls legitimate_address with FOR_SPLIT set to 1.  This
+     is where non-16-byte aligned loads/stores are split into multiple
+     instructions to extract or insert just the part we care about.
+  3) From the split0 pass to the beginning of reload.  During this
+     phase the constant part of an address must be 16 byte aligned, and
+     we don't allow any loads/store of less than 4 bytes.  We also
+     allow a mask of -16 to be part of the address as an optimization.
+  4) From reload until the end.  Reload can change the modes of loads
+     and stores to something smaller than 4-bytes which we need to allow
+     now, and it also adjusts the address to match.  So in this phase we
+     allow that special case.  Still allow addresses with a mask of -16.
+
+  FOR_SPLIT is only set to 1 for phase 2, otherwise it is 0.  */
 int
-spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
-			rtx x, int reg_ok_strict)
+spu_legitimate_address (enum machine_mode mode, rtx x, int reg_ok_strict,
+			int for_split)
 {
-  if (mode == TImode && GET_CODE (x) == AND
-      && GET_CODE (XEXP (x, 1)) == CONST_INT
-      && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
+  int aligned = (split0_completed || for_split)
+    && !reload_in_progress && !reload_completed;
+  int const_aligned = split0_completed || for_split;
+  if (GET_MODE_SIZE (mode) >= 16)
+    aligned = 0;
+  else if (aligned && GET_MODE_SIZE (mode) < 4)
+    return 0;
+  if (split0_completed
+      && (GET_CODE (x) == AND
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16
+	  && !CONSTANT_P (XEXP (x, 0))))
     x = XEXP (x, 0);
   switch (GET_CODE (x))
     {
-    case SYMBOL_REF:
     case LABEL_REF:
-      return !TARGET_LARGE_MEM;
+      return !TARGET_LARGE_MEM && !aligned;
+
+    case SYMBOL_REF:
+      return !TARGET_LARGE_MEM && (!aligned || ALIGNED_SYMBOL_REF_P (x));
 
     case CONST:
       if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
@@ -3555,22 +3583,30 @@ spu_legitimate_address (enum machine_mod
 	  rtx sym = XEXP (XEXP (x, 0), 0);
 	  rtx cst = XEXP (XEXP (x, 0), 1);
 
-	  /* Accept any symbol_ref + constant, assuming it does not
-	     wrap around the local store addressability limit.  */
 	  if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
-	    return 1;
+	    {
+	      /* Check for alignment if required.  */
+	      if (!aligned)
+		return 1;
+	      if ((INTVAL (cst) & 15) == 0 && ALIGNED_SYMBOL_REF_P (sym))
+		return 1;
+	    }
 	}
       return 0;
 
     case CONST_INT:
+      /* We don't test alignement here.  For an absolute address we
+         assume the user knows what they are doing. */
       return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
 
     case SUBREG:
       x = XEXP (x, 0);
-      gcc_assert (GET_CODE (x) == REG);
+      if (GET_CODE (x) != REG)
+	return 0;
 
     case REG:
-      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
+      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict)
+	&& reg_aligned_for_addr (x, 0);
 
     case PLUS:
     case LO_SUM:
@@ -3581,21 +3617,29 @@ spu_legitimate_address (enum machine_mod
 	  op0 = XEXP (op0, 0);
 	if (GET_CODE (op1) == SUBREG)
 	  op1 = XEXP (op1, 0);
-	/* We can't just accept any aligned register because CSE can
-	   change it to a register that is not marked aligned and then
-	   recog will fail.   So we only accept frame registers because
-	   they will only be changed to other frame registers. */
 	if (GET_CODE (op0) == REG
 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
 	    && GET_CODE (op1) == CONST_INT
 	    && INTVAL (op1) >= -0x2000
 	    && INTVAL (op1) <= 0x1fff
-	    && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
+	    && reg_aligned_for_addr (op0, 0)
+	    && (!const_aligned
+		|| (INTVAL (op1) & 15) == 0
+		|| ((reload_in_progress || reload_completed)
+		    && GET_MODE_SIZE (mode) < 4
+		    && (INTVAL (op1) & 15) == 4 - GET_MODE_SIZE (mode))
+		/* Some passes create a fake register for testing valid
+		   addresses, be more lenient when we see those.  ivopts
+		   and reload do it. */
+		|| REGNO (op0) == LAST_VIRTUAL_REGISTER + 1
+		|| REGNO (op0) == LAST_VIRTUAL_REGISTER + 2))
 	  return 1;
 	if (GET_CODE (op0) == REG
 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
+	    && reg_aligned_for_addr (op0, 0)
 	    && GET_CODE (op1) == REG
-	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
+	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)
+	    && reg_aligned_for_addr (op1, 0))
 	  return 1;
       }
       break;
@@ -3633,7 +3677,7 @@ spu_legitimize_address (rtx x, rtx oldx
       else if (GET_CODE (op1) != REG)
 	op1 = force_reg (Pmode, op1);
       x = gen_rtx_PLUS (Pmode, op0, op1);
-      if (spu_legitimate_address (mode, x, 0))
+      if (spu_legitimate_address (mode, x, 0, 0))
 	return x;
     }
   return NULL_RTX;
@@ -4058,60 +4102,16 @@ spu_conditional_register_usage (void)
     }
 }
 
-/* This is called to decide when we can simplify a load instruction.  We
-   must only return true for registers which we know will always be
-   aligned.  Taking into account that CSE might replace this reg with
-   another one that has not been marked aligned.  
-   So this is really only true for frame, stack and virtual registers,
-   which we know are always aligned and should not be adversely effected
-   by CSE.  */
+/* This is called any time we inspect the alignment of a register for
+   addresses.  */
 static int
-regno_aligned_for_load (int regno)
-{
-  return regno == FRAME_POINTER_REGNUM
-    || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
-    || regno == ARG_POINTER_REGNUM
-    || regno == STACK_POINTER_REGNUM
-    || (regno >= FIRST_VIRTUAL_REGISTER 
-	&& regno <= LAST_VIRTUAL_REGISTER);
-}
-
-/* Return TRUE when mem is known to be 16-byte aligned. */
-int
-aligned_mem_p (rtx mem)
+reg_aligned_for_addr (rtx x, int aligned)
 {
-  if (MEM_ALIGN (mem) >= 128)
+  int regno =
+    REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
+  if (!aligned)
     return 1;
-  if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
-    return 1;
-  if (GET_CODE (XEXP (mem, 0)) == PLUS)
-    {
-      rtx p0 = XEXP (XEXP (mem, 0), 0);
-      rtx p1 = XEXP (XEXP (mem, 0), 1);
-      if (regno_aligned_for_load (REGNO (p0)))
-	{
-	  if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
-	    return 1;
-	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
-	    return 1;
-	}
-    }
-  else if (GET_CODE (XEXP (mem, 0)) == REG)
-    {
-      if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
-	return 1;
-    }
-  else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
-    return 1;
-  else if (GET_CODE (XEXP (mem, 0)) == CONST)
-    {
-      rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
-      rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
-      if (GET_CODE (p0) == SYMBOL_REF
-	  && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
-	return 1;
-    }
-  return 0;
+  return REGNO_POINTER_ALIGN (regno) >= 128;
 }
 
 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
@@ -4140,9 +4140,12 @@ spu_encode_section_info (tree decl, rtx
 static int
 store_with_one_insn_p (rtx mem)
 {
+  enum machine_mode mode = GET_MODE (mem);
   rtx addr = XEXP (mem, 0);
-  if (GET_MODE (mem) == BLKmode)
+  if (mode == BLKmode)
     return 0;
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 1;
   /* Only static objects. */
   if (GET_CODE (addr) == SYMBOL_REF)
     {
@@ -4166,6 +4169,22 @@ store_with_one_insn_p (rtx mem)
   return 0;
 }
 
+/* Return 1 when the address is not valid for a simple load and store as
+   required by the '_mov*' patterns.   We could make this less strict
+   for loads, but we prefer mem's to look the same so they are more
+   likely to be merged.  */
+static int
+address_needs_split (rtx mem)
+{
+  if (GET_MODE_SIZE (GET_MODE (mem)) < 16
+      && (GET_MODE_SIZE (GET_MODE (mem)) < 4
+	  || !(store_with_one_insn_p (mem)
+	       || mem_is_padded_component_ref (mem))))
+    return 1;
+
+  return 0;
+}
+
 int
 spu_expand_mov (rtx * ops, enum machine_mode mode)
 {
@@ -4213,25 +4232,6 @@ spu_expand_mov (rtx * ops, enum machine_
     }
   else
     {
-      if (GET_CODE (ops[0]) == MEM)
-	{
-	  if (!spu_valid_move (ops))
-	    {
-	      emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
-				    gen_reg_rtx (TImode)));
-	      return 1;
-	    }
-	}
-      else if (GET_CODE (ops[1]) == MEM)
-	{
-	  if (!spu_valid_move (ops))
-	    {
-	      emit_insn (gen_load
-			 (ops[0], ops[1], gen_reg_rtx (TImode),
-			  gen_reg_rtx (SImode)));
-	      return 1;
-	    }
-	}
       /* Catch the SImode immediates greater than 0x7fffffff, and sign
          extend them. */
       if (GET_CODE (ops[1]) == CONST_INT)
@@ -4247,7 +4247,7 @@ spu_expand_mov (rtx * ops, enum machine_
   return 0;
 }
 
-void
+int
 spu_split_load (rtx * ops)
 {
   enum machine_mode mode = GET_MODE (ops[0]);
@@ -4255,6 +4255,17 @@ spu_split_load (rtx * ops)
   int rot_amt;
 
   addr = XEXP (ops[1], 0);
+  gcc_assert (GET_CODE (addr) != AND);
+
+  if (!address_needs_split (ops[1]))
+    {
+      addr = XEXP (ops[1], 0);
+      if (spu_legitimate_address (mode, addr, 0, 1))
+	return 0;
+      ops[1] = change_address (ops[1], VOIDmode, force_reg (Pmode, addr));
+      emit_move_insn (ops[0], ops[1]);
+      return 1;
+    }
 
   rot = 0;
   rot_amt = 0;
@@ -4272,12 +4283,32 @@ spu_split_load (rtx * ops)
        */
       p0 = XEXP (addr, 0);
       p1 = XEXP (addr, 1);
-      if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
+      if (!reg_aligned_for_addr (p0, 1))
 	{
-	  if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
+	  if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1))
 	    {
-	      emit_insn (gen_addsi3 (ops[3], p0, p1));
-	      rot = ops[3];
+	      rot = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (rot, p0, p1));
+	    }
+	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
+	    {
+	      if (INTVAL (p1) > 0
+		  && INTVAL (p1) * BITS_PER_UNIT < REG_ALIGN (p0))
+		{
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = p0;
+		}
+	      else
+		{
+		  rtx x = gen_reg_rtx (SImode);
+		  emit_move_insn (x, p1);
+		  if (!spu_arith_operand (p1, SImode))
+		    p1 = x;
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = gen_rtx_PLUS (Pmode, p0, x);
+		}
 	    }
 	  else
 	    rot = p0;
@@ -4287,16 +4318,21 @@ spu_split_load (rtx * ops)
 	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
 	    {
 	      rot_amt = INTVAL (p1) & 15;
-	      p1 = GEN_INT (INTVAL (p1) & -16);
-	      addr = gen_rtx_PLUS (SImode, p0, p1);
+	      if (INTVAL (p1) & -16)
+		{
+		  p1 = GEN_INT (INTVAL (p1) & -16);
+		  addr = gen_rtx_PLUS (SImode, p0, p1);
+		}
+	      else
+		addr = p0;
 	    }
-	  else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
+	  else if (GET_CODE (p1) == REG && !reg_aligned_for_addr (p1, 1))
 	    rot = p1;
 	}
     }
   else if (GET_CODE (addr) == REG)
     {
-      if (!regno_aligned_for_load (REGNO (addr)))
+      if (!reg_aligned_for_addr (addr, 1))
 	rot = addr;
     }
   else if (GET_CODE (addr) == CONST)
@@ -4315,7 +4351,10 @@ spu_split_load (rtx * ops)
 	    addr = XEXP (XEXP (addr, 0), 0);
 	}
       else
-	rot = addr;
+	{
+	  rot = gen_reg_rtx (Pmode);
+	  emit_move_insn (rot, addr);
+	}
     }
   else if (GET_CODE (addr) == CONST_INT)
     {
@@ -4323,7 +4362,10 @@ spu_split_load (rtx * ops)
       addr = GEN_INT (rot_amt & -16);
     }
   else if (!ALIGNED_SYMBOL_REF_P (addr))
-    rot = addr;
+    {
+      rot = gen_reg_rtx (Pmode);
+      emit_move_insn (rot, addr);
+    }
 
   if (GET_MODE_SIZE (mode) < 4)
     rot_amt += GET_MODE_SIZE (mode) - 4;
@@ -4332,15 +4374,15 @@ spu_split_load (rtx * ops)
 
   if (rot && rot_amt)
     {
-      emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
-      rot = ops[3];
+      rtx x = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
+      rot = x;
       rot_amt = 0;
     }
 
-  load = ops[2];
+  load = gen_reg_rtx (TImode);
 
-  addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
-  mem = change_address (ops[1], TImode, addr);
+  mem = change_address (ops[1], TImode, copy_rtx (addr));
 
   emit_insn (gen_movti (load, mem));
 
@@ -4349,23 +4391,31 @@ spu_split_load (rtx * ops)
   else if (rot_amt)
     emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
 
-  if (reload_completed)
-    emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
-  else
-    emit_insn (gen_spu_convert (ops[0], load));
+  emit_insn (gen_spu_convert (ops[0], load));
+  return 1;
 }
 
-void
+int
 spu_split_store (rtx * ops)
 {
   enum machine_mode mode = GET_MODE (ops[0]);
-  rtx pat = ops[2];
-  rtx reg = ops[3];
+  rtx reg;
   rtx addr, p0, p1, p1_lo, smem;
   int aform;
   int scalar;
 
+  if (!address_needs_split (ops[0]))
+    {
+      addr = XEXP (ops[0], 0);
+      if (spu_legitimate_address (mode, addr, 0, 1))
+	return 0;
+      ops[0] = change_address (ops[0], VOIDmode, force_reg (Pmode, addr));
+      emit_move_insn (ops[0], ops[1]);
+      return 1;
+    }
+
   addr = XEXP (ops[0], 0);
+  gcc_assert (GET_CODE (addr) != AND);
 
   if (GET_CODE (addr) == PLUS)
     {
@@ -4377,7 +4427,7 @@ spu_split_store (rtx * ops)
          unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
          unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
          unaligned reg + aligned const   => lqd, c?d, shuf, stqx
-         unaligned reg + unaligned const -> not allowed by legitimate address
+         unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
        */
       aform = 0;
       p0 = XEXP (addr, 0);
@@ -4385,8 +4435,20 @@ spu_split_store (rtx * ops)
       if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
 	{
 	  p1_lo = GEN_INT (INTVAL (p1) & 15);
-	  p1 = GEN_INT (INTVAL (p1) & -16);
-	  addr = gen_rtx_PLUS (SImode, p0, p1);
+	  if (reg_aligned_for_addr (p0, 1))
+	    {
+	      p1 = GEN_INT (INTVAL (p1) & -16);
+	      if (p1 == const0_rtx)
+		addr = p0;
+	      else
+		addr = gen_rtx_PLUS (SImode, p0, p1);
+	    }
+	  else
+	    {
+	      rtx x = gen_reg_rtx (SImode);
+	      emit_move_insn (x, p1);
+	      addr = gen_rtx_PLUS (SImode, p0, x);
+	    }
 	}
     }
   else if (GET_CODE (addr) == REG)
@@ -4403,31 +4465,34 @@ spu_split_store (rtx * ops)
       p1_lo = addr;
       if (ALIGNED_SYMBOL_REF_P (addr))
 	p1_lo = const0_rtx;
-      else if (GET_CODE (addr) == CONST)
+      else if (GET_CODE (addr) == CONST
+	       && GET_CODE (XEXP (addr, 0)) == PLUS
+	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
+	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
 	{
-	  if (GET_CODE (XEXP (addr, 0)) == PLUS
-	      && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
-	      && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
-	    {
-	      HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
-	      if ((v & -16) != 0)
-		addr = gen_rtx_CONST (Pmode,
-				      gen_rtx_PLUS (Pmode,
-						    XEXP (XEXP (addr, 0), 0),
-						    GEN_INT (v & -16)));
-	      else
-		addr = XEXP (XEXP (addr, 0), 0);
-	      p1_lo = GEN_INT (v & 15);
-	    }
+	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
+	  if ((v & -16) != 0)
+	    addr = gen_rtx_CONST (Pmode,
+				  gen_rtx_PLUS (Pmode,
+						XEXP (XEXP (addr, 0), 0),
+						GEN_INT (v & -16)));
+	  else
+	    addr = XEXP (XEXP (addr, 0), 0);
+	  p1_lo = GEN_INT (v & 15);
 	}
       else if (GET_CODE (addr) == CONST_INT)
 	{
 	  p1_lo = GEN_INT (INTVAL (addr) & 15);
 	  addr = GEN_INT (INTVAL (addr) & -16);
 	}
+      else
+	{
+	  p1_lo = gen_reg_rtx (SImode);
+	  emit_move_insn (p1_lo, addr);
+	}
     }
 
-  addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
+  reg = gen_reg_rtx (TImode);
 
   scalar = store_with_one_insn_p (ops[0]);
   if (!scalar)
@@ -4437,11 +4502,12 @@ spu_split_store (rtx * ops)
          possible, and copying the flags will prevent that in certain
          cases, e.g. consider the volatile flag. */
 
+      rtx pat = gen_reg_rtx (TImode);
       rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
       set_mem_alias_set (lmem, 0);
       emit_insn (gen_movti (reg, lmem));
 
-      if (!p0 || regno_aligned_for_load (REGNO (p0)))
+      if (!p0 || reg_aligned_for_addr (p0, 1))
 	p0 = stack_pointer_rtx;
       if (!p1_lo)
 	p1_lo = const0_rtx;
@@ -4449,17 +4515,6 @@ spu_split_store (rtx * ops)
       emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
       emit_insn (gen_shufb (reg, ops[1], reg, pat));
     }
-  else if (reload_completed)
-    {
-      if (GET_CODE (ops[1]) == REG)
-	emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
-      else if (GET_CODE (ops[1]) == SUBREG)
-	emit_move_insn (reg,
-			gen_rtx_REG (GET_MODE (reg),
-				     REGNO (SUBREG_REG (ops[1]))));
-      else
-	abort ();
-    }
   else
     {
       if (GET_CODE (ops[1]) == REG)
@@ -4471,15 +4526,16 @@ spu_split_store (rtx * ops)
     }
 
   if (GET_MODE_SIZE (mode) < 4 && scalar)
-    emit_insn (gen_shlqby_ti
-	       (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
+    emit_insn (gen_ashlti3
+	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
 
-  smem = change_address (ops[0], TImode, addr);
+  smem = change_address (ops[0], TImode, copy_rtx (addr));
   /* We can't use the previous alias set because the memory has changed
      size and can potentially overlap objects of other types.  */
   set_mem_alias_set (smem, 0);
 
   emit_insn (gen_movti (smem, reg));
+  return 1;
 }
 
 /* Return TRUE if X is MEM which is a struct member reference
@@ -4578,37 +4634,6 @@ fix_range (const char *const_str)
     }
 }
 
-int
-spu_valid_move (rtx * ops)
-{
-  enum machine_mode mode = GET_MODE (ops[0]);
-  if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
-    return 0;
-
-  /* init_expr_once tries to recog against load and store insns to set
-     the direct_load[] and direct_store[] arrays.  We always want to
-     consider those loads and stores valid.  init_expr_once is called in
-     the context of a dummy function which does not have a decl. */
-  if (cfun->decl == 0)
-    return 1;
-
-  /* Don't allows loads/stores which would require more than 1 insn.
-     During and after reload we assume loads and stores only take 1
-     insn. */
-  if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
-    {
-      if (GET_CODE (ops[0]) == MEM
-	  && (GET_MODE_SIZE (mode) < 4
-	      || !(store_with_one_insn_p (ops[0])
-		   || mem_is_padded_component_ref (ops[0]))))
-	return 0;
-      if (GET_CODE (ops[1]) == MEM
-	  && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
-	return 0;
-    }
-  return 1;
-}
-
 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
    can be generated using the fsmbi instruction. */
 int
@@ -6260,12 +6285,26 @@ spu_sms_res_mii (struct ddg *g)
 
 void
 spu_init_expanders (void)
-{   
-  /* HARD_FRAME_REGISTER is only 128 bit aligned when
-   * frame_pointer_needed is true.  We don't know that until we're
-   * expanding the prologue. */
+{
   if (cfun)
-    REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
+    {
+      rtx r0, r1;
+      /* HARD_FRAME_REGISTER is only 128 bit aligned when
+         frame_pointer_needed is true.  We don't know that until we're
+         expanding the prologue. */
+      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
+
+      /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
+         LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want to
+         handle those cases specially, so we reserve those two registers
+         here by generating them. */
+      r0 = gen_reg_rtx (SImode);
+      r1 = gen_reg_rtx (SImode);
+      mark_reg_pointer (r0, 128);
+      mark_reg_pointer (r1, 128);
+      gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
+		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
+    }
 }
 
 static enum machine_mode
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.h	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.h	2009-09-21 11:47:27.000000000 +0200
@@ -255,6 +255,11 @@ enum reg_class {
 #define INT_REG_OK_FOR_BASE_P(X,STRICT) \
 	((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
 
+#define REG_ALIGN(X) \
+	(REG_POINTER(X) \
+	 	? REGNO_POINTER_ALIGN (ORIGINAL_REGNO (X)) \
+		: 0)
+
 #define PREFERRED_RELOAD_CLASS(X,CLASS)  (CLASS)
 
 #define CLASS_MAX_NREGS(CLASS, MODE)	\
@@ -440,7 +445,7 @@ targetm.resolve_overloaded_builtin = spu
 #endif
 
 #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)			\
-    { if (spu_legitimate_address (MODE, X, REG_OK_STRICT_FLAG))	\
+    { if (spu_legitimate_address (MODE, X, REG_OK_STRICT_FLAG, 0))	\
 	goto ADDR;						\
     }
 
@@ -634,6 +639,8 @@ targetm.resolve_overloaded_builtin = spu
 extern GTY(()) rtx spu_compare_op0;
 extern GTY(()) rtx spu_compare_op1;
 
+#define SPLIT_BEFORE_CSE2 1
+
 
 /* Builtins.  */
 
Index: gcc-4.3.4-20090804/gcc/config/spu/spu.md
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu.md	2009-09-21 11:44:57.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu.md	2009-09-21 11:47:27.000000000 +0200
@@ -276,8 +276,7 @@
 (define_split 
   [(set (match_operand 0 "spu_reg_operand")
 	(match_operand 1 "immediate_operand"))]
-
-  ""
+  "split0_completed"
   [(set (match_dup 0)
 	(high (match_dup 1)))
    (set (match_dup 0)
@@ -314,9 +313,10 @@
 ;; move internal
 
 (define_insn "_mov<mode>"
-  [(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
+  [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
 	(match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
-  "spu_valid_move (operands)"
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
   "@
    ori\t%0,%1,0
    il%s1\t%0,%S1
@@ -334,9 +334,10 @@
   "iohl\t%0,%2@l")
 
 (define_insn "_movdi"
-  [(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
+  [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
 	(match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
-  "spu_valid_move (operands)"
+  "register_operand(operands[0], DImode)
+   || register_operand(operands[1], DImode)"
   "@
    ori\t%0,%1,0
    il%d1\t%0,%D1
@@ -347,9 +348,10 @@
   [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
 
 (define_insn "_movti"
-  [(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
+  [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
 	(match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
-  "spu_valid_move (operands)"
+  "register_operand(operands[0], TImode)
+   || register_operand(operands[1], TImode)"
   "@
    ori\t%0,%1,0
    il%t1\t%0,%T1
@@ -359,29 +361,25 @@
    stq%p0\t%1,%0"
   [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
 
-(define_insn_and_split "load"
-  [(set (match_operand 0 "spu_reg_operand" "=r")
-	(match_operand 1 "memory_operand" "m"))
-   (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
-   (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))]
-  "GET_MODE(operands[0]) == GET_MODE(operands[1])"
-  "#"
-  ""
+(define_split
+  [(set (match_operand 0 "spu_reg_operand")
+	(match_operand 1 "memory_operand"))]
+  "GET_MODE(operands[0]) == GET_MODE(operands[1]) && !split0_completed"
   [(set (match_dup 0)
 	(match_dup 1))]
-  { spu_split_load(operands); DONE; })
+  { if (spu_split_load(operands))
+      DONE;
+  })
 
-(define_insn_and_split "store"
-  [(set (match_operand 0 "memory_operand" "=m")
-	(match_operand 1 "spu_reg_operand" "r"))
-   (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
-   (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
-  "GET_MODE(operands[0]) == GET_MODE(operands[1])"
-  "#"
-  ""
+(define_split
+  [(set (match_operand 0 "memory_operand")
+	(match_operand 1 "spu_reg_operand"))]
+  "GET_MODE(operands[0]) == GET_MODE(operands[1]) && !split0_completed"
   [(set (match_dup 0)
 	(match_dup 1))]
-  { spu_split_store(operands); DONE; })
+  { if (spu_split_store(operands))
+      DONE;
+  })
 
 ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
 
Index: gcc-4.3.4-20090804/gcc/config/spu/spu-protos.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/config/spu/spu-protos.h	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/config/spu/spu-protos.h	2009-09-21 11:47:27.000000000 +0200
@@ -54,7 +54,7 @@ extern int arith_immediate_p (rtx op, en
 extern int spu_constant_address_p (rtx x);
 extern int spu_legitimate_constant_p (rtx x);
 extern int spu_legitimate_address (enum machine_mode mode, rtx x,
-				   int reg_ok_strict);
+				   int reg_ok_strict, int for_split);
 extern rtx spu_legitimize_address (rtx x, rtx oldx, enum machine_mode mode);
 extern int spu_initial_elimination_offset (int from, int to);
 extern rtx spu_function_value (const_tree type, const_tree func);
@@ -64,11 +64,9 @@ extern void spu_setup_incoming_varargs (
 					tree type, int *pretend_size,
 					int no_rtl);
 extern void spu_conditional_register_usage (void);
-extern int aligned_mem_p (rtx mem);
 extern int spu_expand_mov (rtx * ops, enum machine_mode mode);
-extern void spu_split_load (rtx * ops);
-extern void spu_split_store (rtx * ops);
-extern int spu_valid_move (rtx * ops);
+extern int spu_split_load (rtx * ops);
+extern int spu_split_store (rtx * ops);
 extern int fsmbi_const_p (rtx x);
 extern int cpat_const_p (rtx x, enum machine_mode mode);
 extern rtx gen_cpat_const (rtx * ops);
Index: gcc-4.3.4-20090804/gcc/doc/tm.texi
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/doc/tm.texi	2009-09-21 11:45:35.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/doc/tm.texi	2009-09-21 11:47:27.000000000 +0200
@@ -10383,3 +10383,15 @@ to the functions in @file{libgcc} that p
 call stack unwinding.  It is used in declarations in @file{unwind-generic.h}
 and the associated definitions of those functions.
 @end defmac
+
+@defmac SPLIT_BEFORE_CSE2
+This macro determines whether to use an additional split pass before the
+second CSE pass.  @code{split0_completed} will be set after this pass is
+completed.
+
+For example, the Cell SPU target uses this for better optimization of
+the multiple instructions required to do simple loads and stores.  The
+optimizations before this pass work better on simple memory
+instructions, and the optimizations right after this pass (e.g., CSE and
+combine) are be able to optimize the split instructions.
+@end defmac
Index: gcc-4.3.4-20090804/gcc/final.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/final.c	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/final.c	2009-09-21 11:47:27.000000000 +0200
@@ -4242,6 +4242,9 @@ rest_of_clean_state (void)
 #ifdef STACK_REGS
   regstack_completed = 0;
 #endif
+#ifdef SPLIT_BEFORE_CSE2
+  split0_completed = 0;
+#endif
 
   /* Clear out the insn_length contents now that they are no
      longer valid.  */
Index: gcc-4.3.4-20090804/gcc/passes.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/passes.c	2009-09-21 11:45:25.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/passes.c	2009-09-21 11:47:27.000000000 +0200
@@ -716,6 +716,7 @@ init_optimization_passes (void)
 	}
       NEXT_PASS (pass_web);
       NEXT_PASS (pass_jump_bypass);
+      NEXT_PASS (pass_split_before_cse2);
       NEXT_PASS (pass_cse2);
       NEXT_PASS (pass_rtl_dse1);
       NEXT_PASS (pass_rtl_fwprop_addr);
Index: gcc-4.3.4-20090804/gcc/recog.c
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/recog.c	2009-09-21 11:45:45.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/recog.c	2009-09-21 11:47:27.000000000 +0200
@@ -102,6 +102,11 @@ int reload_completed;
 /* Nonzero after thread_prologue_and_epilogue_insns has run.  */
 int epilogue_completed;
 
+#ifdef SPLIT_BEFORE_CSE2
+/* Nonzero after split0 pass has run.  */
+int split0_completed;
+#endif
+
 /* Initialize data used by the function `recog'.
    This must be called once in the compilation of a function
    before any insn recognition may be done in the function.  */
@@ -3577,4 +3582,40 @@ struct tree_opt_pass pass_split_for_shor
   0                                     /* letter */
 };
 
+static bool
+gate_handle_split_before_cse2 (void)
+{
+#ifdef SPLIT_BEFORE_CSE2
+  return SPLIT_BEFORE_CSE2;
+#else
+  return 0;
+#endif
+}
+
+static unsigned int
+rest_of_handle_split_before_cse2 (void)
+{
+#ifdef SPLIT_BEFORE_CSE2
+  split_all_insns_noflow ();
+  split0_completed = 1;
+#endif
+  return 0;
+}
+
+struct tree_opt_pass pass_split_before_cse2 =
+{
+  "split0",                             /* name */
+  gate_handle_split_before_cse2,        /* gate */
+  rest_of_handle_split_before_cse2,     /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  0,                                    /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func,                       /* todo_flags_finish */
+  0                                     /* letter */
+};
 
Index: gcc-4.3.4-20090804/gcc/rtl.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/rtl.h	2009-09-21 11:42:15.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/rtl.h	2009-09-21 11:47:27.000000000 +0200
@@ -1996,6 +1996,11 @@ extern int reload_completed;
 /* Nonzero after thread_prologue_and_epilogue_insns has run.  */
 extern int epilogue_completed;
 
+#ifdef SPLIT_BEFORE_CSE2
+/* Nonzero after the split0 pass has completed. */
+extern int split0_completed;
+#endif
+
 /* Set to 1 while reload_as_needed is operating.
    Required by some machines to handle any generated moves differently.  */
 
Index: gcc-4.3.4-20090804/gcc/testsuite/gcc.target/spu/split0-1.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc-4.3.4-20090804/gcc/testsuite/gcc.target/spu/split0-1.c	2009-09-21 11:47:27.000000000 +0200
@@ -0,0 +1,17 @@
+/* Make sure there are only 2 loads. */
+/* { dg-do compile { target spu-*-* } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "lqd	\\$\[0-9\]+,0\\(\\$\[0-9\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "lqd	\\$\[0-9\]+,16\\(\\$\[0-9\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "lq\[dx\]" 2 } } */
+  
+struct __attribute__ ((__aligned__(16))) S {
+  int a, b, c, d;
+  int e, f, g, h;
+};
+  
+int
+f(struct S *s)
+{ 
+  return s->a + s->b + s->c + s->d + s->e + s->f + s->g + s->h;
+} 
Index: gcc-4.3.4-20090804/gcc/tree-pass.h
===================================================================
--- gcc-4.3.4-20090804.orig/gcc/tree-pass.h	2009-09-21 11:45:25.000000000 +0200
+++ gcc-4.3.4-20090804/gcc/tree-pass.h	2009-09-21 11:47:27.000000000 +0200
@@ -386,6 +386,7 @@ extern struct tree_opt_pass pass_rtl_dol
 extern struct tree_opt_pass pass_rtl_loop_done;
 
 extern struct tree_opt_pass pass_web;
+extern struct tree_opt_pass pass_split_before_cse2;
 extern struct tree_opt_pass pass_cse2;
 extern struct tree_opt_pass pass_df_initialize_opt;
 extern struct tree_opt_pass pass_df_initialize_no_opt;