File s390-long-loop-prediction-1 of Package gcc43
Index: gcc/config/s390/s390.c
===================================================================
--- gcc/config/s390/s390.c.orig 2009-11-20 13:52:36.000000000 +0100
+++ gcc/config/s390/s390.c 2009-11-20 13:52:37.000000000 +0100
@@ -356,6 +356,10 @@ struct machine_function GTY(())
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
+/* That's the read ahead of the dynamic branch prediction unit in
+ bytes on a z10 CPU. */
+#define Z10_PREDICT_DISTANCE 384
+
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
@@ -9599,6 +9603,66 @@ s390_optimize_prologue (void)
}
}
+/* On z10 the dynamic branch prediction must see the backward jump in
+ a window of 384 bytes. If not it falls back to the static
+ prediction. This function rearranges the loop backward branch in a
+ way which makes the static prediction always correct. The function
+ returns true if it added an instruction. */
+static bool
+s390_z10_fix_long_loop_prediction (rtx insn)
+{
+ rtx set = single_set (insn);
+ rtx code_label, label_ref, new_label;
+ rtx uncond_jump;
+ rtx cur_insn;
+ rtx tmp;
+ int distance;
+
+ /* This will exclude branch on count and branch on index patterns
+ since these are correctly statically predicted. */
+ if (!set
+ || SET_DEST (set) != pc_rtx
+ || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
+ return false;
+
+ label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
+ XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
+
+ gcc_assert (GET_CODE (label_ref) == LABEL_REF);
+
+ code_label = XEXP (label_ref, 0);
+
+ if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
+ || INSN_ADDRESSES (INSN_UID (insn)) == -1
+ || (INSN_ADDRESSES (INSN_UID (insn))
+ - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
+ return false;
+
+ for (distance = 0, cur_insn = PREV_INSN (insn);
+ distance < Z10_PREDICT_DISTANCE - 6;
+ distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
+ if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
+ return false;
+
+ new_label = gen_label_rtx ();
+ uncond_jump = emit_jump_insn_after (
+ gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_LABEL_REF (VOIDmode, code_label)),
+ insn);
+ emit_label_after (new_label, uncond_jump);
+
+ tmp = XEXP (SET_SRC (set), 1);
+ XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
+ XEXP (SET_SRC (set), 2) = tmp;
+ INSN_CODE (insn) = -1;
+
+ XEXP (label_ref, 0) = new_label;
+ JUMP_LABEL (insn) = new_label;
+ JUMP_LABEL (uncond_jump) = code_label;
+
+ return true;
+}
+
/* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */
static int
@@ -9681,97 +9745,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *
if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two
- operands of the compare. */
-static void
-s390_z10_optimize_cmp (void)
+ operands of the compare. The function return true whenever it
+ added an insn. */
+static bool
+s390_z10_optimize_cmp (rtx insn)
{
- rtx insn, prev_insn, next_insn;
- int added_NOPs = 0;
+ rtx prev_insn, next_insn;
+ bool insn_added_p = false;
+ rtx cond, *op0, *op1;
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
- rtx cond, *op0, *op1;
-
- if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
- continue;
-
- if (GET_CODE (PATTERN (insn)) == PARALLEL)
- {
- /* Handle compare and branch and branch on count
- instructions. */
- rtx pattern = single_set (insn);
-
- if (!pattern
- || SET_DEST (pattern) != pc_rtx
- || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
- continue;
+ /* Handle compare and branch and branch on count
+ instructions. */
+ rtx pattern = single_set (insn);
+
+ if (!pattern
+ || SET_DEST (pattern) != pc_rtx
+ || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
+ return false;
- cond = XEXP (SET_SRC (pattern), 0);
- op0 = &XEXP (cond, 0);
- op1 = &XEXP (cond, 1);
- }
- else if (GET_CODE (PATTERN (insn)) == SET)
- {
- rtx src, dest;
+ cond = XEXP (SET_SRC (pattern), 0);
+ op0 = &XEXP (cond, 0);
+ op1 = &XEXP (cond, 1);
+ }
+ else if (GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx src, dest;
- /* Handle normal compare instructions. */
- src = SET_SRC (PATTERN (insn));
- dest = SET_DEST (PATTERN (insn));
+ /* Handle normal compare instructions. */
+ src = SET_SRC (PATTERN (insn));
+ dest = SET_DEST (PATTERN (insn));
- if (!REG_P (dest)
- || !CC_REGNO_P (REGNO (dest))
- || GET_CODE (src) != COMPARE)
- continue;
+ if (!REG_P (dest)
+ || !CC_REGNO_P (REGNO (dest))
+ || GET_CODE (src) != COMPARE)
+ return false;
- /* s390_swap_cmp will try to find the conditional
- jump when passing NULL_RTX as condition. */
- cond = NULL_RTX;
- op0 = &XEXP (src, 0);
- op1 = &XEXP (src, 1);
- }
- else
- continue;
+ /* s390_swap_cmp will try to find the conditional
+ jump when passing NULL_RTX as condition. */
+ cond = NULL_RTX;
+ op0 = &XEXP (src, 0);
+ op1 = &XEXP (src, 1);
+ }
+ else
+ return false;
- if (!REG_P (*op0) || !REG_P (*op1))
- continue;
+ if (!REG_P (*op0) || !REG_P (*op1))
+ return false;
- /* Swap the COMPARE arguments and its mask if there is a
- conflicting access in the previous insn. */
- prev_insn = PREV_INSN (insn);
+ /* Swap the COMPARE arguments and its mask if there is a
+ conflicting access in the previous insn. */
+ prev_insn = PREV_INSN (insn);
+ if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+ && reg_referenced_p (*op1, PATTERN (prev_insn)))
+ s390_swap_cmp (cond, op0, op1, insn);
+
+ /* Check if there is a conflict with the next insn. If there
+ was no conflict with the previous insn, then swap the
+ COMPARE arguments and its mask. If we already swapped
+ the operands, or if swapping them would cause a conflict
+ with the previous insn, issue a NOP after the COMPARE in
+ order to separate the two instuctions. */
+ next_insn = NEXT_INSN (insn);
+ if (next_insn != NULL_RTX && INSN_P (next_insn)
+ && s390_non_addr_reg_read_p (*op1, next_insn))
+ {
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
- && reg_referenced_p (*op1, PATTERN (prev_insn)))
- s390_swap_cmp (cond, op0, op1, insn);
-
- /* Check if there is a conflict with the next insn. If there
- was no conflict with the previous insn, then swap the
- COMPARE arguments and its mask. If we already swapped
- the operands, or if swapping them would cause a conflict
- with the previous insn, issue a NOP after the COMPARE in
- order to separate the two instuctions. */
- next_insn = NEXT_INSN (insn);
- if (next_insn != NULL_RTX && INSN_P (next_insn)
- && s390_non_addr_reg_read_p (*op1, next_insn))
+ && s390_non_addr_reg_read_p (*op0, prev_insn))
{
- if (prev_insn != NULL_RTX && INSN_P (prev_insn)
- && s390_non_addr_reg_read_p (*op0, prev_insn))
- {
- if (REGNO (*op1) == 0)
- emit_insn_after (gen_nop1 (), insn);
- else
- emit_insn_after (gen_nop (), insn);
- added_NOPs = 1;
- }
+ if (REGNO (*op1) == 0)
+ emit_insn_after (gen_nop1 (), insn);
else
- s390_swap_cmp (cond, op0, op1, insn);
+ emit_insn_after (gen_nop (), insn);
+ insn_added_p = true;
}
+ else
+ s390_swap_cmp (cond, op0, op1, insn);
}
-
- /* Adjust branches if we added new instructions. */
- if (added_NOPs)
- shorten_branches (get_insns ());
+ return insn_added_p;
}
-
/* Perform machine-dependent processing. */
static void
@@ -9885,10 +9939,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
- /* Eliminate z10-specific pipeline recycles related to some compare
- instructions. */
+ /* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
- s390_z10_optimize_cmp ();
+ {
+ rtx insn;
+ bool insn_added_p = false;
+
+ /* The insn lengths and addresses have to be up to date for the
+ following manipulations. */
+ shorten_branches (get_insns ());
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
+
+ if (GET_CODE (PATTERN (insn)) == PARALLEL
+ || GET_CODE (PATTERN (insn)) == SET)
+ insn_added_p |= s390_z10_optimize_cmp (insn);
+ }
+
+ /* Adjust branches if we added new instructions. */
+ if (insn_added_p)
+ shorten_branches (get_insns ());
+ }
}
Index: gcc/config/s390/s390.md
===================================================================
--- gcc/config/s390/s390.md.orig 2009-11-20 13:52:34.000000000 +0100
+++ gcc/config/s390/s390.md 2009-11-20 13:52:37.000000000 +0100
@@ -1073,6 +1073,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
+; And now the same two patterns as above but with a negated CC mask.
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*icmp_and_br_signed_<mode>"
+ [(set (pc)
+ (if_then_else (match_operator 0 "s390_signed_integer_comparison"
+ [(match_operand:GPR 1 "register_operand" "d,d")
+ (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+{
+ if (get_attr_length (insn) == 6)
+ return which_alternative ?
+ "c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
+ else
+ return which_alternative ?
+ "c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
+}
+ [(set_attr "op_type" "RIE")
+ (set_attr "type" "branch")
+ (set_attr "z10prop" "z10_super_c,z10_super")
+ (set (attr "length")
+ (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+ (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+ ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*icmp_and_br_unsigned_<mode>"
+ [(set (pc)
+ (if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+ [(match_operand:GPR 1 "register_operand" "d,d")
+ (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+{
+ if (get_attr_length (insn) == 6)
+ return which_alternative ?
+ "cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
+ else
+ return which_alternative ?
+ "cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
+}
+ [(set_attr "op_type" "RIE")
+ (set_attr "type" "branch")
+ (set_attr "z10prop" "z10_super_c,z10_super")
+ (set (attr "length")
+ (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+ (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+ ; 10 byte for clgr/jg
+
;;
;;- Move instructions.
;;