File loop-unroll-adjust of Package gcc43
Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** along with GCC; see the file COPYING3.
*** 53,58 ****
--- 53,59 ----
#include "tree-gimple.h"
#include "df.h"
#include "params.h"
+ #include "cfgloop.h"
/* Define the specific costs for a given cpu. */
*************** s390_reorg (void)
*** 10030,10035 ****
--- 10031,10092 ----
}
+ /* This function checks the whole of insn X for memory references. The
+ function always returns zero because the framework it is called
+ from would stop recursively analyzing the insn upon a return value
+ other than zero. The real result of this function is updating
+ counter variable MEM_COUNT. */
+ static int
+ check_dpu (rtx *x, unsigned *mem_count)
+ {
+ if (*x != NULL_RTX && MEM_P (*x))
+ (*mem_count)++;
+ return 0;
+ }
+
+ /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
+ a new number struct loop *loop should be unrolled if tuned for the z10
+ cpu. The loop is analyzed for memory accesses by calling check_dpu for
+ each rtx of the loop. Depending on the loop_depth and the amount of
+ memory accesses a new number <=nunroll is returned to improve the
+ behaviour of the hardware prefetch unit. */
+ static unsigned
+ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+ {
+ basic_block *bbs;
+ rtx insn;
+ unsigned i;
+ unsigned mem_count = 0;
+
+ /* Only z10 needs special handling. */
+ if (s390_tune != PROCESSOR_2097_Z10)
+ return nunroll;
+
+ /* Count the number of memory references within the loop body. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+ if (INSN_P (insn) && INSN_CODE (insn) != -1)
+ for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
+ }
+ free (bbs);
+
+ /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
+ if (mem_count == 0)
+ return nunroll;
+
+ switch (loop_depth(loop))
+ {
+ case 1:
+ return MIN (nunroll, 28 / mem_count);
+ case 2:
+ return MIN (nunroll, 22 / mem_count);
+ default:
+ return MIN (nunroll, 16 / mem_count);
+ }
+ }
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
*************** s390_reorg (void)
*** 10144,10149 ****
--- 10201,10209 ----
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
+ #undef TARGET_LOOP_UNROLL_ADJUST
+ #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
Index: gcc/doc/tm.texi
===================================================================
*** gcc/doc/tm.texi.orig
--- gcc/doc/tm.texi
*************** to have to make special provisions in @c
*** 10296,10301 ****
--- 10296,10310 ----
to reserve space for caller-saved target registers.
@end deftypefn
+ @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
+ This target hook returns a new value for the number of times @var{loop}
+ should be unrolled. The parameter @var{nunroll} is the number of times
+ the loop is to be unrolled. The parameter @var{loop} is a pointer to
+ the loop, which is going to be checked for unrolling. This target hook
+ is required only when the target has special constraints like maximum
+ number of memory accesses.
+ @end deftypefn
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
Index: gcc/loop-unroll.c
===================================================================
*** gcc/loop-unroll.c.orig
--- gcc/loop-unroll.c
*************** along with GCC; see the file COPYING3.
*** 32,37 ****
--- 32,38 ----
#include "expr.h"
#include "hashtab.h"
#include "recog.h"
+ #include "target.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
*************** decide_unroll_runtime_iterations (struct
*** 818,823 ****
--- 819,827 ----
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
*************** decide_unroll_stupid (struct loop *loop,
*** 1358,1363 ****
--- 1362,1370 ----
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
Index: gcc/target-def.h
===================================================================
*** gcc/target-def.h.orig
--- gcc/target-def.h
***************
*** 464,469 ****
--- 464,470 ----
#define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false
#define TARGET_BRANCH_TARGET_REGISTER_CLASS hook_int_void_no_regs
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
+ #define TARGET_LOOP_UNROLL_ADJUST NULL
#define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
#define TARGET_CANNOT_COPY_INSN_P NULL
#define TARGET_COMMUTATIVE_P hook_bool_const_rtx_commutative_p
***************
*** 734,739 ****
--- 735,741 ----
TARGET_CANNOT_MODIFY_JUMPS_P, \
TARGET_BRANCH_TARGET_REGISTER_CLASS, \
TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \
+ TARGET_LOOP_UNROLL_ADJUST, \
TARGET_CANNOT_FORCE_CONST_MEM, \
TARGET_CANNOT_COPY_INSN_P, \
TARGET_COMMUTATIVE_P, \
Index: gcc/target.h
===================================================================
*** gcc/target.h.orig
--- gcc/target.h
*************** struct _dep;
*** 91,96 ****
--- 91,99 ----
/* This is defined in ddg.h . */
struct ddg;
+ /* This is defined in cfgloop.h . */
+ struct loop;
+
struct gcc_target
{
/* Functions that output assembler for the target. */
*************** struct gcc_target
*** 560,565 ****
--- 563,571 ----
already been generated. */
bool (* branch_target_register_callee_saved) (bool after_pe_gen);
+ /* Return a new value for loop unroll size. */
+ unsigned (* loop_unroll_adjust) (unsigned nunroll, struct loop *loop);
+
/* True if the constant X cannot be placed in the constant pool. */
bool (* cannot_force_const_mem) (rtx);