File gcc-amdfam10-suse-12.patch of Package gcc41
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -938,6 +938,9 @@ const int x86_cmpxchg = ~m_386;
const int x86_xadd = ~m_386;
const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC | m_AMDFAM10;
+/* Use Vector Converts instead of Scalar Converts. Added for AMDFAM10 */
+const int x86_use_vector_converts = m_AMDFAM10;
+
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -168,6 +168,7 @@ extern const int x86_use_incdec;
extern const int x86_pad_returns;
extern const int x86_partial_flag_reg_stall;
extern int x86_prefetch_sse;
+extern const int x86_use_vector_converts;
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
@@ -217,6 +218,7 @@ extern int x86_prefetch_sse;
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
+#define TARGET_USE_VECTOR_CONVERTS (x86_use_vector_converts & TUNEMASK)
#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -162,6 +162,11 @@
(UNSPEC_INSERTQI 133)
(UNSPEC_INSERTQ 134)
+ ; Other AMDFAM10 Patterns
+ (UNSPEC_CVTSI2SS_AMDFAM10 140)
+ (UNSPEC_CVTSI2SD_AMDFAM10 141)
+ (UNSPEC_MOVDSI2SF_AMDFAM10 142)
+ (UNSPEC_MOVDSI2DF_AMDFAM10 143)
])
(define_constants
@@ -4474,7 +4479,46 @@
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || TARGET_SSE_MATH"
- "")
+ "
+ {
+ /* For converting SI to SF, the following code is faster in AMDFAM10
+ mov mem32, reg32
+ movd xmm, mem32
+ cvtdq2ps xmm,xmm
+ */
+
+ if (TARGET_USE_VECTOR_CONVERTS && !optimize_size
+ && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+ && optimize )
+ {
+ rtx tmp;
+ tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+ emit_move_insn (tmp, operands[1]);
+ emit_insn (gen_sse2_movdsi2sf_amdfam10 (operands[0], tmp));
+ emit_insn (gen_sse2_cvtdq2ps_amdfam10 (operands[0], operands[0]));
+ DONE;
+ }
+ }
+ ")
+
+(define_insn "sse2_cvtdq2ps_amdfam10"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+ UNSPEC_CVTSI2SS_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movdsi2sf_amdfam10"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SI 1 "memory_operand" "m")]
+ UNSPEC_MOVDSI2SF_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "*floatsisf2_mixed"
[(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f")
@@ -4589,7 +4633,45 @@
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
- "")
+ "
+ {
+ /* For converting SI to DF, the following code is faster in AMDFAM10
+ mov mem32, reg32
+ movd xmm, mem32
+ cvtdq2pd xmm,xmm
+ */
+
+ if (TARGET_USE_VECTOR_CONVERTS && !optimize_size
+ && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+ && optimize)
+ {
+ rtx tmp;
+ tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+ emit_move_insn (tmp, operands[1]);
+ emit_insn (gen_sse2_movdsi2df_amdfam10 (operands[0], tmp));
+ emit_insn (gen_sse2_cvtdq2pd_amdfam10 (operands[0], operands[0]));
+ DONE;
+ }
+ }
+ ")
+
+(define_insn "sse2_cvtdq2pd_amdfam10"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "Y")]
+ UNSPEC_CVTSI2SD_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "cvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_movdsi2df_amdfam10"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (unspec:DF [(match_operand:SI 1 "memory_operand" "m")]
+ UNSPEC_MOVDSI2DF_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
(define_insn "*floatsidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")