File gcc-amdfam10-suse-12.patch of Package gcc41

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -938,6 +938,9 @@ const int x86_cmpxchg = ~m_386;
 const int x86_xadd = ~m_386;
 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC | m_AMDFAM10;
 
+/* Use Vector Converts instead of Scalar Converts. Added for AMDFAM10 */
+const int x86_use_vector_converts = m_AMDFAM10;
+
 /* In case the average insn count for single function invocation is
    lower than this constant, emit fast (but longer) prologue and
    epilogue code.  */
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -168,6 +168,7 @@ extern const int x86_use_incdec;
 extern const int x86_pad_returns;
 extern const int x86_partial_flag_reg_stall;
 extern int x86_prefetch_sse;
+extern const int x86_use_vector_converts;
 
 #define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
 #define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
@@ -217,6 +218,7 @@ extern int x86_prefetch_sse;
 #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
 #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
 #define TARGET_PREFETCH_SSE (x86_prefetch_sse)
+#define TARGET_USE_VECTOR_CONVERTS (x86_use_vector_converts & TUNEMASK)
 #define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
 #define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
 #define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -162,6 +162,11 @@
    (UNSPEC_INSERTQI             133)
    (UNSPEC_INSERTQ              134)
 
+   ; Other AMDFAM10 Patterns
+   (UNSPEC_CVTSI2SS_AMDFAM10    140)
+   (UNSPEC_CVTSI2SD_AMDFAM10    141)	
+   (UNSPEC_MOVDSI2SF_AMDFAM10 	142)
+   (UNSPEC_MOVDSI2DF_AMDFAM10 	143)	
   ])
 
 (define_constants
@@ -4474,7 +4479,46 @@
   [(set (match_operand:SF 0 "register_operand" "")
 	(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
   "TARGET_80387 || TARGET_SSE_MATH"
-  "")
+  "
+    {
+      /* For converting SI to SF, the following code is faster in AMDFAM10
+	 mov 	mem32, reg32
+	 movd	xmm, mem32
+	 cvtdq2ps xmm,xmm
+      */
+	 
+      if (TARGET_USE_VECTOR_CONVERTS && !optimize_size 
+	 && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+	 && optimize )
+	{
+	  rtx tmp;
+	  tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+	  emit_move_insn (tmp, operands[1]);
+	  emit_insn (gen_sse2_movdsi2sf_amdfam10 (operands[0], tmp));
+	  emit_insn (gen_sse2_cvtdq2ps_amdfam10 (operands[0], operands[0]));
+	  DONE;
+	} 
+    }
+  ")
+
+(define_insn "sse2_cvtdq2ps_amdfam10"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+		UNSPEC_CVTSI2SS_AMDFAM10))]
+  "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+  "cvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movdsi2sf_amdfam10"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SI 1 "memory_operand" "m")]
+		UNSPEC_MOVDSI2SF_AMDFAM10))]
+  "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+  "movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "SF")])
+  
 
 (define_insn "*floatsisf2_mixed"
   [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f")
@@ -4589,7 +4633,45 @@
   [(set (match_operand:DF 0 "register_operand" "")
 	(float:DF (match_operand:SI 1 "nonimmediate_operand" "")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "")
+  "
+    {
+      /* For converting SI to DF, the following code is faster in AMDFAM10
+	 mov 	mem32, reg32
+	 movd	xmm, mem32
+	 cvtdq2pd xmm,xmm
+      */
+	 
+      if (TARGET_USE_VECTOR_CONVERTS && !optimize_size 
+	 && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+	 && optimize)
+	{
+	  rtx tmp;
+	  tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+	  emit_move_insn (tmp, operands[1]);
+	  emit_insn (gen_sse2_movdsi2df_amdfam10 (operands[0], tmp));
+	  emit_insn (gen_sse2_cvtdq2pd_amdfam10 (operands[0], operands[0]));
+	  DONE;
+	} 
+    }
+  ")
+
+(define_insn "sse2_cvtdq2pd_amdfam10"
+  [(set (match_operand:DF 0 "register_operand" "=Y")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "Y")]
+		UNSPEC_CVTSI2SD_AMDFAM10))]
+  "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+  "cvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_movdsi2df_amdfam10"
+  [(set (match_operand:DF 0 "register_operand" "=Y")
+	(unspec:DF [(match_operand:SI 1 "memory_operand" "m")]
+		UNSPEC_MOVDSI2DF_AMDFAM10))]
+  "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+  "movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "SF")])
 
 (define_insn "*floatsidf2_mixed"
   [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")
Places

File gcc-amdfam10-suse-12.patch of Package gcc41

Places