File gcc-amdfam10-suse-1.patch of Package gcc41

Index: gcc/config/i386/ammintrin.h
===================================================================
--- /dev/null
+++ gcc/config/i386/ammintrin.h
@@ -0,0 +1,69 @@
+/* Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to
+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/* Implemented from the specification included in the AMD Programmers ManualIntel C++ Compiler
+   User Guide and Reference, version 8.0.  */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifdef __SSE4A__
+#include <pmmintrin.h>
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+  __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+  __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#define _mm_extracti_si64(X, I, L) \
+((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#define _mm_inserti_si64(X, Y, I, L) \
+((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -1371,16 +1371,24 @@ ix86_handle_option (size_t code, const c
     case OPT_msse:
       if (!value)
 	{
-	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
-	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
+	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
+	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
 	}
       return true;
 
     case OPT_msse2:
       if (!value)
 	{
-	  target_flags &= ~MASK_SSE3;
-	  target_flags_explicit |= MASK_SSE3;
+	  target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
+	  target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
+	}
+      return true;
+
+    case OPT_msse3:
+      if (!value)
+	{
+	  target_flags &= ~MASK_SSE4A;
+	  target_flags_explicit |= MASK_SSE4A;
 	}
       return true;
 
@@ -1448,7 +1456,10 @@ override_options (void)
 	  PTA_3DNOW = 32,
 	  PTA_3DNOW_A = 64,
 	  PTA_64BIT = 128,
-	  PTA_SSSE3 = 256
+	  PTA_SSSE3 = 256,
+	  PTA_POPCNT= 512,
+	  PTA_ABM = 1024,
+	  PTA_SSE4A = 2048
 	} flags;
     }
   const processor_alias_table[] =
@@ -1501,6 +1512,9 @@ override_options (void)
 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
+      {"amdfam10", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+				         | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_POPCNT
+                                         | PTA_ABM | PTA_SSE4A},
     };
 
   int const pta_size = ARRAY_SIZE (processor_alias_table);
@@ -1646,6 +1660,15 @@ override_options (void)
 	  target_flags |= MASK_SSSE3;
 	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
 	  x86_prefetch_sse = true;
+	if (processor_alias_table[i].flags & PTA_POPCNT
+	    && !(target_flags_explicit & MASK_POPCNT))
+	  target_flags |= MASK_POPCNT;
+	if (processor_alias_table[i].flags & PTA_ABM
+	    && !(target_flags_explicit & MASK_ABM))
+	  target_flags |= MASK_ABM;
+	if (processor_alias_table[i].flags & PTA_SSE4A
+	    && !(target_flags_explicit & MASK_SSE4A))
+	  target_flags |= MASK_SSE4A;
 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
 	  error ("CPU you selected does not support x86-64 "
 		 "instruction set");
@@ -1837,6 +1860,10 @@ override_options (void)
   if (TARGET_SSSE3)
     target_flags |= MASK_SSE3;
 
+  /* Turn on SSE3 builtins for -msse4a.  */
+  if (TARGET_SSE4A)
+    target_flags |= MASK_SSE3;
+
   /* Turn on SSE2 builtins for -msse3.  */
   if (TARGET_SSE3)
     target_flags |= MASK_SSE2;
@@ -1856,6 +1883,10 @@ override_options (void)
   if (TARGET_3DNOW)
     target_flags |= MASK_MMX;
 
+  /* Turn on POPCNT builtins for -mabm.  */
+  if (TARGET_ABM)
+    target_flags |= MASK_POPCNT;
+
   if (TARGET_64BIT)
     {
       if (TARGET_ALIGN_DOUBLE)
@@ -14294,6 +14325,14 @@ enum ix86_builtins
   IX86_BUILTIN_PABSW128,
   IX86_BUILTIN_PABSD128,
 
+  /* AMDFAM10 - SSE4A New Instructions.  */
+  IX86_BUILTIN_MOVNTSD,
+  IX86_BUILTIN_MOVNTSS,
+  IX86_BUILTIN_EXTRQI,
+  IX86_BUILTIN_EXTRQ,
+  IX86_BUILTIN_INSERTQI,
+  IX86_BUILTIN_INSERTQ,
+
   IX86_BUILTIN_VEC_INIT_V2SI,
   IX86_BUILTIN_VEC_INIT_V4HI,
   IX86_BUILTIN_VEC_INIT_V8QI,
@@ -15036,6 +15075,16 @@ ix86_init_mmx_sse_builtins (void)
     = build_function_type_list (void_type_node,
 			        pchar_type_node, V16QI_type_node, NULL_TREE);
 
+  tree v2di_ftype_v2di_unsigned_unsigned
+    = build_function_type_list (V2DI_type_node, V2DI_type_node,
+                                unsigned_type_node, unsigned_type_node, NULL_TREE);
+  tree v2di_ftype_v2di_v2di_unsigned_unsigned
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+                                unsigned_type_node, unsigned_type_node, NULL_TREE);
+  tree v2di_ftype_v2di_v16qi
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+                                NULL_TREE);
+
   tree float80_type;
   tree float128_type;
   tree ftype;
@@ -15387,6 +15436,14 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
 	       IX86_BUILTIN_PALIGNR);
 
+  /* AMDFAM10 SSE4A New built-ins  */
+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+
   /* Access to the vec_init patterns.  */
   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
 				    integer_type_node, NULL_TREE);
@@ -15875,9 +15932,9 @@ ix86_expand_builtin (tree exp, rtx targe
   enum insn_code icode;
   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
   tree arglist = TREE_OPERAND (exp, 1);
-  tree arg0, arg1, arg2;
-  rtx op0, op1, op2, pat;
-  enum machine_mode tmode, mode0, mode1, mode2, mode3;
+  tree arg0, arg1, arg2, arg3;
+  rtx op0, op1, op2, op3, pat;
+  enum machine_mode tmode, mode0, mode1, mode2, mode3,mode4;
   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
 
   switch (fcode)
@@ -16300,6 +16357,114 @@ ix86_expand_builtin (tree exp, rtx targe
       emit_insn (pat);
       return target;
 
+    case IX86_BUILTIN_MOVNTSD:
+      return ix86_expand_store_builtin (CODE_FOR_sse4a_movntsd, arglist);
+
+    case IX86_BUILTIN_MOVNTSS:
+      return ix86_expand_store_builtin (CODE_FOR_sse4a_movntss, arglist);
+
+    case IX86_BUILTIN_INSERTQ:
+    case IX86_BUILTIN_EXTRQ:
+      icode = (fcode == IX86_BUILTIN_EXTRQ
+               ? CODE_FOR_sse4a_extrq
+               : CODE_FOR_sse4a_insertq);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        op1 = copy_to_mode_reg (mode2, op1);
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
+    case IX86_BUILTIN_EXTRQI:
+      icode = CODE_FOR_sse4a_extrqi;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        {
+          error ("index mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+        {
+          error ("length mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
+    case IX86_BUILTIN_INSERTQI:
+      icode = CODE_FOR_sse4a_insertqi;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      op3 = expand_expr (arg3, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      mode4 = insn_data[icode].operand[4].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        op1 = copy_to_mode_reg (mode2, op1);
+
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+        {
+          error ("index mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
+        {
+          error ("length mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+      if (! pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
     case IX86_BUILTIN_VEC_INIT_V2SI:
     case IX86_BUILTIN_VEC_INIT_V4HI:
     case IX86_BUILTIN_VEC_INIT_V8QI:
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -387,6 +387,8 @@ extern int x86_prefetch_sse;
 	builtin_define ("__SSE3__");				\
       if (TARGET_SSSE3)						\
 	builtin_define ("__SSSE3__");				\
+      if (TARGET_SSE4A)						\
+ 	builtin_define ("__SSE4A__");		                \
       if (TARGET_SSE_MATH && TARGET_SSE)			\
 	builtin_define ("__SSE_MATH__");			\
       if (TARGET_SSE_MATH && TARGET_SSE2)			\
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -154,6 +154,14 @@
    (UNSPEC_PSHUFB		120)
    (UNSPEC_PSIGN		121)
    (UNSPEC_PALIGNR		122)
+
+   ; For SSE4A support
+   (UNSPEC_MOVNTS               130)
+   (UNSPEC_EXTRQI               131)
+   (UNSPEC_EXTRQ                132)   
+   (UNSPEC_INSERTQI             133)
+   (UNSPEC_INSERTQ              134)
+
   ])
 
 (define_constants
@@ -14546,7 +14554,31 @@
      [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
-  "")
+  "
+    {
+      if (TARGET_ABM)
+        {
+          emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+          DONE;
+        }
+    }
+  ")
+
+(define_insn "clzsi2_abm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt {%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_insn "popcountsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+  "popcnt {%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
 
 (define_insn "*bsr"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -14566,8 +14598,36 @@
    (parallel
      [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
       (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "")
+  ""
+  "
+    {
+      if (TARGET_ABM)
+        {
+          emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+          DONE;
+        }
+      else if (TARGET_64BIT)
+        ;
+      else
+        FAIL;
+    }
+  ")
+
+(define_insn "clzdi2_abm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt {%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+  "popcnt {%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
 
 (define_insn "*bsr_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt.orig
+++ gcc/config/i386/i386.opt
@@ -201,6 +201,18 @@ mssse3
 Target Report Mask(SSSE3)
 Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
 
+msse4a
+Target Report Mask(SSE4A)
+Support new AMDFAM10 SSE4A built-in functions and code generation
+
+mpopcnt
+Target Report Mask(POPCNT)
+Support new AMDFAM10 Advanced Bit Manipulation (ABM) popcount built-in functions and code generation
+
+mabm
+Target Report Mask(ABM)
+Support new AMDFAM10 Advanced Bit Manipulation (ABM) built-in functions and code generation
+
 msseregparm
 Target RejectNegative Mask(SSEREGPARM)
 Use SSE register passing conventions for SF and DF mode
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig
+++ gcc/config/i386/sse.md
@@ -4498,3 +4498,83 @@
   "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
   [(set_attr "type" "sselog1")
    (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "sse4a_movntsd"
+  [(set (match_operand:V2DF 0 "memory_operand" "=m")
+	(vec_merge:V2DF
+	  (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
+		       UNSPEC_MOVNTS)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_SSE4A"
+  "movntsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse4a_movntss"
+  [(set (match_operand:V4SF 0 "memory_operand" "=m")
+  	(vec_merge:V4SF
+          (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
+		       UNSPEC_MOVNTS)
+	  (match_dup 1)
+	  (const_int 1)))]                     
+  "TARGET_SSE4A"
+  "movntss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse4a_extrqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand 2 "const_int_operand" "")
+                      (match_operand 3 "const_int_operand" "")]
+                     UNSPEC_EXTRQI))]
+  "TARGET_SSE4A"
+{
+  return "extrq\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand:V16QI 2 "register_operand" "x")]
+                     UNSPEC_EXTRQ))]
+  "TARGET_SSE4A"
+{
+  return "extrq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")
+                      (match_operand 3 "const_int_operand" "")
+                      (match_operand 4 "const_int_operand" "")]
+                     UNSPEC_INSERTQI))]
+  "TARGET_SSE4A"
+{
+  return "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+}
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")]
+        	     UNSPEC_INSERTQ))]
+  "TARGET_SSE4A"
+{
+  return "insertq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc.orig
+++ gcc/config.gcc
@@ -264,12 +264,12 @@ xscale-*-*)
 i[34567]86-*-*)
 	cpu_type=i386
 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
-		       pmmintrin.h tmmintrin.h"
+		       pmmintrin.h tmmintrin.h ammintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
-		       pmmintrin.h tmmintrin.h"
+		       pmmintrin.h tmmintrin.h ammintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi.orig
+++ gcc/doc/extend.texi
@@ -6906,6 +6906,18 @@ v4si __builtin_ia32_pabsd128 (v4si)
 v8hi __builtin_ia32_pabsw128 (v8hi)
 @end smallexample
 
+The following built-in functions are available when @option{-msse4a} is used.
+All of them generate the machine instruction that is part of the name with XMM registers.
+@smallexample
+void             _mm_stream_sd (double*,__m128d);
+void             _mm_stream_ss (float*,__m128);
+__m128i          _mm_extract_si64 (__m128i, __m128i);
+__m128i          _mm_extracti_si64 (__m128i, int, int);
+__m128i          _mm_insert_si64 (__m128i, __m128i);
+__m128i          _mm_inserti_si64 (__m128i, __m128i, int, int);
+@end smallexample
+
+
 The following built-in functions are available when @option{-m3dnow} is used.
 All of them generate the machine instruction that is part of the name.
 
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi.orig
+++ gcc/doc/invoke.texi
@@ -522,7 +522,7 @@ Objective-C and Objective-C++ Dialects}.
 -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
 -mno-wide-multiply  -mrtd  -malign-double @gol
 -mpreferred-stack-boundary=@var{num} @gol
--mmmx  -msse  -msse2 -msse3 -mssse3 -m3dnow @gol
+-mmmx  -msse  -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
 -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
 -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
@@ -9073,6 +9073,10 @@ instruction set support.
 @item k8, opteron, athlon64, athlon-fx
 AMD K8 core based CPUs with x86-64 instruction set support.  (This supersets
 MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
+@item amdfam10
+AMD Family 10 core based CPUs with x86-64 instruction set support.  (This supersets
+MMX, SSE, SSE2, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit instruction set
+extensions.)
 @item winchip-c6
 IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction
 set support.
@@ -9350,6 +9354,8 @@ preferred alignment to @option{-mpreferr
 @itemx -mno-sse3
 @item -mssse3
 @itemx -mno-ssse3
+@item -msse4a
+@item -mno-sse4a
 @item -m3dnow
 @itemx -mno-3dnow
 @opindex mmmx
@@ -9358,11 +9364,15 @@ preferred alignment to @option{-mpreferr
 @opindex mno-sse
 @opindex m3dnow
 @opindex mno-3dnow
+@item -mpopcnt
+@itemx -mno-popcnt
+@item -mabm
+@itemx -mno-abm
 These switches enable or disable the use of instructions in the MMX,
-SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
-These extensions are also available as built-in functions: see
-@ref{X86 Built-in Functions}, for details of the functions enabled and
-disabled by these switches.
+SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets.  
+These extensions are also available as built-in functions: see 
+@ref{X86 Built-in Functions}, for details of the functions enabled
+and disabled by these switches.
 
 To have SSE/SSE2 instructions generated automatically from floating-point
 code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
openSUSE Build Service is sponsored by