File gcc-amdfam10-suse-1.patch of Package gcc41
Index: gcc/config/i386/ammintrin.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc/config/i386/ammintrin.h 2009-11-20 13:42:41.000000000 +0100
@@ -0,0 +1,69 @@
+/* Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the AMD Programmers ManualIntel C++ Compiler
+ User Guide and Reference, version 8.0. */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifdef __SSE4A__
+#include <pmmintrin.h>
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+ __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+ __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#define _mm_extracti_si64(X, I, L) \
+((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+ return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#define _mm_inserti_si64(X, Y, I, L) \
+((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config/i386/i386.c 2009-11-20 13:42:41.000000000 +0100
@@ -1371,16 +1371,24 @@ ix86_handle_option (size_t code, const c
case OPT_msse:
if (!value)
{
- target_flags &= ~(MASK_SSE2 | MASK_SSE3);
- target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
+ target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
+ target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
}
return true;
case OPT_msse2:
if (!value)
{
- target_flags &= ~MASK_SSE3;
- target_flags_explicit |= MASK_SSE3;
+ target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
+ target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
+ }
+ return true;
+
+ case OPT_msse3:
+ if (!value)
+ {
+ target_flags &= ~MASK_SSE4A;
+ target_flags_explicit |= MASK_SSE4A;
}
return true;
@@ -1448,7 +1456,10 @@ override_options (void)
PTA_3DNOW = 32,
PTA_3DNOW_A = 64,
PTA_64BIT = 128,
- PTA_SSSE3 = 256
+ PTA_SSSE3 = 256,
+ PTA_POPCNT= 512,
+ PTA_ABM = 1024,
+ PTA_SSE4A = 2048
} flags;
}
const processor_alias_table[] =
@@ -1501,6 +1512,9 @@ override_options (void)
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
{"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
+ {"amdfam10", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_POPCNT
+ | PTA_ABM | PTA_SSE4A},
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
@@ -1646,6 +1660,15 @@ override_options (void)
target_flags |= MASK_SSSE3;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
+ if (processor_alias_table[i].flags & PTA_POPCNT
+ && !(target_flags_explicit & MASK_POPCNT))
+ target_flags |= MASK_POPCNT;
+ if (processor_alias_table[i].flags & PTA_ABM
+ && !(target_flags_explicit & MASK_ABM))
+ target_flags |= MASK_ABM;
+ if (processor_alias_table[i].flags & PTA_SSE4A
+ && !(target_flags_explicit & MASK_SSE4A))
+ target_flags |= MASK_SSE4A;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 "
"instruction set");
@@ -1837,6 +1860,10 @@ override_options (void)
if (TARGET_SSSE3)
target_flags |= MASK_SSE3;
+ /* Turn on SSE3 builtins for -msse4a. */
+ if (TARGET_SSE4A)
+ target_flags |= MASK_SSE3;
+
/* Turn on SSE2 builtins for -msse3. */
if (TARGET_SSE3)
target_flags |= MASK_SSE2;
@@ -1856,6 +1883,10 @@ override_options (void)
if (TARGET_3DNOW)
target_flags |= MASK_MMX;
+ /* Turn on POPCNT builtins for -mabm. */
+ if (TARGET_ABM)
+ target_flags |= MASK_POPCNT;
+
if (TARGET_64BIT)
{
if (TARGET_ALIGN_DOUBLE)
@@ -14253,6 +14284,14 @@ enum ix86_builtins
IX86_BUILTIN_PABSW128,
IX86_BUILTIN_PABSD128,
+ /* AMDFAM10 - SSE4A New Instructions. */
+ IX86_BUILTIN_MOVNTSD,
+ IX86_BUILTIN_MOVNTSS,
+ IX86_BUILTIN_EXTRQI,
+ IX86_BUILTIN_EXTRQ,
+ IX86_BUILTIN_INSERTQI,
+ IX86_BUILTIN_INSERTQ,
+
IX86_BUILTIN_VEC_INIT_V2SI,
IX86_BUILTIN_VEC_INIT_V4HI,
IX86_BUILTIN_VEC_INIT_V8QI,
@@ -14995,6 +15034,16 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (void_type_node,
pchar_type_node, V16QI_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v16qi
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+
tree float80_type;
tree float128_type;
tree ftype;
@@ -15346,6 +15395,14 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
IX86_BUILTIN_PALIGNR);
+ /* AMDFAM10 SSE4A New built-ins */
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
@@ -15834,9 +15891,9 @@ ix86_expand_builtin (tree exp, rtx targe
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
- tree arg0, arg1, arg2;
- rtx op0, op1, op2, pat;
- enum machine_mode tmode, mode0, mode1, mode2, mode3;
+ tree arg0, arg1, arg2, arg3;
+ rtx op0, op1, op2, op3, pat;
+ enum machine_mode tmode, mode0, mode1, mode2, mode3,mode4;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
switch (fcode)
@@ -16259,6 +16316,114 @@ ix86_expand_builtin (tree exp, rtx targe
emit_insn (pat);
return target;
+ case IX86_BUILTIN_MOVNTSD:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_movntsd, arglist);
+
+ case IX86_BUILTIN_MOVNTSS:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_movntss, arglist);
+
+ case IX86_BUILTIN_INSERTQ:
+ case IX86_BUILTIN_EXTRQ:
+ icode = (fcode == IX86_BUILTIN_EXTRQ
+ ? CODE_FOR_sse4a_extrq
+ : CODE_FOR_sse4a_insertq);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_EXTRQI:
+ icode = CODE_FOR_sse4a_extrqi;
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_INSERTQI:
+ icode = CODE_FOR_sse4a_insertqi;
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+ op3 = expand_expr (arg3, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config/i386/i386.h 2009-11-20 13:42:41.000000000 +0100
@@ -387,6 +387,8 @@ extern int x86_prefetch_sse;
builtin_define ("__SSE3__"); \
if (TARGET_SSSE3) \
builtin_define ("__SSSE3__"); \
+ if (TARGET_SSE4A) \
+ builtin_define ("__SSE4A__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \
builtin_define ("__SSE_MATH__"); \
if (TARGET_SSE_MATH && TARGET_SSE2) \
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config/i386/i386.md 2009-11-20 13:42:41.000000000 +0100
@@ -154,6 +154,14 @@
(UNSPEC_PSHUFB 120)
(UNSPEC_PSIGN 121)
(UNSPEC_PALIGNR 122)
+
+ ; For SSE4A support
+ (UNSPEC_MOVNTS 130)
+ (UNSPEC_EXTRQI 131)
+ (UNSPEC_EXTRQ 132)
+ (UNSPEC_INSERTQI 133)
+ (UNSPEC_INSERTQ 134)
+
])
(define_constants
@@ -14546,7 +14554,31 @@
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
(clobber (reg:CC FLAGS_REG))])]
""
- "")
+ "
+ {
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+ }
+ ")
+
+(define_insn "clzsi2_abm"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt {%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "popcountsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt {%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
(define_insn "*bsr"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -14566,8 +14598,36 @@
(parallel
[(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
(clobber (reg:CC FLAGS_REG))])]
- "TARGET_64BIT"
- "")
+ ""
+ "
+ {
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+ else if (TARGET_64BIT)
+ ;
+ else
+ FAIL;
+ }
+ ")
+
+(define_insn "clzdi2_abm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt {%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "popcountdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt {%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
(define_insn "*bsr_rex64"
[(set (match_operand:DI 0 "register_operand" "=r")
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config/i386/i386.opt 2009-11-20 13:42:41.000000000 +0100
@@ -201,6 +201,18 @@ mssse3
Target Report Mask(SSSE3)
Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
+msse4a
+Target Report Mask(SSE4A)
+Support new AMDFAM10 SSE4A built-in functions and code generation
+
+mpopcnt
+Target Report Mask(POPCNT)
+Support new AMDFAM10 Advanced Bit Manipulation (ABM) popcount built-in functions and code generation
+
+mabm
+Target Report Mask(ABM)
+Support new AMDFAM10 Advanced Bit Manipulation (ABM) built-in functions and code generation
+
msseregparm
Target RejectNegative Mask(SSEREGPARM)
Use SSE register passing conventions for SF and DF mode
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config/i386/sse.md 2009-11-20 13:42:41.000000000 +0100
@@ -4488,6 +4488,86 @@
[(set_attr "type" "sselog1")
(set_attr "mode" "DI")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "sse4a_movntsd"
+ [(set (match_operand:V2DF 0 "memory_operand" "=m")
+ (vec_merge:V2DF
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
+ UNSPEC_MOVNTS)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE4A"
+ "movntsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse4a_movntss"
+ [(set (match_operand:V4SF 0 "memory_operand" "=m")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_MOVNTS)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE4A"
+ "movntss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse4a_extrqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")]
+ UNSPEC_EXTRQI))]
+ "TARGET_SSE4A"
+{
+ return "extrq\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sse")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_EXTRQ))]
+ "TARGET_SSE4A"
+{
+ return "extrq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sse")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" "")]
+ UNSPEC_INSERTQI))]
+ "TARGET_SSE4A"
+{
+ return "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+}
+ [(set_attr "type" "sse")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_INSERTQ))]
+ "TARGET_SSE4A"
+{
+ return "insertq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sse")
+ (set_attr "mode" "TI")])
+
(define_insn "sse3_monitor64"
[(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
(match_operand:DI 1 "register_operand" "c")
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/config.gcc 2009-11-20 13:42:41.000000000 +0100
@@ -264,12 +264,12 @@ xscale-*-*)
i[34567]86-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
- pmmintrin.h tmmintrin.h"
+ pmmintrin.h tmmintrin.h ammintrin.h"
;;
x86_64-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
- pmmintrin.h tmmintrin.h"
+ pmmintrin.h tmmintrin.h ammintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi.orig 2009-11-20 13:42:21.000000000 +0100
+++ gcc/doc/extend.texi 2009-11-20 13:42:41.000000000 +0100
@@ -6906,6 +6906,18 @@ v4si __builtin_ia32_pabsd128 (v4si)
v8hi __builtin_ia32_pabsw128 (v8hi)
@end smallexample
+The following built-in functions are available when @option{-msse4a} is used.
+All of them generate the machine instruction that is part of the name with XMM registers.
+@smallexample
+void _mm_stream_sd (double*,__m128d);
+void _mm_stream_ss (float*,__m128);
+__m128i _mm_extract_si64 (__m128i, __m128i);
+__m128i _mm_extracti_si64 (__m128i, int, int);
+__m128i _mm_insert_si64 (__m128i, __m128i);
+__m128i _mm_inserti_si64 (__m128i, __m128i, int, int);
+@end smallexample
+
+
The following built-in functions are available when @option{-m3dnow} is used.
All of them generate the machine instruction that is part of the name.
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi.orig 2009-11-20 13:42:25.000000000 +0100
+++ gcc/doc/invoke.texi 2009-11-20 13:42:41.000000000 +0100
@@ -522,7 +522,7 @@ Objective-C and Objective-C++ Dialects}.
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
--mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol
+-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
@@ -9073,6 +9073,10 @@ instruction set support.
@item k8, opteron, athlon64, athlon-fx
AMD K8 core based CPUs with x86-64 instruction set support. (This supersets
MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
+@item amdfam10
+AMD Family 10 core based CPUs with x86-64 instruction set support. (This supersets
+MMX, SSE, SSE2, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit instruction set
+extensions.)
@item winchip-c6
IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction
set support.
@@ -9350,6 +9354,8 @@ preferred alignment to @option{-mpreferr
@itemx -mno-sse3
@item -mssse3
@itemx -mno-ssse3
+@item -msse4a
+@item -mno-sse4a
@item -m3dnow
@itemx -mno-3dnow
@opindex mmmx
@@ -9358,11 +9364,15 @@ preferred alignment to @option{-mpreferr
@opindex mno-sse
@opindex m3dnow
@opindex mno-3dnow
+@item -mpopcnt
+@itemx -mno-popcnt
+@item -mabm
+@itemx -mno-abm
These switches enable or disable the use of instructions in the MMX,
-SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
-These extensions are also available as built-in functions: see
-@ref{X86 Built-in Functions}, for details of the functions enabled and
-disabled by these switches.
+SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets.
+These extensions are also available as built-in functions: see
+@ref{X86 Built-in Functions}, for details of the functions enabled
+and disabled by these switches.
To have SSE/SSE2 instructions generated automatically from floating-point
code (as opposed to 387 instructions), see @option{-mfpmath=sse}.