File libatlas.ppc64le-abiv2.patch of Package libatlas3
From: Michel Normand <normand@linux.vnet.ibm.com>
Subject: atlas.ppc64le abiv2
Date: Mon, 14 Apr 2014 18:03:06 +0200
References: http://sourceforge.net/p/math-atlas/mailman/message/32471499/
atlas.ppc64le abiv2
* do not use opd section for ABI V2
* define TOC in r2 in function call
TODO: may be not required everywhere.
based on work of Guy and Thierry
TODO: still have to work on stack FSIZE
TODO: for ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
need to better understand the change about ld pC0
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
---
ATLAS/CONFIG/src/backend/probe_AltiVec.S | 2 -
ATLAS/CONFIG/src/backend/probe_VSX.S | 2 -
ATLAS/src/threads/ATL_DecAtomicCount_ppc.S | 8 ++++++-
ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S | 8 ++++++-
ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 8 ++++++-
ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 10 +++++++--
ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 8 ++++++-
ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 26 +++++++++++++++++++++++--
ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 8 ++++++-
9 files changed, 69 insertions(+), 11 deletions(-)
Index: ATLAS/CONFIG/src/backend/probe_AltiVec.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_AltiVec.S
+++ ATLAS/CONFIG/src/backend/probe_AltiVec.S
@@ -6,7 +6,7 @@
*
*/
.text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
.align 2
.globl ATL_asmdecor(do_vsum)
.section ".opd","aw"
Index: ATLAS/CONFIG/src/backend/probe_VSX.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_VSX.S
+++ ATLAS/CONFIG/src/backend/probe_VSX.S
@@ -6,7 +6,7 @@
*
*/
.text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
.align 2
.globl ATL_asmdecor(do_vsum)
.section ".opd","aw"
Index: ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_DecAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
@@ -4,7 +4,7 @@
.globl _ATL_DecAtomicCount
_ATL_DecAtomicCount:
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -22,6 +22,12 @@ ATL_DecAtomicCount:
#else
.globl ATL_DecAtomicCount
ATL_DecAtomicCount:
+ #if _CALL_ELF == 2
+ .type ATL_DecAtomicCount,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_DecAtomicCount, .-ATL_DecAtomicCount
+ #endif
#endif
#endif
#error "Code is not reliable on PPC, don't know why"
Index: ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_ResetAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
@@ -4,7 +4,7 @@
.globl _ATL_ResetAtomicCount
_ATL_ResetAtomicCount:
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -22,6 +22,12 @@ ATL_ResetAtomicCount:
#else
.globl ATL_ResetAtomicCount
ATL_ResetAtomicCount:
+ #if _CALL_ELF == 2
+ .type ATL_ResetAtomicCount,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_ResetAtomicCount, .-ATL_ResetAtomicCount
+ #endif
#endif
#endif
/* r3 r3 r4 */
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
@@ -181,7 +181,7 @@ void ATL_USERMM(const int M, const int N
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -199,6 +199,12 @@ Mjoin(.,ATL_USERMM):
#else
.globl ATL_USERMM
ATL_USERMM:
+ #if _CALL_ELF == 2
+ .type ATL_USERMM,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_USERMM, .-ATL_USERMM
+ #endif
#endif
#endif
/* Save regs */
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
@@ -279,7 +279,7 @@ void ATL_USERMM(const int M, const int N
#endif
.text
#ifdef ATL_GAS_LINUX_PPC
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* No idea what this does, but seg fault without it (I think it is
* partially resp for making code callable from both static & PIC code)
@@ -296,8 +296,14 @@ ATL_USERMM:
.globl Mjoin(.,ATL_USERMM)
Mjoin(.,ATL_USERMM):
#else
-.globl ATL_USERMM
+ .globl ATL_USERMM
ATL_USERMM:
+ #if _CALL_ELF == 2
+ .type ATL_USERMM,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_USERMM, .-ATL_USERMM
+ #endif
#define IROFF 8
#define FROFF IROFF+48
#define FSIZE (((IROFF+FROFF+144+15)/16)*16)
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -285,6 +285,12 @@ Mjoin(.,ATL_USERMM):
#else
.globl ATL_USERMM
ATL_USERMM:
+ #if _CALL_ELF == 2
+ .type ATL_USERMM,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_USERMM, .-ATL_USERMM
+ #endif
#endif
#endif
#endif
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
const TYPE beta, TYPE *C, const int ldc)
(r10) 8(r1)
*******************************************************************************
-64 bit ABIs:
+64 bit ABIv1s:
r3 r4 r5 r6/f1
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
r7 r8 r9 r10
const TYPE *A, const int lda, const TYPE *B, const int ldb,
f2 120(r1) 128(r1)
const TYPE beta, TYPE *C, const int ldc)
+
+64 bit ABIv2s:
+ r3 r4 r5 r6/f1
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
+ r7 r8 r9 r10
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
+ f2 104(r1) 112(r1)
+ const TYPE beta, TYPE *C, const int ldc)
#endif
#ifdef ATL_AS_AIX_PPC
.csect .text[PR]
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -219,6 +227,12 @@ Mjoin(.,ATL_USERMM):
#else
.globl ATL_USERMM
ATL_USERMM:
+ #if _CALL_ELF == 2
+ .type ATL_USERMM,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_USERMM, .-ATL_USERMM
+ #endif
#endif
#endif
#endif
@@ -257,9 +271,17 @@ ATL_USERMM:
#endif
#endif
+
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+ ld pC0, 104(r1)
+ ld incCn, 112(r1)
+#else
+/* ABIv1 */
ld pC0, 120(r1)
ld incCn, 128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
lwz pC0, 68(r1)
lwz incCn, 72(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -214,6 +214,12 @@ Mjoin(.,ATL_USERMM):
#else
.globl ATL_USERMM
ATL_USERMM:
+ #if _CALL_ELF == 2
+ .type ATL_USERMM,@function
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry ATL_USERMM, .-ATL_USERMM
+ #endif
#endif
#endif
/*