File 0001-Fix-GetTimeCounter-for-Aarch64-variants.patch of Package netgen
From 85e8c09ff6626b12480f4919a26a7086d4c20579 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Wed, 3 Mar 2021 17:03:29 +0100
Subject: [PATCH] Fix GetTimeCounter for Aarch64 variants
Neither GCC nor Clang define an __arm64__ preprocessor macro, but use
__aarch64__ (MSVC uses _MARM_64). Add a "64" suffix to the define, i.e.
NETGEN_ARCH_ARM64 to make it more obvious in only refers to aarch64, and
to be in line with NETGEN_ARCH_AMD64.
Replace the (Clang specific) __builtin_readcyclecounter with inline
asm:
- The function return cycles (i.e. varies with CPU frequency), not time
- It may return 0, depending on the PMU settings
- It may cause an illegal instruction, in case it is not trapped by the
kernel, e.g. on FreeBSD.
Reading the generic timer/counter CNTVCT_EL0 instead of PMCCNTR_EL0 avoids
these pitfalls. The inline asm works on GCC and Clang, instead of
Clang only for the builtin.
---
libsrc/core/ngcore_api.hpp | 6 +++++-
libsrc/core/utils.hpp | 11 +++++++----
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/libsrc/core/ngcore_api.hpp b/libsrc/core/ngcore_api.hpp
index 330e7e33..9c977c1c 100644
--- a/libsrc/core/ngcore_api.hpp
+++ b/libsrc/core/ngcore_api.hpp
@@ -71,7 +71,11 @@
#define NETGEN_ARCH_AMD64
#endif
-#if defined(__arm64__) || defined(_M_ARM64)
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define NETGEN_ARCH_ARM64
+#endif
+
+#if defined(__arm__) || defined(_M_ARM)
#define NETGEN_ARCH_ARM
#endif
diff --git a/libsrc/core/utils.hpp b/libsrc/core/utils.hpp
index ca015ae3..102ff319 100644
--- a/libsrc/core/utils.hpp
+++ b/libsrc/core/utils.hpp
@@ -10,7 +10,7 @@
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
-#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
+#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
#include <mach/mach_time.h>
#endif
@@ -58,12 +58,15 @@ namespace ngcore
inline TTimePoint GetTimeCounter() noexcept
{
-#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
+#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
return mach_absolute_time();
#elif defined(NETGEN_ARCH_AMD64)
return __rdtsc();
-#elif defined(NETGEN_ARCH_ARM)
- return __builtin_readcyclecounter();
+#elif defined(NETGEN_ARCH_ARM64) && defined(__GNUC__)
+ // __GNUC__ is also defined by CLANG. Use inline asm to read Generic Timer
+ unsigned long long tics;
+ __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (tics));
+ return tics;
#else
#warning "Unsupported CPU architecture"
return 0;
--
2.30.1