File 0325-erl_interface-Optimize-latin1_to_utf8-and-friend.patch of Package erlang

From 1ab478a7ce281f7cdce01df2fe04953c0770fdbc Mon Sep 17 00:00:00 2001
From: Sverker Eriksson <sverker@erlang.org>
Date: Mon, 19 Mar 2018 19:13:29 +0100
Subject: [PATCH 2/2] erl_interface: Optimize latin1_to_utf8 and friend

to do word wise check/copy for pure ASCII
if ARCH allows it (x86 and amd64).
---
 lib/erl_interface/configure.in             | 92 ++++++++++++++++++++++++++++++
 lib/erl_interface/src/decode/decode_atom.c | 62 ++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/lib/erl_interface/configure.in b/lib/erl_interface/configure.in
index 0a8fbf513c..7cc1f1f89a 100644
--- a/lib/erl_interface/configure.in
+++ b/lib/erl_interface/configure.in
@@ -106,6 +106,98 @@ if test $ac_cv_sizeof_long = 8; then
   CFLAGS="$CFLAGS -DEI_64BIT"
 fi
 
+dnl
+dnl Determine target hardware in ARCH
+dnl
+AC_MSG_CHECKING([target hardware architecture])
+if test "x$host_alias" != "x" -a "x$host_cpu" != "x"; then
+    chk_arch_=$host_cpu
+else
+    chk_arch_=`uname -m`
+fi
+
+case $chk_arch_ in
+    sun4u)	ARCH=ultrasparc;;
+    sparc64)	ARCH=sparc64;;
+    sun4v)	ARCH=ultrasparc;;
+    i86pc)	ARCH=x86;;
+    i386)	ARCH=x86;;
+    i486)	ARCH=x86;;
+    i586)	ARCH=x86;;
+    i686)	ARCH=x86;;
+    x86_64)	ARCH=amd64;;
+    amd64)	ARCH=amd64;;
+    macppc)	ARCH=ppc;;
+    powerpc)	ARCH=ppc;;
+    ppc)	ARCH=ppc;;
+    ppc64)	ARCH=ppc64;;
+    ppc64le)	ARCH=ppc64le;;
+    "Power Macintosh")	ARCH=ppc;;
+    armv5b)	ARCH=arm;;
+    armv5teb)	ARCH=arm;;
+    armv5tel)	ARCH=arm;;
+    armv5tejl)	ARCH=arm;;
+    armv6l)	ARCH=arm;;
+    armv6hl)	ARCH=arm;;
+    armv7l)	ARCH=arm;;
+    armv7hl)	ARCH=arm;;
+    tile)	ARCH=tile;;
+    *)	 	ARCH=noarch;;
+esac
+AC_MSG_RESULT($ARCH)
+
+dnl
+dnl Convert between x86 and amd64 based on the compiler's mode.
+dnl Ditto between ultrasparc and sparc64.
+dnl
+AC_MSG_CHECKING(whether compilation mode forces ARCH adjustment)
+case "$ARCH-$ac_cv_sizeof_void_p" in
+    x86-8)
+	AC_MSG_RESULT(yes: adjusting ARCH=x86 to ARCH=amd64)
+	ARCH=amd64
+	;;
+    amd64-4)
+	AC_MSG_RESULT(yes: adjusting ARCH=amd64 to ARCH=x86)
+	ARCH=x86
+	;;
+    ultrasparc-8)
+	AC_MSG_RESULT(yes: adjusting ARCH=ultrasparc to ARCH=sparc64)
+	ARCH=sparc64
+	;;
+    sparc64-4)
+	AC_MSG_RESULT(yes: adjusting ARCH=sparc64 to ARCH=ultrasparc)
+	ARCH=ultrasparc
+	;;
+    ppc64-4)
+	AC_MSG_RESULT(yes: adjusting ARCH=ppc64 to ARCH=ppc)
+	ARCH=ppc
+	;;
+    ppc-8)
+	AC_MSG_RESULT(yes: adjusting ARCH=ppc to ARCH=ppc64)
+	ARCH=ppc64
+	;;
+    arm-8)
+	AC_MSG_RESULT(yes: adjusting ARCH=arm to ARCH=noarch)
+	ARCH=noarch
+	;;
+    *)
+	AC_MSG_RESULT(no: ARCH is $ARCH)
+	;;
+esac
+
+AC_SUBST(ARCH)
+
+AC_MSG_CHECKING(for unaligned word access)
+case "$ARCH" in
+    x86|amd64)
+	AC_MSG_RESULT(yes: x86 or amd64)
+	AC_DEFINE(HAVE_UNALIGNED_WORD_ACCESS, 1, [Define if hw supports unaligned word access])
+	;;
+    *)
+	AC_MSG_RESULT(no)
+	;;
+esac
+
 AC_CHECK_TOOL(AR, ar, false)
 if test "$AR" = false; then
   AC_MSG_ERROR([No 'ar' command found in PATH])
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index b3bba82434..87cd75b1be 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -92,6 +92,51 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
 }	
 
 
+
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+
+#if SIZEOF_VOID_P == SIZEOF_LONG
+typedef unsigned long AsciiWord;
+#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG
+typedef unsigned long long AsciiWord;
+#else
+#  error "Uknown word type"
+#endif
+
+#if SIZEOF_VOID_P == 4
+#  define ASCII_CHECK_MASK ((AsciiWord)0x80808080U)
+#elif SIZEOF_VOID_P == 8
+#  define ASCII_CHECK_MASK ((AsciiWord)0x8080808080808080U)
+#endif
+
+static int ascii_fast_track(char* dst, const char* src, int slen, int destlen)
+{
+    const AsciiWord* src_word = (AsciiWord*) src;
+    const AsciiWord* const src_word_end = src_word + (slen / sizeof(AsciiWord));
+
+    if (destlen < slen)
+        return 0;
+
+    if (dst) {
+        AsciiWord* dst_word = (AsciiWord*)dst;
+
+        while (src_word < src_word_end) {
+            if ((*src_word & ASCII_CHECK_MASK) != 0)
+                break;
+            *dst_word++ = *src_word++;
+        }
+    }
+    else {
+        while (src_word < src_word_end) {
+            if ((*src_word & ASCII_CHECK_MASK) != 0)
+                break;
+            src_word++;
+        }
+    }
+    return (char*)src_word - src;
+}
+#endif /* HAVE_UNALIGNED_WORD_ACCESS */
+
 int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
 		   erlang_char_encoding* res_encp)
 {
@@ -99,6 +144,15 @@ int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
     const char* const dst_end = dst + destlen;
     int found_non_ascii = 0;
 
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+    {
+        int aft = ascii_fast_track(dst, src, slen, destlen);
+        src += aft;
+        slen -= aft;
+        dst += aft;
+    }
+#endif
+
     while (slen > 0) {
 	if (dst >= dst_end) return -1;
 	if ((src[0] & 0x80) == 0) {
@@ -136,6 +190,14 @@ int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
     const char* const dst_end = dst + destlen;
     int found_non_ascii = 0;
 
+#ifdef HAVE_UNALIGNED_WORD_ACCESS
+    {
+        int aft = ascii_fast_track(dst, src, slen, destlen);
+        dst += aft;
+        src += aft;
+    }
+#endif
+
     while (src < src_end) {
 	if (dst >= dst_end) return -1;
 	if ((src[0] & 0x80) == 0) {
-- 
2.16.3

openSUSE Build Service is sponsored by