File VIA_padlock_support_on_64systems.patch of Package openssl

diff -rNU 30 ../openssl-1.0.1n-o/engines/e_padlock.c ./engines/e_padlock.c
--- ../openssl-1.0.1n-o/engines/e_padlock.c	2015-06-11 15:01:06.000000000 +0200
+++ ./engines/e_padlock.c	2015-06-12 04:30:50.000000000 +0200
@@ -74,61 +74,64 @@
 # include <openssl/aes.h>
 #endif
 #include <openssl/rand.h>
 #include <openssl/err.h>
 
 #ifndef OPENSSL_NO_HW
 # ifndef OPENSSL_NO_HW_PADLOCK
 
 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
 #  if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
 #   ifndef OPENSSL_NO_DYNAMIC_ENGINE
 #    define DYNAMIC_ENGINE
 #   endif
 #  elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
 #   ifdef ENGINE_DYNAMIC_SUPPORT
 #    define DYNAMIC_ENGINE
 #   endif
 #  else
 #   error "Only OpenSSL >= 0.9.7 is supported"
 #  endif
 
 /*
  * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
  * doesn't exist elsewhere, but it even can't be compiled on other platforms!
  *
  * In addition, because of the heavy use of inline assembler, compiler choice
  * is limited to GCC and Microsoft C.
  */
 #  undef COMPILE_HW_PADLOCK
 #  if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
-#   if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
+# if (defined(__GNUC__) && __GNUC__>=2 && \
+	(defined(__i386__) || defined(__i386) || \
+	 defined(__x86_64__) || defined(__x86_64)) \
+     ) || \
      (defined(_MSC_VER) && defined(_M_IX86))
 #    define COMPILE_HW_PADLOCK
 #   endif
 #  endif
 
 #  ifdef OPENSSL_NO_DYNAMIC_ENGINE
 #   ifdef COMPILE_HW_PADLOCK
 static ENGINE *ENGINE_padlock(void);
 #   endif
 
 void ENGINE_load_padlock(void)
 {
 /* On non-x86 CPUs it just returns. */
 #   ifdef COMPILE_HW_PADLOCK
     ENGINE *toadd = ENGINE_padlock();
     if (!toadd)
         return;
     ENGINE_add(toadd);
     ENGINE_free(toadd);
     ERR_clear_error();
 #   endif
 }
 
 #  endif
 
 #  ifdef COMPILE_HW_PADLOCK
 /*
  * We do these includes here to avoid header problems on platforms that do
  * not have the VIA padlock anyway...
  */
@@ -276,60 +279,61 @@
             int ciphr:1;        /* n/a in C3 */
             unsigned int keygen:1;
             int interm:1;
             unsigned int encdec:1;
             int ksize:2;
         } b;
     } cword;                    /* Control word */
     AES_KEY ks;                 /* Encryption key */
 };
 
 /*
  * Essentially this variable belongs in thread local storage.
  * Having this variable global on the other hand can only cause
  * few bogus key reloads [if any at all on single-CPU system],
  * so we accept the penatly...
  */
 static volatile struct padlock_cipher_data *padlock_saved_context;
 #   endif
 
 /*-
  * =======================================================
  * Inline assembler section(s).
  * =======================================================
  * Order of arguments is chosen to facilitate Windows port
  * using __fastcall calling convention. If you wish to add
  * more routines, keep in mind that first __fastcall
  * argument is passed in %ecx and second - in %edx.
  * =======================================================
  */
 #   if defined(__GNUC__) && __GNUC__>=2
+#if defined(__i386__) || defined(__i386)
 /*
  * As for excessive "push %ebx"/"pop %ebx" found all over.
  * When generating position-independent code GCC won't let
  * us use "b" in assembler templates nor even respect "ebx"
  * in "clobber description." Therefore the trouble...
  */
 
 /*
  * Helper function - check if a CPUID instruction is available on this CPU
  */
 static int padlock_insn_cpuid_available(void)
 {
     int result = -1;
 
     /*
      * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
      * CPUID is available.
      */
     asm volatile ("pushf\n"
                   "popl %%eax\n"
                   "xorl $0x200000, %%eax\n"
                   "movl %%eax, %%ecx\n"
                   "andl $0x200000, %%ecx\n"
                   "pushl %%eax\n"
                   "popf\n"
                   "pushf\n"
                   "popl %%eax\n"
                   "andl $0x200000, %%eax\n"
                   "xorl %%eax, %%ecx\n"
                   "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
@@ -422,98 +426,228 @@
                   "       cmpl    %2,%1\n"
                   "       je      1f\n"
                   "       popfl\n"
                   "       subl    $4,%%esp\n"
                   "1:     addl    $4,%%esp\n"
                   "       movl    %2,%0":"+m" (padlock_saved_context)
                   :"r"(padlock_saved_context), "r"(cdata):"cc");
 }
 
 /* Template for padlock_xcrypt_* modes */
 /*
  * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
  * of the 'padlock_cipher_data' structure.
  */
 #     define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)     \
 static inline void *name(size_t cnt,            \
         struct padlock_cipher_data *cdata,      \
         void *out, const void *inp)             \
 {       void *iv;                               \
         asm volatile ( "pushl   %%ebx\n"        \
                 "       leal    16(%0),%%edx\n" \
                 "       leal    32(%0),%%ebx\n" \
                         rep_xcrypt "\n"         \
                 "       popl    %%ebx"          \
                 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
                 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
                 : "edx", "cc", "memory");       \
         return iv;                              \
 }
 
+
+
+#endif
+
+#elif defined(__x86_64__) || defined(__x86_64)
+
+/* Load supported features of the CPU to see if
+   the PadLock is available. */
+       static int
+padlock_available(void)
+{
+       char vendor_string[16];
+       unsigned int eax, edx;
+       size_t  scratch;
+
+       /* Are we running on the Centaur (VIA) CPU? */
+       eax = 0x00000000;
+       vendor_string[12] = 0;
+       asm volatile (
+                       "movq   %%rbx,%1\n"
+                       "cpuid\n"
+                       "movl   %%ebx,(%2)\n"
+                       "movl   %%edx,4(%2)\n"
+                       "movl   %%ecx,8(%2)\n"
+                       "movq   %1,%%rbx"
+                       : "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx");
+       if (strcmp(vendor_string, "CentaurHauls") != 0)
+               return 0;
+
+       /* Check for Centaur Extended Feature Flags presence */
+       eax = 0xC0000000;
+       asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx"
+               : "+a"(eax), "=&r"(scratch) : : "rcx", "rdx");
+       if (eax < 0xC0000001)
+               return 0;
+
+       /* Read the Centaur Extended Feature Flags */
+       eax = 0xC0000001;
+       asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx"
+               : "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx");
+
+       /* Fill up some flags */
+       padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
+       padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
+
+       return padlock_use_ace + padlock_use_rng;
+}
+
+/* Force key reload from memory to the CPU microcode.
+   Loading EFLAGS from the stack clears EFLAGS[30] 
+   which does the trick. */
+       static inline void
+padlock_reload_key(void)
+{
+       asm volatile ("pushfq; popfq");
+}
+
+#ifndef OPENSSL_NO_AES
+/*
+ * This is heuristic key context tracing. At first one
+ * believes that one should use atomic swap instructions,
+ * but it's not actually necessary. Point is that if
+ * padlock_saved_context was changed by another thread
+ * after we've read it and before we compare it with cdata,
+ * our key *shall* be reloaded upon thread context switch
+ * and we are therefore set in either case...
+ */
+       static inline void
+padlock_verify_context(struct padlock_cipher_data *cdata)
+{
+       asm volatile (
+                       "pushfq\n"
+                       "       btl     $30,(%%rsp)\n"
+                       "       jnc     1f\n"
+                       "       cmpq    %2,%1\n"
+                       "       je      1f\n"
+                       "       popfq\n"
+                       "       subq    $8,%%rsp\n"
+                       "1:     addq    $8,%%rsp\n"
+                       "       movq    %2,%0"
+                       :"+m"(padlock_saved_context)
+                       : "r"(padlock_saved_context), "r"(cdata) : "cc");
+}
+
+/* Template for padlock_xcrypt_* modes */
+/* BIG FAT WARNING: 
+ *     The offsets used with 'leal' instructions
+ *     describe items of the 'padlock_cipher_data'
+ *     structure.
+ */
+#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)    \
+       static inline void *name(size_t cnt,            \
+                       struct padlock_cipher_data *cdata,      \
+                       void *out, const void *inp)             \
+{      void *iv;                               \
+       size_t scratch;                         \
+       asm volatile ( "movq    %%rbx,%4\n"     \
+                       "       leaq    16(%0),%%rdx\n" \
+                       "       leaq    32(%0),%%rbx\n" \
+                       rep_xcrypt "\n"         \
+                       "       movq    %4,%%rbx"       \
+                       : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \
+                       : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
+                       : "rdx", "cc", "memory");       \
+       return iv;                              \
+}
+#endif
+
+#endif /* cpu */
+
+#ifndef OPENSSL_NO_AES
+
+
+
 /* Generate all functions with appropriate opcodes */
 /* rep xcryptecb */
 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
 /* rep xcryptcbc */
     PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
 /* rep xcryptcfb */
     PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
 /* rep xcryptofb */
     PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
+
+
+/* Our own htonl()/ntohl() */
+static inline void
+padlock_bswapl(AES_KEY *ks)
+{
+       size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
+       unsigned int *key = ks->rd_key;
+
+       while (i--) {
+               asm volatile ("bswapl %0" : "+r"(*key));
+               key++;
+       }
+}
+
+
 #    endif
 /* The RNG call itself */
 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
 {
     unsigned int eax_out;
 
     asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
                   :"=a" (eax_out), "=m"(*(unsigned *)addr)
                   :"D"(addr), "d"(edx_in)
         );
 
     return eax_out;
 }
 
 /*
  * Why not inline 'rep movsd'? I failed to find information on what value in
  * Direction Flag one can expect and consequently have to apply
  * "better-safe-than-sorry" approach and assume "undefined." I could
  * explicitly clear it and restore the original value upon return from
  * padlock_aes_cipher, but it's presumably too much trouble for too little
  * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
  * affected by the Direction Flag and pointers advance toward larger
  * addresses unconditionally.
  */
 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
                                             size_t n)
 {
-    long *d = dst;
-    const long *s = src;
+    size_t *d = dst;
+    const size_t *s = src;
 
     n /= sizeof(*d);
     do {
         *d++ = *s++;
     } while (--n);
 
     return dst;
 }
 
 #   elif defined(_MSC_VER)
 /*
  * Unlike GCC these are real functions. In order to minimize impact
  * on performance we adhere to __fastcall calling convention in
  * order to get two first arguments passed through %ecx and %edx.
  * Which kind of suits very well, as instructions in question use
  * both %ecx and %edx as input:-)
  */
 #    define REP_XCRYPT(code)                \
         _asm _emit 0xf3                 \
         _asm _emit 0x0f _asm _emit 0xa7 \
         _asm _emit code
 
 /*
  * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
  * of the 'padlock_cipher_data' structure.
  */
 #    define PADLOCK_XCRYPT_ASM(name,code)   \
 static void * __fastcall                \
         name (size_t cnt, void *cdata,  \
         void *outp, const void *inp)    \
openSUSE Build Service is sponsored by