File VIA_padlock_support_on_64systems.patch of Package openssl.1633

Index: openssl-1.0.1c/engines/e_padlock.c
===================================================================
--- openssl-1.0.1c.orig/engines/e_padlock.c
+++ openssl-1.0.1c/engines/e_padlock.c
@@ -101,7 +101,10 @@
    compiler choice is limited to GCC and Microsoft C. */
 #undef COMPILE_HW_PADLOCK
 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
-# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
+# if (defined(__GNUC__) && __GNUC__>=2 && \
+	(defined(__i386__) || defined(__i386) || \
+	 defined(__x86_64__) || defined(__x86_64)) \
+     ) || \
      (defined(_MSC_VER) && defined(_M_IX86))
 #  define COMPILE_HW_PADLOCK
 # endif
@@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da
  * =======================================================
  */
 #if defined(__GNUC__) && __GNUC__>=2
+#if defined(__i386__) || defined(__i386)
 /*
  * As for excessive "push %ebx"/"pop %ebx" found all over.
  * When generating position-independent code GCC won't let
@@ -383,21 +387,6 @@ padlock_available(void)
 	return padlock_use_ace + padlock_use_rng;
 }
 
-#ifndef OPENSSL_NO_AES
-/* Our own htonl()/ntohl() */
-static inline void
-padlock_bswapl(AES_KEY *ks)
-{
-	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
-	unsigned int *key = ks->rd_key;
-
-	while (i--) {
-		asm volatile ("bswapl %0" : "+r"(*key));
-		key++;
-	}
-}
-#endif
-
 /* Force key reload from memory to the CPU microcode.
    Loading EFLAGS from the stack clears EFLAGS[30] 
    which does the trick. */
@@ -456,11 +445,130 @@ static inline void *name(size_t cnt,		\
 	return iv;				\
 }
 
+
+#endif
+
+#elif defined(__x86_64__) || defined(__x86_64)
+
+/* Load supported features of the CPU to see if
+   the PadLock is available. */
+	static int
+padlock_available(void)
+{
+	char vendor_string[16];
+	unsigned int eax, edx;
+
+	/* Are we running on the Centaur (VIA) CPU? */
+	eax = 0x00000000;
+	vendor_string[12] = 0;
+	asm volatile (
+			"cpuid\n"
+			"movl	%%ebx,(%1)\n"
+			"movl	%%edx,4(%1)\n"
+			"movl	%%ecx,8(%1)\n"
+			: "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
+	if (strcmp(vendor_string, "CentaurHauls") != 0)
+		return 0;
+
+	/* Check for Centaur Extended Feature Flags presence */
+	eax = 0xC0000000;
+	asm volatile ("cpuid"
+			: "+a"(eax) : : "rbx", "rcx", "rdx");
+	if (eax < 0xC0000001)
+		return 0;
+
+	/* Read the Centaur Extended Feature Flags */
+	eax = 0xC0000001;
+	asm volatile ("cpuid"
+			: "+a"(eax), "=d"(edx) : : "rbx", "rcx");
+
+	/* Fill up some flags */
+	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
+	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
+
+	return padlock_use_ace + padlock_use_rng;
+}
+
+/* Force key reload from memory to the CPU microcode.
+   Loading EFLAGS from the stack clears EFLAGS[30] 
+   which does the trick. */
+	static inline void
+padlock_reload_key(void)
+{
+	asm volatile ("pushfq; popfq");
+}
+
+#ifndef OPENSSL_NO_AES
+/*
+ * This is heuristic key context tracing. At first one
+ * believes that one should use atomic swap instructions,
+ * but it's not actually necessary. Point is that if
+ * padlock_saved_context was changed by another thread
+ * after we've read it and before we compare it with cdata,
+ * our key *shall* be reloaded upon thread context switch
+ * and we are therefore set in either case...
+ */
+	static inline void
+padlock_verify_context(struct padlock_cipher_data *cdata)
+{
+	asm volatile (
+			"pushfq\n"
+			"	btl	$30,(%%rsp)\n"
+			"	jnc	1f\n"
+			"	cmpq	%2,%1\n"
+			"	je	1f\n"
+			"	popfq\n"
+			"	subq	$8,%%rsp\n"
+			"1:	addq	$8,%%rsp\n"
+			"	movq	%2,%0"
+			:"+m"(padlock_saved_context)
+			: "r"(padlock_saved_context), "r"(cdata) : "cc");
+}
+
+/* Template for padlock_xcrypt_* modes */
+/* BIG FAT WARNING: 
+ * 	The offsets used with 'leal' instructions
+ * 	describe items of the 'padlock_cipher_data'
+ * 	structure.
+ */
+#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)	\
+	static inline void *name(size_t cnt,		\
+			struct padlock_cipher_data *cdata,	\
+			void *out, const void *inp) 		\
+{	void *iv; 				\
+	asm volatile ( "leaq	16(%0),%%rdx\n"	\
+			"	leaq	32(%0),%%rbx\n"	\
+			rep_xcrypt "\n"		\
+			: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
+			: "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
+			: "rbx", "rdx", "cc", "memory");	\
+	return iv;				\
+}
+#endif
+
+#endif	/* cpu */
+
+#ifndef OPENSSL_NO_AES
+
+
 /* Generate all functions with appropriate opcodes */
 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")	/* rep xcryptecb */
 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")	/* rep xcryptcbc */
 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")	/* rep xcryptcfb */
 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")	/* rep xcryptofb */
+
+/* Our own htonl()/ntohl() */
+static inline void
+padlock_bswapl(AES_KEY *ks)
+{
+	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
+	unsigned int *key = ks->rd_key;
+
+	while (i--) {
+		asm volatile ("bswapl %0" : "+r"(*key));
+		key++;
+	}
+}
 #endif
 
 /* The RNG call itself */
@@ -491,8 +599,8 @@ padlock_xstore(void *addr, unsigned int
 static inline unsigned char *
 padlock_memcpy(void *dst,const void *src,size_t n)
 {
-	long       *d=dst;
-	const long *s=src;
+	size_t       *d=dst;
+	const size_t *s=src;
 
 	n /= sizeof(*d);
 	do { *d++ = *s++; } while (--n);
Index: openssl-1.0.1c/engines/e_padlock.c
===================================================================
--- openssl-1.0.1c.orig/engines/e_padlock.c
+++ openssl-1.0.1c/engines/e_padlock.c
@@ -457,30 +457,33 @@ padlock_available(void)
 {
 	char vendor_string[16];
 	unsigned int eax, edx;
+	size_t	scratch;
 
 	/* Are we running on the Centaur (VIA) CPU? */
 	eax = 0x00000000;
 	vendor_string[12] = 0;
 	asm volatile (
+			"movq	%%rbx,%1\n"
 			"cpuid\n"
-			"movl	%%ebx,(%1)\n"
-			"movl	%%edx,4(%1)\n"
-			"movl	%%ecx,8(%1)\n"
-			: "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
+			"movl	%%ebx,(%2)\n"
+			"movl	%%edx,4(%2)\n"
+			"movl	%%ecx,8(%2)\n"
+			"movq	%1,%%rbx"
+			: "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx");
 	if (strcmp(vendor_string, "CentaurHauls") != 0)
 		return 0;
 
 	/* Check for Centaur Extended Feature Flags presence */
 	eax = 0xC0000000;
-	asm volatile ("cpuid"
-			: "+a"(eax) : : "rbx", "rcx", "rdx");
+	asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx"
+		: "+a"(eax), "=&r"(scratch) : : "rcx", "rdx");
 	if (eax < 0xC0000001)
 		return 0;
 
 	/* Read the Centaur Extended Feature Flags */
 	eax = 0xC0000001;
-	asm volatile ("cpuid"
-			: "+a"(eax), "=d"(edx) : : "rbx", "rcx");
+	asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx"
+		: "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx");
 
 	/* Fill up some flags */
 	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
@@ -536,12 +539,15 @@ padlock_verify_context(struct padlock_ci
 			struct padlock_cipher_data *cdata,	\
 			void *out, const void *inp) 		\
 {	void *iv; 				\
-	asm volatile ( "leaq	16(%0),%%rdx\n"	\
+	size_t scratch;				\
+	asm volatile ( "movq	%%rbx,%4\n"	\
+			"	leaq	16(%0),%%rdx\n"	\
 			"	leaq	32(%0),%%rbx\n"	\
 			rep_xcrypt "\n"		\
-			: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
+			"	movq	%4,%%rbx"	\
+			: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \
 			: "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
-			: "rbx", "rdx", "cc", "memory");	\
+			: "rdx", "cc", "memory");	\
 	return iv;				\
 }
 #endif