Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:draht:parking
openssl
VIA_padlock_support_on_64systems.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File VIA_padlock_support_on_64systems.patch of Package openssl
diff -rNU 30 ../openssl-1.0.1n-o/engines/e_padlock.c ./engines/e_padlock.c --- ../openssl-1.0.1n-o/engines/e_padlock.c 2015-06-11 15:01:06.000000000 +0200 +++ ./engines/e_padlock.c 2015-06-12 04:30:50.000000000 +0200 @@ -74,61 +74,64 @@ # include <openssl/aes.h> #endif #include <openssl/rand.h> #include <openssl/err.h> #ifndef OPENSSL_NO_HW # ifndef OPENSSL_NO_HW_PADLOCK /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ # if (OPENSSL_VERSION_NUMBER >= 0x00908000L) # ifndef OPENSSL_NO_DYNAMIC_ENGINE # define DYNAMIC_ENGINE # endif # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) # ifdef ENGINE_DYNAMIC_SUPPORT # define DYNAMIC_ENGINE # endif # else # error "Only OpenSSL >= 0.9.7 is supported" # endif /* * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it * doesn't exist elsewhere, but it even can't be compiled on other platforms! * * In addition, because of the heavy use of inline assembler, compiler choice * is limited to GCC and Microsoft C. */ # undef COMPILE_HW_PADLOCK # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) -# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ +# if (defined(__GNUC__) && __GNUC__>=2 && \ + (defined(__i386__) || defined(__i386) || \ + defined(__x86_64__) || defined(__x86_64)) \ + ) || \ (defined(_MSC_VER) && defined(_M_IX86)) # define COMPILE_HW_PADLOCK # endif # endif # ifdef OPENSSL_NO_DYNAMIC_ENGINE # ifdef COMPILE_HW_PADLOCK static ENGINE *ENGINE_padlock(void); # endif void ENGINE_load_padlock(void) { /* On non-x86 CPUs it just returns. */ # ifdef COMPILE_HW_PADLOCK ENGINE *toadd = ENGINE_padlock(); if (!toadd) return; ENGINE_add(toadd); ENGINE_free(toadd); ERR_clear_error(); # endif } # endif # ifdef COMPILE_HW_PADLOCK /* * We do these includes here to avoid header problems on platforms that do * not have the VIA padlock anyway... */ @@ -276,60 +279,61 @@ int ciphr:1; /* n/a in C3 */ unsigned int keygen:1; int interm:1; unsigned int encdec:1; int ksize:2; } b; } cword; /* Control word */ AES_KEY ks; /* Encryption key */ }; /* * Essentially this variable belongs in thread local storage. * Having this variable global on the other hand can only cause * few bogus key reloads [if any at all on single-CPU system], * so we accept the penatly... */ static volatile struct padlock_cipher_data *padlock_saved_context; # endif /*- * ======================================================= * Inline assembler section(s). * ======================================================= * Order of arguments is chosen to facilitate Windows port * using __fastcall calling convention. If you wish to add * more routines, keep in mind that first __fastcall * argument is passed in %ecx and second - in %edx. * ======================================================= */ # if defined(__GNUC__) && __GNUC__>=2 +#if defined(__i386__) || defined(__i386) /* * As for excessive "push %ebx"/"pop %ebx" found all over. * When generating position-independent code GCC won't let * us use "b" in assembler templates nor even respect "ebx" * in "clobber description." Therefore the trouble... */ /* * Helper function - check if a CPUID instruction is available on this CPU */ static int padlock_insn_cpuid_available(void) { int result = -1; /* * We're checking if the bit #21 of EFLAGS can be toggled. If yes = * CPUID is available. */ asm volatile ("pushf\n" "popl %%eax\n" "xorl $0x200000, %%eax\n" "movl %%eax, %%ecx\n" "andl $0x200000, %%ecx\n" "pushl %%eax\n" "popf\n" "pushf\n" "popl %%eax\n" "andl $0x200000, %%eax\n" "xorl %%eax, %%ecx\n" "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); @@ -422,98 +426,228 @@ " cmpl %2,%1\n" " je 1f\n" " popfl\n" " subl $4,%%esp\n" "1: addl $4,%%esp\n" " movl %2,%0":"+m" (padlock_saved_context) :"r"(padlock_saved_context), "r"(cdata):"cc"); } /* Template for padlock_xcrypt_* modes */ /* * BIG FAT WARNING: The offsets used with 'leal' instructions describe items * of the 'padlock_cipher_data' structure. */ # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ static inline void *name(size_t cnt, \ struct padlock_cipher_data *cdata, \ void *out, const void *inp) \ { void *iv; \ asm volatile ( "pushl %%ebx\n" \ " leal 16(%0),%%edx\n" \ " leal 32(%0),%%ebx\n" \ rep_xcrypt "\n" \ " popl %%ebx" \ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ : "edx", "cc", "memory"); \ return iv; \ } + + +#endif + +#elif defined(__x86_64__) || defined(__x86_64) + +/* Load supported features of the CPU to see if + the PadLock is available. */ + static int +padlock_available(void) +{ + char vendor_string[16]; + unsigned int eax, edx; + size_t scratch; + + /* Are we running on the Centaur (VIA) CPU? */ + eax = 0x00000000; + vendor_string[12] = 0; + asm volatile ( + "movq %%rbx,%1\n" + "cpuid\n" + "movl %%ebx,(%2)\n" + "movl %%edx,4(%2)\n" + "movl %%ecx,8(%2)\n" + "movq %1,%%rbx" + : "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx"); + if (strcmp(vendor_string, "CentaurHauls") != 0) + return 0; + + /* Check for Centaur Extended Feature Flags presence */ + eax = 0xC0000000; + asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx" + : "+a"(eax), "=&r"(scratch) : : "rcx", "rdx"); + if (eax < 0xC0000001) + return 0; + + /* Read the Centaur Extended Feature Flags */ + eax = 0xC0000001; + asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx" + : "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx"); + + /* Fill up some flags */ + padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); + padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); + + return padlock_use_ace + padlock_use_rng; +} + +/* Force key reload from memory to the CPU microcode. + Loading EFLAGS from the stack clears EFLAGS[30] + which does the trick. */ + static inline void +padlock_reload_key(void) +{ + asm volatile ("pushfq; popfq"); +} + +#ifndef OPENSSL_NO_AES +/* + * This is heuristic key context tracing. At first one + * believes that one should use atomic swap instructions, + * but it's not actually necessary. Point is that if + * padlock_saved_context was changed by another thread + * after we've read it and before we compare it with cdata, + * our key *shall* be reloaded upon thread context switch + * and we are therefore set in either case... + */ + static inline void +padlock_verify_context(struct padlock_cipher_data *cdata) +{ + asm volatile ( + "pushfq\n" + " btl $30,(%%rsp)\n" + " jnc 1f\n" + " cmpq %2,%1\n" + " je 1f\n" + " popfq\n" + " subq $8,%%rsp\n" + "1: addq $8,%%rsp\n" + " movq %2,%0" + :"+m"(padlock_saved_context) + : "r"(padlock_saved_context), "r"(cdata) : "cc"); +} + +/* Template for padlock_xcrypt_* modes */ +/* BIG FAT WARNING: + * The offsets used with 'leal' instructions + * describe items of the 'padlock_cipher_data' + * structure. + */ +#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ + static inline void *name(size_t cnt, \ + struct padlock_cipher_data *cdata, \ + void *out, const void *inp) \ +{ void *iv; \ + size_t scratch; \ + asm volatile ( "movq %%rbx,%4\n" \ + " leaq 16(%0),%%rdx\n" \ + " leaq 32(%0),%%rbx\n" \ + rep_xcrypt "\n" \ + " movq %4,%%rbx" \ + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \ + : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ + : "rdx", "cc", "memory"); \ + return iv; \ +} +#endif + +#endif /* cpu */ + +#ifndef OPENSSL_NO_AES + + + /* Generate all functions with appropriate opcodes */ /* rep xcryptecb */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptcbc */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcfb */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptofb */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") + + +/* Our own htonl()/ntohl() */ +static inline void +padlock_bswapl(AES_KEY *ks) +{ + size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); + unsigned int *key = ks->rd_key; + + while (i--) { + asm volatile ("bswapl %0" : "+r"(*key)); + key++; + } +} + + # endif /* The RNG call itself */ static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) { unsigned int eax_out; asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ :"=a" (eax_out), "=m"(*(unsigned *)addr) :"D"(addr), "d"(edx_in) ); return eax_out; } /* * Why not inline 'rep movsd'? I failed to find information on what value in * Direction Flag one can expect and consequently have to apply * "better-safe-than-sorry" approach and assume "undefined." I could * explicitly clear it and restore the original value upon return from * padlock_aes_cipher, but it's presumably too much trouble for too little * gain... In case you wonder 'rep xcrypt*' instructions above are *not* * affected by the Direction Flag and pointers advance toward larger * addresses unconditionally. */ static inline unsigned char *padlock_memcpy(void *dst, const void *src, size_t n) { - long *d = dst; - const long *s = src; + size_t *d = dst; + const size_t *s = src; n /= sizeof(*d); do { *d++ = *s++; } while (--n); return dst; } # elif defined(_MSC_VER) /* * Unlike GCC these are real functions. In order to minimize impact * on performance we adhere to __fastcall calling convention in * order to get two first arguments passed through %ecx and %edx. * Which kind of suits very well, as instructions in question use * both %ecx and %edx as input:-) */ # define REP_XCRYPT(code) \ _asm _emit 0xf3 \ _asm _emit 0x0f _asm _emit 0xa7 \ _asm _emit code /* * BIG FAT WARNING: The offsets used with 'lea' instructions describe items * of the 'padlock_cipher_data' structure. */ # define PADLOCK_XCRYPT_ASM(name,code) \ static void * __fastcall \ name (size_t cnt, void *cdata, \ void *outp, const void *inp) \
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor