File glibc-fix-avx512-mempcpy.patch of Package glibc
bnc#1092877 and maybe bnc#1093291 mempcpy overwrites 128 bytes after the destination buffer (when copy size is large enough to go into the non-temporal loop, i.e. when it's larger than half the shared cache size per core). Index: glibc-2.26/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S =================================================================== --- glibc-2.26.orig/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S 2017-08-02 14:57:16.000000000 +0200 +++ glibc-2.26/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S 2018-05-18 14:40:32.000000000 +0200 @@ -31,6 +31,7 @@ END (__mempcpy_chk_avx512_no_vzeroupper) ENTRY (__mempcpy_avx512_no_vzeroupper) movq %rdi, %rax + movq %rdi, %r11 addq %rdx, %rax jmp L(start) END (__mempcpy_avx512_no_vzeroupper) @@ -45,6 +46,7 @@ END (__memmove_chk_avx512_no_vzeroupper) ENTRY (__memmove_avx512_no_vzeroupper) mov %rdi, %rax + mov %rdi, %r11 # ifdef USE_AS_MEMPCPY add %rdx, %rax # endif @@ -370,8 +372,8 @@ L(gobble_256bytes_nt_loop): cmp $256, %rdx ja L(gobble_256bytes_nt_loop) sfence - vmovups %zmm4, (%rax) - vmovups %zmm5, 0x40(%rax) + vmovups %zmm4, (%r11) + vmovups %zmm5, 0x40(%r11) jmp L(check) L(preloop_large_bkw):