File musl-1.1.19-git.patch of Package musl

diff --git a/Makefile b/Makefile
index 308ddaae..e23a8332 100644
--- a/Makefile
+++ b/Makefile
@@ -116,17 +116,14 @@ obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC
 OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%))
 $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3
 
-MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c
-$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS)
-
-NOSSP_SRCS = $(wildcard crt/*.c) \
-	src/env/__libc_start_main.c src/env/__init_tls.c \
-	src/env/__stack_chk_fail.c \
-	src/thread/__set_thread_area.c src/thread/$(ARCH)/__set_thread_area.c \
-	src/string/memset.c src/string/$(ARCH)/memset.c \
-	src/string/memcpy.c src/string/$(ARCH)/memcpy.c \
-	ldso/dlstart.c ldso/dynlink.c
-$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
+MEMOPS_OBJS = $(filter %/memcpy.o %/memmove.o %/memcmp.o %/memset.o, $(LIBC_OBJS))
+$(MEMOPS_OBJS) $(MEMOPS_OBJS:%.o=%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS)
+
+NOSSP_OBJS = $(CRT_OBJS) $(LDSO_OBJS) $(filter \
+	%/__libc_start_main.o %/__init_tls.o %/__stack_chk_fail.o \
+	%/__set_thread_area.o %/memset.o %/memcpy.o \
+	, $(LIBC_OBJS))
+$(NOSSP_OBJS) $(NOSSP_OBJS:%.o=%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
 
 $(CRT_OBJS): CFLAGS_ALL += -DCRT
 
diff --git a/arch/aarch64/bits/hwcap.h b/arch/aarch64/bits/hwcap.h
index 11396d31..1727a387 100644
--- a/arch/aarch64/bits/hwcap.h
+++ b/arch/aarch64/bits/hwcap.h
@@ -14,3 +14,10 @@
 #define HWCAP_JSCVT		(1 << 13)
 #define HWCAP_FCMA		(1 << 14)
 #define HWCAP_LRCPC		(1 << 15)
+#define HWCAP_DCPOP		(1 << 16)
+#define HWCAP_SHA3		(1 << 17)
+#define HWCAP_SM3		(1 << 18)
+#define HWCAP_SM4		(1 << 19)
+#define HWCAP_ASIMDDP		(1 << 20)
+#define HWCAP_SHA512		(1 << 21)
+#define HWCAP_SVE		(1 << 22)
diff --git a/arch/aarch64/bits/signal.h b/arch/aarch64/bits/signal.h
index 1c67313d..b71261f5 100644
--- a/arch/aarch64/bits/signal.h
+++ b/arch/aarch64/bits/signal.h
@@ -25,6 +25,7 @@ typedef struct sigcontext {
 #define FPSIMD_MAGIC 0x46508001
 #define ESR_MAGIC 0x45535201
 #define EXTRA_MAGIC 0x45585401
+#define SVE_MAGIC 0x53564501
 struct _aarch64_ctx {
 	unsigned int magic;
 	unsigned int size;
@@ -45,6 +46,44 @@ struct extra_context {
 	unsigned int size;
 	unsigned int __reserved[3];
 };
+struct sve_context {
+	struct _aarch64_ctx head;
+	unsigned short vl;
+	unsigned short __reserved[3];
+};
+#define SVE_VQ_BYTES		16
+#define SVE_VQ_MIN		1
+#define SVE_VQ_MAX		512
+#define SVE_VL_MIN		(SVE_VQ_MIN * SVE_VQ_BYTES)
+#define SVE_VL_MAX		(SVE_VQ_MAX * SVE_VQ_BYTES)
+#define SVE_NUM_ZREGS		32
+#define SVE_NUM_PREGS		16
+#define sve_vl_valid(vl) \
+	((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
+#define sve_vq_from_vl(vl)	((vl) / SVE_VQ_BYTES)
+#define sve_vl_from_vq(vq)	((vq) * SVE_VQ_BYTES)
+#define SVE_SIG_ZREG_SIZE(vq)	((unsigned)(vq) * SVE_VQ_BYTES)
+#define SVE_SIG_PREG_SIZE(vq)	((unsigned)(vq) * (SVE_VQ_BYTES / 8))
+#define SVE_SIG_FFR_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+#define SVE_SIG_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+#define SVE_SIG_ZREGS_OFFSET	SVE_SIG_REGS_OFFSET
+#define SVE_SIG_ZREG_OFFSET(vq, n) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
+#define SVE_SIG_ZREGS_SIZE(vq) \
+	(SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
+#define SVE_SIG_PREGS_OFFSET(vq) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
+#define SVE_SIG_PREG_OFFSET(vq, n) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
+#define SVE_SIG_PREGS_SIZE(vq) \
+	(SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_SIG_FFR_OFFSET(vq) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
+#define SVE_SIG_REGS_SIZE(vq) \
+	(SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
+#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
 #else
 typedef struct {
 	long double __regs[18+256];
diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h
index c5c56f81..62458b45 100644
--- a/arch/arm/atomic_arch.h
+++ b/arch/arm/atomic_arch.h
@@ -7,8 +7,8 @@
 extern uintptr_t __attribute__((__visibility__("hidden")))
 	__a_cas_ptr, __a_barrier_ptr;
 
-#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
- || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
+#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
+ || __ARM_ARCH_6T2__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 
 #define a_ll a_ll
 static inline int a_ll(volatile int *p)
@@ -91,4 +91,16 @@ static inline int a_clz_32(uint32_t x)
 	return x;
 }
 
+#if __ARM_ARCH_6T2__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
+
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
+{
+	uint32_t xr;
+	__asm__ ("rbit %0, %1" : "=r"(xr) : "r"(x));
+	return a_clz_32(xr);
+}
+
+#endif
+
 #endif
diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in
index c594152e..1920516a 100644
--- a/arch/arm/bits/syscall.h.in
+++ b/arch/arm/bits/syscall.h.in
@@ -359,4 +359,5 @@
 #define __ARM_NR_usr26		0x0f0003
 #define __ARM_NR_usr32		0x0f0004
 #define __ARM_NR_set_tls	0x0f0005
+#define __ARM_NR_get_tls	0x0f0006
 
diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
index 197752ef..6657e198 100644
--- a/arch/arm/pthread_arch.h
+++ b/arch/arm/pthread_arch.h
@@ -1,4 +1,4 @@
-#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
+#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
  || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 
 static inline pthread_t __pthread_self()
diff --git a/arch/arm/syscall_arch.h b/arch/arm/syscall_arch.h
index 6023303b..4db7d152 100644
--- a/arch/arm/syscall_arch.h
+++ b/arch/arm/syscall_arch.h
@@ -3,74 +3,99 @@
 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
 #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
 
+#ifdef __thumb__
+
+/* Avoid use of r7 in asm constraints when producing thumb code,
+ * since it's reserved as frame pointer and might not be supported. */
+#define __ASM____R7__
+#define __asm_syscall(...) do { \
+	__asm__ __volatile__ ( "mov %1,r7 ; mov r7,%2 ; svc 0 ; mov r7,%1" \
+	: "=r"(r0), "=&r"((int){0}) : __VA_ARGS__ : "memory"); \
+	return r0; \
+	} while (0)
+
+#else
+
+#define __ASM____R7__ __asm__("r7")
 #define __asm_syscall(...) do { \
 	__asm__ __volatile__ ( "svc 0" \
 	: "=r"(r0) : __VA_ARGS__ : "memory"); \
 	return r0; \
 	} while (0)
+#endif
+
+/* For thumb2, we can allow 8-bit immediate syscall numbers, saving a
+ * register in the above dance around r7. Does not work for thumb1 where
+ * only movs, not mov, supports immediates, and we can't use movs because
+ * it doesn't support high regs. */
+#ifdef __thumb2__
+#define R7_OPERAND "rI"(r7)
+#else
+#define R7_OPERAND "r"(r7)
+#endif
 
 static inline long __syscall0(long n)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0");
-	__asm_syscall("r"(r7));
+	__asm_syscall(R7_OPERAND);
 }
 
 static inline long __syscall1(long n, long a)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
-	__asm_syscall("r"(r7), "0"(r0));
+	__asm_syscall(R7_OPERAND, "0"(r0));
 }
 
 static inline long __syscall2(long n, long a, long b)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
 	register long r1 __asm__("r1") = b;
-	__asm_syscall("r"(r7), "0"(r0), "r"(r1));
+	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1));
 }
 
 static inline long __syscall3(long n, long a, long b, long c)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
 	register long r1 __asm__("r1") = b;
 	register long r2 __asm__("r2") = c;
-	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2));
+	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2));
 }
 
 static inline long __syscall4(long n, long a, long b, long c, long d)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
 	register long r1 __asm__("r1") = b;
 	register long r2 __asm__("r2") = c;
 	register long r3 __asm__("r3") = d;
-	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3));
+	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3));
 }
 
 static inline long __syscall5(long n, long a, long b, long c, long d, long e)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
 	register long r1 __asm__("r1") = b;
 	register long r2 __asm__("r2") = c;
 	register long r3 __asm__("r3") = d;
 	register long r4 __asm__("r4") = e;
-	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4));
+	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4));
 }
 
 static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
 {
-	register long r7 __asm__("r7") = n;
+	register long r7 __ASM____R7__ = n;
 	register long r0 __asm__("r0") = a;
 	register long r1 __asm__("r1") = b;
 	register long r2 __asm__("r2") = c;
 	register long r3 __asm__("r3") = d;
 	register long r4 __asm__("r4") = e;
 	register long r5 __asm__("r5") = f;
-	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+	__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
 }
 
 #define VDSO_USEFUL
diff --git a/arch/generic/bits/termios.h b/arch/generic/bits/termios.h
index 434c02c8..124f71d2 100644
--- a/arch/generic/bits/termios.h
+++ b/arch/generic/bits/termios.h
@@ -51,6 +51,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0000400
 #define NL0    0000000
 #define NL1    0000400
@@ -70,6 +71,7 @@ struct termios {
 #define FFDLY  0100000
 #define FF0    0000000
 #define FF1    0100000
+#endif
 
 #define VTDLY  0040000
 #define VT0    0000000
diff --git a/arch/i386/atomic_arch.h b/arch/i386/atomic_arch.h
index 7d2a48a5..047fb68d 100644
--- a/arch/i386/atomic_arch.h
+++ b/arch/i386/atomic_arch.h
@@ -92,10 +92,10 @@ static inline int a_ctz_64(uint64_t x)
 	return r;
 }
 
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
 {
-	long r;
+	int r;
 	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
 	return r;
 }
diff --git a/arch/i386/bits/limits.h b/arch/i386/bits/limits.h
index 65a3dd64..c340ceb2 100644
--- a/arch/i386/bits/limits.h
+++ b/arch/i386/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 4096
+#define PAGESIZE 4096
 #define LONG_BIT 32
 #endif
 
diff --git a/arch/i386/bits/user.h b/arch/i386/bits/user.h
index 0e343930..33fea986 100644
--- a/arch/i386/bits/user.h
+++ b/arch/i386/bits/user.h
@@ -37,8 +37,8 @@ struct user {
 	int				u_debugreg[8];
 };
 
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-#define NBPG			PAGE_SIZE
+#define PAGE_MASK		(~(PAGESIZE-1))
+#define NBPG			PAGESIZE
 #define UPAGES			1
 #define HOST_TEXT_START_ADDR	(u.start_code)
 #define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
diff --git a/arch/mips/bits/mman.h b/arch/mips/bits/mman.h
index c68aea88..9027bb63 100644
--- a/arch/mips/bits/mman.h
+++ b/arch/mips/bits/mman.h
@@ -18,6 +18,7 @@
 #define MAP_STACK      0x40000
 #undef MAP_HUGETLB
 #define MAP_HUGETLB    0x80000
+#undef MAP_SYNC
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #undef MADV_SOFT_OFFLINE
diff --git a/arch/mips/bits/termios.h b/arch/mips/bits/termios.h
index 6a1205d7..f7b9dd2e 100644
--- a/arch/mips/bits/termios.h
+++ b/arch/mips/bits/termios.h
@@ -52,6 +52,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0000400
 #define NL0    0000000
 #define NL1    0000400
@@ -71,6 +72,7 @@ struct termios {
 #define FFDLY  0100000
 #define FF0    0000000
 #define FF1    0100000
+#endif
 
 #define VTDLY  0040000
 #define VT0    0000000
@@ -163,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 1
+#define TIOCSER_TEMT 0x01
 #endif
diff --git a/arch/mips64/bits/mman.h b/arch/mips64/bits/mman.h
index c68aea88..9027bb63 100644
--- a/arch/mips64/bits/mman.h
+++ b/arch/mips64/bits/mman.h
@@ -18,6 +18,7 @@
 #define MAP_STACK      0x40000
 #undef MAP_HUGETLB
 #define MAP_HUGETLB    0x80000
+#undef MAP_SYNC
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #undef MADV_SOFT_OFFLINE
diff --git a/arch/mips64/bits/termios.h b/arch/mips64/bits/termios.h
index 6a1205d7..f7b9dd2e 100644
--- a/arch/mips64/bits/termios.h
+++ b/arch/mips64/bits/termios.h
@@ -52,6 +52,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0000400
 #define NL0    0000000
 #define NL1    0000400
@@ -71,6 +72,7 @@ struct termios {
 #define FFDLY  0100000
 #define FF0    0000000
 #define FF1    0100000
+#endif
 
 #define VTDLY  0040000
 #define VT0    0000000
@@ -163,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 1
+#define TIOCSER_TEMT 0x01
 #endif
diff --git a/arch/mipsn32/bits/mman.h b/arch/mipsn32/bits/mman.h
index c68aea88..9027bb63 100644
--- a/arch/mipsn32/bits/mman.h
+++ b/arch/mipsn32/bits/mman.h
@@ -18,6 +18,7 @@
 #define MAP_STACK      0x40000
 #undef MAP_HUGETLB
 #define MAP_HUGETLB    0x80000
+#undef MAP_SYNC
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #undef MADV_SOFT_OFFLINE
diff --git a/arch/mipsn32/bits/termios.h b/arch/mipsn32/bits/termios.h
index 6a1205d7..f7b9dd2e 100644
--- a/arch/mipsn32/bits/termios.h
+++ b/arch/mipsn32/bits/termios.h
@@ -52,6 +52,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0000400
 #define NL0    0000000
 #define NL1    0000400
@@ -71,6 +72,7 @@ struct termios {
 #define FFDLY  0100000
 #define FF0    0000000
 #define FF1    0100000
+#endif
 
 #define VTDLY  0040000
 #define VT0    0000000
@@ -163,5 +165,5 @@ struct termios {
 #define EXTPROC 0200000
 
 #define XTABS  0014000
-#define TIOCSER_TEMT 1
+#define TIOCSER_TEMT 0x01
 #endif
diff --git a/arch/or1k/bits/limits.h b/arch/or1k/bits/limits.h
index 483b6749..3a811c99 100644
--- a/arch/or1k/bits/limits.h
+++ b/arch/or1k/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 8192
+#define PAGESIZE 8192
 #define LONG_BIT 32
 #endif
 
diff --git a/arch/powerpc/bits/hwcap.h b/arch/powerpc/bits/hwcap.h
index 82c92a93..803de9b5 100644
--- a/arch/powerpc/bits/hwcap.h
+++ b/arch/powerpc/bits/hwcap.h
@@ -38,3 +38,6 @@
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
 #define PPC_FEATURE2_ARCH_3_00		0x00800000
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000
+#define PPC_FEATURE2_DARN		0x00200000
+#define PPC_FEATURE2_SCV		0x00100000
+#define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000
diff --git a/arch/powerpc/bits/mman.h b/arch/powerpc/bits/mman.h
index 95ec4358..b3a675a8 100644
--- a/arch/powerpc/bits/mman.h
+++ b/arch/powerpc/bits/mman.h
@@ -4,6 +4,7 @@
 #define MAP_NORESERVE   0x40
 #undef MAP_LOCKED
 #define MAP_LOCKED	0x80
+#undef MAP_SYNC
 
 #undef MCL_CURRENT
 #define MCL_CURRENT     0x2000
diff --git a/arch/powerpc/bits/termios.h b/arch/powerpc/bits/termios.h
index 0b09630c..e3f22e86 100644
--- a/arch/powerpc/bits/termios.h
+++ b/arch/powerpc/bits/termios.h
@@ -53,6 +53,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0001400
 #define NL0    0000000
 #define NL1    0000400
@@ -74,6 +75,7 @@ struct termios {
 #define BSDLY  0100000
 #define BS0    0000000
 #define BS1    0100000
+#endif
 
 #define VTDLY  0200000
 #define VT0    0000000
@@ -165,5 +167,5 @@ struct termios {
 #define EXTPROC 0x10000000
 
 #define XTABS   00006000
-#define TIOCSER_TEMT 1
+#define TIOCSER_TEMT 0x01
 #endif
diff --git a/arch/powerpc64/bits/hwcap.h b/arch/powerpc64/bits/hwcap.h
index 82c92a93..803de9b5 100644
--- a/arch/powerpc64/bits/hwcap.h
+++ b/arch/powerpc64/bits/hwcap.h
@@ -38,3 +38,6 @@
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
 #define PPC_FEATURE2_ARCH_3_00		0x00800000
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000
+#define PPC_FEATURE2_DARN		0x00200000
+#define PPC_FEATURE2_SCV		0x00100000
+#define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000
diff --git a/arch/powerpc64/bits/mman.h b/arch/powerpc64/bits/mman.h
index 95ec4358..b3a675a8 100644
--- a/arch/powerpc64/bits/mman.h
+++ b/arch/powerpc64/bits/mman.h
@@ -4,6 +4,7 @@
 #define MAP_NORESERVE   0x40
 #undef MAP_LOCKED
 #define MAP_LOCKED	0x80
+#undef MAP_SYNC
 
 #undef MCL_CURRENT
 #define MCL_CURRENT     0x2000
diff --git a/arch/powerpc64/bits/termios.h b/arch/powerpc64/bits/termios.h
index 0b09630c..e3f22e86 100644
--- a/arch/powerpc64/bits/termios.h
+++ b/arch/powerpc64/bits/termios.h
@@ -53,6 +53,7 @@ struct termios {
 #define ONLRET 0000040
 #define OFILL  0000100
 #define OFDEL  0000200
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define NLDLY  0001400
 #define NL0    0000000
 #define NL1    0000400
@@ -74,6 +75,7 @@ struct termios {
 #define BSDLY  0100000
 #define BS0    0000000
 #define BS1    0100000
+#endif
 
 #define VTDLY  0200000
 #define VT0    0000000
@@ -165,5 +167,5 @@ struct termios {
 #define EXTPROC 0x10000000
 
 #define XTABS   00006000
-#define TIOCSER_TEMT 1
+#define TIOCSER_TEMT 0x01
 #endif
diff --git a/arch/s390x/bits/limits.h b/arch/s390x/bits/limits.h
index 792a30b9..86ef7663 100644
--- a/arch/s390x/bits/limits.h
+++ b/arch/s390x/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 4096
+#define PAGESIZE 4096
 #define LONG_BIT 64
 #endif
 
diff --git a/arch/s390x/bits/syscall.h.in b/arch/s390x/bits/syscall.h.in
index 4fe1a64f..c965664c 100644
--- a/arch/s390x/bits/syscall.h.in
+++ b/arch/s390x/bits/syscall.h.in
@@ -322,4 +322,5 @@
 #define __NR_pwritev2                   377
 #define __NR_s390_guarded_storage       378
 #define __NR_statx                      379
+#define __NR_s390_sthyi                 380
 
diff --git a/arch/s390x/bits/user.h b/arch/s390x/bits/user.h
index 17bce16f..ff3f0483 100644
--- a/arch/s390x/bits/user.h
+++ b/arch/s390x/bits/user.h
@@ -54,8 +54,8 @@ struct user {
 	char u_comm[32];
 };
 
-#define PAGE_MASK            (~(PAGE_SIZE-1))
-#define NBPG                 PAGE_SIZE
+#define PAGE_MASK            (~(PAGESIZE-1))
+#define NBPG                 PAGESIZE
 #define UPAGES               1
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR  (u.start_stack + u.u_ssize * NBPG)
diff --git a/arch/sh/bits/limits.h b/arch/sh/bits/limits.h
index 65a3dd64..c340ceb2 100644
--- a/arch/sh/bits/limits.h
+++ b/arch/sh/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 4096
+#define PAGESIZE 4096
 #define LONG_BIT 32
 #endif
 
diff --git a/arch/x32/atomic_arch.h b/arch/x32/atomic_arch.h
index a744c299..918c2d4e 100644
--- a/arch/x32/atomic_arch.h
+++ b/arch/x32/atomic_arch.h
@@ -106,8 +106,8 @@ static inline int a_ctz_64(uint64_t x)
 	return x;
 }
 
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
 {
 	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
 	return x;
diff --git a/arch/x32/bits/limits.h b/arch/x32/bits/limits.h
index 65a3dd64..c340ceb2 100644
--- a/arch/x32/bits/limits.h
+++ b/arch/x32/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 4096
+#define PAGESIZE 4096
 #define LONG_BIT 32
 #endif
 
diff --git a/arch/x32/bits/user.h b/arch/x32/bits/user.h
index 471bb19d..4073cc06 100644
--- a/arch/x32/bits/user.h
+++ b/arch/x32/bits/user.h
@@ -34,8 +34,8 @@ struct user {
 	unsigned long			u_debugreg[8];
 };
 
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-#define NBPG			PAGE_SIZE
+#define PAGE_MASK		(~(PAGESIZE-1))
+#define NBPG			PAGESIZE
 #define UPAGES			1
 #define HOST_TEXT_START_ADDR	(u.start_code)
 #define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
diff --git a/arch/x86_64/bits/limits.h b/arch/x86_64/bits/limits.h
index 792a30b9..86ef7663 100644
--- a/arch/x86_64/bits/limits.h
+++ b/arch/x86_64/bits/limits.h
@@ -1,6 +1,6 @@
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define PAGE_SIZE 4096
+#define PAGESIZE 4096
 #define LONG_BIT 64
 #endif
 
diff --git a/arch/x86_64/bits/user.h b/arch/x86_64/bits/user.h
index 471bb19d..4073cc06 100644
--- a/arch/x86_64/bits/user.h
+++ b/arch/x86_64/bits/user.h
@@ -34,8 +34,8 @@ struct user {
 	unsigned long			u_debugreg[8];
 };
 
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-#define NBPG			PAGE_SIZE
+#define PAGE_MASK		(~(PAGESIZE-1))
+#define NBPG			PAGESIZE
 #define UPAGES			1
 #define HOST_TEXT_START_ADDR	(u.start_code)
 #define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
diff --git a/configure b/configure
index 1e59c461..09a0c436 100755
--- a/configure
+++ b/configure
@@ -590,10 +590,12 @@ tryldflag LDFLAGS_AUTO -Wl,--no-undefined
 # versions built without shared library support and pcc are broken.
 tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL
 
-# Linking with -Bsymbolic-functions is no longer mandatory for
-# the dynamic linker to work, but enable it if it works as
-# a linking optimization.
-tryldflag LDFLAGS_AUTO -Wl,-Bsymbolic-functions
+# Public data symbols must be interposable to allow for copy
+# relocations, but otherwise we want to bind symbols at libc link
+# time to eliminate startup relocations and PLT overhead. Use
+# --dynamic-list rather than -Bsymbolic-functions for greater
+# control over what symbols are left unbound.
+tryldflag LDFLAGS_AUTO -Wl,--dynamic-list="$srcdir/dynamic.list"
 
 # Find compiler runtime library
 test -z "$LIBCC" && tryldflag LIBCC -lgcc && tryldflag LIBCC -lgcc_eh
diff --git a/dynamic.list b/dynamic.list
new file mode 100644
index 00000000..686f8eb4
--- /dev/null
+++ b/dynamic.list
@@ -0,0 +1,44 @@
+{
+environ;
+__environ;
+
+stdin;
+stdout;
+stderr;
+
+malloc;
+calloc;
+realloc;
+free;
+memalign;
+posix_memalign;
+aligned_alloc;
+malloc_usable_size;
+
+timezone;
+daylight;
+tzname;
+__timezone;
+__daylight;
+__tzname;
+
+signgam;
+__signgam;
+
+optarg;
+optind;
+opterr;
+optreset;
+__optreset;
+
+getdate_err;
+
+h_errno;
+
+program_invocation_name;
+program_invocation_short_name;
+__progname;
+__progname_full;
+
+__stack_chk_guard;
+};
diff --git a/include/alltypes.h.in b/include/alltypes.h.in
index 6a9c105f..622ca01d 100644
--- a/include/alltypes.h.in
+++ b/include/alltypes.h.in
@@ -7,10 +7,10 @@ TYPEDEF _Addr regoff_t;
 TYPEDEF _Reg register_t;
 
 TYPEDEF signed char     int8_t;
-TYPEDEF short           int16_t;
-TYPEDEF int             int32_t;
-TYPEDEF _Int64          int64_t;
-TYPEDEF _Int64          intmax_t;
+TYPEDEF signed short    int16_t;
+TYPEDEF signed int      int32_t;
+TYPEDEF signed _Int64   int64_t;
+TYPEDEF signed _Int64   intmax_t;
 TYPEDEF unsigned char   uint8_t;
 TYPEDEF unsigned short  uint16_t;
 TYPEDEF unsigned int    uint32_t;
diff --git a/include/elf.h b/include/elf.h
index e79915fe..78906f15 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -623,6 +623,7 @@ typedef struct {
 
 
 #define NT_PRSTATUS	1
+#define NT_PRFPREG	2
 #define NT_FPREGSET	2
 #define NT_PRPSINFO	3
 #define NT_PRXREG	4
@@ -644,6 +645,19 @@ typedef struct {
 #define NT_PPC_VMX	0x100
 #define NT_PPC_SPE	0x101
 #define NT_PPC_VSX	0x102
+#define NT_PPC_TAR	0x103
+#define NT_PPC_PPR	0x104
+#define NT_PPC_DSCR	0x105
+#define NT_PPC_EBB	0x106
+#define NT_PPC_PMU	0x107
+#define NT_PPC_TM_CGPR	0x108
+#define NT_PPC_TM_CFPR	0x109
+#define NT_PPC_TM_CVMX	0x10a
+#define NT_PPC_TM_CVSX	0x10b
+#define NT_PPC_TM_SPR	0x10c
+#define NT_PPC_TM_CTAR	0x10d
+#define NT_PPC_TM_CPPR	0x10e
+#define NT_PPC_TM_CDSCR	0x10f
 #define NT_386_TLS	0x200
 #define NT_386_IOPERM	0x201
 #define NT_X86_XSTATE	0x202
@@ -656,14 +670,21 @@ typedef struct {
 #define NT_S390_LAST_BREAK	0x306
 #define NT_S390_SYSTEM_CALL	0x307
 #define NT_S390_TDB	0x308
+#define NT_S390_VXRS_LOW	0x309
+#define NT_S390_VXRS_HIGH	0x30a
+#define NT_S390_GS_CB	0x30b
+#define NT_S390_GS_BC	0x30c
+#define NT_S390_RI_CB	0x30d
 #define NT_ARM_VFP	0x400
 #define NT_ARM_TLS	0x401
 #define NT_ARM_HW_BREAK	0x402
 #define NT_ARM_HW_WATCH	0x403
 #define NT_ARM_SYSTEM_CALL	0x404
+#define NT_ARM_SVE	0x405
 #define NT_METAG_CBUF	0x500
 #define NT_METAG_RPIPE	0x501
 #define NT_METAG_TLS	0x502
+#define NT_ARC_V2	0x600
 #define NT_VERSION	1
 
 
@@ -721,7 +742,8 @@ typedef struct {
 #define DT_ENCODING	32
 #define DT_PREINIT_ARRAY 32
 #define DT_PREINIT_ARRAYSZ 33
-#define	DT_NUM		34
+#define DT_SYMTAB_SHNDX	34
+#define	DT_NUM		35
 #define DT_LOOS		0x6000000d
 #define DT_HIOS		0x6ffff000
 #define DT_LOPROC	0x70000000
@@ -821,6 +843,8 @@ typedef struct {
 #define	DF_1_SYMINTPOSE	0x00800000
 #define	DF_1_GLOBAUDIT	0x01000000
 #define	DF_1_SINGLETON	0x02000000
+#define	DF_1_STUB	0x04000000
+#define	DF_1_PIE	0x08000000
 
 #define DTF_1_PARINIT	0x00000001
 #define DTF_1_CONFEXP	0x00000002
@@ -1002,6 +1026,14 @@ typedef struct {
 #define AT_L2_CACHESHAPE	36
 #define AT_L3_CACHESHAPE	37
 
+#define AT_L1I_CACHESIZE	40
+#define AT_L1I_CACHEGEOMETRY	41
+#define AT_L1D_CACHESIZE	42
+#define AT_L1D_CACHEGEOMETRY	43
+#define AT_L2_CACHESIZE		44
+#define AT_L2_CACHEGEOMETRY	45
+#define AT_L3_CACHESIZE		46
+#define AT_L3_CACHEGEOMETRY	47
 
 
 
@@ -2233,6 +2265,7 @@ enum
 
 #define PPC64_OPT_TLS		1
 #define PPC64_OPT_MULTI_TOC	2
+#define PPC64_OPT_LOCALENTRY	4
 
 #define STO_PPC64_LOCAL_BIT	5
 #define STO_PPC64_LOCAL_MASK	0xe0
diff --git a/include/limits.h b/include/limits.h
index f9805a1e..9cb5426f 100644
--- a/include/limits.h
+++ b/include/limits.h
@@ -40,14 +40,10 @@
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 
 #define PIPE_BUF 4096
-#ifdef PAGE_SIZE
-#define PAGESIZE PAGE_SIZE
-#endif
 #define FILESIZEBITS 64
 #define NAME_MAX 255
 #define SYMLINK_MAX 255
 #define PATH_MAX 4096
-#define NZERO 20
 #define NGROUPS_MAX 32
 #define ARG_MAX 131072
 #define IOV_MAX 1024
@@ -82,13 +78,22 @@
 #define RE_DUP_MAX 255
 
 #define NL_ARGMAX 9
-#define NL_LANGMAX 32
 #define NL_MSGMAX 32767
 #define NL_SETMAX 255
 #define NL_TEXTMAX 2048
 
 #endif
 
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
+
+#ifdef PAGESIZE
+#define PAGE_SIZE PAGESIZE
+#endif
+#define NZERO 20
+#define NL_LANGMAX 32
+
+#endif
+
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) \
  || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE+0 < 700)
 
diff --git a/include/net/if_arp.h b/include/net/if_arp.h
index c832ff95..27becc83 100644
--- a/include/net/if_arp.h
+++ b/include/net/if_arp.h
@@ -59,6 +59,7 @@ struct arphdr {
 #define ARPHRD_LAPB	516
 #define ARPHRD_DDCMP	517
 #define	ARPHRD_RAWHDLC	518
+#define ARPHRD_RAWIP	519
 
 #define ARPHRD_TUNNEL	768
 #define ARPHRD_TUNNEL6	769
diff --git a/include/netinet/if_ether.h b/include/netinet/if_ether.h
index 97134d75..9007d609 100644
--- a/include/netinet/if_ether.h
+++ b/include/netinet/if_ether.h
@@ -53,6 +53,7 @@
 #define ETH_P_AOE	0x88A2
 #define ETH_P_8021AD	0x88A8
 #define ETH_P_802_EX1	0x88B5
+#define ETH_P_ERSPAN	0x88BE
 #define ETH_P_TIPC	0x88CA
 #define ETH_P_MACSEC	0x88E5
 #define ETH_P_8021AH	0x88E7
@@ -66,11 +67,13 @@
 #define ETH_P_IBOE	0x8915
 #define ETH_P_80221	0x8917
 #define ETH_P_HSR	0x892F
+#define ETH_P_NSH	0x894F
 #define ETH_P_LOOPBACK	0x9000
 #define ETH_P_QINQ1	0x9100
 #define ETH_P_QINQ2	0x9200
 #define ETH_P_QINQ3	0x9300
 #define ETH_P_EDSA	0xDADA
+#define ETH_P_IFE	0xED3E
 #define ETH_P_AF_IUCV	0xFBFB
 
 #define ETH_P_802_3_MIN	0x0600
@@ -100,6 +103,7 @@
 #define ETH_P_IEEE802154 0x00F6
 #define ETH_P_CAIF	0x00F7
 #define ETH_P_XDSA	0x00F8
+#define ETH_P_MAP	0x00F9
 
 struct ethhdr {
 	uint8_t h_dest[ETH_ALEN];
diff --git a/include/netinet/in.h b/include/netinet/in.h
index f18b478d..192679a6 100644
--- a/include/netinet/in.h
+++ b/include/netinet/in.h
@@ -363,6 +363,7 @@ struct ip6_mtuinfo {
 #define IPV6_TRANSPARENT        75
 #define IPV6_UNICAST_IF         76
 #define IPV6_RECVFRAGSIZE       77
+#define IPV6_FREEBIND           78
 
 #define IPV6_ADD_MEMBERSHIP     IPV6_JOIN_GROUP
 #define IPV6_DROP_MEMBERSHIP    IPV6_LEAVE_GROUP
diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h
index 4d20936a..2747f4ea 100644
--- a/include/netinet/tcp.h
+++ b/include/netinet/tcp.h
@@ -34,6 +34,8 @@
 #define TCP_FASTOPEN_CONNECT 30
 #define TCP_ULP          31
 #define TCP_MD5SIG_EXT   32
+#define TCP_FASTOPEN_KEY 33
+#define TCP_FASTOPEN_NO_COOKIE 34
 
 #define TCP_ESTABLISHED  1
 #define TCP_SYN_SENT     2
@@ -220,6 +222,14 @@ struct tcp_md5sig {
 	uint8_t tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+struct tcp_diag_md5sig {
+	uint8_t tcpm_family;
+	uint8_t tcpm_prefixlen;
+	uint16_t tcpm_keylen;
+	uint32_t tcpm_addr[4];
+	uint8_t tcpm_key[TCP_MD5SIG_MAXKEYLEN];
+};
+
 struct tcp_repair_window {
 	uint32_t snd_wl1;
 	uint32_t snd_wnd;
diff --git a/include/signal.h b/include/signal.h
index 2c8b3d55..a4f85cca 100644
--- a/include/signal.h
+++ b/include/signal.h
@@ -210,7 +210,7 @@ int sigpending(sigset_t *);
 int sigwait(const sigset_t *__restrict, int *__restrict);
 int sigwaitinfo(const sigset_t *__restrict, siginfo_t *__restrict);
 int sigtimedwait(const sigset_t *__restrict, siginfo_t *__restrict, const struct timespec *__restrict);
-int sigqueue(pid_t, int, const union sigval);
+int sigqueue(pid_t, int, union sigval);
 
 int pthread_sigmask(int, const sigset_t *__restrict, sigset_t *__restrict);
 int pthread_kill(pthread_t, int);
@@ -231,6 +231,8 @@ int sigrelse(int);
 void (*sigset(int, void (*)(int)))(int);
 #define TRAP_BRKPT 1
 #define TRAP_TRACE 2
+#define TRAP_BRANCH 3
+#define TRAP_HWBKPT 4
 #define POLL_IN 1
 #define POLL_OUT 2
 #define POLL_MSG 3
diff --git a/include/stdio.h b/include/stdio.h
index 7c4f9ee4..afadd912 100644
--- a/include/stdio.h
+++ b/include/stdio.h
@@ -49,6 +49,7 @@ extern "C" {
 
 typedef union _G_fpos64_t {
 	char __opaque[16];
+	long long __lldata;
 	double __align;
 } fpos_t;
 
diff --git a/include/sys/mman.h b/include/sys/mman.h
index 8a5149c9..302ad134 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #define MAP_SHARED     0x01
 #define MAP_PRIVATE    0x02
+#define MAP_SHARED_VALIDATE 0x03
 #define MAP_TYPE       0x0f
 #define MAP_FIXED      0x10
 #define MAP_ANON       0x20
@@ -33,8 +34,22 @@ extern "C" {
 #define MAP_NONBLOCK   0x10000
 #define MAP_STACK      0x20000
 #define MAP_HUGETLB    0x40000
+#define MAP_SYNC       0x80000
 #define MAP_FILE       0
 
+#define MAP_HUGE_SHIFT 26
+#define MAP_HUGE_MASK  0x3f
+#define MAP_HUGE_64KB  (16 << 26)
+#define MAP_HUGE_512KB (19 << 26)
+#define MAP_HUGE_1MB   (20 << 26)
+#define MAP_HUGE_2MB   (21 << 26)
+#define MAP_HUGE_8MB   (23 << 26)
+#define MAP_HUGE_16MB  (24 << 26)
+#define MAP_HUGE_256MB (28 << 26)
+#define MAP_HUGE_1GB   (30 << 26)
+#define MAP_HUGE_2GB   (31 << 26)
+#define MAP_HUGE_16GB  (34U << 26)
+
 #define PROT_NONE      0
 #define PROT_READ      1
 #define PROT_WRITE     2
@@ -72,6 +87,8 @@ extern "C" {
 #define MADV_NOHUGEPAGE  15
 #define MADV_DONTDUMP    16
 #define MADV_DODUMP      17
+#define MADV_WIPEONFORK  18
+#define MADV_KEEPONFORK  19
 #define MADV_HWPOISON    100
 #define MADV_SOFT_OFFLINE 101
 #endif
diff --git a/include/sys/prctl.h b/include/sys/prctl.h
index 24f4f8bd..aa0c7a88 100644
--- a/include/sys/prctl.h
+++ b/include/sys/prctl.h
@@ -130,6 +130,12 @@ struct prctl_mm_map {
 #define PR_CAP_AMBIENT_LOWER    3
 #define PR_CAP_AMBIENT_CLEAR_ALL 4
 
+#define PR_SVE_SET_VL           50
+#define PR_SVE_SET_VL_ONEXEC (1 << 18)
+#define PR_SVE_GET_VL           51
+#define PR_SVE_VL_LEN_MASK 0xffff
+#define PR_SVE_VL_INHERIT (1 << 17)
+
 int prctl (int, ...);
 
 #ifdef __cplusplus
diff --git a/include/sys/random.h b/include/sys/random.h
new file mode 100644
index 00000000..4ee7bf2c
--- /dev/null
+++ b/include/sys/random.h
@@ -0,0 +1,19 @@
+#ifndef _SYS_RANDOM_H
+#define _SYS_RANDOM_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define __NEED_size_t
+#define __NEED_ssize_t
+#include <bits/alltypes.h>
+
+#define GRND_NONBLOCK	0x0001
+#define GRND_RANDOM	0x0002
+
+ssize_t getrandom(void *, size_t, unsigned);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/include/sys/shm.h b/include/sys/shm.h
index 67be822b..e7d39ff6 100644
--- a/include/sys/shm.h
+++ b/include/sys/shm.h
@@ -40,6 +40,19 @@ extern "C" {
 #define SHM_HUGETLB 04000
 #define SHM_NORESERVE 010000
 
+#define SHM_HUGE_SHIFT 26
+#define SHM_HUGE_MASK  0x3f
+#define SHM_HUGE_64KB  (16 << 26)
+#define SHM_HUGE_512KB (19 << 26)
+#define SHM_HUGE_1MB   (20 << 26)
+#define SHM_HUGE_2MB   (21 << 26)
+#define SHM_HUGE_8MB   (23 << 26)
+#define SHM_HUGE_16MB  (24 << 26)
+#define SHM_HUGE_256MB (28 << 26)
+#define SHM_HUGE_1GB   (30 << 26)
+#define SHM_HUGE_2GB   (31 << 26)
+#define SHM_HUGE_16GB  (34U << 26)
+
 typedef unsigned long shmatt_t;
 
 void *shmat(int, const void *, int);
diff --git a/include/sys/socket.h b/include/sys/socket.h
index 051d20e0..507da5cc 100644
--- a/include/sys/socket.h
+++ b/include/sys/socket.h
@@ -108,7 +108,8 @@ struct linger {
 #define PF_VSOCK        40
 #define PF_KCM          41
 #define PF_QIPCRTR      42
-#define PF_MAX          43
+#define PF_SMC          43
+#define PF_MAX          44
 
 #define AF_UNSPEC       PF_UNSPEC
 #define AF_LOCAL        PF_LOCAL
@@ -156,6 +157,7 @@ struct linger {
 #define AF_VSOCK        PF_VSOCK
 #define AF_KCM          PF_KCM
 #define AF_QIPCRTR      PF_QIPCRTR
+#define AF_SMC          PF_SMC
 #define AF_MAX          PF_MAX
 
 #ifndef SO_DEBUG
@@ -230,6 +232,7 @@ struct linger {
 #define SO_COOKIE               57
 #define SCM_TIMESTAMPING_PKTINFO 58
 #define SO_PEERGROUPS           59
+#define SO_ZEROCOPY             60
 
 #ifndef SOL_SOCKET
 #define SOL_SOCKET      1
@@ -261,6 +264,7 @@ struct linger {
 #define SOL_ALG         279
 #define SOL_NFC         280
 #define SOL_KCM         281
+#define SOL_TLS         282
 
 #define SOMAXCONN       128
 
@@ -282,6 +286,7 @@ struct linger {
 #define MSG_MORE      0x8000
 #define MSG_WAITFORONE 0x10000
 #define MSG_BATCH     0x40000
+#define MSG_ZEROCOPY  0x4000000
 #define MSG_FASTOPEN  0x20000000
 #define MSG_CMSG_CLOEXEC 0x40000000
 
diff --git a/include/tar.h b/include/tar.h
index be589842..2eba66ec 100644
--- a/include/tar.h
+++ b/include/tar.h
@@ -1,9 +1,13 @@
 #ifndef	_TAR_H
 #define	_TAR_H
 
+#include <features.h>
+
 #define TSUID   04000
 #define TSGID   02000
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE)
 #define TSVTX   01000
+#endif
 #define TUREAD  00400
 #define TUWRITE 00200
 #define TUEXEC  00100
diff --git a/include/unistd.h b/include/unistd.h
index 09190af4..9485da7a 100644
--- a/include/unistd.h
+++ b/include/unistd.h
@@ -128,12 +128,11 @@ long fpathconf(int, int);
 long sysconf(int);
 size_t confstr(int, char *, size_t);
 
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 #define F_ULOCK 0
 #define F_LOCK  1
 #define F_TLOCK 2
 #define F_TEST  3
-
-#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 int setreuid(uid_t, uid_t);
 int setregid(gid_t, gid_t);
 int lockf(int, int, off_t);
@@ -176,6 +175,7 @@ int acct(const char *);
 long syscall(long, ...);
 int execvpe(const char *, char *const [], char *const []);
 int issetugid(void);
+int getentropy(void *, size_t);
 #endif
 
 #ifdef _GNU_SOURCE
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 9bf6924b..cea5f452 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -133,6 +133,9 @@ static struct dso *const nodeps_dummy;
 
 struct debug *_dl_debug_addr = &debug;
 
+__attribute__((__visibility__("hidden")))
+extern int __malloc_replaced;
+
 __attribute__((__visibility__("hidden")))
 void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
 
@@ -158,10 +161,26 @@ static void *laddr(const struct dso *p, size_t v)
 	for (j=0; v-p->loadmap->segs[j].p_vaddr >= p->loadmap->segs[j].p_memsz; j++);
 	return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
 }
+static void *laddr_pg(const struct dso *p, size_t v)
+{
+	size_t j=0;
+	size_t pgsz = PAGE_SIZE;
+	if (!p->loadmap) return p->base + v;
+	for (j=0; ; j++) {
+		size_t a = p->loadmap->segs[j].p_vaddr;
+		size_t b = a + p->loadmap->segs[j].p_memsz;
+		a &= -pgsz;
+		b += pgsz-1;
+		b &= -pgsz;
+		if (v-a<b-a) break;
+	}
+	return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
+}
 #define fpaddr(p, v) ((void (*)())&(struct funcdesc){ \
 	laddr(p, v), (p)->got })
 #else
 #define laddr(p, v) (void *)((p)->base + (v))
+#define laddr_pg(p, v) laddr(p, v)
 #define fpaddr(p, v) ((void (*)())laddr(p, v))
 #endif
 
@@ -476,23 +495,16 @@ static void redo_lazy_relocs()
 /* A huge hack: to make up for the wastefulness of shared libraries
  * needing at least a page of dirty memory even if they have no global
  * data, we reclaim the gaps at the beginning and end of writable maps
- * and "donate" them to the heap by setting up minimal malloc
- * structures and then freeing them. */
+ * and "donate" them to the heap. */
 
 static void reclaim(struct dso *dso, size_t start, size_t end)
 {
-	size_t *a, *z;
+	void __malloc_donate(char *, char *);
 	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
 	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
-	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
-	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
-	if (start>end || end-start < 4*sizeof(size_t)) return;
-	a = laddr(dso, start);
-	z = laddr(dso, end);
-	a[-2] = 1;
-	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
-	z[1] = 1;
-	free(a);
+	if (start >= end) return;
+	char *base = laddr_pg(dso, start);
+	__malloc_donate(base, base+(end-start));
 }
 
 static void reclaim_gaps(struct dso *dso)
@@ -500,7 +512,6 @@ static void reclaim_gaps(struct dso *dso)
 	Phdr *ph = dso->phdr;
 	size_t phcnt = dso->phnum;
 
-	if (DL_FDPIC) return; // FIXME
 	for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
 		if (ph->p_type!=PT_LOAD) continue;
 		if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
@@ -1683,6 +1694,12 @@ _Noreturn void __dls3(size_t *sp)
 	if (ldso_fail) _exit(127);
 	if (ldd_mode) _exit(0);
 
+	/* Determine if malloc was interposed by a replacement implementation
+	 * so that calloc and the memalign family can harden against the
+	 * possibility of incomplete replacement. */
+	if (find_sym(head, "malloc", 1).dso != &ldso)
+		__malloc_replaced = 1;
+
 	/* Switch to runtime mode: any further failures in the dynamic
 	 * linker are a reportable failure rather than a fatal startup
 	 * error. */
diff --git a/src/complex/casin.c b/src/complex/casin.c
index dfdda988..01ed6184 100644
--- a/src/complex/casin.c
+++ b/src/complex/casin.c
@@ -12,5 +12,6 @@ double complex casin(double complex z)
 	x = creal(z);
 	y = cimag(z);
 	w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y);
-	return clog(CMPLX(-y, x) + csqrt(w));
+	double complex r = clog(CMPLX(-y, x) + csqrt(w));
+	return CMPLX(cimag(r), -creal(r));
 }
diff --git a/src/complex/casinf.c b/src/complex/casinf.c
index 93f0e335..4fcb76fc 100644
--- a/src/complex/casinf.c
+++ b/src/complex/casinf.c
@@ -10,5 +10,6 @@ float complex casinf(float complex z)
 	x = crealf(z);
 	y = cimagf(z);
 	w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y);
-	return clogf(CMPLXF(-y, x) + csqrtf(w));
+	float complex r = clogf(CMPLXF(-y, x) + csqrtf(w));
+	return CMPLXF(cimagf(r), -crealf(r));
 }
diff --git a/src/complex/casinl.c b/src/complex/casinl.c
index 0916c60f..3b7ceba7 100644
--- a/src/complex/casinl.c
+++ b/src/complex/casinl.c
@@ -15,6 +15,7 @@ long double complex casinl(long double complex z)
 	x = creall(z);
 	y = cimagl(z);
 	w = CMPLXL(1.0 - (x - y)*(x + y), -2.0*x*y);
-	return clogl(CMPLXL(-y, x) + csqrtl(w));
+	long double complex r = clogl(CMPLXL(-y, x) + csqrtl(w));
+	return CMPLXL(cimagl(r), -creall(r));
 }
 #endif
diff --git a/src/complex/catan.c b/src/complex/catan.c
index 39ce6cf2..7dc2afeb 100644
--- a/src/complex/catan.c
+++ b/src/complex/catan.c
@@ -91,29 +91,17 @@ double complex catan(double complex z)
 	x = creal(z);
 	y = cimag(z);
 
-	if (x == 0.0 && y > 1.0)
-		goto ovrf;
-
 	x2 = x * x;
 	a = 1.0 - x2 - (y * y);
-	if (a == 0.0)
-		goto ovrf;
 
 	t = 0.5 * atan2(2.0 * x, a);
 	w = _redupi(t);
 
 	t = y - 1.0;
 	a = x2 + (t * t);
-	if (a == 0.0)
-		goto ovrf;
 
 	t = y + 1.0;
 	a = (x2 + t * t)/a;
-	w = w + (0.25 * log(a)) * I;
-	return w;
-
-ovrf:
-	// FIXME
-	w = MAXNUM + MAXNUM * I;
+	w = CMPLX(w, 0.25 * log(a));
 	return w;
 }
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index b125eb1f..1c5d98a0 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -15,7 +15,8 @@ int __init_tp(void *p)
 	int r = __set_thread_area(TP_ADJ(p));
 	if (r < 0) return -1;
 	if (!r) libc.can_do_threads = 1;
-	td->tid = __syscall(SYS_set_tid_address, &td->tid);
+	td->detach_state = DT_JOINABLE;
+	td->tid = __syscall(SYS_set_tid_address, &td->detach_state);
 	td->locale = &libc.global_locale;
 	td->robust_list.head = &td->robust_list.head;
 	return 0;
diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c
index 2d758af7..0583f686 100644
--- a/src/env/__libc_start_main.c
+++ b/src/env/__libc_start_main.c
@@ -42,11 +42,13 @@ void __init_libc(char **envp, char *pn)
 		&& !aux[AT_SECURE]) return;
 
 	struct pollfd pfd[3] = { {.fd=0}, {.fd=1}, {.fd=2} };
+	int r =
 #ifdef SYS_poll
 	__syscall(SYS_poll, pfd, 3, 0);
 #else
 	__syscall(SYS_ppoll, pfd, 3, &(struct timespec){0}, 0, _NSIG/8);
 #endif
+	if (r<0) a_crash();
 	for (i=0; i<3; i++) if (pfd[i].revents&POLLNVAL)
 		if (__sys_open("/dev/null", O_RDWR)<0)
 			a_crash();
diff --git a/src/errno/__errno_location.c b/src/errno/__errno_location.c
index 7172a1be..ad9f9241 100644
--- a/src/errno/__errno_location.c
+++ b/src/errno/__errno_location.c
@@ -1,3 +1,4 @@
+#include <errno.h>
 #include "pthread_impl.h"
 
 int *__errno_location(void)
diff --git a/src/internal/atomic.h b/src/internal/atomic.h
index ab473dd7..f938879b 100644
--- a/src/internal/atomic.h
+++ b/src/internal/atomic.h
@@ -251,6 +251,22 @@ static inline void a_crash()
 }
 #endif
 
+#ifndef a_ctz_32
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
+{
+#ifdef a_clz_32
+	return 31-a_clz_32(x&-x);
+#else
+	static const char debruijn32[32] = {
+		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+	};
+	return debruijn32[(x&-x)*0x076be629 >> 27];
+#endif
+}
+#endif
+
 #ifndef a_ctz_64
 #define a_ctz_64 a_ctz_64
 static inline int a_ctz_64(uint64_t x)
@@ -261,22 +277,23 @@ static inline int a_ctz_64(uint64_t x)
 		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
 		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
 	};
-	static const char debruijn32[32] = {
-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-	};
 	if (sizeof(long) < 8) {
 		uint32_t y = x;
 		if (!y) {
 			y = x>>32;
-			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
+			return 32 + a_ctz_32(y);
 		}
-		return debruijn32[(y&-y)*0x076be629 >> 27];
+		return a_ctz_32(y);
 	}
 	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
 }
 #endif
 
+static inline int a_ctz_l(unsigned long x)
+{
+	return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
+}
+
 #ifndef a_clz_64
 #define a_clz_64 a_clz_64
 static inline int a_clz_64(uint64_t x)
@@ -298,17 +315,4 @@ static inline int a_clz_64(uint64_t x)
 }
 #endif
 
-#ifndef a_ctz_l
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
-{
-	static const char debruijn32[32] = {
-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-	};
-	if (sizeof(long) == 8) return a_ctz_64(x);
-	return debruijn32[(x&-x)*0x076be629 >> 27];
-}
-#endif
-
 #endif
diff --git a/src/internal/malloc_impl.h b/src/internal/malloc_impl.h
new file mode 100644
index 00000000..5d025b06
--- /dev/null
+++ b/src/internal/malloc_impl.h
@@ -0,0 +1,45 @@
+#ifndef MALLOC_IMPL_H
+#define MALLOC_IMPL_H
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+void *__mremap(void *, size_t, size_t, int, ...);
+int __madvise(void *, size_t, int);
+
+struct chunk {
+	size_t psize, csize;
+	struct chunk *next, *prev;
+};
+
+struct bin {
+	volatile int lock[2];
+	struct chunk *head;
+	struct chunk *tail;
+};
+
+#define SIZE_ALIGN (4*sizeof(size_t))
+#define SIZE_MASK (-SIZE_ALIGN)
+#define OVERHEAD (2*sizeof(size_t))
+#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN)
+#define DONTCARE 16
+#define RECLAIM 163840
+
+#define CHUNK_SIZE(c) ((c)->csize & -2)
+#define CHUNK_PSIZE(c) ((c)->psize & -2)
+#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
+#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
+#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
+#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
+#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
+
+#define C_INUSE  ((size_t)1)
+
+#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
+
+__attribute__((__visibility__("hidden")))
+void __bin_chunk(struct chunk *);
+
+__attribute__((__visibility__("hidden")))
+extern int __malloc_replaced;
+
+#endif
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index f6a4f2c2..fc2def63 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -19,38 +19,38 @@ struct pthread {
 	void **dtv, *unused1, *unused2;
 	uintptr_t sysinfo;
 	uintptr_t canary, canary2;
-	pid_t tid, pid;
 
 	/* Part 2 -- implementation details, non-ABI. */
-	int tsd_used, errno_val;
-	volatile int cancel, canceldisable, cancelasync;
-	int detached;
+	int tid;
+	int errno_val;
+	volatile int detach_state;
+	volatile int cancel;
+	volatile unsigned char canceldisable, cancelasync;
+	unsigned char tsd_used:1;
+	unsigned char unblock_cancel:1;
+	unsigned char dlerror_flag:1;
 	unsigned char *map_base;
 	size_t map_size;
 	void *stack;
 	size_t stack_size;
+	size_t guard_size;
 	void *start_arg;
 	void *(*start)(void *);
 	void *result;
 	struct __ptcb *cancelbuf;
 	void **tsd;
-	volatile int dead;
 	struct {
 		volatile void *volatile head;
 		long off;
 		volatile void *volatile pending;
 	} robust_list;
-	int unblock_cancel;
 	volatile int timer_id;
 	locale_t locale;
 	volatile int killlock[1];
-	volatile int exitlock[1];
-	volatile int startlock[2];
+	volatile int startlock[1];
 	unsigned long sigmask[_NSIG/8/sizeof(long)];
 	char *dlerror_buf;
-	int dlerror_flag;
 	void *stdio_locks;
-	size_t guard_size;
 
 	/* Part 3 -- the positions of these fields relative to
 	 * the end of the structure is external and internal ABI. */
@@ -58,6 +58,14 @@ struct pthread {
 	void **dtv_copy;
 };
 
+enum {
+	DT_EXITED = 0,
+	DT_EXITING,
+	DT_JOINABLE,
+	DT_DETACHED,
+	DT_DYNAMIC,
+};
+
 struct __timer {
 	int timerid;
 	pthread_t thread;
diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
index 7cdf729d..1127a492 100644
--- a/src/internal/stdio_impl.h
+++ b/src/internal/stdio_impl.h
@@ -9,7 +9,7 @@
 
 #define FFINALLOCK(f) ((f)->lock>=0 ? __lockfile((f)) : 0)
 #define FLOCK(f) int __need_unlock = ((f)->lock>=0 ? __lockfile((f)) : 0)
-#define FUNLOCK(f) if (__need_unlock) __unlockfile((f)); else
+#define FUNLOCK(f) do { if (__need_unlock) __unlockfile((f)); } while (0)
 
 #define F_PERM 1
 #define F_NORD 4
diff --git a/src/linux/getrandom.c b/src/linux/getrandom.c
new file mode 100644
index 00000000..6cc6f6b0
--- /dev/null
+++ b/src/linux/getrandom.c
@@ -0,0 +1,7 @@
+#include <sys/random.h>
+#include "syscall.h"
+
+ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
+{
+	return syscall_cp(SYS_getrandom, buf, buflen, flags);
+}
diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c
index b16caf44..83be6433 100644
--- a/src/locale/langinfo.c
+++ b/src/locale/langinfo.c
@@ -33,7 +33,7 @@ char *__nl_langinfo_l(nl_item item, locale_t loc)
 	int idx = item & 65535;
 	const char *str;
 
-	if (item == CODESET) return MB_CUR_MAX==1 ? "ASCII" : "UTF-8";
+	if (item == CODESET) return loc->cat[LC_CTYPE] ? "UTF-8" : "ASCII";
 
 	/* _NL_LOCALE_NAME extension */
 	if (idx == 65535 && cat < LC_ALL)
diff --git a/src/malloc/__brk.c b/src/malloc/__brk.c
deleted file mode 100644
index 4c9119b4..00000000
--- a/src/malloc/__brk.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <stdint.h>
-#include "syscall.h"
-
-uintptr_t __brk(uintptr_t newbrk)
-{
-	return __syscall(SYS_brk, newbrk);
-}
diff --git a/src/malloc/calloc.c b/src/malloc/calloc.c
deleted file mode 100644
index 436c0b03..00000000
--- a/src/malloc/calloc.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <stdlib.h>
-#include <errno.h>
-
-void *__malloc0(size_t);
-
-void *calloc(size_t m, size_t n)
-{
-	if (n && m > (size_t)-1/n) {
-		errno = ENOMEM;
-		return 0;
-	}
-	return __malloc0(n * m);
-}
diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
index 701f60b4..96c4feac 100644
--- a/src/malloc/lite_malloc.c
+++ b/src/malloc/lite_malloc.c
@@ -47,4 +47,14 @@ static void *__simple_malloc(size_t n)
 }
 
 weak_alias(__simple_malloc, malloc);
-weak_alias(__simple_malloc, __malloc0);
+
+static void *__simple_calloc(size_t m, size_t n)
+{
+	if (n && m > (size_t)-1/n) {
+		errno = ENOMEM;
+		return 0;
+	}
+	return __simple_malloc(n * m);
+}
+
+weak_alias(__simple_calloc, calloc);
diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index 9e05e1d6..d72883e1 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -8,53 +8,19 @@
 #include "libc.h"
 #include "atomic.h"
 #include "pthread_impl.h"
+#include "malloc_impl.h"
 
 #if defined(__GNUC__) && defined(__PIC__)
 #define inline inline __attribute__((always_inline))
 #endif
 
-void *__mmap(void *, size_t, int, int, int, off_t);
-int __munmap(void *, size_t);
-void *__mremap(void *, size_t, size_t, int, ...);
-int __madvise(void *, size_t, int);
-
-struct chunk {
-	size_t psize, csize;
-	struct chunk *next, *prev;
-};
-
-struct bin {
-	volatile int lock[2];
-	struct chunk *head;
-	struct chunk *tail;
-};
-
 static struct {
 	volatile uint64_t binmap;
 	struct bin bins[64];
 	volatile int free_lock[2];
 } mal;
 
-
-#define SIZE_ALIGN (4*sizeof(size_t))
-#define SIZE_MASK (-SIZE_ALIGN)
-#define OVERHEAD (2*sizeof(size_t))
-#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN)
-#define DONTCARE 16
-#define RECLAIM 163840
-
-#define CHUNK_SIZE(c) ((c)->csize & -2)
-#define CHUNK_PSIZE(c) ((c)->psize & -2)
-#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
-#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
-#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
-#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
-#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
-
-#define C_INUSE  ((size_t)1)
-
-#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
-
+int __malloc_replaced;
 
 /* Synchronization tools */
 
@@ -314,7 +280,7 @@ static void trim(struct chunk *self, size_t n)
 	next->psize = n1-n | C_INUSE;
 	self->csize = n | C_INUSE;
 
-	free(CHUNK_TO_MEM(split));
+	__bin_chunk(split);
 }
 
 void *malloc(size_t n)
@@ -366,15 +332,40 @@ void *malloc(size_t n)
 	return CHUNK_TO_MEM(c);
 }
 
-void *__malloc0(size_t n)
+static size_t mal0_clear(char *p, size_t pagesz, size_t n)
 {
+#ifdef __GNUC__
+	typedef uint64_t __attribute__((__may_alias__)) T;
+#else
+	typedef unsigned char T;
+#endif
+	char *pp = p + n;
+	size_t i = (uintptr_t)pp & (pagesz - 1);
+	for (;;) {
+		pp = memset(pp - i, 0, i);
+		if (pp - p < pagesz) return pp - p;
+		for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
+		        if (((T *)pp)[-1] | ((T *)pp)[-2])
+				break;
+	}
+}
+
+void *calloc(size_t m, size_t n)
+{
+	if (n && m > (size_t)-1/n) {
+		errno = ENOMEM;
+		return 0;
+	}
+	n *= m;
 	void *p = malloc(n);
-	if (p && !IS_MMAPPED(MEM_TO_CHUNK(p))) {
-		size_t *z;
-		n = (n + sizeof *z - 1)/sizeof *z;
-		for (z=p; n; n--, z++) if (*z) *z=0;
+	if (!p) return p;
+	if (!__malloc_replaced) {
+		if (IS_MMAPPED(MEM_TO_CHUNK(p)))
+			return p;
+		if (n >= PAGE_SIZE)
+			n = mal0_clear(p, PAGE_SIZE, n);
 	}
-	return p;
+	return memset(p, 0, n);
 }
 
 void *realloc(void *p, size_t n)
@@ -397,10 +388,9 @@ void *realloc(void *p, size_t n)
 		size_t newlen = n + extra;
 		/* Crash on realloc of freed chunk */
 		if (extra & 1) a_crash();
-		if (newlen < PAGE_SIZE && (new = malloc(n))) {
-			memcpy(new, p, n-OVERHEAD);
-			free(p);
-			return new;
+		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
+			n0 = n;
+			goto copy_free_ret;
 		}
 		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
 		if (oldlen == newlen) return p;
@@ -443,34 +433,20 @@ copy_realloc:
 	/* As a last resort, allocate a new chunk and copy to it. */
 	new = malloc(n-OVERHEAD);
 	if (!new) return 0;
+copy_free_ret:
 	memcpy(new, p, n0-OVERHEAD);
 	free(CHUNK_TO_MEM(self));
 	return new;
 }
 
-void free(void *p)
+void __bin_chunk(struct chunk *self)
 {
-	struct chunk *self, *next;
+	struct chunk *next = NEXT_CHUNK(self);
 	size_t final_size, new_size, size;
 	int reclaim=0;
 	int i;
 
-	if (!p) return;
-
-	self = MEM_TO_CHUNK(p);
-
-	if (IS_MMAPPED(self)) {
-		size_t extra = self->psize;
-		char *base = (char *)self - extra;
-		size_t len = CHUNK_SIZE(self) + extra;
-		/* Crash on double free */
-		if (extra & 1) a_crash();
-		__munmap(base, len);
-		return;
-	}
-
 	final_size = new_size = CHUNK_SIZE(self);
-	next = NEXT_CHUNK(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
@@ -531,3 +507,44 @@ void free(void *p)
 
 	unlock_bin(i);
 }
+
+static void unmap_chunk(struct chunk *self)
+{
+	size_t extra = self->psize;
+	char *base = (char *)self - extra;
+	size_t len = CHUNK_SIZE(self) + extra;
+	/* Crash on double free */
+	if (extra & 1) a_crash();
+	__munmap(base, len);
+}
+
+void free(void *p)
+{
+	if (!p) return;
+
+	struct chunk *self = MEM_TO_CHUNK(p);
+
+	if (IS_MMAPPED(self))
+		unmap_chunk(self);
+	else
+		__bin_chunk(self);
+}
+
+void __malloc_donate(char *start, char *end)
+{
+	size_t align_start_up = (SIZE_ALIGN-1) & (-(uintptr_t)start - OVERHEAD);
+	size_t align_end_down = (SIZE_ALIGN-1) & (uintptr_t)end;
+
+	/* Getting past this condition ensures that the padding for alignment
+	 * and header overhead will not overflow and will leave a nonzero
+	 * multiple of SIZE_ALIGN bytes between start and end. */
+	if (end - start <= OVERHEAD + align_start_up + align_end_down)
+		return;
+	start += align_start_up + OVERHEAD;
+	end   -= align_end_down;
+
+	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
+	c->psize = n->csize = C_INUSE;
+	c->csize = n->psize = C_INUSE | (end-start);
+	__bin_chunk(c);
+}
diff --git a/src/malloc/memalign.c b/src/malloc/memalign.c
index 006bd21c..8a6152f4 100644
--- a/src/malloc/memalign.c
+++ b/src/malloc/memalign.c
@@ -2,55 +2,53 @@
 #include <stdint.h>
 #include <errno.h>
 #include "libc.h"
-
-/* This function should work with most dlmalloc-like chunk bookkeeping
- * systems, but it's only guaranteed to work with the native implementation
- * used in this library. */
+#include "malloc_impl.h"
 
 void *__memalign(size_t align, size_t len)
 {
-	unsigned char *mem, *new, *end;
-	size_t header, footer;
+	unsigned char *mem, *new;
 
 	if ((align & -align) != align) {
 		errno = EINVAL;
-		return NULL;
+		return 0;
 	}
 
-	if (len > SIZE_MAX - align) {
+	if (len > SIZE_MAX - align || __malloc_replaced) {
 		errno = ENOMEM;
-		return NULL;
+		return 0;
 	}
 
-	if (align <= 4*sizeof(size_t)) {
-		if (!(mem = malloc(len)))
-			return NULL;
-		return mem;
-	}
+	if (align <= SIZE_ALIGN)
+		return malloc(len);
 
 	if (!(mem = malloc(len + align-1)))
-		return NULL;
+		return 0;
 
 	new = (void *)((uintptr_t)mem + align-1 & -align);
 	if (new == mem) return mem;
 
-	header = ((size_t *)mem)[-1];
+	struct chunk *c = MEM_TO_CHUNK(mem);
+	struct chunk *n = MEM_TO_CHUNK(new);
 
-	if (!(header & 7)) {
-		((size_t *)new)[-2] = ((size_t *)mem)[-2] + (new-mem);
-		((size_t *)new)[-1] = ((size_t *)mem)[-1] - (new-mem);
+	if (IS_MMAPPED(c)) {
+		/* Apply difference between aligned and original
+		 * address to the "extra" field of mmapped chunk. */
+		n->psize = c->psize + (new-mem);
+		n->csize = c->csize - (new-mem);
 		return new;
 	}
 
-	end = mem + (header & -8);
-	footer = ((size_t *)end)[-2];
+	struct chunk *t = NEXT_CHUNK(c);
 
-	((size_t *)mem)[-1] = header&7 | new-mem;
-	((size_t *)new)[-2] = footer&7 | new-mem;
-	((size_t *)new)[-1] = header&7 | end-new;
-	((size_t *)end)[-2] = footer&7 | end-new;
+	/* Split the allocated chunk into two chunks. The aligned part
+	 * that will be used has the size in its footer reduced by the
+	 * difference between the aligned and original addresses, and
+	 * the resulting size copied to its header. A new header and
+	 * footer are written for the split-off part to be freed. */
+	n->psize = c->csize = C_INUSE | (new-mem);
+	n->csize = t->psize -= new-mem;
 
-	free(mem);
+	__bin_chunk(c);
 	return new;
 }
 
diff --git a/src/math/fmaf.c b/src/math/fmaf.c
index aa57feb6..80f5cd8a 100644
--- a/src/math/fmaf.c
+++ b/src/math/fmaf.c
@@ -50,7 +50,7 @@ float fmaf(float x, float y, float z)
 	/* Common case: The double precision result is fine. */
 	if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
 		e == 0x7ff ||                   /* NaN */
-		result - xy == z ||                 /* exact */
+		(result - xy == z && result - z == xy) || /* exact */
 		fegetround() != FE_TONEAREST)       /* not round-to-nearest */
 	{
 		/*
diff --git a/src/misc/getentropy.c b/src/misc/getentropy.c
new file mode 100644
index 00000000..4c61ae26
--- /dev/null
+++ b/src/misc/getentropy.c
@@ -0,0 +1,31 @@
+#include <sys/random.h>
+#include <pthread.h>
+#include <errno.h>
+
+int getentropy(void *buffer, size_t len)
+{
+	int cs, ret;
+	char *pos = buffer;
+
+	if (len > 256) {
+		errno = EIO;
+		return -1;
+	}
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+
+	while (len) {
+		ret = getrandom(pos, len, 0);
+		if (ret < 0) {
+			if (errno == EINTR) continue;
+			else break;
+		}
+		pos += ret;
+		len -= ret;
+		ret = 0;
+	}
+
+	pthread_setcancelstate(cs, 0);
+
+	return ret;
+}
diff --git a/src/misc/gethostid.c b/src/misc/gethostid.c
index ea65611a..25bb35db 100644
--- a/src/misc/gethostid.c
+++ b/src/misc/gethostid.c
@@ -1,3 +1,5 @@
+#include <unistd.h>
+
 long gethostid()
 {
 	return 0;
diff --git a/src/misc/getopt.c b/src/misc/getopt.c
index e9bab41c..e921a60e 100644
--- a/src/misc/getopt.c
+++ b/src/misc/getopt.c
@@ -77,7 +77,7 @@ int getopt(int argc, char * const argv[], const char *optstring)
 		if (l>0) i+=l; else i++;
 	} while (l && d != c);
 
-	if (d != c) {
+	if (d != c || c == ':') {
 		optopt = c;
 		if (optstring[0] != ':' && opterr)
 			__getopt_msg(argv[0], ": unrecognized option: ", optchar, k);
diff --git a/src/misc/getopt_long.c b/src/misc/getopt_long.c
index 008b747c..ddcef949 100644
--- a/src/misc/getopt_long.c
+++ b/src/misc/getopt_long.c
@@ -1,5 +1,7 @@
 #define _GNU_SOURCE
 #include <stddef.h>
+#include <stdlib.h>
+#include <limits.h>
 #include <getopt.h>
 #include <stdio.h>
 #include <string.h>
@@ -58,10 +60,10 @@ static int __getopt_long_core(int argc, char *const *argv, const char *optstring
 	{
 		int colon = optstring[optstring[0]=='+'||optstring[0]=='-']==':';
 		int i, cnt, match;
-		char *arg, *opt;
+		char *arg, *opt, *start = argv[optind]+1;
 		for (cnt=i=0; longopts[i].name; i++) {
 			const char *name = longopts[i].name;
-			opt = argv[optind]+1;
+			opt = start;
 			if (*opt == '-') opt++;
 			while (*opt && *opt != '=' && *opt == *name)
 				name++, opt++;
@@ -74,6 +76,17 @@ static int __getopt_long_core(int argc, char *const *argv, const char *optstring
 			}
 			cnt++;
 		}
+		if (cnt==1 && longonly && arg-start == mblen(start, MB_LEN_MAX)) {
+			int l = arg-start;
+			for (i=0; optstring[i]; i++) {
+				int j;
+				for (j=0; j<l && start[j]==optstring[i+j]; j++);
+				if (j==l) {
+					cnt++;
+					break;
+				}
+			}
+		}
 		if (cnt==1) {
 			i = match;
 			opt = arg;
diff --git a/src/signal/sigisemptyset.c b/src/signal/sigisemptyset.c
index 312c66cf..68b86624 100644
--- a/src/signal/sigisemptyset.c
+++ b/src/signal/sigisemptyset.c
@@ -4,6 +4,7 @@
 
 int sigisemptyset(const sigset_t *set)
 {
-	static const unsigned long zeroset[_NSIG/8/sizeof(long)];
-	return !memcmp(set, &zeroset, _NSIG/8);
+	for (size_t i=0; i<_NSIG/8/sizeof *set->__bits; i++)
+		if (set->__bits[i]) return 0;
+	return 1;
 }
diff --git a/src/signal/sigrtmin.c b/src/signal/sigrtmin.c
index d0e769bb..c5a1fd92 100644
--- a/src/signal/sigrtmin.c
+++ b/src/signal/sigrtmin.c
@@ -1,3 +1,5 @@
+#include <signal.h>
+
 int __libc_current_sigrtmin()
 {
 	return 35;
diff --git a/src/stdio/__lockfile.c b/src/stdio/__lockfile.c
index 9d967d6e..2ff75d8a 100644
--- a/src/stdio/__lockfile.c
+++ b/src/stdio/__lockfile.c
@@ -1,28 +1,25 @@
 #include "stdio_impl.h"
 #include "pthread_impl.h"
 
+#define MAYBE_WAITERS 0x40000000
+
 int __lockfile(FILE *f)
 {
-	int owner, tid = __pthread_self()->tid;
-	if (f->lock == tid)
+	int owner = f->lock, tid = __pthread_self()->tid;
+	if ((owner & ~MAYBE_WAITERS) == tid)
 		return 0;
-	while ((owner = a_cas(&f->lock, 0, tid)))
-		__wait(&f->lock, &f->waiters, owner, 1);
+	for (;;) {
+		owner = a_cas(&f->lock, 0, tid);
+		if (!owner) return 1;
+		if (a_cas(&f->lock, owner, owner|MAYBE_WAITERS)==owner) break;
+	}
+	while ((owner = a_cas(&f->lock, 0, tid|MAYBE_WAITERS)))
+		__futexwait(&f->lock, owner, 1);
 	return 1;
 }
 
 void __unlockfile(FILE *f)
 {
-	a_store(&f->lock, 0);
-
-	/* The following read is technically invalid under situations
-	 * of self-synchronized destruction. Another thread may have
-	 * called fclose as soon as the above store has completed.
-	 * Nonetheless, since FILE objects always live in memory
-	 * obtained by malloc from the heap, it's safe to assume
-	 * the dereferences below will not fault. In the worst case,
-	 * a spurious syscall will be made. If the implementation of
-	 * malloc changes, this assumption needs revisiting. */
-
-	if (f->waiters) __wake(&f->lock, 1, 1);
+	if (a_swap(&f->lock, 0) & MAYBE_WAITERS)
+		__wake(&f->lock, 1, 1);
 }
diff --git a/src/stdio/__stdio_read.c b/src/stdio/__stdio_read.c
index f8fa6d3b..ea675da3 100644
--- a/src/stdio/__stdio_read.c
+++ b/src/stdio/__stdio_read.c
@@ -9,10 +9,11 @@ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
 	};
 	ssize_t cnt;
 
-	cnt = syscall(SYS_readv, f->fd, iov, 2);
+	cnt = iov[0].iov_len ? syscall(SYS_readv, f->fd, iov, 2)
+		: syscall(SYS_read, f->fd, iov[1].iov_base, iov[1].iov_len);
 	if (cnt <= 0) {
-		f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
-		return cnt;
+		f->flags |= cnt ? F_ERR : F_EOF;
+		return 0;
 	}
 	if (cnt <= iov[0].iov_len) return cnt;
 	cnt -= iov[0].iov_len;
diff --git a/src/stdio/__towrite.c b/src/stdio/__towrite.c
index 0a69d926..b022cbca 100644
--- a/src/stdio/__towrite.c
+++ b/src/stdio/__towrite.c
@@ -3,7 +3,7 @@
 int __towrite(FILE *f)
 {
 	f->mode |= f->mode-1;
-	if (f->flags & (F_NOWR)) {
+	if (f->flags & F_NOWR) {
 		f->flags |= F_ERR;
 		return EOF;
 	}
diff --git a/src/stdio/fclose.c b/src/stdio/fclose.c
index d687a877..c675413d 100644
--- a/src/stdio/fclose.c
+++ b/src/stdio/fclose.c
@@ -24,7 +24,7 @@ int fclose(FILE *f)
 	r = fflush(f);
 	r |= f->close(f);
 
-	if (f->getln_buf) free(f->getln_buf);
+	free(f->getln_buf);
 	if (!perm) free(f);
 	else FUNLOCK(f);
 
diff --git a/src/stdio/fgetpos.c b/src/stdio/fgetpos.c
index c3fa0eb0..6eb361e1 100644
--- a/src/stdio/fgetpos.c
+++ b/src/stdio/fgetpos.c
@@ -4,7 +4,7 @@ int fgetpos(FILE *restrict f, fpos_t *restrict pos)
 {
 	off_t off = __ftello(f);
 	if (off < 0) return -1;
-	*(off_t *)pos = off;
+	*(long long *)pos = off;
 	return 0;
 }
 
diff --git a/src/stdio/flockfile.c b/src/stdio/flockfile.c
index a196c1ef..6806cf8b 100644
--- a/src/stdio/flockfile.c
+++ b/src/stdio/flockfile.c
@@ -1,10 +1,11 @@
 #include "stdio_impl.h"
 #include "pthread_impl.h"
 
+void __register_locked_file(FILE *, pthread_t);
+
 void flockfile(FILE *f)
 {
-	while (ftrylockfile(f)) {
-		int owner = f->lock;
-		if (owner) __wait(&f->lock, &f->waiters, owner, 1);
-	}
+	if (!ftrylockfile(f)) return;
+	__lockfile(f);
+	__register_locked_file(f, __pthread_self());
 }
diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c
index 2ce43d32..fb2656e3 100644
--- a/src/stdio/fmemopen.c
+++ b/src/stdio/fmemopen.c
@@ -9,6 +9,12 @@ struct cookie {
 	int mode;
 };
 
+struct mem_FILE {
+	FILE f;
+	struct cookie c;
+	unsigned char buf[UNGET+BUFSIZ], buf2[];
+};
+
 static off_t mseek(FILE *f, off_t off, int whence)
 {
 	ssize_t base;
@@ -72,8 +78,7 @@ static int mclose(FILE *m)
 
 FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
 {
-	FILE *f;
-	struct cookie *c;
+	struct mem_FILE *f;
 	int plus = !!strchr(mode, '+');
 	
 	if (!size || !strchr("rwa", *mode)) {
@@ -86,29 +91,34 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
 		return 0;
 	}
 
-	f = calloc(sizeof *f + sizeof *c + UNGET + BUFSIZ + (buf?0:size), 1);
+	f = malloc(sizeof *f + (buf?0:size));
 	if (!f) return 0;
-	f->cookie = c = (void *)(f+1);
-	f->fd = -1;
-	f->lbf = EOF;
-	f->buf = (unsigned char *)(c+1) + UNGET;
-	f->buf_size = BUFSIZ;
-	if (!buf) buf = f->buf + BUFSIZ;
+	memset(&f->f, 0, sizeof f->f);
+	f->f.cookie = &f->c;
+	f->f.fd = -1;
+	f->f.lbf = EOF;
+	f->f.buf = f->buf + UNGET;
+	f->f.buf_size = sizeof f->buf - UNGET;
+	if (!buf) {
+		buf = f->buf2;;
+		memset(buf, 0, size);
+	}
 
-	c->buf = buf;
-	c->size = size;
-	c->mode = *mode;
+	memset(&f->c, 0, sizeof f->c);
+	f->c.buf = buf;
+	f->c.size = size;
+	f->c.mode = *mode;
 	
-	if (!plus) f->flags = (*mode == 'r') ? F_NOWR : F_NORD;
-	if (*mode == 'r') c->len = size;
-	else if (*mode == 'a') c->len = c->pos = strnlen(buf, size);
+	if (!plus) f->f.flags = (*mode == 'r') ? F_NOWR : F_NORD;
+	if (*mode == 'r') f->c.len = size;
+	else if (*mode == 'a') f->c.len = f->c.pos = strnlen(buf, size);
 
-	f->read = mread;
-	f->write = mwrite;
-	f->seek = mseek;
-	f->close = mclose;
+	f->f.read = mread;
+	f->f.write = mwrite;
+	f->f.seek = mseek;
+	f->f.close = mclose;
 
-	if (!libc.threaded) f->lock = -1;
+	if (!libc.threaded) f->f.lock = -1;
 
-	return __ofl_add(f);
+	return __ofl_add(&f->f);
 }
diff --git a/src/stdio/fopencookie.c b/src/stdio/fopencookie.c
index 2f46dd53..da042fe8 100644
--- a/src/stdio/fopencookie.c
+++ b/src/stdio/fopencookie.c
@@ -116,15 +116,12 @@ FILE *fopencookie(void *cookie, const char *mode, cookie_io_functions_t iofuncs)
 
 	/* Set up our fcookie */
 	f->fc.cookie = cookie;
-	f->fc.iofuncs.read = iofuncs.read;
-	f->fc.iofuncs.write = iofuncs.write;
-	f->fc.iofuncs.seek = iofuncs.seek;
-	f->fc.iofuncs.close = iofuncs.close;
+	f->fc.iofuncs = iofuncs;
 
 	f->f.fd = -1;
 	f->f.cookie = &f->fc;
 	f->f.buf = f->buf + UNGET;
-	f->f.buf_size = BUFSIZ;
+	f->f.buf_size = sizeof f->buf - UNGET;
 	f->f.lbf = EOF;
 
 	/* Initialize op ptrs. No problem if some are unneeded. */
diff --git a/src/stdio/fread.c b/src/stdio/fread.c
index aef75f73..733d3716 100644
--- a/src/stdio/fread.c
+++ b/src/stdio/fread.c
@@ -25,7 +25,7 @@ size_t fread(void *restrict destv, size_t size, size_t nmemb, FILE *restrict f)
 	/* Read the remainder directly */
 	for (; l; l-=k, dest+=k) {
 		k = __toread(f) ? 0 : f->read(f, dest, l);
-		if (k+1<=1) {
+		if (!k) {
 			FUNLOCK(f);
 			return (len-l)/size;
 		}
diff --git a/src/stdio/fsetpos.c b/src/stdio/fsetpos.c
index 5d76c8cd..6310424e 100644
--- a/src/stdio/fsetpos.c
+++ b/src/stdio/fsetpos.c
@@ -2,7 +2,7 @@
 
 int fsetpos(FILE *f, const fpos_t *pos)
 {
-	return __fseeko(f, *(const off_t *)pos, SEEK_SET);
+	return __fseeko(f, *(const long long *)pos, SEEK_SET);
 }
 
 LFS64(fsetpos);
diff --git a/src/stdio/ftrylockfile.c b/src/stdio/ftrylockfile.c
index eb13c839..3b97807a 100644
--- a/src/stdio/ftrylockfile.c
+++ b/src/stdio/ftrylockfile.c
@@ -2,6 +2,8 @@
 #include "pthread_impl.h"
 #include <limits.h>
 
+#define MAYBE_WAITERS 0x40000000
+
 void __do_orphaned_stdio_locks()
 {
 	FILE *f;
@@ -18,23 +20,29 @@ void __unlist_locked_file(FILE *f)
 	}
 }
 
+void __register_locked_file(FILE *f, pthread_t self)
+{
+	f->lockcount = 1;
+	f->prev_locked = 0;
+	f->next_locked = self->stdio_locks;
+	if (f->next_locked) f->next_locked->prev_locked = f;
+	self->stdio_locks = f;
+}
+
 int ftrylockfile(FILE *f)
 {
 	pthread_t self = __pthread_self();
 	int tid = self->tid;
-	if (f->lock == tid) {
+	int owner = f->lock;
+	if ((owner & ~MAYBE_WAITERS) == tid) {
 		if (f->lockcount == LONG_MAX)
 			return -1;
 		f->lockcount++;
 		return 0;
 	}
-	if (f->lock < 0) f->lock = 0;
-	if (f->lock || a_cas(&f->lock, 0, tid))
+	if (owner < 0) f->lock = owner = 0;
+	if (owner || a_cas(&f->lock, 0, tid))
 		return -1;
-	f->lockcount = 1;
-	f->prev_locked = 0;
-	f->next_locked = self->stdio_locks;
-	if (f->next_locked) f->next_locked->prev_locked = f;
-	self->stdio_locks = f;
+	__register_locked_file(f, self);
 	return 0;
 }
diff --git a/src/stdio/getdelim.c b/src/stdio/getdelim.c
index 1ccd8029..d4b23882 100644
--- a/src/stdio/getdelim.c
+++ b/src/stdio/getdelim.c
@@ -3,8 +3,6 @@
 #include <inttypes.h>
 #include <errno.h>
 
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
-
 ssize_t getdelim(char **restrict s, size_t *restrict n, int delim, FILE *restrict f)
 {
 	char *tmp;
diff --git a/src/stdio/open_memstream.c b/src/stdio/open_memstream.c
index eab024da..ee834234 100644
--- a/src/stdio/open_memstream.c
+++ b/src/stdio/open_memstream.c
@@ -12,6 +12,12 @@ struct cookie {
 	size_t space;
 };
 
+struct ms_FILE {
+	FILE f;
+	struct cookie c;
+	unsigned char buf[BUFSIZ];
+};
+
 static off_t ms_seek(FILE *f, off_t off, int whence)
 {
 	ssize_t base;
@@ -57,34 +63,34 @@ static int ms_close(FILE *f)
 
 FILE *open_memstream(char **bufp, size_t *sizep)
 {
-	FILE *f;
-	struct cookie *c;
+	struct ms_FILE *f;
 	char *buf;
 
-	if (!(f=malloc(sizeof *f + sizeof *c + BUFSIZ))) return 0;
+	if (!(f=malloc(sizeof *f))) return 0;
 	if (!(buf=malloc(sizeof *buf))) {
 		free(f);
 		return 0;
 	}
-	memset(f, 0, sizeof *f + sizeof *c);
-	f->cookie = c = (void *)(f+1);
+	memset(&f->f, 0, sizeof f->f);
+	memset(&f->c, 0, sizeof f->c);
+	f->f.cookie = &f->c;
 
-	c->bufp = bufp;
-	c->sizep = sizep;
-	c->pos = c->len = c->space = *sizep = 0;
-	c->buf = *bufp = buf;
+	f->c.bufp = bufp;
+	f->c.sizep = sizep;
+	f->c.pos = f->c.len = f->c.space = *sizep = 0;
+	f->c.buf = *bufp = buf;
 	*buf = 0;
 
-	f->flags = F_NORD;
-	f->fd = -1;
-	f->buf = (void *)(c+1);
-	f->buf_size = BUFSIZ;
-	f->lbf = EOF;
-	f->write = ms_write;
-	f->seek = ms_seek;
-	f->close = ms_close;
+	f->f.flags = F_NORD;
+	f->f.fd = -1;
+	f->f.buf = f->buf;
+	f->f.buf_size = sizeof f->buf;
+	f->f.lbf = EOF;
+	f->f.write = ms_write;
+	f->f.seek = ms_seek;
+	f->f.close = ms_close;
 
-	if (!libc.threaded) f->lock = -1;
+	if (!libc.threaded) f->f.lock = -1;
 
-	return __ofl_add(f);
+	return __ofl_add(&f->f);
 }
diff --git a/src/stdio/open_wmemstream.c b/src/stdio/open_wmemstream.c
index 4d90cd97..cb693ea7 100644
--- a/src/stdio/open_wmemstream.c
+++ b/src/stdio/open_wmemstream.c
@@ -14,6 +14,12 @@ struct cookie {
 	mbstate_t mbs;
 };
 
+struct wms_FILE {
+	FILE f;
+	struct cookie c;
+	unsigned char buf[1];
+};
+
 static off_t wms_seek(FILE *f, off_t off, int whence)
 {
 	ssize_t base;
@@ -59,34 +65,34 @@ static int wms_close(FILE *f)
 
 FILE *open_wmemstream(wchar_t **bufp, size_t *sizep)
 {
-	FILE *f;
-	struct cookie *c;
+	struct wms_FILE *f;
 	wchar_t *buf;
 
-	if (!(f=malloc(sizeof *f + sizeof *c))) return 0;
+	if (!(f=malloc(sizeof *f))) return 0;
 	if (!(buf=malloc(sizeof *buf))) {
 		free(f);
 		return 0;
 	}
-	memset(f, 0, sizeof *f + sizeof *c);
-	f->cookie = c = (void *)(f+1);
+	memset(&f->f, 0, sizeof f->f);
+	memset(&f->c, 0, sizeof f->c);
+	f->f.cookie = &f->c;
 
-	c->bufp = bufp;
-	c->sizep = sizep;
-	c->pos = c->len = c->space = *sizep = 0;
-	c->buf = *bufp = buf;
+	f->c.bufp = bufp;
+	f->c.sizep = sizep;
+	f->c.pos = f->c.len = f->c.space = *sizep = 0;
+	f->c.buf = *bufp = buf;
 	*buf = 0;
 
-	f->flags = F_NORD;
-	f->fd = -1;
-	f->buf = (void *)(c+1);
-	f->buf_size = 0;
-	f->lbf = EOF;
-	f->write = wms_write;
-	f->seek = wms_seek;
-	f->close = wms_close;
+	f->f.flags = F_NORD;
+	f->f.fd = -1;
+	f->f.buf = f->buf;
+	f->f.buf_size = 0;
+	f->f.lbf = EOF;
+	f->f.write = wms_write;
+	f->f.seek = wms_seek;
+	f->f.close = wms_close;
 
-	if (!libc.threaded) f->lock = -1;
+	if (!libc.threaded) f->f.lock = -1;
 
-	return __ofl_add(f);
+	return __ofl_add(&f->f);
 }
diff --git a/src/stdio/setvbuf.c b/src/stdio/setvbuf.c
index 541a125f..b6b9b018 100644
--- a/src/stdio/setvbuf.c
+++ b/src/stdio/setvbuf.c
@@ -1,22 +1,25 @@
 #include "stdio_impl.h"
 
-/* This function makes no attempt to protect the user from his/her own
- * stupidity. If called any time but when then ISO C standard specifically
- * allows it, all hell can and will break loose, especially with threads!
- *
- * This implementation ignores all arguments except the buffering type,
- * and uses the existing buffer allocated alongside the FILE object.
- * In the case of stderr where the preexisting buffer is length 1, it
- * is not possible to set line buffering or full buffering. */
+/* The behavior of this function is undefined except when it is the first
+ * operation on the stream, so the presence or absence of locking is not
+ * observable in a program whose behavior is defined. Thus no locking is
+ * performed here. No allocation of buffers is performed, but a buffer
+ * provided by the caller is used as long as it is suitably sized. */
 
 int setvbuf(FILE *restrict f, char *restrict buf, int type, size_t size)
 {
 	f->lbf = EOF;
 
-	if (type == _IONBF)
+	if (type == _IONBF) {
 		f->buf_size = 0;
-	else if (type == _IOLBF)
-		f->lbf = '\n';
+	} else {
+		if (buf && size >= UNGET) {
+			f->buf = (void *)buf;
+			f->buf_size = size - UNGET;
+		}
+		if (type == _IOLBF && f->buf_size)
+			f->lbf = '\n';
+	}
 
 	f->flags |= F_SVB;
 
diff --git a/src/stdio/vswprintf.c b/src/stdio/vswprintf.c
index 6eb2f6ac..38efed65 100644
--- a/src/stdio/vswprintf.c
+++ b/src/stdio/vswprintf.c
@@ -1,6 +1,5 @@
 #include "stdio_impl.h"
 #include <limits.h>
-#include <string.h>
 #include <errno.h>
 #include <stdint.h>
 #include <wchar.h>
@@ -37,17 +36,17 @@ static size_t sw_write(FILE *f, const unsigned char *s, size_t l)
 int vswprintf(wchar_t *restrict s, size_t n, const wchar_t *restrict fmt, va_list ap)
 {
 	int r;
-	FILE f;
 	unsigned char buf[256];
 	struct cookie c = { s, n-1 };
+	FILE f = {
+		.lbf = EOF,
+		.write = sw_write,
+		.lock = -1,
+		.buf = buf,
+		.buf_size = sizeof buf,
+		.cookie = &c,
+	};
 
-	memset(&f, 0, sizeof(FILE));
-	f.lbf = EOF;
-	f.write = sw_write;
-	f.buf_size = sizeof buf;
-	f.buf = buf;
-	f.lock = -1;
-	f.cookie = &c;
 	if (!n) {
 		return -1;
 	} else if (n > INT_MAX) {
diff --git a/src/stdlib/abs.c b/src/stdlib/abs.c
index 4806d629..e721fdc2 100644
--- a/src/stdlib/abs.c
+++ b/src/stdlib/abs.c
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 int abs(int a)
 {
 	return a>0 ? a : -a;
diff --git a/src/stdlib/labs.c b/src/stdlib/labs.c
index 675b95b8..83ddb147 100644
--- a/src/stdlib/labs.c
+++ b/src/stdlib/labs.c
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 long labs(long a)
 {
 	return a>0 ? a : -a;
diff --git a/src/stdlib/llabs.c b/src/stdlib/llabs.c
index bec4a03d..9dfaf5cf 100644
--- a/src/stdlib/llabs.c
+++ b/src/stdlib/llabs.c
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 long long llabs(long long a)
 {
 	return a>0 ? a : -a;
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index 439ee363..e07d29e3 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -37,11 +37,11 @@ _Noreturn void __pthread_exit(void *result)
 
 	__pthread_tsd_run_dtors();
 
-	LOCK(self->exitlock);
-
-	/* Mark this thread dead before decrementing count */
+	/* Access to target the exiting thread with syscalls that use
+	 * its kernel tid is controlled by killlock. For detached threads,
+	 * any use past this point would have undefined behavior, but for
+	 * joinable threads it's a valid usage that must be handled. */
 	LOCK(self->killlock);
-	self->dead = 1;
 
 	/* Block all signals before decrementing the live thread count.
 	 * This is important to ensure that dynamically allocated TLS
@@ -49,20 +49,14 @@ _Noreturn void __pthread_exit(void *result)
 	 * reasons as well. */
 	__block_all_sigs(&set);
 
-	/* Wait to unlock the kill lock, which governs functions like
-	 * pthread_kill which target a thread id, until signals have
-	 * been blocked. This precludes observation of the thread id
-	 * as a live thread (with application code running in it) after
-	 * the thread was reported dead by ESRCH being returned. */
-	UNLOCK(self->killlock);
-
 	/* It's impossible to determine whether this is "the last thread"
 	 * until performing the atomic decrement, since multiple threads
 	 * could exit at the same time. For the last thread, revert the
-	 * decrement and unblock signals to give the atexit handlers and
-	 * stdio cleanup code a consistent state. */
+	 * decrement, restore the tid, and unblock signals to give the
+	 * atexit handlers and stdio cleanup code a consistent state. */
 	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
 		libc.threads_minus_1 = 0;
+		UNLOCK(self->killlock);
 		__restore_sigs(&set);
 		exit(0);
 	}
@@ -89,15 +83,19 @@ _Noreturn void __pthread_exit(void *result)
 	__do_orphaned_stdio_locks();
 	__dl_thread_cleanup();
 
-	if (self->detached && self->map_base) {
+	/* This atomic potentially competes with a concurrent pthread_detach
+	 * call; the loser is responsible for freeing thread resources. */
+	int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);
+
+	if (state>=DT_DETACHED && self->map_base) {
 		/* Detached threads must avoid the kernel clear_child_tid
 		 * feature, since the virtual address will have been
 		 * unmapped and possibly already reused by a new mapping
 		 * at the time the kernel would perform the write. In
 		 * the case of threads that started out detached, the
 		 * initial clone flags are correct, but if the thread was
-		 * detached later (== 2), we need to clear it here. */
-		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
+		 * detached later, we need to clear it here. */
+		if (state == DT_DYNAMIC) __syscall(SYS_set_tid_address, 0);
 
 		/* Robust list will no longer be valid, and was already
 		 * processed above, so unregister it with the kernel. */
@@ -113,6 +111,12 @@ _Noreturn void __pthread_exit(void *result)
 		__unmapself(self->map_base, self->map_size);
 	}
 
+	/* After the kernel thread exits, its tid may be reused. Clear it
+	 * to prevent inadvertent use and inform functions that would use
+	 * it that it's no longer available. */
+	self->tid = 0;
+	UNLOCK(self->killlock);
+
 	for (;;) __syscall(SYS_exit, 0);
 }
 
@@ -139,7 +143,7 @@ static int start(void *p)
 	if (self->startlock[0]) {
 		__wait(self->startlock, 0, 1, 1);
 		if (self->startlock[0] == 2) {
-			self->detached = 2;
+			self->detach_state = DT_DYNAMIC;
 			pthread_exit(0);
 		}
 		__restore_sigs(self->sigmask);
@@ -272,8 +276,10 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	new->tsd = (void *)tsd;
 	new->locale = &libc.global_locale;
 	if (attr._a_detach) {
-		new->detached = 1;
+		new->detach_state = DT_DETACHED;
 		flags -= CLONE_CHILD_CLEARTID;
+	} else {
+		new->detach_state = DT_JOINABLE;
 	}
 	if (attr._a_sched) {
 		do_sched = new->startlock[0] = 1;
@@ -284,7 +290,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	new->CANARY = self->CANARY;
 
 	a_inc(&libc.threads_minus_1);
-	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
+	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->detach_state);
 
 	__release_ptc();
 
diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c
index 692bbaf9..9cee7a89 100644
--- a/src/thread/pthread_detach.c
+++ b/src/thread/pthread_detach.c
@@ -5,11 +5,10 @@ int __pthread_join(pthread_t, void **);
 
 static int __pthread_detach(pthread_t t)
 {
-	/* Cannot detach a thread that's already exiting */
-	if (a_cas(t->exitlock, 0, INT_MIN + 1))
+	/* If the cas fails, detach state is either already-detached
+	 * or exiting/exited, and pthread_join will trap or cleanup. */
+	if (a_cas(&t->detach_state, DT_JOINABLE, DT_DYNAMIC) != DT_JOINABLE)
 		return __pthread_join(t, 0);
-	t->detached = 2;
-	UNLOCK(t->exitlock);
 	return 0;
 }
 
diff --git a/src/thread/pthread_getattr_np.c b/src/thread/pthread_getattr_np.c
index 29a209bd..2881831f 100644
--- a/src/thread/pthread_getattr_np.c
+++ b/src/thread/pthread_getattr_np.c
@@ -6,7 +6,7 @@
 int pthread_getattr_np(pthread_t t, pthread_attr_t *a)
 {
 	*a = (pthread_attr_t){0};
-	a->_a_detach = !!t->detached;
+	a->_a_detach = t->detach_state>=DT_DETACHED;
 	a->_a_guardsize = t->guard_size;
 	if (t->stack) {
 		a->_a_stackaddr = (uintptr_t)t->stack;
diff --git a/src/thread/pthread_getschedparam.c b/src/thread/pthread_getschedparam.c
index a994b637..05be4242 100644
--- a/src/thread/pthread_getschedparam.c
+++ b/src/thread/pthread_getschedparam.c
@@ -4,7 +4,7 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param
 {
 	int r;
 	LOCK(t->killlock);
-	if (t->dead) {
+	if (!t->tid) {
 		r = ESRCH;
 	} else {
 		r = -__syscall(SYS_sched_getparam, t->tid, param);
diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c
index b7175c09..18264da6 100644
--- a/src/thread/pthread_join.c
+++ b/src/thread/pthread_join.c
@@ -7,13 +7,14 @@ int __pthread_setcancelstate(int, int *);
 
 int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec *at)
 {
-	int tmp, cs, r = 0;
+	int state, cs, r = 0;
 	__pthread_testcancel();
 	__pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	if (cs == PTHREAD_CANCEL_ENABLE) __pthread_setcancelstate(cs, 0);
-	if (t->detached) a_crash();
-	while ((tmp = t->tid) && r != ETIMEDOUT && r != EINVAL)
-		r = __timedwait_cp(&t->tid, tmp, CLOCK_REALTIME, at, 0);
+	while ((state = t->detach_state) && r != ETIMEDOUT && r != EINVAL) {
+		if (state >= DT_DETACHED) a_crash();
+		r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 0);
+	}
 	__pthread_setcancelstate(cs, 0);
 	if (r == ETIMEDOUT || r == EINVAL) return r;
 	a_barrier();
@@ -29,7 +30,7 @@ int __pthread_join(pthread_t t, void **res)
 
 int __pthread_tryjoin_np(pthread_t t, void **res)
 {
-	return t->tid ? EBUSY : __pthread_join(t, res);
+	return t->detach_state==DT_JOINABLE ? EBUSY : __pthread_join(t, res);
 }
 
 weak_alias(__pthread_tryjoin_np, pthread_tryjoin_np);
diff --git a/src/thread/pthread_kill.c b/src/thread/pthread_kill.c
index f0903420..6d70e626 100644
--- a/src/thread/pthread_kill.c
+++ b/src/thread/pthread_kill.c
@@ -4,7 +4,8 @@ int pthread_kill(pthread_t t, int sig)
 {
 	int r;
 	LOCK(t->killlock);
-	r = t->dead ? ESRCH : -__syscall(SYS_tkill, t->tid, sig);
+	r = t->tid ? -__syscall(SYS_tkill, t->tid, sig)
+		: (sig+0U >= _NSIG ? EINVAL : 0);
 	UNLOCK(t->killlock);
 	return r;
 }
diff --git a/src/thread/pthread_setschedparam.c b/src/thread/pthread_setschedparam.c
index 9e2fa456..ab45f2ff 100644
--- a/src/thread/pthread_setschedparam.c
+++ b/src/thread/pthread_setschedparam.c
@@ -4,7 +4,7 @@ int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *par
 {
 	int r;
 	LOCK(t->killlock);
-	r = t->dead ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param);
+	r = !t->tid ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param);
 	UNLOCK(t->killlock);
 	return r;
 }
diff --git a/src/thread/pthread_setschedprio.c b/src/thread/pthread_setschedprio.c
index dc745b42..c353f6b5 100644
--- a/src/thread/pthread_setschedprio.c
+++ b/src/thread/pthread_setschedprio.c
@@ -4,7 +4,7 @@ int pthread_setschedprio(pthread_t t, int prio)
 {
 	int r;
 	LOCK(t->killlock);
-	r = t->dead ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio);
+	r = !t->tid ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio);
 	UNLOCK(t->killlock);
 	return r;
 }
diff --git a/src/time/wcsftime.c b/src/time/wcsftime.c
index 638e64f6..23500cc8 100644
--- a/src/time/wcsftime.c
+++ b/src/time/wcsftime.c
@@ -4,7 +4,7 @@
 #include "locale_impl.h"
 #include "libc.h"
 
-const char *__strftime_fmt_1(char (*s)[100], size_t *l, int f, const struct tm *tm, locale_t loc);
+const char *__strftime_fmt_1(char (*s)[100], size_t *l, int f, const struct tm *tm, locale_t loc, int pad);
 
 size_t __wcsftime_l(wchar_t *restrict s, size_t n, const wchar_t *restrict f, const struct tm *restrict tm, locale_t loc)
 {
@@ -14,7 +14,7 @@ size_t __wcsftime_l(wchar_t *restrict s, size_t n, const wchar_t *restrict f, co
 	wchar_t *p;
 	const char *t_mb;
 	const wchar_t *t;
-	int plus;
+	int pad, plus;
 	unsigned long width;
 	for (l=0; l<n; f++) {
 		if (!*f) {
@@ -26,6 +26,8 @@ size_t __wcsftime_l(wchar_t *restrict s, size_t n, const wchar_t *restrict f, co
 			continue;
 		}
 		f++;
+		pad = 0;
+		if (*f == '-' || *f == '_' || *f == '0') pad = *f++;
 		if ((plus = (*f == '+'))) f++;
 		width = wcstoul(f, &p, 10);
 		if (*p == 'C' || *p == 'F' || *p == 'G' || *p == 'Y') {
@@ -35,7 +37,7 @@ size_t __wcsftime_l(wchar_t *restrict s, size_t n, const wchar_t *restrict f, co
 		}
 		f = p;
 		if (*f == 'E' || *f == 'O') f++;
-		t_mb = __strftime_fmt_1(&buf, &k, *f, tm, loc);
+		t_mb = __strftime_fmt_1(&buf, &k, *f, tm, loc, pad);
 		if (!t_mb) break;
 		k = mbstowcs(wbuf, t_mb, sizeof wbuf / sizeof *wbuf);
 		if (k == (size_t)-1) return 0;
diff --git a/src/unistd/getcwd.c b/src/unistd/getcwd.c
index 103fbbb5..f407ffe0 100644
--- a/src/unistd/getcwd.c
+++ b/src/unistd/getcwd.c
@@ -6,10 +6,10 @@
 
 char *getcwd(char *buf, size_t size)
 {
-	char tmp[PATH_MAX];
+	char tmp[buf ? 1 : PATH_MAX];
 	if (!buf) {
 		buf = tmp;
-		size = PATH_MAX;
+		size = sizeof tmp;
 	} else if (!size) {
 		errno = EINVAL;
 		return 0;
diff --git a/src/unistd/gethostname.c b/src/unistd/gethostname.c
index f984b7dd..633ef571 100644
--- a/src/unistd/gethostname.c
+++ b/src/unistd/gethostname.c
@@ -8,6 +8,6 @@ int gethostname(char *name, size_t len)
 	if (uname(&uts)) return -1;
 	if (len > sizeof uts.nodename) len = sizeof uts.nodename;
 	for (i=0; i<len && (name[i] = uts.nodename[i]); i++);
-	if (i==len) name[i-1] = 0;
+	if (i && i==len) name[i-1] = 0;
 	return 0;
 }
diff --git a/src/unistd/nice.c b/src/unistd/nice.c
index da569967..6c25c8c3 100644
--- a/src/unistd/nice.c
+++ b/src/unistd/nice.c
@@ -1,12 +1,16 @@
 #include <unistd.h>
 #include <sys/resource.h>
+#include <limits.h>
 #include "syscall.h"
 
 int nice(int inc)
 {
-#ifdef SYS_nice
-	return syscall(SYS_nice, inc);
-#else
-	return setpriority(PRIO_PROCESS, 0, getpriority(PRIO_PROCESS, 0)+inc);
-#endif
+	int prio = inc;
+	// Only query old priority if it can affect the result.
+	// This also avoids issues with integer overflow.
+	if (inc > -2*NZERO && inc < 2*NZERO)
+		prio += getpriority(PRIO_PROCESS, 0);
+	if (prio > NZERO-1) prio = NZERO-1;
+	if (prio < -NZERO) prio = -NZERO;
+	return setpriority(PRIO_PROCESS, 0, prio) ? -1 : prio;
 }
openSUSE Build Service is sponsored by