File openssl-CVE-2014-3570.patch of Package compat-openssl098.1339

commit 4b4c0a19211bf73d81de52de697a1a9dc60aed82
Author: Andy Polyakov <appro@openssl.org>
Date:   Mon Jan 5 14:52:56 2015 +0100

    Fix for CVE-2014-3570.
    
    Reviewed-by: Emilia Kasper <emilia@openssl.org>
    (cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de)

diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
index dca4105..8ced51b 100644
--- a/crypto/bn/asm/mips3.s
+++ b/crypto/bn/asm/mips3.s
@@ -1584,17 +1584,17 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
@@ -1609,63 +1609,63 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_3,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	c_3,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
-	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_1,a_2		/* mul_add_c2(a[1],b[2],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	sd	c_1,24(a0)
 
 	dmultu	a_4,a_0		/* mul_add_c2(a[4],b[0],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_1,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	c_1,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
-	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_1,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	AT,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
 	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
@@ -1680,93 +1680,93 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_0,a_5		/* mul_add_c2(a[0],b[5],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_1,a_4		/* mul_add_c2(a[1],b[4],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_2,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	AT,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
 	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_2,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	AT,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
 	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	sd	c_3,40(a0)
 
 	dmultu	a_6,a_0		/* mul_add_c2(a[6],b[0],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_3,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	c_3,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
-	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_5,a_1		/* mul_add_c2(a[5],b[1],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_4,a_2		/* mul_add_c2(a[4],b[2],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
@@ -1781,108 +1781,108 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_0,a_7		/* mul_add_c2(a[0],b[7],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_1,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	c_1,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
-	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_1,a_6		/* mul_add_c2(a[1],b[6],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_1,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	AT,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
 	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_2,a_5		/* mul_add_c2(a[2],b[5],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_1,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	AT,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
 	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_3,a_4		/* mul_add_c2(a[3],b[4],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_1,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	AT,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
 	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	sd	c_2,56(a0)
 
 	dmultu	a_7,a_1		/* mul_add_c2(a[7],b[1],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_6,a_2		/* mul_add_c2(a[6],b[2],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_2,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	AT,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
 	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_5,a_3		/* mul_add_c2(a[5],b[3],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_2,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	AT,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
 	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_4,a_4		/* mul_add_c(a[4],b[4],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
@@ -1897,78 +1897,78 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_2,a_7		/* mul_add_c2(a[2],b[7],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_3,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	c_3,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
-	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_3,a_6		/* mul_add_c2(a[3],b[6],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_4,a_5		/* mul_add_c2(a[4],b[5],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	sd	c_1,72(a0)
 
 	dmultu	a_7,a_3		/* mul_add_c2(a[7],b[3],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_1,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	c_1,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
-	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_6,a_4		/* mul_add_c2(a[6],b[4],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_1,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	AT,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
 	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_5,a_5		/* mul_add_c(a[5],b[5],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
@@ -1983,48 +1983,48 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_4,a_7		/* mul_add_c2(a[4],b[7],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_5,a_6		/* mul_add_c2(a[5],b[6],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_2,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	AT,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
 	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	sd	c_3,88(a0)
 
 	dmultu	a_7,a_5		/* mul_add_c2(a[7],b[5],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_3,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	c_3,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
-	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_6,a_6		/* mul_add_c(a[6],b[6],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
@@ -2039,17 +2039,17 @@ LEAF(bn_sqr_comba8)
 	dmultu	a_6,a_7		/* mul_add_c2(a[6],b[7],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_1,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	c_1,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
-	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	sd	c_2,104(a0)
 
 	dmultu	a_7,a_7		/* mul_add_c(a[7],b[7],c3,c1,c2); */
@@ -2070,9 +2070,9 @@ LEAF(bn_sqr_comba4)
 	.set	reorder
 	ld	a_0,0(a1)
 	ld	a_1,8(a1)
+	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
 	ld	a_2,16(a1)
 	ld	a_3,24(a1)
-	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */
 	mflo	c_1
 	mfhi	c_2
 	sd	c_1,0(a0)
@@ -2093,17 +2093,17 @@ LEAF(bn_sqr_comba4)
 	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
@@ -2118,48 +2118,48 @@ LEAF(bn_sqr_comba4)
 	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_3,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	c_3,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
-	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	dmultu	a_1,a_2		/* mul_add_c(a2[1],b[2],c1,c2,c3); */
 	mflo	t_1
 	mfhi	t_2
-	slt	AT,t_2,zero
-	daddu	c_3,AT
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_1,t_1
 	sltu	AT,c_1,t_1
-	daddu	t_2,AT
+	daddu	c_1,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_1,t_1
+	daddu	c_2,AT
+	daddu	t_2,t_1
+	sltu	AT,c_2,AT
 	daddu	c_2,t_2
-	sltu	AT,c_2,t_2
 	daddu	c_3,AT
+	sltu	t_2,c_2,t_2
+	daddu	c_3,t_2
 	sd	c_1,24(a0)
 
 	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_1,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_2,t_1
 	sltu	AT,c_2,t_1
-	daddu	t_2,AT
+	daddu	c_2,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_2,t_1
+	daddu	c_3,AT
+	daddu	t_2,t_1
+	sltu	c_1,c_3,AT
 	daddu	c_3,t_2
-	sltu	AT,c_3,t_2
-	daddu	c_1,AT
+	sltu	t_2,c_3,t_2
+	daddu	c_1,t_2
 	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */
 	mflo	t_1
 	mfhi	t_2
@@ -2174,17 +2174,17 @@ LEAF(bn_sqr_comba4)
 	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */
 	mflo	t_1
 	mfhi	t_2
-	slt	c_2,t_2,zero
-	dsll	t_2,1
-	slt	a2,t_1,zero
-	daddu	t_2,a2
-	dsll	t_1,1
 	daddu	c_3,t_1
 	sltu	AT,c_3,t_1
-	daddu	t_2,AT
+	daddu	c_3,t_1
+	daddu	AT,t_2
+	sltu	t_1,c_3,t_1
+	daddu	c_1,AT
+	daddu	t_2,t_1
+	sltu	c_2,c_1,AT
 	daddu	c_1,t_2
-	sltu	AT,c_1,t_2
-	daddu	c_2,AT
+	sltu	t_2,c_1,t_2
+	daddu	c_2,t_2
 	sd	c_3,40(a0)
 
 	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */
diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
index 2d80f19..eba8304 100644
--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -269,6 +269,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
 #if 0
 /* original macros are kept for reference purposes */
 #define mul_add_c(a,b,c0,c1,c2) {	\
@@ -283,10 +287,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 	BN_ULONG ta=(a),tb=(b),t0;	\
 	t1 = BN_UMULT_HIGH(ta,tb);	\
 	t0 = ta * tb;			\
-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
-	c0 += t1; t2 += (c0<t1)?1:0;	\
+	c0 += t0; t2 = t1+((c0<t0)?1:0);\
 	c1 += t2; c2 += (c1<t2)?1:0;	\
+	c0 += t0; t1 += (c0<t0)?1:0;	\
+	c1 += t1; c2 += (c1<t1)?1:0;	\
 	}
 #else
 #define mul_add_c(a,b,c0,c1,c2)	do {	\
@@ -324,22 +328,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 		: "=a"(t1),"=d"(t2)	\
 		: "a"(a),"m"(b)		\
 		: "cc");		\
-	asm ("addq %0,%0; adcq %2,%1"	\
-		: "+d"(t2),"+r"(c2)	\
-		: "g"(0)		\
-		: "cc");		\
-	asm ("addq %0,%0; adcq %2,%1"	\
-		: "+a"(t1),"+d"(t2)	\
-		: "g"(0)		\
-		: "cc");		\
-	asm ("addq %2,%0; adcq %3,%1"	\
-		: "+r"(c0),"+d"(t2)	\
-		: "a"(t1),"g"(0)	\
-		: "cc");		\
-	asm ("addq %2,%0; adcq %3,%1"	\
-		: "+r"(c1),"+r"(c2)	\
-		: "d"(t2),"g"(0)	\
-		: "cc");		\
+	asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"	\
+		: "+r"(c0),"+r"(c1),"+r"(c2)		\
+		: "r"(t1),"r"(t2),"g"(0)		\
+		: "cc");				\
+	asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"	\
+		: "+r"(c0),"+r"(c1),"+r"(c2)		\
+		: "r"(t1),"r"(t2),"g"(0)		\
+		: "cc");				\
 	} while (0)
 #endif
 
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
index 99bc2de..b95b003 100644
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -431,6 +431,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
 #ifdef BN_LLONG
 #define mul_add_c(a,b,c0,c1,c2) \
 	t=(BN_ULLONG)a*b; \
@@ -471,10 +475,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 #define mul_add_c2(a,b,c0,c1,c2) {	\
 	BN_ULONG ta=(a),tb=(b),t0;	\
 	BN_UMULT_LOHI(t0,t1,ta,tb);	\
-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
-	c0 += t1; t2 += (c0<t1)?1:0;	\
+	c0 += t0; t2 = t1+((c0<t0)?1:0);\
 	c1 += t2; c2 += (c1<t2)?1:0;	\
+	c0 += t0; t1 += (c0<t0)?1:0;	\
+	c1 += t1; c2 += (c1<t1)?1:0;	\
 	}
 
 #define sqr_add_c(a,i,c0,c1,c2)	{	\
@@ -501,10 +505,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 	BN_ULONG ta=(a),tb=(b),t0;	\
 	t1 = BN_UMULT_HIGH(ta,tb);	\
 	t0 = ta * tb;			\
-	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
-	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
-	c0 += t1; t2 += (c0<t1)?1:0;	\
+	c0 += t0; t2 = t1+((c0<t0)?1:0);\
 	c1 += t2; c2 += (c1<t2)?1:0;	\
+	c0 += t0; t1 += (c0<t0)?1:0;	\
+	c1 += t1; c2 += (c1<t1)?1:0;	\
 	}
 
 #define sqr_add_c(a,i,c0,c1,c2)	{	\
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index d41daac..9138c2f 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -676,44 +676,98 @@ int test_mul(BIO *bp)
 
 int test_sqr(BIO *bp, BN_CTX *ctx)
 	{
-	BIGNUM a,c,d,e;
-	int i;
+	BIGNUM *a,*c,*d,*e;
+	int i, ret = 0;
 
-	BN_init(&a);
-	BN_init(&c);
-	BN_init(&d);
-	BN_init(&e);
+	a = BN_new();
+	c = BN_new();
+	d = BN_new();
+	e = BN_new();
+	if (a == NULL || c == NULL || d == NULL || e == NULL)
+		{
+		goto err;
+		}
 
 	for (i=0; i<num0; i++)
 		{
-		BN_bntest_rand(&a,40+i*10,0,0);
-		a.neg=rand_neg();
-		BN_sqr(&c,&a,ctx);
+		BN_bntest_rand(a,40+i*10,0,0);
+		a->neg=rand_neg();
+		BN_sqr(c,a,ctx);
 		if (bp != NULL)
 			{
 			if (!results)
 				{
-				BN_print(bp,&a);
+				BN_print(bp,a);
 				BIO_puts(bp," * ");
-				BN_print(bp,&a);
+				BN_print(bp,a);
 				BIO_puts(bp," - ");
 				}
-			BN_print(bp,&c);
+			BN_print(bp,c);
 			BIO_puts(bp,"\n");
 			}
-		BN_div(&d,&e,&c,&a,ctx);
-		BN_sub(&d,&d,&a);
-		if(!BN_is_zero(&d) || !BN_is_zero(&e))
-		    {
-		    fprintf(stderr,"Square test failed!\n");
-		    return 0;
-		    }
+		BN_div(d,e,c,a,ctx);
+		BN_sub(d,d,a);
+		if(!BN_is_zero(d) || !BN_is_zero(e))
+			{
+			fprintf(stderr,"Square test failed!\n");
+			goto err;
+			}
 		}
-	BN_free(&a);
-	BN_free(&c);
-	BN_free(&d);
-	BN_free(&e);
-	return(1);
+
+	/* Regression test for a BN_sqr overflow bug. */
+	BN_hex2bn(&a,
+		"80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000");
+	BN_sqr(c, a, ctx);
+	if (bp != NULL)
+		{
+		if (!results)
+			{
+			BN_print(bp,a);
+			BIO_puts(bp," * ");
+			BN_print(bp,a);
+			BIO_puts(bp," - ");
+			}
+		BN_print(bp,c);
+		BIO_puts(bp,"\n");
+		}
+	BN_mul(d, a, a, ctx);
+	if (BN_cmp(c, d))
+		{
+		fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
+			"different results!\n");
+		goto err;
+		}
+
+	/* Regression test for a BN_sqr overflow bug. */
+	BN_hex2bn(&a,
+		"80000000000000000000000080000001FFFFFFFE000000000000000000000000");
+	BN_sqr(c, a, ctx);
+	if (bp != NULL)
+		{
+		if (!results)
+			{
+			BN_print(bp,a);
+			BIO_puts(bp," * ");
+			BN_print(bp,a);
+			BIO_puts(bp," - ");
+			}
+		BN_print(bp,c);
+		BIO_puts(bp,"\n");
+		}
+	BN_mul(d, a, a, ctx);
+	if (BN_cmp(c, d))
+		{
+		fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
+			"different results!\n");
+		goto err;
+		}
+	ret = 1;
+err:
+	if (a != NULL) BN_free(a);
+	if (c != NULL) BN_free(c);
+	if (d != NULL) BN_free(d);
+	if (e != NULL) BN_free(e);
+	return ret;
 	}
 
 int test_mont(BIO *bp, BN_CTX *ctx)