[openssl-dev] [openssl.org #4126] [PATCH] Re-arrange code to improve performance for z systems

Leonidas Da Silva Barbosa via RT rt at openssl.org
Sun Nov 8 11:37:56 UTC 2015


This patch does a re-arrangement in order to circumvent
a performance degradation of more than 20%.
Measurements with the fix included showed performance
improvements of the required size on zEC12 and z13.

Signed-off-by: Leonidas Da Silva Barbosa <leosilva at linux.vnet.ibm.com>
---
 crypto/bn/asm/s390x.S | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S
index 43fcb79..c0e1fe9 100755
--- a/crypto/bn/asm/s390x.S
+++ b/crypto/bn/asm/s390x.S
@@ -33,35 +33,40 @@ bn_mul_add_words:
 
 .Loop4_madd:
 	lg	%r7,0(%r2,%r3)	// ap[i]
+	lg      %r9,8(%r2,%r3)
+
 	mlgr	%r6,%r5		// *=w
 	alcgr	%r7,%r8		// +=carry
-	alcgr	%r6,zero
+	alcgr	%r6,%r0
+	mlgr    %r8,%r5
+
 	alg	%r7,0(%r2,%r1)	// +=rp[i]
 	stg	%r7,0(%r2,%r1)	// rp[i]=
+	lg	%r7,0x10(%r2,%r3)
 
-	lg	%r9,8(%r2,%r3)
-	mlgr	%r8,%r5
 	alcgr	%r9,%r6
-	alcgr	%r8,zero
+	alcgr	%r8,%r0
+
 	alg	%r9,8(%r2,%r1)
 	stg	%r9,8(%r2,%r1)
+	lg	%r9,0x18(%r2,%r3)
 
-	lg	%r7,16(%r2,%r3)
 	mlgr	%r6,%r5
 	alcgr	%r7,%r8
-	alcgr	%r6,zero
-	alg	%r7,16(%r2,%r1)
-	stg	%r7,16(%r2,%r1)
+	alcgr	%r6,%r0
+	mlgr    %r8,%r5
+
+	alg	%r7,0x10(%r2,%r1)
+	stg	%r7,0x10(%r2,%r1)
 
-	lg	%r9,24(%r2,%r3)
-	mlgr	%r8,%r5
 	alcgr	%r9,%r6
-	alcgr	%r8,zero
-	alg	%r9,24(%r2,%r1)
-	stg	%r9,24(%r2,%r1)
+	alcgr	%r8,%r0
 
-	la	%r2,32(%r2)	// i+=4
-	brct	%r4,.Loop4_madd
+	alg	%r9,0x18(%r2,%r1)
+	stg	%r9,0x18(%r2,%r1)
+	la	%r2,0x20(%r0,%r2)	// i+=4
+
+	brct    %r4,.Loop4_madd
 
 	la	%r10,1(%r10)		// see if len%4 is zero ...
 	brct	%r10,.Loop1_madd	// without touching condition code:-)
-- 
1.8.3.1

_______________________________________________
openssl-bugs-mod mailing list
openssl-bugs-mod at openssl.org
https://mta.openssl.org/mailman/listinfo/openssl-bugs-mod



More information about the openssl-dev mailing list