[openssl-dev] [openssl.org #4126] [PATCH] Re-arrange code to improve performance for z systems
Leonidas Da Silva Barbosa via RT
rt at openssl.org
Sun Nov 8 11:37:56 UTC 2015
This patch does a re-arrangement in order to circumvent
a performance degradation of more than 20%.
Measurements with the fix included showed performance
improvements of the required size on zEC12 and z13.
Signed-off-by: Leonidas Da Silva Barbosa <leosilva at linux.vnet.ibm.com>
---
crypto/bn/asm/s390x.S | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S
index 43fcb79..c0e1fe9 100755
--- a/crypto/bn/asm/s390x.S
+++ b/crypto/bn/asm/s390x.S
@@ -33,35 +33,40 @@ bn_mul_add_words:
.Loop4_madd:
lg %r7,0(%r2,%r3) // ap[i]
+ lg %r9,8(%r2,%r3)
+
mlgr %r6,%r5 // *=w
alcgr %r7,%r8 // +=carry
- alcgr %r6,zero
+ alcgr %r6,%r0
+ mlgr %r8,%r5
+
alg %r7,0(%r2,%r1) // +=rp[i]
stg %r7,0(%r2,%r1) // rp[i]=
+ lg %r7,0x10(%r2,%r3)
- lg %r9,8(%r2,%r3)
- mlgr %r8,%r5
alcgr %r9,%r6
- alcgr %r8,zero
+ alcgr %r8,%r0
+
alg %r9,8(%r2,%r1)
stg %r9,8(%r2,%r1)
+ lg %r9,0x18(%r2,%r3)
- lg %r7,16(%r2,%r3)
mlgr %r6,%r5
alcgr %r7,%r8
- alcgr %r6,zero
- alg %r7,16(%r2,%r1)
- stg %r7,16(%r2,%r1)
+ alcgr %r6,%r0
+ mlgr %r8,%r5
+
+ alg %r7,0x10(%r2,%r1)
+ stg %r7,0x10(%r2,%r1)
- lg %r9,24(%r2,%r3)
- mlgr %r8,%r5
alcgr %r9,%r6
- alcgr %r8,zero
- alg %r9,24(%r2,%r1)
- stg %r9,24(%r2,%r1)
+ alcgr %r8,%r0
- la %r2,32(%r2) // i+=4
- brct %r4,.Loop4_madd
+ alg %r9,0x18(%r2,%r1)
+ stg %r9,0x18(%r2,%r1)
+ la %r2,0x20(%r0,%r2) // i+=4
+
+ brct %r4,.Loop4_madd
la %r10,1(%r10) // see if len%4 is zero ...
brct %r10,.Loop1_madd // without touching condition code:-)
--
1.8.3.1
_______________________________________________
openssl-bugs-mod mailing list
openssl-bugs-mod at openssl.org
https://mta.openssl.org/mailman/listinfo/openssl-bugs-mod
More information about the openssl-dev
mailing list