[openssl-commits] [openssl] OpenSSL_1_1_0-stable update

Andy Polyakov appro at openssl.org
Mon Jul 24 21:31:13 UTC 2017


The branch OpenSSL_1_1_0-stable has been updated
       via  6fb428f76c2fb039734694de9f7f8ab8f4d9f59c (commit)
       via  5c6317e86d5f44291bb2b028607b8bad858b4f74 (commit)
      from  a701db76e27ffce4c88c2f621269ca5589c984ea (commit)


- Log -----------------------------------------------------------------
commit 6fb428f76c2fb039734694de9f7f8ab8f4d9f59c
Author: Andy Polyakov <appro at openssl.org>
Date:   Mon Jul 10 15:21:00 2017 +0200

    aes/asm/aesni-sha*-x86_64.pl: add SHAEXT performance results.
    
    Reviewed-by: Kurt Roeckx <kurt at roeckx.be>
    (Merged from https://github.com/openssl/openssl/pull/3898)
    
    (cherry picked from commit 1843787173da9b07029d0863e236107b1dd4fdd7)

commit 5c6317e86d5f44291bb2b028607b8bad858b4f74
Author: Andy Polyakov <appro at openssl.org>
Date:   Mon Jul 10 15:19:45 2017 +0200

    evp/e_aes_cbc_hmac_sha256.c: give SHAEXT right priority.
    
    Reviewed-by: Kurt Roeckx <kurt at roeckx.be>
    (Merged from https://github.com/openssl/openssl/pull/3898)
    
    (cherry picked from commit d0f6eb1d8c84165c383a677266cfae9c0b162781)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/aesni-sha1-x86_64.pl   |  7 ++++++-
 crypto/aes/asm/aesni-sha256-x86_64.pl | 15 +++++++++------
 crypto/evp/e_aes_cbc_hmac_sha256.c    |  8 +++++---
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl
index adff3a3..33a7f0c 100644
--- a/crypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha1-x86_64.pl
@@ -34,6 +34,8 @@
 # Haswell	4.43[+3.6(4.2)]	8.00(8.58)	4.55(5.21)  +75%(+65%)
 # Skylake	2.63[+3.5(4.1)]	6.17(6.69)	4.23(4.44)  +46%(+51%)
 # Bulldozer	5.77[+6.0]	11.72		6.37        +84%
+# Ryzen(**)	2.71[+1.93]	4.64		2.74        +69%
+# Goldmont(**)	3.82[+1.70]	5.52		4.20        +31%
 #
 #		AES-192-CBC
 # Westmere	4.51		9.81		6.80	    +44%
@@ -47,13 +49,16 @@
 # Sandy Bridge	7.05		12.06(13.15)	7.12(7.72)  +69%(+70%)
 # Ivy Bridge	7.05		11.65		7.12        +64%
 # Haswell	6.19		9.76(10.34)	6.21(6.25)  +57%(+65%)
-# Skylake	3.62		7.16(7.68)	4.56(4.76)  +57%(+61$)
+# Skylake	3.62		7.16(7.68)	4.56(4.76)  +57%(+61%)
 # Bulldozer	8.00		13.95		8.25        +69%
+# Ryzen(**)	3.71		5.64		3.72        +52%
+# Goldmont(**)	5.35		7.05		5.76        +22%
 #
 # (*)	There are two code paths: SSSE3 and AVX. See sha1-568.pl for
 #	background information. Above numbers in parentheses are SSSE3
 #	results collected on AVX-capable CPU, i.e. apply on OSes that
 #	don't support AVX.
+# (**)	SHAEXT results.
 #
 # Needless to mention that it makes no sense to implement "stitched"
 # *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1
diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl
index 3b03328..0e49f26 100644
--- a/crypto/aes/asm/aesni-sha256-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha256-x86_64.pl
@@ -28,18 +28,21 @@
 # for standalone AESNI-CBC encrypt, standalone SHA256, and stitched
 # subroutine:
 #
-#		 AES-128/-192/-256+SHA256	this(**)gain
-# Sandy Bridge	    5.05/6.05/7.05+11.6		13.0	+28%/36%/43%
-# Ivy Bridge	    5.05/6.05/7.05+10.3		11.6	+32%/41%/50%
-# Haswell	    4.43/5.29/6.19+7.80		8.79	+39%/49%/59%
-# Skylake	    2.62/3.14/3.62+7.70		8.10	+27%/34%/40%
-# Bulldozer	    5.77/6.89/8.00+13.7		13.7	+42%/50%/58%
+#		 AES-128/-192/-256+SHA256   this(**)	gain
+# Sandy Bridge	    5.05/6.05/7.05+11.6	    13.0	+28%/36%/43%
+# Ivy Bridge	    5.05/6.05/7.05+10.3	    11.6	+32%/41%/50%
+# Haswell	    4.43/5.29/6.19+7.80	    8.79	+39%/49%/59%
+# Skylake	    2.62/3.14/3.62+7.70	    8.10	+27%/34%/40%
+# Bulldozer	    5.77/6.89/8.00+13.7	    13.7	+42%/50%/58%
+# Ryzen(***)	    2.71/-/3.71+2.05	    2.74/-/3.73	+74%/-/54%
+# Goldmont(***)	    3.82/-/5.35+4.16	    4.73/-/5.94	+69%/-/60%
 #
 # (*)	there are XOP, AVX1 and AVX2 code paths, meaning that
 #	Westmere is omitted from loop, this is because gain was not
 #	estimated high enough to justify the effort;
 # (**)	these are EVP-free results, results obtained with 'speed
 #	-evp aes-256-cbc-hmac-sha256' will vary by percent or two;
+# (***)	these are SHAEXT results;
 
 $flavour = shift;
 $output  = shift;
diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c
index 652ace4..13973f1 100644
--- a/crypto/evp/e_aes_cbc_hmac_sha256.c
+++ b/crypto/evp/e_aes_cbc_hmac_sha256.c
@@ -453,10 +453,12 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx,
          * to identify it and avoid stitch invocation. So that after we
          * establish that current CPU supports AVX, we even see if it's
          * either even XOP-capable Bulldozer-based or GenuineIntel one.
+         * But SHAEXT-capable go ahead...
          */
-        if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
-            ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
-             | (OPENSSL_ia32cap_P[0] & (1<<30))) &&    /* "Intel CPU"? */
+        if (((OPENSSL_ia32cap_P[2] & (1 << 29)) ||         /* SHAEXT? */
+             ((OPENSSL_ia32cap_P[1] & (1 << (60 - 32))) && /* AVX? */
+              ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32)))   /* XOP? */
+               | (OPENSSL_ia32cap_P[0] & (1 << 30))))) &&  /* "Intel CPU"? */
             plen > (sha_off + iv) &&
             (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
             SHA256_Update(&key->md, in + iv, sha_off);


More information about the openssl-commits mailing list