[openssl-commits] [openssl] OpenSSL_1_0_2-stable update

Andy Polyakov appro at openssl.org
Tue Jul 25 19:34:58 UTC 2017


The branch OpenSSL_1_0_2-stable has been updated
       via  777cf0fbd47e12a0ff3eadf2ee8af773b8138c12 (commit)
      from  f794476093342d021176ddac27270bd43ff2c804 (commit)


- Log -----------------------------------------------------------------
commit 777cf0fbd47e12a0ff3eadf2ee8af773b8138c12
Author: Andy Polyakov <appro at openssl.org>
Date:   Mon Jul 24 21:50:52 2017 +0200

    x86_64 assembly pack: "optimize" for Knights Landing.
    
    "Optimize" is in quotes because it's rather a "salvage operation"
    for now. Idea is to identify processor capability flags that
    drive Knights Landing to suboptimial code paths and mask them.
    Two flags were identified, XSAVE and ADCX/ADOX. Former affects
    choice of AES-NI code path specific for Silvermont (Knights Landing
    is of Silvermont "ancestry"). And 64-bit ADCX/ADOX instructions are
    effectively mishandled at decode time. In both cases we are looking
    at ~2x improvement.
    
    Hardware used for benchmarking courtesy of Atos, experiments run by
    Romain Dolbeau <romain.dolbeau at atos.net>. Kudos!
    
    This is minimalistic backpoint of 64d92d74985ebb3d0be58a9718f9e080a14a8e7f
    
    Thanks to David Benjamin for spotting typo in Knights Landing detection!
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/4006)
    
    (cherry picked from commit 738a9dd53cacce593cd7d67e18e1273549640a79)

-----------------------------------------------------------------------

Summary of changes:
 crypto/x86_64cpuid.pl | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index a3d6f43..240d361 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -136,8 +136,19 @@ OPENSSL_ia32_cpuid:
 	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
 	and	\$15,%ah
 	cmp	\$15,%ah		# examine Family ID
-	jne	.Lnotintel
+	jne	.LnotP4
 	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
+.LnotP4:
+	cmp	\$6,%ah
+	jne	.Lnotintel
+	and	\$0x0fff0ff0,%eax
+	cmp	\$0x00050670,%eax	# Knights Landing
+	je	.Lknights
+	cmp	\$0x00080650,%eax	# Knights Mill (according to sde)
+	jne	.Lnotintel
+.Lknights:
+	and	\$0xfbffffff,%ecx	# clear XSAVE flag to mimic Silvermont
+
 .Lnotintel:
 	bt	\$28,%edx		# test hyper-threading bit
 	jnc	.Lgeneric
@@ -162,6 +173,10 @@ OPENSSL_ia32_cpuid:
 	mov	\$7,%eax
 	xor	%ecx,%ecx
 	cpuid
+	bt	\$26,%r9d		# check XSAVE bit, cleared on Knights
+	jc	.Lnotknights
+	and	\$0xfff7ffff,%ebx	# clear ADCX/ADOX flag
+.Lnotknights:
 	mov	%ebx,8(%rdi)		# save extended feature flags
 .Lno_extended_info:
 


More information about the openssl-commits mailing list