[openssl-commits] [openssl] OpenSSL_1_0_2-stable update

Andy Polyakov appro at openssl.org
Mon Mar 13 17:43:42 UTC 2017


The branch OpenSSL_1_0_2-stable has been updated
       via  7eba15f274f6e241bc3c9077b0427ec39a630e3d (commit)
      from  7b64c79bd5d3c1183b989de70170b0052b1d6c9e (commit)


- Log -----------------------------------------------------------------
commit 7eba15f274f6e241bc3c9077b0427ec39a630e3d
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun Mar 12 14:45:06 2017 +0100

    crypto/x86*cpuid.pl: move extended feature detection.
    
    Exteneded feature flags were not pulled on AMD processors, as result
    a number of extensions were effectively masked on Ryzen. Original fix
    for x86_64cpuid.pl addressed this problem, but messed up processor
    vendor detection. This fix moves extended feature detection past
    basic feature detection where it belongs. 32-bit counterpart is
    harmonized too.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    Reviewed-by: Richard Levitte <levitte at openssl.org>
    (cherry picked from commit 1aed5e1ac28790cc915ad03e86e2d5e896a4ea13)

-----------------------------------------------------------------------

Summary of changes:
 crypto/x86_64cpuid.pl | 21 ++++++++++-----------
 crypto/x86cpuid.pl    | 38 ++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index a430ab9..a3d6f43 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -59,20 +59,10 @@ OPENSSL_ia32_cpuid:
 	mov	%rbx,%r8		# save %rbx
 
 	xor	%eax,%eax
-	mov	%eax,8(%rdi)		# clear 3rd word
+	mov	%eax,8(%rdi)		# clear extended feature flags
 	cpuid
 	mov	%eax,%r11d		# max value for standard query level
 
-	cmp	\$7,%eax
-	jb	.Lno_extended_info
-
-	mov	\$7,%eax
-	xor	%ecx,%ecx
-	cpuid
-	mov	%ebx,8(%rdi)
-
-.Lno_extended_info:
-
 	xor	%eax,%eax
 	cmp	\$0x756e6547,%ebx	# "Genu"
 	setne	%al
@@ -166,6 +156,15 @@ OPENSSL_ia32_cpuid:
 	or	%ecx,%r9d		# merge AMD XOP flag
 
 	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
+
+	cmp	\$7,%r11d
+	jb	.Lno_extended_info
+	mov	\$7,%eax
+	xor	%ecx,%ecx
+	cpuid
+	mov	%ebx,8(%rdi)		# save extended feature flags
+.Lno_extended_info:
+
 	bt	\$27,%r9d		# check OSXSAVE bit
 	jnc	.Lclear_avx
 	xor	%ecx,%ecx		# XCR0
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index e95f627..90ed196 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -20,10 +20,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&pop	("eax");
 	&xor	("ecx","eax");
 	&xor	("eax","eax");
+	&mov	("esi",&wparam(0));
+	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
 	&bt	("ecx",21);
 	&jnc	(&label("nocpuid"));
-	&mov	("esi",&wparam(0));
-	&mov	(&DWP(8,"esi"),"eax");	# clear 3rd word
 	&cpuid	();
 	&mov	("edi","eax");		# max value for standard query level
 
@@ -81,26 +81,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&jmp	(&label("generic"));
 	
 &set_label("intel");
-	&cmp	("edi",7);
-	&jb	(&label("cacheinfo"));
-
-	&mov	("esi",&wparam(0));
-	&mov	("eax",7);
-	&xor	("ecx","ecx");
-	&cpuid	();
-	&mov	(&DWP(8,"esi"),"ebx");
-
-&set_label("cacheinfo");
 	&cmp	("edi",4);
-	&mov	("edi",-1);
+	&mov	("esi",-1);
 	&jb	(&label("nocacheinfo"));
 
 	&mov	("eax",4);
 	&mov	("ecx",0);		# query L1D
 	&cpuid	();
-	&mov	("edi","eax");
-	&shr	("edi",14);
-	&and	("edi",0xfff);		# number of cores -1 per L1D
+	&mov	("esi","eax");
+	&shr	("esi",14);
+	&and	("esi",0xfff);		# number of cores -1 per L1D
 
 &set_label("nocacheinfo");
 	&mov	("eax",1);
@@ -118,7 +108,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&bt	("edx",28);		# test hyper-threading bit
 	&jnc	(&label("generic"));
 	&and	("edx",0xefffffff);
-	&cmp	("edi",0);
+	&cmp	("esi",0);
 	&je	(&label("generic"));
 
 	&or	("edx",0x10000000);
@@ -130,10 +120,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 &set_label("generic");
 	&and	("ebp",1<<11);		# isolate AMD XOP flag
 	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
-	&mov	("esi","edx");
+	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
 	&or	("ebp","ecx");		# merge AMD XOP flag
 
-	&bt	("ecx",27);		# check OSXSAVE bit
+	&cmp	("edi",7);
+	&mov	("edi",&wparam(0));
+	&jb	(&label("no_extended_info"));
+	&mov	("eax",7);
+	&xor	("ecx","ecx");
+	&cpuid	();
+	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
+&set_label("no_extended_info");
+
+	&bt	("ebp",27);		# check OSXSAVE bit
 	&jnc	(&label("clear_avx"));
 	&xor	("ecx","ecx");
 	&data_byte(0x0f,0x01,0xd0);	# xgetbv
@@ -147,7 +146,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&and	("esi",0xfeffffff);	# clear FXSR
 &set_label("clear_avx");
 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
-	&mov	("edi",&wparam(0));
 	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
 &set_label("done");
 	&mov	("eax","esi");


More information about the openssl-commits mailing list