[openssl-commits] [openssl] master update

Andy Polyakov appro at openssl.org
Mon Mar 13 17:42:42 UTC 2017


The branch master has been updated
       via  1aed5e1ac28790cc915ad03e86e2d5e896a4ea13 (commit)
      from  b1fa4031feb94e1406f3bd2ba0f771e48d0407b5 (commit)


- Log -----------------------------------------------------------------
commit 1aed5e1ac28790cc915ad03e86e2d5e896a4ea13
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun Mar 12 14:45:06 2017 +0100

    crypto/x86*cpuid.pl: move extended feature detection.
    
    Exteneded feature flags were not pulled on AMD processors, as result
    a number of extensions were effectively masked on Ryzen. Original fix
    for x86_64cpuid.pl addressed this problem, but messed up processor
    vendor detection. This fix moves extended feature detection past
    basic feature detection where it belongs. 32-bit counterpart is
    harmonized too.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    Reviewed-by: Richard Levitte <levitte at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/x86_64cpuid.pl | 21 ++++++++++-----------
 crypto/x86cpuid.pl    | 38 ++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index c2a7d72..2467af7 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -68,20 +68,10 @@ OPENSSL_ia32_cpuid:
 .cfi_register	%rbx,%r8
 
 	xor	%eax,%eax
-	mov	%eax,8(%rdi)		# clear 3rd word
+	mov	%eax,8(%rdi)		# clear extended feature flags
 	cpuid
 	mov	%eax,%r11d		# max value for standard query level
 
-	cmp	\$7,%eax
-	jb	.Lno_extended_info
-
-	mov	\$7,%eax
-	xor	%ecx,%ecx
-	cpuid
-	mov	%ebx,8(%rdi)
-
-.Lno_extended_info:
-
 	xor	%eax,%eax
 	cmp	\$0x756e6547,%ebx	# "Genu"
 	setne	%al
@@ -175,6 +165,15 @@ OPENSSL_ia32_cpuid:
 	or	%ecx,%r9d		# merge AMD XOP flag
 
 	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
+
+	cmp	\$7,%r11d
+	jb	.Lno_extended_info
+	mov	\$7,%eax
+	xor	%ecx,%ecx
+	cpuid
+	mov	%ebx,8(%rdi)		# save extended feature flags
+.Lno_extended_info:
+
 	bt	\$27,%r9d		# check OSXSAVE bit
 	jnc	.Lclear_avx
 	xor	%ecx,%ecx		# XCR0
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index d3e7ade..1d57f36 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -30,10 +30,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&pop	("eax");
 	&xor	("ecx","eax");
 	&xor	("eax","eax");
+	&mov	("esi",&wparam(0));
+	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
 	&bt	("ecx",21);
 	&jnc	(&label("nocpuid"));
-	&mov	("esi",&wparam(0));
-	&mov	(&DWP(8,"esi"),"eax");	# clear 3rd word
 	&cpuid	();
 	&mov	("edi","eax");		# max value for standard query level
 
@@ -91,26 +91,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&jmp	(&label("generic"));
 
 &set_label("intel");
-	&cmp	("edi",7);
-	&jb	(&label("cacheinfo"));
-
-	&mov	("esi",&wparam(0));
-	&mov	("eax",7);
-	&xor	("ecx","ecx");
-	&cpuid	();
-	&mov	(&DWP(8,"esi"),"ebx");
-
-&set_label("cacheinfo");
 	&cmp	("edi",4);
-	&mov	("edi",-1);
+	&mov	("esi",-1);
 	&jb	(&label("nocacheinfo"));
 
 	&mov	("eax",4);
 	&mov	("ecx",0);		# query L1D
 	&cpuid	();
-	&mov	("edi","eax");
-	&shr	("edi",14);
-	&and	("edi",0xfff);		# number of cores -1 per L1D
+	&mov	("esi","eax");
+	&shr	("esi",14);
+	&and	("esi",0xfff);		# number of cores -1 per L1D
 
 &set_label("nocacheinfo");
 	&mov	("eax",1);
@@ -128,7 +118,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&bt	("edx",28);		# test hyper-threading bit
 	&jnc	(&label("generic"));
 	&and	("edx",0xefffffff);
-	&cmp	("edi",0);
+	&cmp	("esi",0);
 	&je	(&label("generic"));
 
 	&or	("edx",0x10000000);
@@ -140,10 +130,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 &set_label("generic");
 	&and	("ebp",1<<11);		# isolate AMD XOP flag
 	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
-	&mov	("esi","edx");
+	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
 	&or	("ebp","ecx");		# merge AMD XOP flag
 
-	&bt	("ecx",27);		# check OSXSAVE bit
+	&cmp	("edi",7);
+	&mov	("edi",&wparam(0));
+	&jb	(&label("no_extended_info"));
+	&mov	("eax",7);
+	&xor	("ecx","ecx");
+	&cpuid	();
+	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
+&set_label("no_extended_info");
+
+	&bt	("ebp",27);		# check OSXSAVE bit
 	&jnc	(&label("clear_avx"));
 	&xor	("ecx","ecx");
 	&data_byte(0x0f,0x01,0xd0);	# xgetbv
@@ -157,7 +156,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 	&and	("esi",0xfeffffff);	# clear FXSR
 &set_label("clear_avx");
 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
-	&mov	("edi",&wparam(0));
 	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
 &set_label("done");
 	&mov	("eax","esi");


More information about the openssl-commits mailing list