[openssl-commits] [openssl] OpenSSL_1_0_1-stable update

Andy Polyakov appro at openssl.org
Mon Mar 7 21:17:03 UTC 2016


The branch OpenSSL_1_0_1-stable has been updated
       via  a15971944091fa01d959566b17ce86225346c83c (commit)
      from  6e7a1f35b71f35296257ce634fce933f8fe41c8c (commit)


- Log -----------------------------------------------------------------
commit a15971944091fa01d959566b17ce86225346c83c
Author: Andy Polyakov <appro at openssl.org>
Date:   Fri Mar 4 11:39:11 2016 +0100

    bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking.
    
    Some OSes, *cough*-dows, insist on stack being "wired" to
    physical memory in strictly sequential manner, i.e. if stack
    allocation spans two pages, then reference to farmost one can
    be punishable by SEGV. But page walking can do good even on
    other OSes, because it guarantees that villain thread hits
    the guard page before it can make damage to innocent one...
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    (cherry picked from commit adc4f1fc25b2cac90076f1e1695b05b7aeeae501)
    
    Resolved conflicts:
    	crypto/bn/asm/x86_64-mont.pl
    	crypto/bn/asm/x86_64-mont5.pl
    
    Reviewed-by: Richard Levitte <levitte at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/bn/asm/x86-mont.pl     | 15 +++++++++++++++
 crypto/bn/asm/x86_64-mont.pl  | 40 +++++++++++++++++++++++++++++++++++++---
 crypto/bn/asm/x86_64-mont5.pl | 22 ++++++++++++++++++++++
 3 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl
index e8f6b05..89f4de6 100755
--- a/crypto/bn/asm/x86-mont.pl
+++ b/crypto/bn/asm/x86-mont.pl
@@ -85,6 +85,21 @@ $frame=32;				# size of above frame rounded up to 16n
 
 	&and	("esp",-64);		# align to cache line
 
+	# Some OSes, *cough*-dows, insist on stack being "wired" to
+	# physical memory in strictly sequential manner, i.e. if stack
+	# allocation spans two pages, then reference to farmost one can
+	# be punishable by SEGV. But page walking can do good even on
+	# other OSes, because it guarantees that villain thread hits
+	# the guard page before it can make damage to innocent one...
+	&mov	("eax","ebp");
+	&sub	("eax","esp");
+	&and	("eax",-4096);
+&set_label("page_walk");
+	&mov	("edx",&DWP(0,"esp","eax"));
+	&sub	("eax",4096);
+	&data_byte(0x2e);
+	&jnc	(&label("page_walk"));
+
 	################################# load argument block...
 	&mov	("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
 	&mov	("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index 17fb94c..c8ae019 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -91,6 +91,20 @@ bn_mul_mont:
 
 	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul_body:
+	# Some OSes, *cough*-dows, insist on stack being "wired" to
+	# physical memory in strictly sequential manner, i.e. if stack
+	# allocation spans two pages, then reference to farmost one can
+	# be punishable by SEGV. But page walking can do good even on
+	# other OSes, because it guarantees that villain thread hits
+	# the guard page before it can make damage to innocent one...
+	sub	%rsp,%r11
+	and	\$-4096,%r11
+.Lmul_page_walk:
+	mov	(%rsp,%r11),%r10
+	sub	\$4096,%r11
+	.byte	0x66,0x2e		# predict non-taken
+	jnc	.Lmul_page_walk
+
 	mov	$bp,%r12		# reassign $bp
 ___
 		$bp="%r12";
@@ -296,6 +310,14 @@ bn_mul4x_mont:
 
 	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul4x_body:
+	sub	%rsp,%r11
+	and	\$-4096,%r11
+.Lmul4x_page_walk:
+	mov	(%rsp,%r11),%r10
+	sub	\$4096,%r11
+	.byte	0x2e			# predict non-taken
+	jnc	.Lmul4x_page_walk
+
 	mov	$rp,16(%rsp,$num,8)	# tp[num+2]=$rp
 	mov	%rdx,%r12		# reassign $bp
 ___
@@ -707,6 +729,7 @@ $code.=<<___;
 .align	16
 bn_sqr4x_mont:
 .Lsqr4x_enter:
+	mov	%rsp,%rax
 	push	%rbx
 	push	%rbp
 	push	%r12
@@ -715,12 +738,23 @@ bn_sqr4x_mont:
 	push	%r15
 
 	shl	\$3,${num}d		# convert $num to bytes
-	xor	%r10,%r10
 	mov	%rsp,%r11		# put aside %rsp
-	sub	$num,%r10		# -$num
+	neg	$num			# -$num
 	mov	($n0),$n0		# *n0
-	lea	-72(%rsp,%r10,2),%rsp	# alloca(frame+2*$num)
+	lea	-72(%rsp,$num,2),%rsp	# alloca(frame+2*$num)
 	and	\$-1024,%rsp		# minimize TLB usage
+
+	sub	%rsp,%r11
+	and	\$-4096,%r11
+.Lsqr4x_page_walk:
+	mov	(%rsp,%r11),%r10
+	sub	\$4096,%r11
+	.byte	0x2e			# predict non-taken
+	jnc	.Lsqr4x_page_walk
+
+	mov	$num,%r10
+	neg	$num			# restore $num
+	lea	-48(%rax),%r11		# restore saved %rsp
 	##############################################################
 	# Stack layout
 	#
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 2359791..99243ae 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -84,6 +84,20 @@ bn_mul_mont_gather5:
 
 	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul_body:
+	# Some OSes, *cough*-dows, insist on stack being "wired" to
+	# physical memory in strictly sequential manner, i.e. if stack
+	# allocation spans two pages, then reference to farmost one can
+	# be punishable by SEGV. But page walking can do good even on
+	# other OSes, because it guarantees that villain thread hits
+	# the guard page before it can make damage to innocent one...
+	sub	%rsp,%rax
+	and	\$-4096,%rax
+.Lmul_page_walk:
+	mov	(%rsp,%rax),%r11
+	sub	\$4096,%rax
+	.byte	0x2e			# predict non-taken
+	jnc	.Lmul_page_walk
+
 	lea	128($bp),%r12		# reassign $bp (+size optimization)
 ___
 		$bp="%r12";
@@ -407,6 +421,14 @@ bn_mul4x_mont_gather5:
 
 	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul4x_body:
+	sub	%rsp,%rax
+	and	\$-4096,%rax
+.Lmul4x_page_walk:
+	mov	(%rsp,%rax),%r11
+	sub	\$4096,%rax
+	.byte	0x2e			# predict non-taken
+	jnc	.Lmul4x_page_walk
+
 	mov	$rp,16(%rsp,$num,8)	# tp[num+2]=$rp
 	lea	128(%rdx),%r12		# reassign $bp (+size optimization)
 ___


More information about the openssl-commits mailing list