[openssl-commits] [openssl] OpenSSL_1_0_2-stable update

Andy Polyakov appro at openssl.org
Wed Nov 18 10:57:59 UTC 2015


The branch OpenSSL_1_0_2-stable has been updated
       via  bc2ab6ed2f07d56dd0afee2a2dfb631a27548934 (commit)
      from  bef7693860b4869613a6a4e82265ba82f9c0d460 (commit)


- Log -----------------------------------------------------------------
commit bc2ab6ed2f07d56dd0afee2a2dfb631a27548934
Author: Andy Polyakov <appro at openssl.org>
Date:   Fri Nov 13 23:52:13 2015 +0100

    aes/asm/vpaes-ppc.pl: eliminate overhung stores in misaligned cases.
    
    Reviewed-by: Richard Levitte <levitte at openssl.org>
    (cherry picked from commit ce24d2ed232a095349886aecc8cb959d5dda8342)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/vpaes-ppc.pl | 198 ++++++++++++++++++++++++++++++--------------
 1 file changed, 136 insertions(+), 62 deletions(-)

diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl
index 7fda60e..1759ae9 100644
--- a/crypto/aes/asm/vpaes-ppc.pl
+++ b/crypto/aes/asm/vpaes-ppc.pl
@@ -337,24 +337,27 @@ Lenc_entry:
 	addi	$inp, $inp, 15		# 15 is not a typo
 	 ?lvsr	$outperm, 0, $out
 	?lvsl	$keyperm, 0, $key	# prepare for unaligned access
-	 vnor	$outmask, v7, v7	# 0xff..ff
 	lvx	$inptail, 0, $inp	# redundant in aligned case
-	 ?vperm	$outmask, v7, $outmask, $outperm
-	 lvx	$outhead, 0, $out
 	?vperm	v0, v0, $inptail, $inpperm
 
 	bl	_vpaes_encrypt_core
 
+	andi.	r8, $out, 15
+	li	r9, 16
+	beq	Lenc_out_aligned
+
 	vperm	v0, v0, v0, $outperm	# rotate right/left
-	vsel	v1, $outhead, v0, $outmask
-	vmr	$outhead, v0
-	stvx	v1, 0, $out
-	addi	$out, $out, 15		# 15 is not a typo
-	########
+	mtctr	r9
+Lenc_out_unaligned:
+	stvebx	v0, 0, $out
+	addi	$out, $out, 1
+	bdnz	Lenc_out_unaligned
+	b	Lenc_done
 
-	lvx	v1, 0, $out		# redundant in aligned case
-	vsel	v1, $outhead, v1, $outmask
-	stvx	v1, 0, $out
+.align	4
+Lenc_out_aligned:
+	stvx	v0, 0, $out
+Lenc_done:
 
 	li	r10,`15+6*$SIZE_T`
 	li	r11,`31+6*$SIZE_T`
@@ -566,24 +569,27 @@ Ldec_entry:
 	addi	$inp, $inp, 15		# 15 is not a typo
 	 ?lvsr	$outperm, 0, $out
 	?lvsl	$keyperm, 0, $key
-	 vnor	$outmask, v7, v7	# 0xff..ff
 	lvx	$inptail, 0, $inp	# redundant in aligned case
-	 ?vperm	$outmask, v7, $outmask, $outperm
-	 lvx	$outhead, 0, $out
 	?vperm	v0, v0, $inptail, $inpperm
 
 	bl	_vpaes_decrypt_core
 
+	andi.	r8, $out, 15
+	li	r9, 16
+	beq	Ldec_out_aligned
+
 	vperm	v0, v0, v0, $outperm	# rotate right/left
-	vsel	v1, $outhead, v0, $outmask
-	vmr	$outhead, v0
-	stvx	v1, 0, $out
-	addi	$out, $out, 15		# 15 is not a typo
-	########
+	mtctr	r9
+Ldec_out_unaligned:
+	stvebx	v0, 0, $out
+	addi	$out, $out, 1
+	bdnz	Ldec_out_unaligned
+	b	Ldec_done
 
-	lvx	v1, 0, $out		# redundant in aligned case
-	vsel	v1, $outhead, v1, $outmask
-	stvx	v1, 0, $out
+.align	4
+Ldec_out_aligned:
+	stvx	v0, 0, $out
+Ldec_done:
 
 	li	r10,`15+6*$SIZE_T`
 	li	r11,`31+6*$SIZE_T`
@@ -658,11 +664,11 @@ Ldec_entry:
 	$PUSH	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
 
 	and	r30, r5, r9		# copy length&-16
+	andi.	r9, $out, 15		# is $out aligned?
 	mr	r5, r6			# copy pointer to key
 	mr	r31, r7			# copy pointer to iv
-	blt	Lcbc_abort
-	cmpwi	r8, 0			# test direction
 	li	r6, -1
+	mcrf	cr1, cr0		# put aside $out alignment flag
 	mr	r7, r12			# copy vrsave
 	mtspr	256, r6			# preserve all AltiVec registers
 
@@ -672,6 +678,7 @@ Ldec_entry:
 	lvx	v25, r9, r31
 	?vperm	v24, v24, v25, $inpperm
 
+	cmpwi	r8, 0			# test direction
 	neg	r8, $inp		# prepare for unaligned access
 	 vxor	v7, v7, v7
 	?lvsl	$keyperm, 0, $key
@@ -681,13 +688,37 @@ Ldec_entry:
 	lvx	$inptail, 0, $inp
 	 ?vperm	$outmask, v7, $outmask, $outperm
 	addi	$inp, $inp, 15		# 15 is not a typo
-	 lvx	$outhead, 0, $out
 
 	beq	Lcbc_decrypt
 
 	bl	_vpaes_encrypt_preheat
 	li	r0, 16
 
+	beq	cr1, Lcbc_enc_loop	# $out is aligned
+
+	vmr	v0, $inptail
+	lvx	$inptail, 0, $inp
+	addi	$inp, $inp, 16
+	?vperm	v0, v0, $inptail, $inpperm
+	vxor	v0, v0, v24		# ^= iv
+
+	bl	_vpaes_encrypt_core
+
+	andi.	r8, $out, 15
+	vmr	v24, v0			# put aside iv
+	sub	r9, $out, r8
+	vperm	$outhead, v0, v0, $outperm	# rotate right/left
+
+Lcbc_enc_head:
+	stvebx	$outhead, r8, r9
+	cmpwi	r8, 15
+	addi	r8, r8, 1
+	bne	Lcbc_enc_head
+
+	sub.	r30, r30, r0		# len -= 16
+	addi	$out, $out, 16
+	beq	Lcbc_unaligned_done
+
 Lcbc_enc_loop:
 	vmr	v0, $inptail
 	lvx	$inptail, 0, $inp
@@ -713,6 +744,32 @@ Lcbc_decrypt:
 	bl	_vpaes_decrypt_preheat
 	li	r0, 16
 
+	beq	cr1, Lcbc_dec_loop	# $out is aligned
+
+	vmr	v0, $inptail
+	lvx	$inptail, 0, $inp
+	addi	$inp, $inp, 16
+	?vperm	v0, v0, $inptail, $inpperm
+	vmr	v25, v0			# put aside input
+
+	bl	_vpaes_decrypt_core
+
+	andi.	r8, $out, 15
+	vxor	v0, v0, v24		# ^= iv
+	vmr	v24, v25
+	sub	r9, $out, r8
+	vperm	$outhead, v0, v0, $outperm	# rotate right/left
+
+Lcbc_dec_head:
+	stvebx	$outhead, r8, r9
+	cmpwi	r8, 15
+	addi	r8, r8, 1
+	bne	Lcbc_dec_head
+
+	sub.	r30, r30, r0		# len -= 16
+	addi	$out, $out, 16
+	beq	Lcbc_unaligned_done
+
 Lcbc_dec_loop:
 	vmr	v0, $inptail
 	lvx	$inptail, 0, $inp
@@ -733,23 +790,29 @@ Lcbc_dec_loop:
 	bne	Lcbc_dec_loop
 
 Lcbc_done:
-	addi	$out, $out, -1
-	lvx	v1, 0, $out		# redundant in aligned case
-	vsel	v1, $outhead, v1, $outmask
-	stvx	v1, 0, $out
-
+	beq	cr1, Lcbc_write_iv	# $out is aligned
+
+Lcbc_unaligned_done:
+	andi.	r8, $out, 15
+	sub	$out, $out, r8
+	li	r9, 0
+Lcbc_tail:
+	stvebx	$outhead, r9, $out
+	addi	r9, r9, 1
+	cmpw	r9, r8
+	bne	Lcbc_tail
+
+Lcbc_write_iv:
 	neg	r8, r31			# write [potentially unaligned] iv
+	li	r10, 4
 	?lvsl	$outperm, 0, r8
-	li	r6, 15
-	vnor	$outmask, v7, v7	# 0xff..ff
-	?vperm	$outmask, v7, $outmask, $outperm
-	lvx	$outhead, 0, r31
+	li	r11, 8
+	li	r12, 12
 	vperm	v24, v24, v24, $outperm	# rotate right/left
-	vsel	v0, $outhead, v24, $outmask
-	lvx	v1, r6, r31
-	stvx	v0, 0, r31
-	vsel	v1, v24, v1, $outmask
-	stvx	v1, r6, r31
+	stvewx	v24, 0, r31		# ivp is at least 32-bit aligned
+	stvewx	v24, r10, r31
+	stvewx	v24, r11, r31
+	stvewx	v24, r12, r31
 
 	mtspr	256, r7			# restore vrsave
 	li	r10,`15+6*$SIZE_T`
@@ -872,18 +935,21 @@ _vpaes_schedule_core:
 
 	# encrypting, output zeroth round key after transform
 	li	r8, 0x30		# mov	\$0x30,%r8d
-	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
+	li	r9, 4
+	li	r10, 8
+	li	r11, 12
 
 	?lvsr	$outperm, 0, $out	# prepare for unaligned access
 	vnor	$outmask, v9, v9	# 0xff..ff
-	lvx	$outhead, 0, $out
 	?vperm	$outmask, v9, $outmask, $outperm
 
 	#stvx	v0, 0, $out		# vmovdqu	%xmm0,	(%rdx)
-	vperm	v1, v0, v0, $outperm	# rotate right/left
-	vsel	v2, $outhead, v1, $outmask
-	vmr	$outhead, v1
-	stvx	v2, 0, $out
+	vperm	$outhead, v0, v0, $outperm	# rotate right/left
+	stvewx	$outhead, 0, $out	# some are superfluous
+	stvewx	$outhead, r9, $out
+	stvewx	$outhead, r10, $out
+	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
+	stvewx	$outhead, r11, $out
 	b	Lschedule_go
 
 Lschedule_am_decrypting:
@@ -893,20 +959,24 @@ Lschedule_am_decrypting:
 	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
 	# decrypting, output zeroth round key after shiftrows
 	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
+	li	r9, 4
+	li	r10, 8
+	li	r11, 12
 	vperm	v4, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
 
 	neg	r0, $out		# prepare for unaligned access
 	?lvsl	$outperm, 0, r0
-	addi	$out, $out, 15		# 15 is not typo
 	vnor	$outmask, v9, v9	# 0xff..ff
-	lvx	$outhead, 0, $out
 	?vperm	$outmask, $outmask, v9, $outperm
 
 	#stvx	v4, 0, $out		# vmovdqu	%xmm3,	(%rdx)
-	vperm	v4, v4, v4, $outperm	# rotate right/left
-	vsel	v2, $outhead, v4, $outmask
-	vmr	$outhead, v4
-	stvx	v2, 0, $out
+	vperm	$outhead, v4, v4, $outperm	# rotate right/left
+	stvewx	$outhead, 0, $out	# some are superfluous
+	stvewx	$outhead, r9, $out
+	stvewx	$outhead, r10, $out
+	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
+	stvewx	$outhead, r11, $out
+	addi	$out, $out, 15		# 15 is not typo
 	xori	r8, r8, 0x30		# xor	\$0x30, %r8
 
 Lschedule_go:
@@ -1038,14 +1108,15 @@ Lschedule_mangle_last:
 
 	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
 	vperm	v0, v0, v0, $outperm	# rotate right/left
+	li	r10, 4
 	vsel	v2, $outhead, v0, $outmask
-	vmr	$outhead, v0
+	li	r11, 8
 	stvx	v2, 0, $out
-
-	addi	$out, $out, 15		# 15 is not typo
-	lvx	v1, 0, $out		# redundant in aligned case
-	vsel	v1, $outhead, v1, $outmask
-	stvx	v1, 0, $out
+	li	r12, 12
+	stvewx	v0, 0, $out		# some (or all) are redundant
+	stvewx	v0, r10, $out
+	stvewx	v0, r11, $out
+	stvewx	v0, r12, $out
 	b	Lschedule_mangle_done
 
 .align	4
@@ -1057,15 +1128,18 @@ Lschedule_mangle_last_dec:
 	bl	_vpaes_schedule_transform	# output transform
 
 	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
+	addi	r9, $out, -15		# -15 is not typo
 	vperm	v0, v0, v0, $outperm	# rotate right/left
+	li	r10, 4
 	vsel	v2, $outhead, v0, $outmask
-	vmr	$outhead, v0
+	li	r11, 8
 	stvx	v2, 0, $out
+	li	r12, 12
+	stvewx	v0, 0, r9		# some (or all) are redundant
+	stvewx	v0, r10, r9
+	stvewx	v0, r11, r9
+	stvewx	v0, r12, r9
 
-	addi	$out, $out, -15		# -15 is not typo
-	lvx	v1, 0, $out		# redundant in aligned case
-	vsel	v1, $outhead, v1, $outmask
-	stvx	v1, 0, $out
 
 Lschedule_mangle_done:
 	mtlr	r7


More information about the openssl-commits mailing list