[openssl-dev] [openssl.org #4491] [PATCH] VMX-crypto: Add XTS support

Andy Polyakov via RT rt at openssl.org
Wed May 25 21:11:55 UTC 2016


> Author: Leonidas Da Silva Barbosa <leosilva at linux.vnet.ibm.com>
> ASM implementation
> 
> Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo at linux.vnet.ibm.com>
> Signed-off-by: Leonidas Da Silva Barbosa <leosilva at linux.vnet.ibm.com>
> ---
> +my ($inp,$out,$len,$key,$tweak,$enc,$rounds,$idx)=map("r$_",(3..10));

There seem to be misunderstanding. EVP layer expects XTS "stream"
subroutine to accept clear-text initial tweak (for this both keys are
passed) and to handle lengths not divisible by 16. Suggested code does
neither. I fail to follow tweak caclucations, it seems more complicated
than necessary. I managed to do in 5 instructions:

        vsrab           $tmp,$tweak,$seven
        vaddubm         $tweak,$tweak,$tweak
        vsldoi          $tmp,$tmp,$tmp,15
        vand            $tmp,$tmp,$eighty7
        vxor            $tweak,$tweak,$tmp

Please review attached code. Even though it's almost 4x faster, it will
be improved even further shortly.


-- 
Ticket here: http://rt.openssl.org/Ticket/Display.html?id=4491
Please log in as guest with password guest if prompted

-------------- next part --------------
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
index a1891cc..8ea52cd 100755
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -1887,6 +1887,415 @@ Lctr32_enc8x_done:
 ___
 }}	}}}
 
+#########################################################################
+{{{	# XTS procedure[s]						#
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp) =			map("v$_",(0..3));
+my ($inptail,$inpperm,$output,$leperm,$keyperm) =	map("v$_",(4..8));
+my ($tweak,$seven,$eighty7,$tweak1) =			map("v$_",(9..12));
+
+   ($inp,$idx) = ($idx,$inp);				# reassign
+
+$code.=<<___;
+.globl	.${prefix}_xts_encrypt
+.align	5
+.${prefix}_xts_encrypt:
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff0
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+	be?b		Loop_xts_enc
+
+.align	5
+Loop_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vcipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_enc_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$output,$output,$rndkey0	# just in case $len<16
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_enc
+
+	vxor		$output,$output,$tweak
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	subi		r11,$out,17
+	subi		$out,$out,16
+	mtctr		$len
+	li		$len,16
+Loop_xts_enc_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_enc_steal
+
+	mtctr		$rounds
+	b		Loop_xts_enc			# one more time...
+
+Lxts_enc_done:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl	.${prefix}_xts_decrypt
+.align	5
+.${prefix}_xts_decrypt:
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff8
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	andi.		r0,$len,15
+	neg		r0,r0
+	andi.		r0,r0,16
+	sub		$len,$len,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+	${UCMP}i	$len,16
+	blt		Ltail_xts_dec
+	b		Loop_xts_dec
+
+.align	5
+Loop_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_dec_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_dec
+
+Ltail_xts_dec:
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak1,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak1,$tweak1,$tmp
+
+	vxor		$inout,$inout,$tweak		# :-(
+	vxor		$inout,$inout,$tweak1		# :-)
+
+Loop_xts_dec_short:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec_short
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak1
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	#addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	vxor		$rndkey0,$rndkey0,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	subi		r11,$out,1
+	mtctr		$len
+	li		$len,16
+Loop_xts_dec_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_dec_steal
+
+	mtctr		$rounds
+	b		Loop_xts_dec			# one more time...
+
+Lxts_dec_done:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+}}}
+
 my $consts=1;
 foreach(split("\n",$code)) {
         s/\`([^\`]*)\`/eval($1)/geo;
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index ae39992..4dd791e 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -155,6 +155,8 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len,
 # define HWAES_decrypt aes_p8_decrypt
 # define HWAES_cbc_encrypt aes_p8_cbc_encrypt
 # define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
+# define HWAES_xts_encrypt aes_p8_xts_encrypt
+# define HWAES_xts_decrypt aes_p8_xts_decrypt
 #endif
 
 #if     defined(AES_ASM) && !defined(I386_ONLY) &&      (  \
@@ -1008,6 +1010,12 @@ void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
                                 size_t len, const AES_KEY *key,
                                 const unsigned char ivec[16]);
+void HWAES_xts_encrypt(const unsigned char *inp, unsigned char *out,
+                       size_t len, const AES_KEY *key1,
+                       const AES_KEY *key2, const unsigned char iv[16]);
+void HWAES_xts_decrypt(const unsigned char *inp, unsigned char *out,
+                       size_t len, const AES_KEY *key1,
+                       const AES_KEY *key2, const unsigned char iv[16]);
 #endif
 
 #define BLOCK_CIPHER_generic_pack(nid,keylen,flags)             \
@@ -1803,11 +1811,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                                           EVP_CIPHER_CTX_key_length(ctx) * 4,
                                           &xctx->ks1.ks);
                     xctx->xts.block1 = (block128_f) HWAES_encrypt;
+# ifdef HWAES_xts_encrypt
+                    xctx->stream = HWAES_xts_encrypt;
+# endif
                 } else {
                     HWAES_set_decrypt_key(key,
                                           EVP_CIPHER_CTX_key_length(ctx) * 4,
                                           &xctx->ks1.ks);
                     xctx->xts.block1 = (block128_f) HWAES_decrypt;
+# ifdef HWAES_xts_decrypt
+                    xctx->stream = HWAES_xts_decrypt;
+#endif
                 }
 
                 HWAES_set_encrypt_key(key + EVP_CIPHER_CTX_key_length(ctx) / 2,


More information about the openssl-dev mailing list