[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Wed Jun 6 20:14:48 UTC 2018
The branch master has been updated
via 316d527ff9b6597105df399fc222ea328cd827bf (commit)
via 791cc3029bd2aee7fc6b766b9841ce1e0a14484a (commit)
via 3f9c3b3c48252f24043689ad6b0e78b4a3005910 (commit)
via f0c77d66b49c3ca7378035f233f86ec0336866eb (commit)
from 0336df2fa316a3e08b8f0d2d0e8d4bc175e46634 (commit)
- Log -----------------------------------------------------------------
commit 316d527ff9b6597105df399fc222ea328cd827bf
Author: Andy Polyakov <appro at openssl.org>
Date: Tue Jun 5 20:00:46 2018 +0200
crypto/ppccap.c: wire new ChaCha20_ctr32_vsx.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6419)
commit 791cc3029bd2aee7fc6b766b9841ce1e0a14484a
Author: Andy Polyakov <appro at openssl.org>
Date: Tue Jun 5 19:59:19 2018 +0200
chacha/asm/chacha-ppc.pl: improve performance by 40/80% on POWER8/9.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6419)
commit 3f9c3b3c48252f24043689ad6b0e78b4a3005910
Author: Andy Polyakov <appro at openssl.org>
Date: Tue Jun 5 19:57:42 2018 +0200
perlasm/ppc-xlate.pl: add vmrg[eo]w instructions.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6419)
commit f0c77d66b49c3ca7378035f233f86ec0336866eb
Author: Andy Polyakov <appro at openssl.org>
Date: Tue Jun 5 19:55:55 2018 +0200
sha/asm/sha512p8-ppc.pl: fix build on Mac OS X.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6419)
-----------------------------------------------------------------------
Summary of changes:
crypto/chacha/asm/chacha-ppc.pl | 415 +++++++++++++++++++++++++++++++++++++++-
crypto/perlasm/ppc-xlate.pl | 2 +
crypto/ppccap.c | 11 +-
crypto/sha/asm/sha512p8-ppc.pl | 3 +-
4 files changed, 418 insertions(+), 13 deletions(-)
diff --git a/crypto/chacha/asm/chacha-ppc.pl b/crypto/chacha/asm/chacha-ppc.pl
index 88746fe..96cdfeb 100755
--- a/crypto/chacha/asm/chacha-ppc.pl
+++ b/crypto/chacha/asm/chacha-ppc.pl
@@ -18,19 +18,31 @@
#
# ChaCha20 for PowerPC/AltiVec.
#
+# June 2018
+#
+# Add VSX 2.07 code path. Original 3xAltiVec+1xIALU is well-suited for
+# processors that can't issue more than one vector instruction per
+# cycle. But POWER8 (and POWER9) can issue a pair, and vector-only 4x
+# interleave would perform better. Incidentally PowerISA 2.07 (first
+# implemented by POWER8) defined new usable instructions, hence 4xVSX
+# code path...
+#
# Performance in cycles per byte out of large buffer.
#
-# IALU/gcc-4.x 3xAltiVec+1xIALU
+# IALU/gcc-4.x 3xAltiVec+1xIALU 4xVSX
#
-# Freescale e300 13.6/+115% -
-# PPC74x0/G4e 6.81/+310% 3.81
-# PPC970/G5 9.29/+160% ?
-# POWER7 8.62/+61% 3.35
-# POWER8 8.70/+51% 2.91
-# POWER9 8.80/+29% 4.44(*)
+# Freescale e300 13.6/+115% - -
+# PPC74x0/G4e 6.81/+310% 3.81 -
+# PPC970/G5 9.29/+160% ? -
+# POWER7 8.62/+61% 3.35 -
+# POWER8 8.70/+51% 2.91 2.09
+# POWER9 8.80/+29% 4.44(*) 2.45(**)
#
# (*) this is trade-off result, it's possible to improve it, but
# then it would negatively affect all others;
+# (**) POWER9 seems to be "allergic" to mixing vector and integer
+# instructions, which is why switch to vector-only code pays
+# off that much;
$flavour = shift;
@@ -893,7 +905,390 @@ Ldone_vmx:
.byte 0,12,0x04,1,0x80,18,5,0
.long 0
.size .ChaCha20_ctr32_vmx,.-.ChaCha20_ctr32_vmx
+___
+}}}
+{{{
+my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
+ $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3) = map("v$_",(0..15));
+my @K = map("v$_",(16..19));
+my $CTR = "v26";
+my ($xt0,$xt1,$xt2,$xt3) = map("v$_",(27..30));
+my ($sixteen,$twelve,$eight,$seven) = ($xt0,$xt1,$xt2,$xt3);
+my $beperm = "v31";
+
+my ($x00,$x10,$x20,$x30) = (0, map("r$_",(8..10)));
+
+my $FRAME=$LOCALS+64+7*16; # 7*16 is for v26-v31 offload
+
+sub VSX_lane_ROUND {
+my ($a0,$b0,$c0,$d0)=@_;
+my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
+my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
+my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
+my @x=map("\"v$_\"",(0..15));
+
+ (
+ "&vadduwm (@x[$a0], at x[$a0], at x[$b0])", # Q1
+ "&vadduwm (@x[$a1], at x[$a1], at x[$b1])", # Q2
+ "&vadduwm (@x[$a2], at x[$a2], at x[$b2])", # Q3
+ "&vadduwm (@x[$a3], at x[$a3], at x[$b3])", # Q4
+ "&vxor (@x[$d0], at x[$d0], at x[$a0])",
+ "&vxor (@x[$d1], at x[$d1], at x[$a1])",
+ "&vxor (@x[$d2], at x[$d2], at x[$a2])",
+ "&vxor (@x[$d3], at x[$d3], at x[$a3])",
+ "&vrlw (@x[$d0], at x[$d0],'$sixteen')",
+ "&vrlw (@x[$d1], at x[$d1],'$sixteen')",
+ "&vrlw (@x[$d2], at x[$d2],'$sixteen')",
+ "&vrlw (@x[$d3], at x[$d3],'$sixteen')",
+
+ "&vadduwm (@x[$c0], at x[$c0], at x[$d0])",
+ "&vadduwm (@x[$c1], at x[$c1], at x[$d1])",
+ "&vadduwm (@x[$c2], at x[$c2], at x[$d2])",
+ "&vadduwm (@x[$c3], at x[$c3], at x[$d3])",
+ "&vxor (@x[$b0], at x[$b0], at x[$c0])",
+ "&vxor (@x[$b1], at x[$b1], at x[$c1])",
+ "&vxor (@x[$b2], at x[$b2], at x[$c2])",
+ "&vxor (@x[$b3], at x[$b3], at x[$c3])",
+ "&vrlw (@x[$b0], at x[$b0],'$twelve')",
+ "&vrlw (@x[$b1], at x[$b1],'$twelve')",
+ "&vrlw (@x[$b2], at x[$b2],'$twelve')",
+ "&vrlw (@x[$b3], at x[$b3],'$twelve')",
+
+ "&vadduwm (@x[$a0], at x[$a0], at x[$b0])",
+ "&vadduwm (@x[$a1], at x[$a1], at x[$b1])",
+ "&vadduwm (@x[$a2], at x[$a2], at x[$b2])",
+ "&vadduwm (@x[$a3], at x[$a3], at x[$b3])",
+ "&vxor (@x[$d0], at x[$d0], at x[$a0])",
+ "&vxor (@x[$d1], at x[$d1], at x[$a1])",
+ "&vxor (@x[$d2], at x[$d2], at x[$a2])",
+ "&vxor (@x[$d3], at x[$d3], at x[$a3])",
+ "&vrlw (@x[$d0], at x[$d0],'$eight')",
+ "&vrlw (@x[$d1], at x[$d1],'$eight')",
+ "&vrlw (@x[$d2], at x[$d2],'$eight')",
+ "&vrlw (@x[$d3], at x[$d3],'$eight')",
+
+ "&vadduwm (@x[$c0], at x[$c0], at x[$d0])",
+ "&vadduwm (@x[$c1], at x[$c1], at x[$d1])",
+ "&vadduwm (@x[$c2], at x[$c2], at x[$d2])",
+ "&vadduwm (@x[$c3], at x[$c3], at x[$d3])",
+ "&vxor (@x[$b0], at x[$b0], at x[$c0])",
+ "&vxor (@x[$b1], at x[$b1], at x[$c1])",
+ "&vxor (@x[$b2], at x[$b2], at x[$c2])",
+ "&vxor (@x[$b3], at x[$b3], at x[$c3])",
+ "&vrlw (@x[$b0], at x[$b0],'$seven')",
+ "&vrlw (@x[$b1], at x[$b1],'$seven')",
+ "&vrlw (@x[$b2], at x[$b2],'$seven')",
+ "&vrlw (@x[$b3], at x[$b3],'$seven')"
+ );
+}
+
+$code.=<<___;
+.globl .ChaCha20_ctr32_vsx
+.align 5
+.ChaCha20_ctr32_vsx:
+ $STU $sp,-$FRAME($sp)
+ mflr r0
+ li r10,`15+$LOCALS+64`
+ li r11,`31+$LOCALS+64`
+ mfspr r12,256
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ stw r12,`$FRAME-4`($sp) # save vrsave
+ li r12,-4096+63
+ $PUSH r0, `$FRAME+$LRSAVE`($sp)
+ mtspr 256,r12 # preserve 29 AltiVec registers
+
+ bl Lconsts # returns pointer Lsigma in r12
+ lvx_4w @K[0],0,r12 # load sigma
+ addi r12,r12,0x50
+ li $x10,16
+ li $x20,32
+ li $x30,48
+ li r11,64
+
+ lvx_4w @K[1],0,$key # load key
+ lvx_4w @K[2],$x10,$key
+ lvx_4w @K[3],0,$ctr # load counter
+
+ vxor $xt0,$xt0,$xt0
+ lvx_4w $xt1,r11,r12
+ vspltw $CTR, at K[3],0
+ vsldoi @K[3], at K[3],$xt0,4
+ vsldoi @K[3],$xt0, at K[3],12 # clear @K[3].word[0]
+ vadduwm $CTR,$CTR,$xt1
+
+ be?lvsl $beperm,0,$x10 # 0x00..0f
+ be?vspltisb $xt0,3 # 0x03..03
+ be?vxor $beperm,$beperm,$xt0 # swap bytes within words
+
+ li r0,10 # inner loop counter
+ mtctr r0
+ b Loop_outer_vsx
+
+.align 5
+Loop_outer_vsx:
+ lvx $xa0,$x00,r12 # load [smashed] sigma
+ lvx $xa1,$x10,r12
+ lvx $xa2,$x20,r12
+ lvx $xa3,$x30,r12
+
+ vspltw $xb0, at K[1],0 # smash the key
+ vspltw $xb1, at K[1],1
+ vspltw $xb2, at K[1],2
+ vspltw $xb3, at K[1],3
+
+ vspltw $xc0, at K[2],0
+ vspltw $xc1, at K[2],1
+ vspltw $xc2, at K[2],2
+ vspltw $xc3, at K[2],3
+
+ vmr $xd0,$CTR # smash the counter
+ vspltw $xd1, at K[3],1
+ vspltw $xd2, at K[3],2
+ vspltw $xd3, at K[3],3
+
+ vspltisw $sixteen,-16 # synthesize constants
+ vspltisw $twelve,12
+ vspltisw $eight,8
+ vspltisw $seven,7
+
+Loop_vsx:
+___
+ foreach (&VSX_lane_ROUND(0, 4, 8,12)) { eval; }
+ foreach (&VSX_lane_ROUND(0, 5,10,15)) { eval; }
+$code.=<<___;
+ bdnz Loop_vsx
+
+ vadduwm $xd0,$xd0,$CTR
+
+ vmrgew $xt0,$xa0,$xa1 # transpose data
+ vmrgew $xt1,$xa2,$xa3
+ vmrgow $xa0,$xa0,$xa1
+ vmrgow $xa2,$xa2,$xa3
+ vmrgew $xt2,$xb0,$xb1
+ vmrgew $xt3,$xb2,$xb3
+ vpermdi $xa1,$xa0,$xa2,0b00
+ vpermdi $xa3,$xa0,$xa2,0b11
+ vpermdi $xa0,$xt0,$xt1,0b00
+ vpermdi $xa2,$xt0,$xt1,0b11
+
+ vmrgow $xb0,$xb0,$xb1
+ vmrgow $xb2,$xb2,$xb3
+ vmrgew $xt0,$xc0,$xc1
+ vmrgew $xt1,$xc2,$xc3
+ vpermdi $xb1,$xb0,$xb2,0b00
+ vpermdi $xb3,$xb0,$xb2,0b11
+ vpermdi $xb0,$xt2,$xt3,0b00
+ vpermdi $xb2,$xt2,$xt3,0b11
+
+ vmrgow $xc0,$xc0,$xc1
+ vmrgow $xc2,$xc2,$xc3
+ vmrgew $xt2,$xd0,$xd1
+ vmrgew $xt3,$xd2,$xd3
+ vpermdi $xc1,$xc0,$xc2,0b00
+ vpermdi $xc3,$xc0,$xc2,0b11
+ vpermdi $xc0,$xt0,$xt1,0b00
+ vpermdi $xc2,$xt0,$xt1,0b11
+
+ vmrgow $xd0,$xd0,$xd1
+ vmrgow $xd2,$xd2,$xd3
+ vspltisw $xt0,4
+ vadduwm $CTR,$CTR,$xt0 # next counter value
+ vpermdi $xd1,$xd0,$xd2,0b00
+ vpermdi $xd3,$xd0,$xd2,0b11
+ vpermdi $xd0,$xt2,$xt3,0b00
+ vpermdi $xd2,$xt2,$xt3,0b11
+
+ vadduwm $xa0,$xa0, at K[0]
+ vadduwm $xb0,$xb0, at K[1]
+ vadduwm $xc0,$xc0, at K[2]
+ vadduwm $xd0,$xd0, at K[3]
+
+ be?vperm $xa0,$xa0,$xa0,$beperm
+ be?vperm $xb0,$xb0,$xb0,$beperm
+ be?vperm $xc0,$xc0,$xc0,$beperm
+ be?vperm $xd0,$xd0,$xd0,$beperm
+
+ ${UCMP}i $len,0x40
+ blt Ltail_vsx
+
+ lvx_4w $xt0,$x00,$inp
+ lvx_4w $xt1,$x10,$inp
+ lvx_4w $xt2,$x20,$inp
+ lvx_4w $xt3,$x30,$inp
+
+ vxor $xt0,$xt0,$xa0
+ vxor $xt1,$xt1,$xb0
+ vxor $xt2,$xt2,$xc0
+ vxor $xt3,$xt3,$xd0
+
+ stvx_4w $xt0,$x00,$out
+ stvx_4w $xt1,$x10,$out
+ addi $inp,$inp,0x40
+ stvx_4w $xt2,$x20,$out
+ subi $len,$len,0x40
+ stvx_4w $xt3,$x30,$out
+ addi $out,$out,0x40
+ beq Ldone_vsx
+
+ vadduwm $xa0,$xa1, at K[0]
+ vadduwm $xb0,$xb1, at K[1]
+ vadduwm $xc0,$xc1, at K[2]
+ vadduwm $xd0,$xd1, at K[3]
+
+ be?vperm $xa0,$xa0,$xa0,$beperm
+ be?vperm $xb0,$xb0,$xb0,$beperm
+ be?vperm $xc0,$xc0,$xc0,$beperm
+ be?vperm $xd0,$xd0,$xd0,$beperm
+
+ ${UCMP}i $len,0x40
+ blt Ltail_vsx
+
+ lvx_4w $xt0,$x00,$inp
+ lvx_4w $xt1,$x10,$inp
+ lvx_4w $xt2,$x20,$inp
+ lvx_4w $xt3,$x30,$inp
+
+ vxor $xt0,$xt0,$xa0
+ vxor $xt1,$xt1,$xb0
+ vxor $xt2,$xt2,$xc0
+ vxor $xt3,$xt3,$xd0
+
+ stvx_4w $xt0,$x00,$out
+ stvx_4w $xt1,$x10,$out
+ addi $inp,$inp,0x40
+ stvx_4w $xt2,$x20,$out
+ subi $len,$len,0x40
+ stvx_4w $xt3,$x30,$out
+ addi $out,$out,0x40
+ beq Ldone_vsx
+
+ vadduwm $xa0,$xa2, at K[0]
+ vadduwm $xb0,$xb2, at K[1]
+ vadduwm $xc0,$xc2, at K[2]
+ vadduwm $xd0,$xd2, at K[3]
+
+ be?vperm $xa0,$xa0,$xa0,$beperm
+ be?vperm $xb0,$xb0,$xb0,$beperm
+ be?vperm $xc0,$xc0,$xc0,$beperm
+ be?vperm $xd0,$xd0,$xd0,$beperm
+
+ ${UCMP}i $len,0x40
+ blt Ltail_vsx
+
+ lvx_4w $xt0,$x00,$inp
+ lvx_4w $xt1,$x10,$inp
+ lvx_4w $xt2,$x20,$inp
+ lvx_4w $xt3,$x30,$inp
+
+ vxor $xt0,$xt0,$xa0
+ vxor $xt1,$xt1,$xb0
+ vxor $xt2,$xt2,$xc0
+ vxor $xt3,$xt3,$xd0
+
+ stvx_4w $xt0,$x00,$out
+ stvx_4w $xt1,$x10,$out
+ addi $inp,$inp,0x40
+ stvx_4w $xt2,$x20,$out
+ subi $len,$len,0x40
+ stvx_4w $xt3,$x30,$out
+ addi $out,$out,0x40
+ beq Ldone_vsx
+
+ vadduwm $xa0,$xa3, at K[0]
+ vadduwm $xb0,$xb3, at K[1]
+ vadduwm $xc0,$xc3, at K[2]
+ vadduwm $xd0,$xd3, at K[3]
+
+ be?vperm $xa0,$xa0,$xa0,$beperm
+ be?vperm $xb0,$xb0,$xb0,$beperm
+ be?vperm $xc0,$xc0,$xc0,$beperm
+ be?vperm $xd0,$xd0,$xd0,$beperm
+
+ ${UCMP}i $len,0x40
+ blt Ltail_vsx
+
+ lvx_4w $xt0,$x00,$inp
+ lvx_4w $xt1,$x10,$inp
+ lvx_4w $xt2,$x20,$inp
+ lvx_4w $xt3,$x30,$inp
+
+ vxor $xt0,$xt0,$xa0
+ vxor $xt1,$xt1,$xb0
+ vxor $xt2,$xt2,$xc0
+ vxor $xt3,$xt3,$xd0
+
+ stvx_4w $xt0,$x00,$out
+ stvx_4w $xt1,$x10,$out
+ addi $inp,$inp,0x40
+ stvx_4w $xt2,$x20,$out
+ subi $len,$len,0x40
+ stvx_4w $xt3,$x30,$out
+ addi $out,$out,0x40
+ mtctr r0
+ bne Loop_outer_vsx
+
+Ldone_vsx:
+ lwz r12,`$FRAME-4`($sp) # pull vrsave
+ li r10,`15+$LOCALS+64`
+ li r11,`31+$LOCALS+64`
+ $POP r0, `$FRAME+$LRSAVE`($sp)
+ mtspr 256,r12 # restore vrsave
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ mtlr r0
+ addi $sp,$sp,$FRAME
+ blr
+
+.align 4
+Ltail_vsx:
+ addi r11,$sp,$LOCALS
+ mtctr $len
+ stvx_4w $xa0,$x00,r11 # offload block to stack
+ stvx_4w $xb0,$x10,r11
+ stvx_4w $xc0,$x20,r11
+ stvx_4w $xd0,$x30,r11
+ subi r12,r11,1 # prepare for *++ptr
+ subi $inp,$inp,1
+ subi $out,$out,1
+
+Loop_tail_vsx:
+ lbzu r6,1(r12)
+ lbzu r7,1($inp)
+ xor r6,r6,r7
+ stbu r6,1($out)
+ bdnz Loop_tail_vsx
+
+ stvx_4w $K[0],$x00,r11 # wipe copy of the block
+ stvx_4w $K[0],$x10,r11
+ stvx_4w $K[0],$x20,r11
+ stvx_4w $K[0],$x30,r11
+
+ b Ldone_vsx
+ .long 0
+ .byte 0,12,0x04,1,0x80,0,5,0
+ .long 0
+.size .ChaCha20_ctr32_vsx,.-.ChaCha20_ctr32_vsx
+___
+}}}
+$code.=<<___;
.align 5
Lconsts:
mflr r0
@@ -919,10 +1314,14 @@ $code.=<<___ if (!$LITTLE_ENDIAN); # flipped words
.long 0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c
___
$code.=<<___;
+ .long 0x61707865,0x61707865,0x61707865,0x61707865
+ .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
+ .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
+ .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
+ .long 0,1,2,3
.asciz "ChaCha20 for PowerPC/AltiVec, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
-}}}
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl
index 1a22f7a..d220c62 100755
--- a/crypto/perlasm/ppc-xlate.pl
+++ b/crypto/perlasm/ppc-xlate.pl
@@ -256,6 +256,8 @@ my $vsubudm = sub { vcrypto_op(@_, 1216); };
my $vaddcuq = sub { vcrypto_op(@_, 320); };
my $vaddeuqm = sub { vfour(@_,60); };
my $vaddecuq = sub { vfour(@_,61); };
+my $vmrgew = sub { vfour(@_,0,1932); };
+my $vmrgow = sub { vfour(@_,0,1676); };
my $mtsle = sub {
my ($f, $arg) = @_;
diff --git a/crypto/ppccap.c b/crypto/ppccap.c
index f8b7c00..8b7d765 100644
--- a/crypto/ppccap.c
+++ b/crypto/ppccap.c
@@ -90,13 +90,18 @@ void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp,
void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp,
size_t len, const unsigned int key[8],
const unsigned int counter[4]);
+void ChaCha20_ctr32_vsx(unsigned char *out, const unsigned char *inp,
+ size_t len, const unsigned int key[8],
+ const unsigned int counter[4]);
void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
size_t len, const unsigned int key[8],
const unsigned int counter[4])
{
- OPENSSL_ppccap_P & PPC_ALTIVEC
- ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
- : ChaCha20_ctr32_int(out, inp, len, key, counter);
+ OPENSSL_ppccap_P & PPC_CRYPTO207
+ ? ChaCha20_ctr32_vsx(out, inp, len, key, counter)
+ : OPENSSL_ppccap_P & PPC_ALTIVEC
+ ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
+ : ChaCha20_ctr32_int(out, inp, len, key, counter);
}
#endif
diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl
index e3f522c..2792800 100755
--- a/crypto/sha/asm/sha512p8-ppc.pl
+++ b/crypto/sha/asm/sha512p8-ppc.pl
@@ -92,8 +92,7 @@ $idx="r7";
$lrsave="r8";
$offload="r11";
$vrsave="r12";
- at I = ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31));
- $x00=0 if ($flavour =~ /osx/);
+ at I = ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70) = (0,map("r$_",(10,26..31)));
@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
@X=map("v$_",(8..19,24..27));
More information about the openssl-commits
mailing list