[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Fri Sep 2 11:34:28 UTC 2016
The branch master has been updated
via c1ec40ef1d9b487061f4f9e2843d4a1894f96660 (commit)
via 947716c1872d210828122212d076d503ae68b928 (commit)
from a43249122baabb9ebaee0822d683f1fdb60f72a7 (commit)
- Log -----------------------------------------------------------------
commit c1ec40ef1d9b487061f4f9e2843d4a1894f96660
Author: Andy Polyakov <appro at openssl.org>
Date: Thu Sep 1 10:46:08 2016 +0200
Configurations/10-main.conf: add android64-mips64 target.
Reviewed-by: Richard Levitte <levitte at openssl.org>
commit 947716c1872d210828122212d076d503ae68b928
Author: Andy Polyakov <appro at openssl.org>
Date: Thu Sep 1 10:39:15 2016 +0200
MIPS assembly pack: adapt it for MIPS[32|64]R6.
MIPS[32|64]R6 is binary and source incompatible with previous MIPS ISA
specifications. Fortunately it's still possible to resolve differences
in source code with standard pre-processor and switching to trap-free
version of addition and subtraction instructions.
Reviewed-by: Richard Levitte <levitte at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
Configurations/00-base-templates.conf | 2 +-
Configurations/10-main.conf | 13 +
crypto/aes/asm/aes-mips.pl | 87 +++-
crypto/aes/build.info | 1 +
crypto/bn/asm/mips-mont.pl | 102 ++---
crypto/bn/asm/mips.pl | 742 +++++++++++++++++-----------------
crypto/bn/build.info | 16 +-
crypto/mips_arch.h | 40 ++
crypto/poly1305/asm/poly1305-mips.pl | 50 ++-
crypto/poly1305/build.info | 1 +
crypto/sha/asm/sha1-mips.pl | 26 +-
crypto/sha/asm/sha512-mips.pl | 22 +-
crypto/sha/build.info | 3 +
13 files changed, 622 insertions(+), 483 deletions(-)
create mode 100644 crypto/mips_arch.h
diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf
index 1c63a50..8bb4de7 100644
--- a/Configurations/00-base-templates.conf
+++ b/Configurations/00-base-templates.conf
@@ -216,7 +216,7 @@
},
mips32_asm => {
template => 1,
- bn_asm_src => "bn-mips.s mips-mont.s",
+ bn_asm_src => "bn-mips.S mips-mont.S",
aes_asm_src => "aes_cbc.c aes-mips.S",
sha1_asm_src => "sha1-mips.S sha256-mips.S",
},
diff --git a/Configurations/10-main.conf b/Configurations/10-main.conf
index a1d566d..f32ccbb 100644
--- a/Configurations/10-main.conf
+++ b/Configurations/10-main.conf
@@ -938,6 +938,19 @@ sub vms_info {
inherit_from => [ "android64", asm("x86_64_asm") ],
perlasm_scheme => "elf",
},
+ "android64-mips64" => {
+ ############################################################
+ # You are more than likely have to specify target processor
+ # on ./Configure command line. Trouble is that toolchain's
+ # default is MIPS64r6 (at least in r10d), but there are no
+ # such processors around (or they are too rare to spot one).
+ # Actual problem is that MIPS64r6 is binary incompatible
+ # with previous MIPS ISA versions, in sense that unlike
+ # prior versions original MIPS binary code will fail.
+ #
+ inherit_from => [ "android64", asm("mips64_asm") ],
+ perlasm_scheme => "64",
+ },
#### *BSD
"BSD-generic32" => {
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
index 439578d..0eb1474 100644
--- a/crypto/aes/asm/aes-mips.pl
+++ b/crypto/aes/asm/aes-mips.pl
@@ -65,8 +65,8 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_LA="dla";
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_ADD="daddu"; # incidentally works even on n32
+ $PTR_SUB="dsubu"; # incidentally works even on n32
$PTR_INS="dins";
$REG_S="sd";
$REG_L="ld";
@@ -74,8 +74,8 @@ if ($flavour =~ /64|n32/i) {
$SZREG=8;
} else {
$PTR_LA="la";
- $PTR_ADD="add";
- $PTR_SUB="sub";
+ $PTR_ADD="addu";
+ $PTR_SUB="subu";
$PTR_INS="ins";
$REG_S="sw";
$REG_L="lw";
@@ -102,15 +102,13 @@ open STDOUT,">$output";
my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
$code.=<<___;
+#include "mips_arch.h"
+
.text
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif
-#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
-#define _MIPS_ARCH_MIPS32R2
-#endif
-
#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2
#endif
@@ -146,7 +144,7 @@ _mips_AES_encrypt:
xor $s2,$t2
xor $s3,$t3
- sub $cnt,1
+ subu $cnt,1
#if defined(__mips_smartmips)
ext $i0,$s1,16,8
.Loop_enc:
@@ -218,7 +216,7 @@ _mips_AES_encrypt:
xor $t2,$t6
xor $t3,$t7
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
@@ -409,7 +407,7 @@ _mips_AES_encrypt:
xor $t2,$t6
xor $t3,$t7
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
@@ -657,6 +655,12 @@ $code.=<<___;
.set reorder
$PTR_LA $Tbl,AES_Te # PIC-ified 'load address'
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw $s0,0($inp)
+ lw $s1,4($inp)
+ lw $s2,8($inp)
+ lw $s3,12($inp)
+#else
lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp)
@@ -665,9 +669,16 @@ $code.=<<___;
lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp)
+#endif
bal _mips_AES_encrypt
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ sw $s0,0($out)
+ sw $s1,4($out)
+ sw $s2,8($out)
+ sw $s3,12($out)
+#else
swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out)
@@ -676,6 +687,7 @@ $code.=<<___;
swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out)
+#endif
.set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
@@ -720,7 +732,7 @@ _mips_AES_decrypt:
xor $s2,$t2
xor $s3,$t3
- sub $cnt,1
+ subu $cnt,1
#if defined(__mips_smartmips)
ext $i0,$s3,16,8
.Loop_dec:
@@ -792,7 +804,7 @@ _mips_AES_decrypt:
xor $t2,$t6
xor $t3,$t7
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
@@ -985,7 +997,7 @@ _mips_AES_decrypt:
xor $t2,$t6
xor $t3,$t7
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
@@ -1228,6 +1240,12 @@ $code.=<<___;
.set reorder
$PTR_LA $Tbl,AES_Td # PIC-ified 'load address'
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw $s0,0($inp)
+ lw $s1,4($inp)
+ lw $s2,8($inp)
+ lw $s3,12($inp)
+#else
lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp)
@@ -1236,9 +1254,16 @@ $code.=<<___;
lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp)
+#endif
bal _mips_AES_decrypt
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ sw $s0,0($out)
+ sw $s1,4($out)
+ sw $s2,8($out)
+ sw $s3,12($out)
+#else
swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out)
@@ -1247,6 +1272,7 @@ $code.=<<___;
swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out)
+#endif
.set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
@@ -1295,35 +1321,52 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $rcon,$Tbl,256
.set reorder
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw $rk0,0($inp) # load 128 bits
+ lw $rk1,4($inp)
+ lw $rk2,8($inp)
+ lw $rk3,12($inp)
+#else
lwl $rk0,0+$MSB($inp) # load 128 bits
lwl $rk1,4+$MSB($inp)
lwl $rk2,8+$MSB($inp)
lwl $rk3,12+$MSB($inp)
- li $at,128
lwr $rk0,0+$LSB($inp)
lwr $rk1,4+$LSB($inp)
lwr $rk2,8+$LSB($inp)
lwr $rk3,12+$LSB($inp)
+#endif
+ li $at,128
.set noreorder
beq $bits,$at,.L128bits
li $cnt,10
.set reorder
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw $rk4,16($inp) # load 192 bits
+ lw $rk5,20($inp)
+#else
lwl $rk4,16+$MSB($inp) # load 192 bits
lwl $rk5,20+$MSB($inp)
- li $at,192
lwr $rk4,16+$LSB($inp)
lwr $rk5,20+$LSB($inp)
+#endif
+ li $at,192
.set noreorder
beq $bits,$at,.L192bits
li $cnt,8
.set reorder
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw $rk6,24($inp) # load 256 bits
+ lw $rk7,28($inp)
+#else
lwl $rk6,24+$MSB($inp) # load 256 bits
lwl $rk7,28+$MSB($inp)
- li $at,256
lwr $rk6,24+$LSB($inp)
lwr $rk7,28+$LSB($inp)
+#endif
+ li $at,256
.set noreorder
beq $bits,$at,.L256bits
li $cnt,7
@@ -1353,7 +1396,7 @@ _mips_AES_set_encrypt_key:
sw $rk1,4($key)
sw $rk2,8($key)
sw $rk3,12($key)
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key,16
_bias $i0,24
@@ -1410,7 +1453,7 @@ _mips_AES_set_encrypt_key:
sw $rk3,12($key)
sw $rk4,16($key)
sw $rk5,20($key)
- sub $cnt,1
+ subu $cnt,1
$PTR_ADD $key,24
_bias $i0,24
@@ -1471,7 +1514,7 @@ _mips_AES_set_encrypt_key:
sw $rk5,20($key)
sw $rk6,24($key)
sw $rk7,28($key)
- sub $cnt,1
+ subu $cnt,1
_bias $i0,24
_bias $i1,16
@@ -1653,7 +1696,7 @@ $code.=<<___;
lw $tp1,16($key) # modulo-scheduled
lui $x80808080,0x8080
- sub $cnt,1
+ subu $cnt,1
or $x80808080,0x8080
sll $cnt,2
$PTR_ADD $key,16
@@ -1716,7 +1759,7 @@ $code.=<<___;
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2
#endif
- sub $cnt,1
+ subu $cnt,1
sw $tpe,0($key)
$PTR_ADD $key,4
bnez $cnt,.Lmix
diff --git a/crypto/aes/build.info b/crypto/aes/build.info
index cf6cb5e..bcc71ab 100644
--- a/crypto/aes/build.info
+++ b/crypto/aes/build.info
@@ -35,6 +35,7 @@ GENERATE[aesp8-ppc.s]=asm/aesp8-ppc.pl $(PERLASM_SCHEME)
GENERATE[aes-parisc.s]=asm/aes-parisc.pl $(PERLASM_SCHEME)
GENERATE[aes-mips.S]=asm/aes-mips.pl $(PERLASM_SCHEME)
+INCLUDE[aes-mips.o]=..
GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl $(PERLASM_SCHEME)
INCLUDE[aesv8-armx.o]=..
diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl
index a907571..56d4202 100644
--- a/crypto/bn/asm/mips-mont.pl
+++ b/crypto/bn/asm/mips-mont.pl
@@ -56,14 +56,14 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_ADD="daddu"; # incidentally works even on n32
+ $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd";
$REG_L="ld";
$SZREG=8;
} else {
- $PTR_ADD="add";
- $PTR_SUB="sub";
+ $PTR_ADD="addu";
+ $PTR_SUB="subu";
$REG_S="sw";
$REG_L="lw";
$SZREG=4;
@@ -121,6 +121,8 @@ $m1=$s11;
$FRAMESIZE=14;
$code=<<___;
+#include "mips_arch.h"
+
.text
.set noat
@@ -183,27 +185,27 @@ $code.=<<___;
$PTR_SUB $sp,$num
and $sp,$at
- $MULTU $aj,$bi
- $LD $alo,$BNSZ($ap)
- $LD $nlo,$BNSZ($np)
- mflo $lo0
- mfhi $hi0
- $MULTU $lo0,$n0
- mflo $m1
-
- $MULTU $alo,$bi
- mflo $alo
- mfhi $ahi
-
- $MULTU $nj,$m1
- mflo $lo1
- mfhi $hi1
- $MULTU $nlo,$m1
+ $MULTU ($aj,$bi)
+ $LD $ahi,$BNSZ($ap)
+ $LD $nhi,$BNSZ($np)
+ mflo ($lo0,$aj,$bi)
+ mfhi ($hi0,$aj,$bi)
+ $MULTU ($lo0,$n0)
+ mflo ($m1,$lo0,$n0)
+
+ $MULTU ($ahi,$bi)
+ mflo ($alo,$ahi,$bi)
+ mfhi ($ahi,$ahi,$bi)
+
+ $MULTU ($nj,$m1)
+ mflo ($lo1,$nj,$m1)
+ mfhi ($hi1,$nj,$m1)
+ $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nhi,$m1)
+ mfhi ($nhi,$nhi,$m1)
move $tp,$sp
li $j,2*$BNSZ
@@ -215,25 +217,25 @@ $code.=<<___;
$LD $aj,($aj)
$LD $nj,($nj)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
- mflo $alo
- mfhi $ahi
+ mflo ($alo,$aj,$bi)
+ mfhi ($ahi,$aj,$bi)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
- $MULTU $nj,$m1
+ $MULTU ($nj,$m1)
$ADDU $hi1,$at
addu $j,$BNSZ
$ST $lo1,($tp)
sltu $t0,$j,$num
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nj,$m1)
+ mfhi ($nhi,$nj,$m1)
bnez $t0,.L1st
$PTR_ADD $tp,$BNSZ
@@ -263,34 +265,34 @@ $code.=<<___;
$PTR_ADD $bi,$bp,$i
$LD $bi,($bi)
$LD $aj,($ap)
- $LD $alo,$BNSZ($ap)
+ $LD $ahi,$BNSZ($ap)
$LD $tj,($sp)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$LD $nj,($np)
- $LD $nlo,$BNSZ($np)
- mflo $lo0
- mfhi $hi0
+ $LD $nhi,$BNSZ($np)
+ mflo ($lo0,$aj,$bi)
+ mfhi ($hi0,$aj,$bi)
$ADDU $lo0,$tj
- $MULTU $lo0,$n0
+ $MULTU ($lo0,$n0)
sltu $at,$lo0,$tj
$ADDU $hi0,$at
- mflo $m1
+ mflo ($m1,$lo0,$n0)
- $MULTU $alo,$bi
- mflo $alo
- mfhi $ahi
+ $MULTU ($ahi,$bi)
+ mflo ($alo,$ahi,$bi)
+ mfhi ($ahi,$ahi,$bi)
- $MULTU $nj,$m1
- mflo $lo1
- mfhi $hi1
+ $MULTU ($nj,$m1)
+ mflo ($lo1,$nj,$m1)
+ mfhi ($hi1,$nj,$m1)
- $MULTU $nlo,$m1
+ $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nhi,$m1)
+ mfhi ($nhi,$nhi,$m1)
move $tp,$sp
li $j,2*$BNSZ
@@ -303,19 +305,19 @@ $code.=<<___;
$LD $aj,($aj)
$LD $nj,($nj)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
- mflo $alo
- mfhi $ahi
+ mflo ($alo,$aj,$bi)
+ mfhi ($ahi,$aj,$bi)
$ADDU $lo0,$tj
addu $j,$BNSZ
- $MULTU $nj,$m1
+ $MULTU ($nj,$m1)
sltu $at,$lo0,$tj
$ADDU $lo1,$lo0
$ADDU $hi0,$at
@@ -323,8 +325,8 @@ $code.=<<___;
$LD $tj,2*$BNSZ($tp)
$ADDU $hi1,$t0
sltu $at,$j,$num
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nj,$m1)
+ mfhi ($nhi,$nj,$m1)
$ST $lo1,($tp)
bnez $at,.Linner
$PTR_ADD $tp,$BNSZ
diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
index 420f01f..102b656 100644
--- a/crypto/bn/asm/mips.pl
+++ b/crypto/bn/asm/mips.pl
@@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i);
$minus4=$v1;
$code.=<<___;
+#include "mips_arch.h"
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define ddivu(rs,rt)
+# define mfqt(rd,rs,rt) ddivu rd,rs,rt
+# define mfrm(rd,rs,rt) dmodu rd,rs,rt
+#elif defined(_MIPS_ARCH_MIPS32R6)
+# define divu(rs,rt)
+# define mfqt(rd,rs,rt) divu rd,rs,rt
+# define mfrm(rd,rs,rt) modu rd,rs,rt
+#else
+# define $DIVU(rs,rt) $DIVU $zero,rs,rt
+# define mfqt(rd,rs,rt) mflo rd
+# define mfrm(rd,rs,rt) mfhi rd
+#endif
+
.rdata
.asciiz "mips3.s, Version 1.2"
.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
@@ -151,7 +167,7 @@ $code.=<<___;
.L_bn_mul_add_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,0($a0)
$LD $t2,$BNSZ($a1)
$LD $t3,$BNSZ($a0)
@@ -161,11 +177,11 @@ $code.=<<___;
sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit
# values", but it seems to work fine
# even on 64-bit registers.
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
- $MULTU $t2,$a3
+ $MULTU ($t2,$a3)
sltu $at,$t1,$at
$ST $t1,0($a0)
$ADDU $v0,$at
@@ -174,11 +190,11 @@ $code.=<<___;
$LD $ta3,3*$BNSZ($a0)
$ADDU $t3,$v0
sltu $v0,$t3,$v0
- mflo $at
- mfhi $t2
+ mflo ($at,$t2,$a3)
+ mfhi ($t2,$t2,$a3)
$ADDU $t3,$at
$ADDU $v0,$t2
- $MULTU $ta0,$a3
+ $MULTU ($ta0,$a3)
sltu $at,$t3,$at
$ST $t3,$BNSZ($a0)
$ADDU $v0,$at
@@ -188,11 +204,11 @@ $code.=<<___;
$PTR_ADD $a1,4*$BNSZ
$ADDU $ta1,$v0
sltu $v0,$ta1,$v0
- mflo $at
- mfhi $ta0
+ mflo ($at,$ta0,$a3)
+ mfhi ($ta0,$ta0,$a3)
$ADDU $ta1,$at
$ADDU $v0,$ta0
- $MULTU $ta2,$a3
+ $MULTU ($ta2,$a3)
sltu $at,$ta1,$at
$ST $ta1,-2*$BNSZ($a0)
$ADDU $v0,$at
@@ -201,8 +217,8 @@ $code.=<<___;
and $ta0,$a2,$minus4
$ADDU $ta3,$v0
sltu $v0,$ta3,$v0
- mflo $at
- mfhi $ta2
+ mflo ($at,$ta2,$a3)
+ mfhi ($ta2,$ta2,$a3)
$ADDU $ta3,$at
$ADDU $v0,$ta2
sltu $at,$ta3,$at
@@ -217,13 +233,13 @@ $code.=<<___;
.L_bn_mul_add_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,0($a0)
subu $a2,1
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -232,13 +248,13 @@ $code.=<<___;
beqz $a2,.L_bn_mul_add_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,$BNSZ($a0)
subu $a2,1
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -247,12 +263,12 @@ $code.=<<___;
beqz $a2,.L_bn_mul_add_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,2*$BNSZ($a0)
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -310,40 +326,40 @@ $code.=<<___;
.L_bn_mul_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
$LD $ta2,3*$BNSZ($a1)
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
- $MULTU $t2,$a3
+ $MULTU ($t2,$a3)
$ST $v0,0($a0)
$ADDU $v0,$t1,$t0
subu $a2,4
$PTR_ADD $a0,4*$BNSZ
$PTR_ADD $a1,4*$BNSZ
- mflo $at
- mfhi $t2
+ mflo ($at,$t2,$a3)
+ mfhi ($t2,$t2,$a3)
$ADDU $v0,$at
sltu $t3,$v0,$at
- $MULTU $ta0,$a3
+ $MULTU ($ta0,$a3)
$ST $v0,-3*$BNSZ($a0)
$ADDU $v0,$t3,$t2
- mflo $at
- mfhi $ta0
+ mflo ($at,$ta0,$a3)
+ mfhi ($ta0,$ta0,$a3)
$ADDU $v0,$at
sltu $ta1,$v0,$at
- $MULTU $ta2,$a3
+ $MULTU ($ta2,$a3)
$ST $v0,-2*$BNSZ($a0)
$ADDU $v0,$ta1,$ta0
and $ta0,$a2,$minus4
- mflo $at
- mfhi $ta2
+ mflo ($at,$ta2,$a3)
+ mfhi ($ta2,$ta2,$a3)
$ADDU $v0,$at
sltu $ta3,$v0,$at
$ST $v0,-$BNSZ($a0)
@@ -357,10 +373,10 @@ $code.=<<___;
.L_bn_mul_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
subu $a2,1
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,0($a0)
@@ -368,10 +384,10 @@ $code.=<<___;
beqz $a2,.L_bn_mul_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
subu $a2,1
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,$BNSZ($a0)
@@ -379,9 +395,9 @@ $code.=<<___;
beqz $a2,.L_bn_mul_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$a3
- mflo $at
- mfhi $t0
+ $MULTU ($t0,$a3)
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,2*$BNSZ($a0)
@@ -438,35 +454,35 @@ $code.=<<___;
.L_bn_sqr_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
$LD $ta2,3*$BNSZ($a1)
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,0($a0)
$ST $t0,$BNSZ($a0)
- $MULTU $t2,$t2
+ $MULTU ($t2,$t2)
subu $a2,4
$PTR_ADD $a0,8*$BNSZ
$PTR_ADD $a1,4*$BNSZ
- mflo $t3
- mfhi $t2
+ mflo ($t3,$t2,$t2)
+ mfhi ($t2,$t2,$t2)
$ST $t3,-6*$BNSZ($a0)
$ST $t2,-5*$BNSZ($a0)
- $MULTU $ta0,$ta0
- mflo $ta1
- mfhi $ta0
+ $MULTU ($ta0,$ta0)
+ mflo ($ta1,$ta0,$ta0)
+ mfhi ($ta0,$ta0,$ta0)
$ST $ta1,-4*$BNSZ($a0)
$ST $ta0,-3*$BNSZ($a0)
- $MULTU $ta2,$ta2
+ $MULTU ($ta2,$ta2)
and $ta0,$a2,$minus4
- mflo $ta3
- mfhi $ta2
+ mflo ($ta3,$ta2,$ta2)
+ mfhi ($ta2,$ta2,$ta2)
$ST $ta3,-2*$BNSZ($a0)
.set noreorder
@@ -479,27 +495,27 @@ $code.=<<___;
.L_bn_sqr_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
subu $a2,1
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,0($a0)
$ST $t0,$BNSZ($a0)
beqz $a2,.L_bn_sqr_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
subu $a2,1
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,2*$BNSZ($a0)
$ST $t0,3*$BNSZ($a0)
beqz $a2,.L_bn_sqr_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$t0
- mflo $t1
- mfhi $t0
+ $MULTU ($t0,$t0)
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,4*$BNSZ($a0)
$ST $t0,5*$BNSZ($a0)
@@ -823,11 +839,11 @@ $code.=<<___;
move $ta3,$ra
bal bn_div_words_internal
move $ra,$ta3
- $MULTU $ta2,$v0
+ $MULTU ($ta2,$v0)
$LD $t2,-2*$BNSZ($a3)
move $ta0,$zero
- mfhi $t1
- mflo $t0
+ mfhi ($t1,$ta2,$v0)
+ mflo ($t0,$ta2,$v0)
sltu $t8,$t1,$a1
.L_bn_div_3_words_inner_loop:
bnez $t8,.L_bn_div_3_words_inner_loop_done
@@ -930,15 +946,15 @@ $code.=<<___;
$SRL $HH,$a0,4*$BNSZ # bits
$SRL $QT,4*$BNSZ # q=0xffffffff
beq $DH,$HH,.L_bn_div_words_skip_div1
- $DIVU $zero,$a0,$DH
- mflo $QT
+ $DIVU ($a0,$DH)
+ mfqt ($QT,$a0,$DH)
.L_bn_div_words_skip_div1:
- $MULTU $a2,$QT
+ $MULTU ($a2,$QT)
$SLL $t3,$a0,4*$BNSZ # bits
$SRL $at,$a1,4*$BNSZ # bits
or $t3,$at
- mflo $t0
- mfhi $t1
+ mflo ($t0,$a2,$QT)
+ mfhi ($t1,$a2,$QT)
.L_bn_div_words_inner_loop1:
sltu $t2,$t3,$t0
seq $t8,$HH,$t1
@@ -963,15 +979,15 @@ $code.=<<___;
$SRL $HH,$a0,4*$BNSZ # bits
$SRL $QT,4*$BNSZ # q=0xffffffff
beq $DH,$HH,.L_bn_div_words_skip_div2
- $DIVU $zero,$a0,$DH
- mflo $QT
+ $DIVU ($a0,$DH)
+ mfqt ($QT,$a0,$DH)
.L_bn_div_words_skip_div2:
- $MULTU $a2,$QT
+ $MULTU ($a2,$QT)
$SLL $t3,$a0,4*$BNSZ # bits
$SRL $at,$a1,4*$BNSZ # bits
or $t3,$at
- mflo $t0
- mfhi $t1
+ mflo ($t0,$a2,$QT)
+ mfhi ($t1,$a2,$QT)
.L_bn_div_words_inner_loop2:
sltu $t2,$t3,$t0
seq $t8,$HH,$t1
@@ -1070,592 +1086,592 @@ $code.=<<___;
$LD $b_0,0($a2)
$LD $a_1,$BNSZ($a1)
$LD $a_2,2*$BNSZ($a1)
- $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_3,3*$BNSZ($a1)
$LD $b_1,$BNSZ($a2)
$LD $b_2,2*$BNSZ($a2)
$LD $b_3,3*$BNSZ($a2)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$b_0)
+ mfhi ($c_2,$a_0,$b_0)
$LD $a_4,4*$BNSZ($a1)
$LD $a_5,5*$BNSZ($a1)
- $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
+ $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1);
$LD $a_6,6*$BNSZ($a1)
$LD $a_7,7*$BNSZ($a1)
$LD $b_4,4*$BNSZ($a2)
$LD $b_5,5*$BNSZ($a2)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_1)
+ mfhi ($t_2,$a_0,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
+ $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1);
$ADDU $c_3,$t_2,$at
$LD $b_6,6*$BNSZ($a2)
$LD $b_7,7*$BNSZ($a2)
$ST $c_1,0($a0) # r[0]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_0)
+ mfhi ($t_2,$a_1,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
$ST $c_2,$BNSZ($a0) # r[1]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_0)
+ mfhi ($t_2,$a_2,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
+ $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_1)
+ mfhi ($t_2,$a_1,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
+ $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_2)
+ mfhi ($t_2,$a_0,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0) # r[2]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_3)
+ mfhi ($t_2,$a_0,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
+ $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_2)
+ mfhi ($t_2,$a_1,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
+ $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_1)
+ mfhi ($t_2,$a_2,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
+ $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_0)
+ mfhi ($t_2,$a_3,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1);
+ $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,3*$BNSZ($a0) # r[3]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_0)
+ mfhi ($t_2,$a_4,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
+ $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_1)
+ mfhi ($t_2,$a_3,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
+ $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_2)
+ mfhi ($t_2,$a_2,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
+ $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_3)
+ mfhi ($t_2,$a_1,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1);
+ $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_4)
+ mfhi ($t_2,$a_0,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2);
+ $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0) # r[4]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_5)
+ mfhi ($t_2,$a_0,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2);
+ $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_4)
+ mfhi ($t_2,$a_1,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_3)
+ mfhi ($t_2,$a_2,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
+ $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_2)
+ mfhi ($t_2,$a_3,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2);
+ $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_1)
+ mfhi ($t_2,$a_4,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2);
+ $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_0)
+ mfhi ($t_2,$a_5,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3);
+ $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,5*$BNSZ($a0) # r[5]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_0)
+ mfhi ($t_2,$a_6,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3);
+ $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_1)
+ mfhi ($t_2,$a_5,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3);
+ $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_2)
+ mfhi ($t_2,$a_4,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
+ $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_3)
+ mfhi ($t_2,$a_3,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3);
+ $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_4)
+ mfhi ($t_2,$a_2,$b_4)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3);
+ $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_5)
+ mfhi ($t_2,$a_1,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3);
+ $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_6)
+ mfhi ($t_2,$a_0,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1);
+ $MULTU ($a_0,$b_7) # mul_add_c(a[0],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,6*$BNSZ($a0) # r[6]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_7)
+ mfhi ($t_2,$a_0,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1);
+ $MULTU ($a_1,$b_6) # mul_add_c(a[1],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_6)
+ mfhi ($t_2,$a_1,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1);
+ $MULTU ($a_2,$b_5) # mul_add_c(a[2],b[5],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_5)
+ mfhi ($t_2,$a_2,$b_5)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1);
+ $MULTU ($a_3,$b_4) # mul_add_c(a[3],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_4)
+ mfhi ($t_2,$a_3,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1);
+ $MULTU ($a_4,$b_3) # mul_add_c(a[4],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_3)
+ mfhi ($t_2,$a_4,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1);
+ $MULTU ($a_5,$b_2) # mul_add_c(a[5],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_2)
+ mfhi ($t_2,$a_5,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1);
+ $MULTU ($a_6,$b_1) # mul_add_c(a[6],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_1)
+ mfhi ($t_2,$a_6,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1);
+ $MULTU ($a_7,$b_0) # mul_add_c(a[7],b[0],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_0)
+ mfhi ($t_2,$a_7,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2);
+ $MULTU ($a_7,$b_1) # mul_add_c(a[7],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,7*$BNSZ($a0) # r[7]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_1)
+ mfhi ($t_2,$a_7,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2);
+ $MULTU ($a_6,$b_2) # mul_add_c(a[6],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_2)
+ mfhi ($t_2,$a_6,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2);
+ $MULTU ($a_5,$b_3) # mul_add_c(a[5],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_3)
+ mfhi ($t_2,$a_5,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2);
+ $MULTU ($a_4,$b_4) # mul_add_c(a[4],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_4)
+ mfhi ($t_2,$a_4,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2);
+ $MULTU ($a_3,$b_5) # mul_add_c(a[3],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_5)
+ mfhi ($t_2,$a_3,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2);
+ $MULTU ($a_2,$b_6) # mul_add_c(a[2],b[6],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_6)
+ mfhi ($t_2,$a_2,$b_6)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2);
+ $MULTU ($a_1,$b_7) # mul_add_c(a[1],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_7)
+ mfhi ($t_2,$a_1,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3);
+ $MULTU ($a_2,$b_7) # mul_add_c(a[2],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,8*$BNSZ($a0) # r[8]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_7)
+ mfhi ($t_2,$a_2,$b_7)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3);
+ $MULTU ($a_3,$b_6) # mul_add_c(a[3],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_6)
+ mfhi ($t_2,$a_3,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3);
+ $MULTU ($a_4,$b_5) # mul_add_c(a[4],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_5)
+ mfhi ($t_2,$a_4,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3);
+ $MULTU ($a_5,$b_4) # mul_add_c(a[5],b[4],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_4)
+ mfhi ($t_2,$a_5,$b_4)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3);
+ $MULTU ($a_6,$b_3) # mul_add_c(a[6],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_3)
+ mfhi ($t_2,$a_6,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3);
+ $MULTU ($a_7,$b_2) # mul_add_c(a[7],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_2)
+ mfhi ($t_2,$a_7,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1);
+ $MULTU ($a_7,$b_3) # mul_add_c(a[7],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,9*$BNSZ($a0) # r[9]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_3)
+ mfhi ($t_2,$a_7,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1);
+ $MULTU ($a_6,$b_4) # mul_add_c(a[6],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_4)
+ mfhi ($t_2,$a_6,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1);
+ $MULTU ($a_5,$b_5) # mul_add_c(a[5],b[5],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_5)
+ mfhi ($t_2,$a_5,$b_5)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1);
+ $MULTU ($a_4,$b_6) # mul_add_c(a[4],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_6)
+ mfhi ($t_2,$a_4,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1);
+ $MULTU ($a_3,$b_7) # mul_add_c(a[3],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_7)
+ mfhi ($t_2,$a_3,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2);
+ $MULTU ($a_4,$b_7) # mul_add_c(a[4],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,10*$BNSZ($a0) # r[10]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_7)
+ mfhi ($t_2,$a_4,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2);
+ $MULTU ($a_5,$b_6) # mul_add_c(a[5],b[6],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_6)
+ mfhi ($t_2,$a_5,$b_6)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2);
+ $MULTU ($a_6,$b_5) # mul_add_c(a[6],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_5)
+ mfhi ($t_2,$a_6,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2);
+ $MULTU ($a_7,$b_4) # mul_add_c(a[7],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_4)
+ mfhi ($t_2,$a_7,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3);
+ $MULTU ($a_7,$b_5) # mul_add_c(a[7],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,11*$BNSZ($a0) # r[11]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_5)
+ mfhi ($t_2,$a_7,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3);
+ $MULTU ($a_6,$b_6) # mul_add_c(a[6],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_6)
+ mfhi ($t_2,$a_6,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3);
+ $MULTU ($a_5,$b_7) # mul_add_c(a[5],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_7)
+ mfhi ($t_2,$a_5,$b_7)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1);
+ $MULTU ($a_6,$b_7) # mul_add_c(a[6],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,12*$BNSZ($a0) # r[12]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_7)
+ mfhi ($t_2,$a_6,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1);
+ $MULTU ($a_7,$b_6) # mul_add_c(a[7],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_6)
+ mfhi ($t_2,$a_7,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2);
+ $MULTU ($a_7,$b_7) # mul_add_c(a[7],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,13*$BNSZ($a0) # r[13]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_7)
+ mfhi ($t_2,$a_7,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
$ADDU $t_2,$at
@@ -1716,144 +1732,144 @@ $code.=<<___;
$LD $b_0,0($a2)
$LD $a_1,$BNSZ($a1)
$LD $a_2,2*$BNSZ($a1)
- $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_3,3*$BNSZ($a1)
$LD $b_1,$BNSZ($a2)
$LD $b_2,2*$BNSZ($a2)
$LD $b_3,3*$BNSZ($a2)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$b_0)
+ mfhi ($c_2,$a_0,$b_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$b_1)
+ mfhi ($t_2,$a_0,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
+ $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1);
$ADDU $c_3,$t_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_0)
+ mfhi ($t_2,$a_1,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
$ST $c_2,$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_0)
+ mfhi ($t_2,$a_2,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
+ $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_1)
+ mfhi ($t_2,$a_1,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
+ $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_2)
+ mfhi ($t_2,$a_0,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_3)
+ mfhi ($t_2,$a_0,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
+ $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_2)
+ mfhi ($t_2,$a_1,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
+ $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_1)
+ mfhi ($t_2,$a_2,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
+ $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_0)
+ mfhi ($t_2,$a_3,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
+ $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,3*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_1)
+ mfhi ($t_2,$a_3,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
+ $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_2)
+ mfhi ($t_2,$a_2,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
+ $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_3)
+ mfhi ($t_2,$a_1,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_3)
+ mfhi ($t_2,$a_2,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
+ $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_2)
+ mfhi ($t_2,$a_3,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
+ $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,5*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_3)
+ mfhi ($t_2,$a_3,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
$ADDU $t_2,$at
@@ -1888,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2,
# commented as "forward multiplication" below];
)=@_;
$code.=<<___;
- mflo $lo
- mfhi $hi
$ADDU $c0,$lo
sltu $at,$c0,$lo
- $MULTU $an,$bn # forward multiplication
+ $MULTU ($an,$bn) # forward multiplication
$ADDU $c0,$lo
$ADDU $at,$hi
sltu $lo,$c0,$lo
@@ -1902,15 +1916,17 @@ ___
$code.=<<___ if (!$warm);
sltu $c2,$c1,$at
$ADDU $c1,$hi
- sltu $hi,$c1,$hi
- $ADDU $c2,$hi
___
$code.=<<___ if ($warm);
sltu $at,$c1,$at
$ADDU $c1,$hi
$ADDU $c2,$at
+___
+$code.=<<___;
sltu $hi,$c1,$hi
$ADDU $c2,$hi
+ mflo ($lo,$an,$bn)
+ mfhi ($hi,$an,$bn)
___
}
@@ -1940,21 +1956,21 @@ $code.=<<___;
$LD $a_2,2*$BNSZ($a1)
$LD $a_3,3*$BNSZ($a1)
- $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_4,4*$BNSZ($a1)
$LD $a_5,5*$BNSZ($a1)
$LD $a_6,6*$BNSZ($a1)
$LD $a_7,7*$BNSZ($a1)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$a_0)
+ mfhi ($c_2,$a_0,$a_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$a_1)
+ mfhi ($t_2,$a_0,$a_1)
slt $c_1,$t_2,$zero
$SLL $t_2,1
- $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $a2,$t_1,$zero
$ADDU $t_2,$a2
$SLL $t_1,1
@@ -1962,20 +1978,22 @@ $code.=<<___;
sltu $at,$c_2,$t_1
$ADDU $c_3,$t_2,$at
$ST $c_2,$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_0)
+ mfhi ($t_2,$a_2,$a_0)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_3)
+ mfhi ($t_2,$a_0,$a_3)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
@@ -1989,16 +2007,16 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
$a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2);
+ $MULTU ($a_0,$a_5) # mul_add_c2(a[0],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_5)
+ mfhi ($t_2,$a_0,$a_5)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
@@ -2016,16 +2034,16 @@ ___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
$a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1);
+ $MULTU ($a_0,$a_7) # mul_add_c2(a[0],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,6*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_7)
+ mfhi ($t_2,$a_0,$a_7)
___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
@@ -2045,16 +2063,16 @@ ___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
$a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3);
+ $MULTU ($a_2,$a_7) # mul_add_c2(a[2],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,8*$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_7)
+ mfhi ($t_2,$a_2,$a_7)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
@@ -2070,16 +2088,16 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
$a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2);
+ $MULTU ($a_4,$a_7) # mul_add_c2(a[4],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,10*$BNSZ($a0)
+ mflo ($t_1,$a_4,$a_7)
+ mfhi ($t_2,$a_4,$a_7)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
@@ -2091,24 +2109,22 @@ ___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1);
+ $MULTU ($a_6,$a_7) # mul_add_c2(a[6],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,12*$BNSZ($a0)
+ mflo ($t_1,$a_6,$a_7)
+ mfhi ($t_2,$a_6,$a_7)
___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
$code.=<<___;
$ST $c_2,13*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
$ADDU $t_2,$at
@@ -2152,19 +2168,19 @@ $code.=<<___;
.set reorder
$LD $a_0,0($a1)
$LD $a_1,$BNSZ($a1)
- $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_2,2*$BNSZ($a1)
$LD $a_3,3*$BNSZ($a1)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$a_0)
+ mfhi ($c_2,$a_0,$a_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$a_1)
+ mfhi ($t_2,$a_0,$a_1)
slt $c_1,$t_2,$zero
$SLL $t_2,1
- $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $a2,$t_1,$zero
$ADDU $t_2,$a2
$SLL $t_1,1
@@ -2172,20 +2188,22 @@ $code.=<<___;
sltu $at,$c_2,$t_1
$ADDU $c_3,$t_2,$at
$ST $c_2,$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_0)
+ mfhi ($t_2,$a_2,$a_0)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_3)
+ mfhi ($t_2,$a_0,$a_3)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
@@ -2197,24 +2215,22 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$a_3) # mul_add_c2(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_3)
+ mfhi ($t_2,$a_2,$a_3)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
$code.=<<___;
$ST $c_3,5*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
$ADDU $t_2,$at
diff --git a/crypto/bn/build.info b/crypto/bn/build.info
index c608ecc..3544106 100644
--- a/crypto/bn/build.info
+++ b/crypto/bn/build.info
@@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=..
GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[sparcv9-gf2m.o]=..
-GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME)
-GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME)
+GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
+INCLUDE[bn-mips.o]=..
+GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
+INCLUDE[mips-mont.o]=..
GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
@@ -64,16 +66,8 @@ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[armv4-gf2m.o]=..
GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
-OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c
+OVERRIDES=pa-risc2W.o pa-risc2.c
BEGINRAW[Makefile]
-##### BN assembler implementations
-
-{- $builddir -}/bn-mips3.o: {- $sourcedir -}/asm/mips3.s
- @if [ "$(CC)" = "gcc" ]; then \
- ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \
- as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \
- else $(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi
-
# GNU assembler fails to compile PA-RISC2 modules, insist on calling
# vendor assembler...
{- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s
diff --git a/crypto/mips_arch.h b/crypto/mips_arch.h
new file mode 100644
index 0000000..e0cd905
--- /dev/null
+++ b/crypto/mips_arch.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#ifndef __MIPS_ARCH_H__
+# define __MIPS_ARCH_H__
+
+# if (defined(__mips_smartmips) || defined(_MIPS_ARCH_MIPS32R3) || \
+ defined(_MIPS_ARCH_MIPS32R5) || defined(_MIPS_ARCH_MIPS32R6))
+ && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+# endif
+
+# if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \
+ defined(_MIPS_ARCH_MIPS64R6)) \
+ && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+# endif
+
+# if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt) dmulu rd,rs,rt
+# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
+# elif defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt) mulu rd,rs,rt
+# define mfhi(rd,rs,rt) muhu rd,rs,rt
+# else
+# define dmultu(rs,rt) dmultu rs,rt
+# define multu(rs,rt) multu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+# endif
+
+#endif
diff --git a/crypto/poly1305/asm/poly1305-mips.pl b/crypto/poly1305/asm/poly1305-mips.pl
index d2b3e90..28b6772 100755
--- a/crypto/poly1305/asm/poly1305-mips.pl
+++ b/crypto/poly1305/asm/poly1305-mips.pl
@@ -67,6 +67,8 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___;
+#include "mips_arch.h"
+
#ifdef MIPSEB
# define MSB 0
# define LSB 7
@@ -92,10 +94,15 @@ poly1305_init:
beqz $inp,.Lno_key
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $in0,0($inp)
+ ld $in1,8($inp)
+#else
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
+#endif
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
@@ -182,7 +189,7 @@ poly1305_blocks_internal:
.frame $sp,6*8,$ra
.mask $SAVED_REGS_MASK,-8
.set noreorder
- dsub $sp,6*8
+ dsubu $sp,6*8
sd $s5,40($sp)
sd $s4,32($sp)
___
@@ -204,11 +211,16 @@ $code.=<<___;
ld $s1,40($ctx)
.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $in0,0($inp) # load input
+ ld $in1,8($inp)
+#else
ldl $in0,0+MSB($inp) # load input
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
- daddiu $len,-1
ldr $in1,8+LSB($inp)
+#endif
+ daddiu $len,-1
daddiu $inp,16
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
@@ -258,42 +270,42 @@ $code.=<<___;
sltu $tmp1,$h1,$in1
daddu $h1,$tmp0
- dmultu $r0,$h0 # h0*r0
+ dmultu ($r0,$h0) # h0*r0
daddu $h2,$padbit
sltu $tmp0,$h1,$tmp0
- mflo $d0
- mfhi $d1
+ mflo ($d0,$r0,$h0)
+ mfhi ($d1,$r0,$h0)
- dmultu $s1,$h1 # h1*5*r1
+ dmultu ($s1,$h1) # h1*5*r1
daddu $tmp0,$tmp1
daddu $h2,$tmp0
- mflo $tmp0
- mfhi $tmp1
+ mflo ($tmp0,$s1,$h1)
+ mfhi ($tmp1,$s1,$h1)
- dmultu $r1,$h0 # h0*r1
+ dmultu ($r1,$h0) # h0*r1
daddu $d0,$tmp0
daddu $d1,$tmp1
- mflo $tmp2
- mfhi $d2
+ mflo ($tmp2,$r1,$h0)
+ mfhi ($d2,$r1,$h0)
sltu $tmp0,$d0,$tmp0
daddu $d1,$tmp0
- dmultu $r0,$h1 # h1*r0
+ dmultu ($r0,$h1) # h1*r0
daddu $d1,$tmp2
sltu $tmp2,$d1,$tmp2
- mflo $tmp0
- mfhi $tmp1
+ mflo ($tmp0,$r0,$h1)
+ mfhi ($tmp1,$r0,$h1)
daddu $d2,$tmp2
- dmultu $s1,$h2 # h2*5*r1
+ dmultu ($s1,$h2) # h2*5*r1
daddu $d1,$tmp0
daddu $d2,$tmp1
- mflo $tmp2
+ mflo ($tmp2,$s1,$h2)
- dmultu $r0,$h2 # h2*r0
+ dmultu ($r0,$h2) # h2*r0
sltu $tmp0,$d1,$tmp0
daddu $d2,$tmp0
- mflo $tmp3
+ mflo ($tmp3,$r0,$h2)
daddu $d1,$tmp2
daddu $d2,$tmp3
@@ -329,7 +341,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
___
$code.=<<___;
jr $ra
- dadd $sp,6*8
+ daddu $sp,6*8
.end poly1305_blocks_internal
___
}
diff --git a/crypto/poly1305/build.info b/crypto/poly1305/build.info
index d575f5a..358fa82 100644
--- a/crypto/poly1305/build.info
+++ b/crypto/poly1305/build.info
@@ -13,6 +13,7 @@ INCLUDE[poly1305-armv4.o]=..
GENERATE[poly1305-armv8.S]=asm/poly1305-armv8.pl $(PERLASM_SCHEME)
INCLUDE[poly1305-armv8.o]=..
GENERATE[poly1305-mips.S]=asm/poly1305-mips.pl $(PERLASM_SCHEME)
+INCLUDE[poly1305-mips.o]=..
BEGINRAW[Makefile(unix)]
{- $builddir -}/poly1305-%.S: {- $sourcedir -}/asm/poly1305-%.pl
diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl
index 882f973..d9911c8 100644
--- a/crypto/sha/asm/sha1-mips.pl
+++ b/crypto/sha/asm/sha1-mips.pl
@@ -56,15 +56,15 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_ADD="daddu"; # incidentally works even on n32
+ $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8;
} else {
- $PTR_ADD="add";
- $PTR_SUB="sub";
+ $PTR_ADD="addu";
+ $PTR_SUB="subu";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
@@ -126,10 +126,14 @@ $code.=<<___;
addu $e,$K # $i
xor $t0,$c,$d
rotr $t1,$a,27
- lwl @X[$j],$j*4+$MSB($inp)
and $t0,$b
addu $e,$t1
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lw @X[$j],$j*4($inp)
+#else
+ lwl @X[$j],$j*4+$MSB($inp)
lwr @X[$j],$j*4+$LSB($inp)
+#endif
xor $t0,$d
addu $e, at X[$i]
rotr $b,$b,2
@@ -336,14 +340,12 @@ $FRAMESIZE=16; # large enough to accommodate NUBI saved registers
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code=<<___;
+#include "mips_arch.h"
+
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif
-#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
-#define _MIPS_ARCH_MIPS32R2
-#endif
-
.text
.set noat
@@ -387,10 +389,16 @@ $code.=<<___;
.align 4
.Loop:
.set reorder
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ lui $K,0x5a82
+ lw @X[0],($inp)
+ ori $K,0x7999 # K_00_19
+#else
lwl @X[0],$MSB($inp)
lui $K,0x5a82
lwr @X[0],$LSB($inp)
ori $K,0x7999 # K_00_19
+#endif
___
for ($i=0;$i<15;$i++) { &BODY_00_14($i, at V); unshift(@V,pop(@V)); }
for (;$i<20;$i++) { &BODY_15_19($i, at V); unshift(@V,pop(@V)); }
diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl
index 5c2d23f..5464543 100644
--- a/crypto/sha/asm/sha512-mips.pl
+++ b/crypto/sha/asm/sha512-mips.pl
@@ -60,16 +60,16 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_LA="dla";
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_ADD="daddu"; # incidentally works even on n32
+ $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8;
} else {
$PTR_LA="la";
- $PTR_ADD="add";
- $PTR_SUB="sub";
+ $PTR_ADD="addu";
+ $PTR_SUB="subu";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
@@ -135,8 +135,12 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4], at X[5], at X[6], at X[7]);
$code.=<<___ if ($i<15);
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ ${LD} @X[1],`($i+1)*$SZ`($inp)
+#else
${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp)
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
+#endif
___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
@@ -298,14 +302,12 @@ $FRAMESIZE=16*$SZ+16*$SZREG;
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code.=<<___;
+#include "mips_arch.h"
+
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif
-#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
-#define _MIPS_ARCH_MIPS32R2
-#endif
-
.text
.set noat
#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
@@ -369,8 +371,12 @@ $code.=<<___;
.align 5
.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+ ${LD} @X[0],($inp)
+#else
${LD}l @X[0],$MSB($inp)
${LD}r @X[0],$LSB($inp)
+#endif
___
for ($i=0;$i<16;$i++)
{ &BODY_00_15($i, at V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
diff --git a/crypto/sha/build.info b/crypto/sha/build.info
index 5843e50..4b3225b 100644
--- a/crypto/sha/build.info
+++ b/crypto/sha/build.info
@@ -39,8 +39,11 @@ GENERATE[sha256-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME)
GENERATE[sha512-parisc.s]=asm/sha512-parisc.pl $(PERLASM_SCHEME)
GENERATE[sha1-mips.S]=asm/sha1-mips.pl $(PERLASM_SCHEME)
+INCLUDE[sha1-mips.o]=..
GENERATE[sha256-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME)
+INCLUDE[sha256-mips.o]=..
GENERATE[sha512-mips.S]=asm/sha512-mips.pl $(PERLASM_SCHEME)
+INCLUDE[sha512-mips.o]=..
GENERATE[sha1-armv4-large.S]=asm/sha1-armv4-large.pl $(PERLASM_SCHEME)
INCLUDE[sha1-armv4-large.o]=..
More information about the openssl-commits
mailing list