diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch new file mode 100644 index 0000000000..eb3fc52dca --- /dev/null +++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-1.patch @@ -0,0 +1,1918 @@ +From 14ac0f0e4e1f36793d09b41ffd5e482575289ab2 Mon Sep 17 00:00:00 2001 +From: Danny Tsen +Date: Tue, 11 Feb 2025 13:48:01 -0500 +Subject: [PATCH] Fix Minerva timing side-channel signal for P-384 curve on PPC + +1. bn_ppc.c: Used bn_mul_mont_int() instead of bn_mul_mont_300_fixed_n6() + for Montgomery multiplication. +2. ecp_nistp384-ppc64.pl: + - Re-wrote p384_felem_mul and p384_felem_square for easier maintenance with + minumum perl wrapper. + - Implemented p384_felem_reduce, p384_felem_mul_reduce and p384_felem_square_reduce. + - Implemented p384_felem_diff64, felem_diff_128_64 and felem_diff128 in assembly. +3. ecp_nistp384.c: + - Added wrapper function for p384_felem_mul_reduce and p384_felem_square_reduce. + +Signed-off-by: Danny Tsen + +Reviewed-by: Dmitry Belyavskiy +Reviewed-by: Tomas Mraz +(Merged from https://github.com/openssl/openssl/pull/26709) + +(cherry picked from commit 85cabd94958303859b1551364a609d4ff40b67a5) + +CVE: CVE-2025-27587 +Upstream-Status: Backport [https://github.com/openssl/openssl/commit/14ac0f0e4e1f36793d09b41ffd5e482575289ab2] +Signed-off-by: Peter Marko +--- + crypto/bn/bn_ppc.c | 3 + + crypto/ec/asm/ecp_nistp384-ppc64.pl | 1724 +++++++++++++++++++++++---- + crypto/ec/ecp_nistp384.c | 28 +- + 3 files changed, 1504 insertions(+), 251 deletions(-) + +diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c +index 1e9421bee2..29293bad55 100644 +--- a/crypto/bn/bn_ppc.c ++++ b/crypto/bn/bn_ppc.c +@@ -41,12 +41,15 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + */ + + #if defined(_ARCH_PPC64) && !defined(__ILP32__) ++ /* Minerva side-channel fix danny */ ++# if defined(USE_FIXED_N6) + if (num == 6) { + if (OPENSSL_ppccap_P & PPC_MADD300) + return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num); + else + return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num); + } ++# endif + #endif + + return bn_mul_mont_int(rp, ap, bp, np, n0, num); +diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl +index 28f4168e52..b663bddfc6 100755 +--- a/crypto/ec/asm/ecp_nistp384-ppc64.pl ++++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl +@@ -7,13 +7,15 @@ + # https://www.openssl.org/source/license.html + # + # ==================================================================== +-# Written by Rohan McLure for the OpenSSL +-# project. ++# Written by Danny Tsen # for the OpenSSL project. ++# ++# Copyright 2025- IBM Corp. + # ==================================================================== + # +-# p384 lower-level primitives for PPC64 using vector instructions. ++# p384 lower-level primitives for PPC64. + # + ++ + use strict; + use warnings; + +@@ -21,7 +23,7 @@ my $flavour = shift; + my $output = ""; + while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} + if (!$output) { +- $output = "-"; ++ $output = "-"; + } + + my ($xlate, $dir); +@@ -35,271 +37,1495 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; + + my $code = ""; + +-my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12"); +- +-my $vzero = "v32"; +- +-sub startproc($) +-{ +- my ($name) = @_; +- +- $code.=<<___; +- .globl ${name} +- .align 5 +-${name}: +- +-___ +-} +- +-sub endproc($) +-{ +- my ($name) = @_; +- +- $code.=<<___; +- blr +- .size ${name},.-${name} +- +-___ +-} +- +-sub load_vrs($$) +-{ +- my ($pointer, $reg_list) = @_; +- +- for (my $i = 0; $i <= 6; $i++) { +- my $offset = $i * 8; +- $code.=<<___; +- lxsd $reg_list->[$i],$offset($pointer) +-___ +- } +- +- $code.=<<___; +- +-___ +-} +- +-sub store_vrs($$) +-{ +- my ($pointer, $reg_list) = @_; +- +- for (my $i = 0; $i <= 12; $i++) { +- my $offset = $i * 16; +- $code.=<<___; +- stxv $reg_list->[$i],$offset($pointer) +-___ +- } +- +- $code.=<<___; +- +-___ +-} +- + $code.=<<___; +-.machine "any" ++.machine "any" + .text + +-___ ++.globl p384_felem_mul ++.type p384_felem_mul,\@function ++.align 4 ++p384_felem_mul: + +-{ +- # mul/square common +- my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43"); +- my ($zero, $one) = ("r8", "r9"); +- my $out = "v51"; ++ stdu 1, -176(1) ++ mflr 0 ++ std 14, 56(1) ++ std 15, 64(1) ++ std 16, 72(1) ++ std 17, 80(1) ++ std 18, 88(1) ++ std 19, 96(1) ++ std 20, 104(1) ++ std 21, 112(1) ++ std 22, 120(1) + +- { +- # +- # p384_felem_mul +- # ++ bl _p384_felem_mul_core + +- my ($in1p, $in2p) = ("r4", "r5"); +- my @in1 = map("v$_",(44..50)); +- my @in2 = map("v$_",(35..41)); ++ mtlr 0 ++ ld 14, 56(1) ++ ld 15, 64(1) ++ ld 16, 72(1) ++ ld 17, 80(1) ++ ld 18, 88(1) ++ ld 19, 96(1) ++ ld 20, 104(1) ++ ld 21, 112(1) ++ ld 22, 120(1) ++ addi 1, 1, 176 ++ blr ++.size p384_felem_mul,.-p384_felem_mul + +- startproc("p384_felem_mul"); ++.globl p384_felem_square ++.type p384_felem_square,\@function ++.align 4 ++p384_felem_square: + +- $code.=<<___; +- vspltisw $vzero,0 ++ stdu 1, -176(1) ++ mflr 0 ++ std 14, 56(1) ++ std 15, 64(1) ++ std 16, 72(1) ++ std 17, 80(1) + +-___ ++ bl _p384_felem_square_core + +- load_vrs($in1p, \@in1); +- load_vrs($in2p, \@in2); +- +- $code.=<<___; +- vmsumudm $out,$in1[0],$in2[0],$vzero +- stxv $out,0($outp) +- +- xxpermdi $t1,$in1[0],$in1[1],0b00 +- xxpermdi $t2,$in2[1],$in2[0],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- stxv $out,16($outp) +- +- xxpermdi $t2,$in2[2],$in2[1],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$in1[2],$in2[0],$out +- stxv $out,32($outp) +- +- xxpermdi $t2,$in2[1],$in2[0],0b00 +- xxpermdi $t3,$in1[2],$in1[3],0b00 +- xxpermdi $t4,$in2[3],$in2[2],0b00 +- vmsumudm $out,$t1,$t4,$vzero +- vmsumudm $out,$t3,$t2,$out +- stxv $out,48($outp) +- +- xxpermdi $t2,$in2[4],$in2[3],0b00 +- xxpermdi $t4,$in2[2],$in2[1],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$t3,$t4,$out +- vmsumudm $out,$in1[4],$in2[0],$out +- stxv $out,64($outp) +- +- xxpermdi $t2,$in2[5],$in2[4],0b00 +- xxpermdi $t4,$in2[3],$in2[2],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$t3,$t4,$out +- xxpermdi $t4,$in2[1],$in2[0],0b00 +- xxpermdi $t1,$in1[4],$in1[5],0b00 +- vmsumudm $out,$t1,$t4,$out +- stxv $out,80($outp) +- +- xxpermdi $t1,$in1[0],$in1[1],0b00 +- xxpermdi $t2,$in2[6],$in2[5],0b00 +- xxpermdi $t4,$in2[4],$in2[3],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$t3,$t4,$out +- xxpermdi $t2,$in2[2],$in2[1],0b00 +- xxpermdi $t1,$in1[4],$in1[5],0b00 +- vmsumudm $out,$t1,$t2,$out +- vmsumudm $out,$in1[6],$in2[0],$out +- stxv $out,96($outp) +- +- xxpermdi $t1,$in1[1],$in1[2],0b00 +- xxpermdi $t2,$in2[6],$in2[5],0b00 +- xxpermdi $t3,$in1[3],$in1[4],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$t3,$t4,$out +- xxpermdi $t3,$in2[2],$in2[1],0b00 +- xxpermdi $t1,$in1[5],$in1[6],0b00 +- vmsumudm $out,$t1,$t3,$out +- stxv $out,112($outp) +- +- xxpermdi $t1,$in1[2],$in1[3],0b00 +- xxpermdi $t3,$in1[4],$in1[5],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$t3,$t4,$out +- vmsumudm $out,$in1[6],$in2[2],$out +- stxv $out,128($outp) +- +- xxpermdi $t1,$in1[3],$in1[4],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- xxpermdi $t1,$in1[5],$in1[6],0b00 +- vmsumudm $out,$t1,$t4,$out +- stxv $out,144($outp) +- +- vmsumudm $out,$t3,$t2,$vzero +- vmsumudm $out,$in1[6],$in2[4],$out +- stxv $out,160($outp) +- +- vmsumudm $out,$t1,$t2,$vzero +- stxv $out,176($outp) +- +- vmsumudm $out,$in1[6],$in2[6],$vzero +- stxv $out,192($outp) +-___ ++ mtlr 0 ++ ld 14, 56(1) ++ ld 15, 64(1) ++ ld 16, 72(1) ++ ld 17, 80(1) ++ addi 1, 1, 176 ++ blr ++.size p384_felem_square,.-p384_felem_square + +- endproc("p384_felem_mul"); +- } ++# ++# Felem mul core function - ++# r3, r4 and r5 need to pre-loaded. ++# ++.type _p384_felem_mul_core,\@function ++.align 4 ++_p384_felem_mul_core: + +- { +- # +- # p384_felem_square +- # ++ ld 6,0(4) ++ ld 14,0(5) ++ ld 7,8(4) ++ ld 15,8(5) ++ ld 8,16(4) ++ ld 16,16(5) ++ ld 9,24(4) ++ ld 17,24(5) ++ ld 10,32(4) ++ ld 18,32(5) ++ ld 11,40(4) ++ ld 19,40(5) ++ ld 12,48(4) ++ ld 20,48(5) + +- my ($inp) = ("r4"); +- my @in = map("v$_",(44..50)); +- my @inx2 = map("v$_",(35..41)); ++ # out0 ++ mulld 21, 14, 6 ++ mulhdu 22, 14, 6 ++ std 21, 0(3) ++ std 22, 8(3) + +- startproc("p384_felem_square"); ++ vxor 0, 0, 0 + +- $code.=<<___; +- vspltisw $vzero,0 ++ # out1 ++ mtvsrdd 32+13, 14, 6 ++ mtvsrdd 32+14, 7, 15 ++ vmsumudm 1, 13, 14, 0 + +-___ ++ # out2 ++ mtvsrdd 32+15, 15, 6 ++ mtvsrdd 32+16, 7, 16 ++ mtvsrdd 32+17, 0, 8 ++ mtvsrdd 32+18, 0, 14 ++ vmsumudm 19, 15, 16, 0 ++ vmsumudm 2, 17, 18, 19 + +- load_vrs($inp, \@in); ++ # out3 ++ mtvsrdd 32+13, 16, 6 ++ mtvsrdd 32+14, 7, 17 ++ mtvsrdd 32+15, 14, 8 ++ mtvsrdd 32+16, 9, 15 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 3, 15, 16, 19 + +- $code.=<<___; +- li $zero,0 +- li $one,1 +- mtvsrdd $t1,$one,$zero +-___ ++ # out4 ++ mtvsrdd 32+13, 17, 6 ++ mtvsrdd 32+14, 7, 18 ++ mtvsrdd 32+15, 15, 8 ++ mtvsrdd 32+16, 9, 16 ++ mtvsrdd 32+17, 0, 10 ++ mtvsrdd 32+18, 0, 14 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 4, 15, 16, 19 ++ vmsumudm 4, 17, 18, 4 + +- for (my $i = 0; $i <= 6; $i++) { +- $code.=<<___; +- vsld $inx2[$i],$in[$i],$t1 +-___ +- } +- +- $code.=<<___; +- vmsumudm $out,$in[0],$in[0],$vzero +- stxv $out,0($outp) +- +- vmsumudm $out,$in[0],$inx2[1],$vzero +- stxv $out,16($outp) +- +- vmsumudm $out,$in[0],$inx2[2],$vzero +- vmsumudm $out,$in[1],$in[1],$out +- stxv $out,32($outp) +- +- xxpermdi $t1,$in[0],$in[1],0b00 +- xxpermdi $t2,$inx2[3],$inx2[2],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- stxv $out,48($outp) +- +- xxpermdi $t4,$inx2[4],$inx2[3],0b00 +- vmsumudm $out,$t1,$t4,$vzero +- vmsumudm $out,$in[2],$in[2],$out +- stxv $out,64($outp) +- +- xxpermdi $t2,$inx2[5],$inx2[4],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$in[2],$inx2[3],$out +- stxv $out,80($outp) +- +- xxpermdi $t2,$inx2[6],$inx2[5],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$in[2],$inx2[4],$out +- vmsumudm $out,$in[3],$in[3],$out +- stxv $out,96($outp) +- +- xxpermdi $t3,$in[1],$in[2],0b00 +- vmsumudm $out,$t3,$t2,$vzero +- vmsumudm $out,$in[3],$inx2[4],$out +- stxv $out,112($outp) +- +- xxpermdi $t1,$in[2],$in[3],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- vmsumudm $out,$in[4],$in[4],$out +- stxv $out,128($outp) +- +- xxpermdi $t1,$in[3],$in[4],0b00 +- vmsumudm $out,$t1,$t2,$vzero +- stxv $out,144($outp) +- +- vmsumudm $out,$in[4],$inx2[6],$vzero +- vmsumudm $out,$in[5],$in[5],$out +- stxv $out,160($outp) +- +- vmsumudm $out,$in[5],$inx2[6],$vzero +- stxv $out,176($outp) +- +- vmsumudm $out,$in[6],$in[6],$vzero +- stxv $out,192($outp) +-___ ++ # out5 ++ mtvsrdd 32+13, 18, 6 ++ mtvsrdd 32+14, 7, 19 ++ mtvsrdd 32+15, 16, 8 ++ mtvsrdd 32+16, 9, 17 ++ mtvsrdd 32+17, 14, 10 ++ mtvsrdd 32+18, 11, 15 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 5, 15, 16, 19 ++ vmsumudm 5, 17, 18, 5 ++ ++ stxv 32+1, 16(3) ++ stxv 32+2, 32(3) ++ stxv 32+3, 48(3) ++ stxv 32+4, 64(3) ++ stxv 32+5, 80(3) ++ ++ # out6 ++ mtvsrdd 32+13, 19, 6 ++ mtvsrdd 32+14, 7, 20 ++ mtvsrdd 32+15, 17, 8 ++ mtvsrdd 32+16, 9, 18 ++ mtvsrdd 32+17, 15, 10 ++ mtvsrdd 32+18, 11, 16 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 6, 15, 16, 19 ++ mtvsrdd 32+13, 0, 12 ++ mtvsrdd 32+14, 0, 14 ++ vmsumudm 19, 17, 18, 6 ++ vmsumudm 6, 13, 14, 19 ++ ++ # out7 ++ mtvsrdd 32+13, 19, 7 ++ mtvsrdd 32+14, 8, 20 ++ mtvsrdd 32+15, 17, 9 ++ mtvsrdd 32+16, 10, 18 ++ mtvsrdd 32+17, 15, 11 ++ mtvsrdd 32+18, 12, 16 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 7, 15, 16, 19 ++ vmsumudm 7, 17, 18, 7 ++ ++ # out8 ++ mtvsrdd 32+13, 19, 8 ++ mtvsrdd 32+14, 9, 20 ++ mtvsrdd 32+15, 17, 10 ++ mtvsrdd 32+16, 11, 18 ++ mtvsrdd 32+17, 0, 12 ++ mtvsrdd 32+18, 0, 16 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 8, 15, 16, 19 ++ vmsumudm 8, 17, 18, 8 ++ ++ # out9 ++ mtvsrdd 32+13, 19, 9 ++ mtvsrdd 32+14, 10, 20 ++ mtvsrdd 32+15, 17, 11 ++ mtvsrdd 32+16, 12, 18 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 9, 15, 16, 19 ++ ++ # out10 ++ mtvsrdd 32+13, 19, 10 ++ mtvsrdd 32+14, 11, 20 ++ mtvsrdd 32+15, 0, 12 ++ mtvsrdd 32+16, 0, 18 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 10, 15, 16, 19 ++ ++ # out11 ++ mtvsrdd 32+17, 19, 11 ++ mtvsrdd 32+18, 12, 20 ++ vmsumudm 11, 17, 18, 0 ++ ++ stxv 32+6, 96(3) ++ stxv 32+7, 112(3) ++ stxv 32+8, 128(3) ++ stxv 32+9, 144(3) ++ stxv 32+10, 160(3) ++ stxv 32+11, 176(3) ++ ++ # out12 ++ mulld 21, 20, 12 ++ mulhdu 22, 20, 12 # out12 ++ ++ std 21, 192(3) ++ std 22, 200(3) ++ ++ blr ++.size _p384_felem_mul_core,.-_p384_felem_mul_core ++ ++# ++# Felem square core function - ++# r3 and r4 need to pre-loaded. ++# ++.type _p384_felem_square_core,\@function ++.align 4 ++_p384_felem_square_core: ++ ++ ld 6, 0(4) ++ ld 7, 8(4) ++ ld 8, 16(4) ++ ld 9, 24(4) ++ ld 10, 32(4) ++ ld 11, 40(4) ++ ld 12, 48(4) ++ ++ vxor 0, 0, 0 ++ ++ # out0 ++ mulld 14, 6, 6 ++ mulhdu 15, 6, 6 ++ std 14, 0(3) ++ std 15, 8(3) ++ ++ # out1 ++ add 14, 6, 6 ++ mtvsrdd 32+13, 0, 14 ++ mtvsrdd 32+14, 0, 7 ++ vmsumudm 1, 13, 14, 0 ++ ++ # out2 ++ mtvsrdd 32+15, 7, 14 ++ mtvsrdd 32+16, 7, 8 ++ vmsumudm 2, 15, 16, 0 ++ ++ # out3 ++ add 15, 7, 7 ++ mtvsrdd 32+13, 8, 14 ++ mtvsrdd 32+14, 15, 9 ++ vmsumudm 3, 13, 14, 0 ++ ++ # out4 ++ mtvsrdd 32+13, 9, 14 ++ mtvsrdd 32+14, 15, 10 ++ mtvsrdd 32+15, 0, 8 ++ vmsumudm 4, 13, 14, 0 ++ vmsumudm 4, 15, 15, 4 ++ ++ # out5 ++ mtvsrdd 32+13, 10, 14 ++ mtvsrdd 32+14, 15, 11 ++ add 16, 8, 8 ++ mtvsrdd 32+15, 0, 16 ++ mtvsrdd 32+16, 0, 9 ++ vmsumudm 5, 13, 14, 0 ++ vmsumudm 5, 15, 16, 5 ++ ++ stxv 32+1, 16(3) ++ stxv 32+2, 32(3) ++ stxv 32+3, 48(3) ++ stxv 32+4, 64(3) ++ ++ # out6 ++ mtvsrdd 32+13, 11, 14 ++ mtvsrdd 32+14, 15, 12 ++ mtvsrdd 32+15, 9, 16 ++ mtvsrdd 32+16, 9, 10 ++ stxv 32+5, 80(3) ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 6, 15, 16, 19 ++ ++ # out7 ++ add 17, 9, 9 ++ mtvsrdd 32+13, 11, 15 ++ mtvsrdd 32+14, 16, 12 ++ mtvsrdd 32+15, 0, 17 ++ mtvsrdd 32+16, 0, 10 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 7, 15, 16, 19 ++ ++ # out8 ++ mtvsrdd 32+13, 11, 16 ++ mtvsrdd 32+14, 17, 12 ++ mtvsrdd 32+15, 0, 10 ++ vmsumudm 19, 13, 14, 0 ++ vmsumudm 8, 15, 15, 19 ++ ++ # out9 ++ add 14, 10, 10 ++ mtvsrdd 32+13, 11, 17 ++ mtvsrdd 32+14, 14, 12 ++ vmsumudm 9, 13, 14, 0 ++ ++ # out10 ++ mtvsrdd 32+13, 11, 14 ++ mtvsrdd 32+14, 11, 12 ++ vmsumudm 10, 13, 14, 0 ++ ++ stxv 32+6, 96(3) ++ stxv 32+7, 112(3) ++ ++ # out11 ++ #add 14, 11, 11 ++ #mtvsrdd 32+13, 0, 14 ++ #mtvsrdd 32+14, 0, 12 ++ #vmsumudm 11, 13, 14, 0 ++ ++ mulld 6, 12, 11 ++ mulhdu 7, 12, 11 ++ addc 8, 6, 6 ++ adde 9, 7, 7 ++ ++ stxv 32+8, 128(3) ++ stxv 32+9, 144(3) ++ stxv 32+10, 160(3) ++ #stxv 32+11, 176(3) ++ ++ # out12 ++ mulld 14, 12, 12 ++ mulhdu 15, 12, 12 ++ ++ std 8, 176(3) ++ std 9, 184(3) ++ std 14, 192(3) ++ std 15, 200(3) ++ ++ blr ++.size _p384_felem_square_core,.-_p384_felem_square_core ++ ++# ++# widefelem (128 bits) * 8 ++# ++.macro F128_X_8 _off1 _off2 ++ ld 9,\\_off1(3) ++ ld 8,\\_off2(3) ++ srdi 10,9,61 ++ rldimi 10,8,3,0 ++ sldi 9,9,3 ++ std 9,\\_off1(3) ++ std 10,\\_off2(3) ++.endm ++ ++.globl p384_felem128_mul_by_8 ++.type p384_felem128_mul_by_8, \@function ++.align 4 ++p384_felem128_mul_by_8: ++ ++ F128_X_8 0, 8 ++ ++ F128_X_8 16, 24 ++ ++ F128_X_8 32, 40 ++ ++ F128_X_8 48, 56 ++ ++ F128_X_8 64, 72 ++ ++ F128_X_8 80, 88 ++ ++ F128_X_8 96, 104 ++ ++ F128_X_8 112, 120 ++ ++ F128_X_8 128, 136 ++ ++ F128_X_8 144, 152 ++ ++ F128_X_8 160, 168 ++ ++ F128_X_8 176, 184 ++ ++ F128_X_8 192, 200 ++ ++ blr ++.size p384_felem128_mul_by_8,.-p384_felem128_mul_by_8 ++ ++# ++# widefelem (128 bits) * 2 ++# ++.macro F128_X_2 _off1 _off2 ++ ld 9,\\_off1(3) ++ ld 8,\\_off2(3) ++ srdi 10,9,63 ++ rldimi 10,8,1,0 ++ sldi 9,9,1 ++ std 9,\\_off1(3) ++ std 10,\\_off2(3) ++.endm ++ ++.globl p384_felem128_mul_by_2 ++.type p384_felem128_mul_by_2, \@function ++.align 4 ++p384_felem128_mul_by_2: ++ ++ F128_X_2 0, 8 ++ ++ F128_X_2 16, 24 ++ ++ F128_X_2 32, 40 ++ ++ F128_X_2 48, 56 ++ ++ F128_X_2 64, 72 ++ ++ F128_X_2 80, 88 ++ ++ F128_X_2 96, 104 ++ ++ F128_X_2 112, 120 ++ ++ F128_X_2 128, 136 ++ ++ F128_X_2 144, 152 ++ ++ F128_X_2 160, 168 ++ ++ F128_X_2 176, 184 ++ ++ F128_X_2 192, 200 ++ ++ blr ++.size p384_felem128_mul_by_2,.-p384_felem128_mul_by_2 ++ ++.globl p384_felem_diff128 ++.type p384_felem_diff128, \@function ++.align 4 ++p384_felem_diff128: ++ ++ addis 5, 2, .LConst_two127\@toc\@ha ++ addi 5, 5, .LConst_two127\@toc\@l ++ ++ ld 10, 0(3) ++ ld 8, 8(3) ++ li 9, 0 ++ addc 10, 10, 9 ++ li 7, -1 ++ rldicr 7, 7, 0, 0 # two127 ++ adde 8, 8, 7 ++ ld 11, 0(4) ++ ld 12, 8(4) ++ subfc 11, 11, 10 ++ subfe 12, 12, 8 ++ std 11, 0(3) # out0 ++ std 12, 8(3) ++ ++ # two127m71 = (r10, r9) ++ ld 8, 16(3) ++ ld 7, 24(3) ++ ld 10, 24(5) # two127m71 ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 16(4) ++ ld 12, 24(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 16(3) # out1 ++ std 12, 24(3) ++ ++ ld 8, 32(3) ++ ld 7, 40(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 32(4) ++ ld 12, 40(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 32(3) # out2 ++ std 12, 40(3) ++ ++ ld 8, 48(3) ++ ld 7, 56(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 48(4) ++ ld 12, 56(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 48(3) # out3 ++ std 12, 56(3) ++ ++ ld 8, 64(3) ++ ld 7, 72(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 64(4) ++ ld 12, 72(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 64(3) # out4 ++ std 12, 72(3) ++ ++ ld 8, 80(3) ++ ld 7, 88(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 80(4) ++ ld 12, 88(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 80(3) # out5 ++ std 12, 88(3) ++ ++ ld 8, 96(3) ++ ld 7, 104(3) ++ ld 6, 40(5) # two127p111m79m71 ++ addc 8, 8, 9 ++ adde 7, 7, 6 ++ ld 11, 96(4) ++ ld 12, 104(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 96(3) # out6 ++ std 12, 104(3) ++ ++ ld 8, 112(3) ++ ld 7, 120(3) ++ ld 6, 56(5) # two127m119m71 ++ addc 8, 8, 9 ++ adde 7, 7, 6 ++ ld 11, 112(4) ++ ld 12, 120(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 112(3) # out7 ++ std 12, 120(3) ++ ++ ld 8, 128(3) ++ ld 7, 136(3) ++ ld 6, 72(5) # two127m95m71 ++ addc 8, 8, 9 ++ adde 7, 7, 6 ++ ld 11, 128(4) ++ ld 12, 136(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 128(3) # out8 ++ std 12, 136(3) ++ ++ ld 8, 144(3) ++ ld 7, 152(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 144(4) ++ ld 12, 152(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 144(3) # out9 ++ std 12, 152(3) ++ ++ ld 8, 160(3) ++ ld 7, 168(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 160(4) ++ ld 12, 168(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 160(3) # out10 ++ std 12, 168(3) ++ ++ ld 8, 176(3) ++ ld 7, 184(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 176(4) ++ ld 12, 184(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 176(3) # out11 ++ std 12, 184(3) ++ ++ ld 8, 192(3) ++ ld 7, 200(3) ++ addc 8, 8, 9 ++ adde 7, 7, 10 ++ ld 11, 192(4) ++ ld 12, 200(4) ++ subfc 11, 11, 8 ++ subfe 12, 12, 7 ++ std 11, 192(3) # out12 ++ std 12, 200(3) ++ ++ blr ++.size p384_felem_diff128,.-p384_felem_diff128 ++ ++.data ++.align 4 ++.LConst_two127: ++#two127 ++.long 0x00000000, 0x00000000, 0x00000000, 0x80000000 ++#two127m71 ++.long 0x00000000, 0x00000000, 0xffffff80, 0x7fffffff ++#two127p111m79m71 ++.long 0x00000000, 0x00000000, 0xffff7f80, 0x80007fff ++#two127m119m71 ++.long 0x00000000, 0x00000000, 0xffffff80, 0x7f7fffff ++#two127m95m71 ++.long 0x00000000, 0x00000000, 0x7fffff80, 0x7fffffff ++ ++.text ++ ++.globl p384_felem_diff_128_64 ++.type p384_felem_diff_128_64, \@function ++.align 4 ++p384_felem_diff_128_64: ++ addis 5, 2, .LConst_128_two64\@toc\@ha ++ addi 5, 5, .LConst_128_two64\@toc\@l ++ ++ ld 9, 0(3) ++ ld 10, 8(3) ++ ld 8, 48(5) # two64p48m16 ++ li 7, 0 ++ addc 9, 9, 8 ++ li 6, 1 ++ adde 10, 10, 6 ++ ld 11, 0(4) ++ subfc 8, 11, 9 ++ subfe 12, 7, 10 ++ std 8, 0(3) # out0 ++ std 12, 8(3) ++ ++ ld 9, 16(3) ++ ld 10, 24(3) ++ ld 8, 0(5) # two64m56m8 ++ addc 9, 9, 8 ++ addze 10, 10 ++ ld 11, 8(4) ++ subfc 11, 11, 9 ++ subfe 12, 7, 10 ++ std 11, 16(3) # out1 ++ std 12, 24(3) ++ ++ ld 9, 32(3) ++ ld 10, 40(3) ++ ld 8, 16(5) # two64m32m8 ++ addc 9, 9, 8 ++ addze 10, 10 ++ ld 11, 16(4) ++ subfc 11, 11, 9 ++ subfe 12, 7, 10 ++ std 11, 32(3) # out2 ++ std 12, 40(3) ++ ++ ld 10, 48(3) ++ ld 8, 56(3) ++ #ld 9, 32(5) # two64m8 ++ li 9, -256 # two64m8 ++ addc 10, 10, 9 ++ addze 8, 8 ++ ld 11, 24(4) ++ subfc 11, 11, 10 ++ subfe 12, 7, 8 ++ std 11, 48(3) # out3 ++ std 12, 56(3) ++ ++ ld 10, 64(3) ++ ld 8, 72(3) ++ addc 10, 10, 9 ++ addze 8, 8 ++ ld 11, 32(4) ++ subfc 11, 11, 10 ++ subfe 12, 7, 8 ++ std 11, 64(3) # out4 ++ std 12, 72(3) ++ ++ ld 10, 80(3) ++ ld 8, 88(3) ++ addc 10, 10, 9 ++ addze 8, 8 ++ ld 11, 40(4) ++ subfc 11, 11, 10 ++ subfe 12, 7, 8 ++ std 11, 80(3) # out5 ++ std 12, 88(3) ++ ++ ld 10, 96(3) ++ ld 8, 104(3) ++ addc 10, 10, 9 ++ addze 9, 8 ++ ld 11, 48(4) ++ subfc 11, 11, 10 ++ subfe 12, 7, 9 ++ std 11, 96(3) # out6 ++ std 12, 104(3) ++ ++ blr ++.size p384_felem_diff_128_64,.-p384_felem_diff_128_64 ++ ++.data ++.align 4 ++.LConst_128_two64: ++#two64m56m8 ++.long 0xffffff00, 0xfeffffff, 0x00000000, 0x00000000 ++#two64m32m8 ++.long 0xffffff00, 0xfffffffe, 0x00000000, 0x00000000 ++#two64m8 ++.long 0xffffff00, 0xffffffff, 0x00000000, 0x00000000 ++#two64p48m16 ++.long 0xffff0000, 0x0000ffff, 0x00000001, 0x00000000 ++ ++.LConst_two60: ++#two60m52m4 ++.long 0xfffffff0, 0x0fefffff, 0x0, 0x0 ++#two60p44m12 ++.long 0xfffff000, 0x10000fff, 0x0, 0x0 ++#two60m28m4 ++.long 0xeffffff0, 0x0fffffff, 0x0, 0x0 ++#two60m4 ++.long 0xfffffff0, 0x0fffffff, 0x0, 0x0 ++ ++.text ++# ++# static void felem_diff64(felem out, const felem in) ++# ++.globl p384_felem_diff64 ++.type p384_felem_diff64, \@function ++.align 4 ++p384_felem_diff64: ++ addis 5, 2, .LConst_two60\@toc\@ha ++ addi 5, 5, .LConst_two60\@toc\@l ++ ++ ld 9, 0(3) ++ ld 8, 16(5) # two60p44m12 ++ li 7, 0 ++ add 9, 9, 8 ++ ld 11, 0(4) ++ subf 8, 11, 9 ++ std 8, 0(3) # out0 ++ ++ ld 9, 8(3) ++ ld 8, 0(5) # two60m52m4 ++ add 9, 9, 8 ++ ld 11, 8(4) ++ subf 11, 11, 9 ++ std 11, 8(3) # out1 ++ ++ ld 9, 16(3) ++ ld 8, 32(5) # two60m28m4 ++ add 9, 9, 8 ++ ld 11, 16(4) ++ subf 11, 11, 9 ++ std 11, 16(3) # out2 ++ ++ ld 10, 24(3) ++ ld 9, 48(5) # two60m4 ++ add 10, 10, 9 ++ ld 12, 24(4) ++ subf 12, 12, 10 ++ std 12, 24(3) # out3 ++ ++ ld 10, 32(3) ++ add 10, 10, 9 ++ ld 11, 32(4) ++ subf 11, 11, 10 ++ std 11, 32(3) # out4 ++ ++ ld 10, 40(3) ++ add 10, 10, 9 ++ ld 12, 40(4) ++ subf 12, 12, 10 ++ std 12, 40(3) # out5 + +- endproc("p384_felem_square"); +- } +-} ++ ld 10, 48(3) ++ add 10, 10, 9 ++ ld 11, 48(4) ++ subf 11, 11, 10 ++ std 11, 48(3) # out6 ++ ++ blr ++.size p384_felem_diff64,.-p384_felem_diff64 ++ ++.text ++# ++# Shift 128 bits right ++# ++.macro SHR o_h o_l in_h in_l nbits ++ srdi \\o_l, \\in_l, \\nbits # shift lower right ++ rldimi \\o_l, \\in_h, 64-\\nbits, 0 # insert <64-nbits> from hi ++ srdi \\o_h, \\in_h, \\nbits # shift higher right ++.endm ++ ++# ++# static void felem_reduce(felem out, const widefelem in) ++# ++.global p384_felem_reduce ++.type p384_felem_reduce,\@function ++.align 4 ++p384_felem_reduce: ++ ++ stdu 1, -208(1) ++ mflr 0 ++ std 14, 56(1) ++ std 15, 64(1) ++ std 16, 72(1) ++ std 17, 80(1) ++ std 18, 88(1) ++ std 19, 96(1) ++ std 20, 104(1) ++ std 21, 112(1) ++ std 22, 120(1) ++ std 23, 128(1) ++ std 24, 136(1) ++ std 25, 144(1) ++ std 26, 152(1) ++ std 27, 160(1) ++ std 28, 168(1) ++ std 29, 176(1) ++ std 30, 184(1) ++ std 31, 192(1) ++ ++ bl _p384_felem_reduce_core ++ ++ mtlr 0 ++ ld 14, 56(1) ++ ld 15, 64(1) ++ ld 16, 72(1) ++ ld 17, 80(1) ++ ld 18, 88(1) ++ ld 19, 96(1) ++ ld 20, 104(1) ++ ld 21, 112(1) ++ ld 22, 120(1) ++ ld 23, 128(1) ++ ld 24, 136(1) ++ ld 25, 144(1) ++ ld 26, 152(1) ++ ld 27, 160(1) ++ ld 28, 168(1) ++ ld 29, 176(1) ++ ld 30, 184(1) ++ ld 31, 192(1) ++ addi 1, 1, 208 ++ blr ++.size p384_felem_reduce,.-p384_felem_reduce ++ ++# ++# Felem reduction core function - ++# r3 and r4 need to pre-loaded. ++# ++.type _p384_felem_reduce_core,\@function ++.align 4 ++_p384_felem_reduce_core: ++ addis 12, 2, .LConst\@toc\@ha ++ addi 12, 12, .LConst\@toc\@l ++ ++ # load constat p ++ ld 11, 8(12) # hi - two124m68 ++ ++ # acc[6] = in[6] + two124m68; ++ ld 26, 96(4) # in[6].l ++ ld 27, 96+8(4) # in[6].h ++ add 27, 27, 11 ++ ++ # acc[5] = in[5] + two124m68; ++ ld 24, 80(4) # in[5].l ++ ld 25, 80+8(4) # in[5].h ++ add 25, 25, 11 ++ ++ # acc[4] = in[4] + two124m68; ++ ld 22, 64(4) # in[4].l ++ ld 23, 64+8(4) # in[4].h ++ add 23, 23, 11 ++ ++ # acc[3] = in[3] + two124m68; ++ ld 20, 48(4) # in[3].l ++ ld 21, 48+8(4) # in[3].h ++ add 21, 21, 11 ++ ++ ld 11, 48+8(12) # hi - two124m92m68 ++ ++ # acc[2] = in[2] + two124m92m68; ++ ld 18, 32(4) # in[2].l ++ ld 19, 32+8(4) # in[2].h ++ add 19, 19, 11 ++ ++ ld 11, 16+8(12) # high - two124m116m68 ++ ++ # acc[1] = in[1] + two124m116m68; ++ ld 16, 16(4) # in[1].l ++ ld 17, 16+8(4) # in[1].h ++ add 17, 17, 11 ++ ++ ld 11, 32+8(12) # high - two124p108m76 ++ ++ # acc[0] = in[0] + two124p108m76; ++ ld 14, 0(4) # in[0].l ++ ld 15, 0+8(4) # in[0].h ++ add 15, 15, 11 ++ ++ # compute mask ++ li 7, -1 ++ ++ # Eliminate in[12] ++ ++ # acc[8] += in[12] >> 32; ++ ld 5, 192(4) # in[12].l ++ ld 6, 192+8(4) # in[12].h ++ SHR 9, 10, 6, 5, 32 ++ ld 30, 128(4) # in[8].l ++ ld 31, 136(4) # in[8].h ++ addc 30, 30, 10 ++ adde 31, 31, 9 ++ ++ # acc[7] += (in[12] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ ld 28, 112(4) # in[7].l ++ ld 29, 120(4) # in[7].h ++ addc 28, 28, 11 ++ addze 29, 29 ++ ++ # acc[7] += in[12] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 28, 28, 10 ++ adde 29, 29, 9 ++ ++ # acc[6] += (in[12] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 26, 26, 11 ++ addze 27, 27 ++ ++ # acc[6] -= in[12] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 26, 10, 26 ++ subfe 27, 9, 27 ++ ++ # acc[5] -= (in[12] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 24, 11, 24 ++ subfe 25, 9, 25 ++ ++ # acc[6] += in[12] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 26, 26, 10 ++ adde 27, 27, 9 ++ ++ # acc[5] += (in[12] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 24, 24, 11 ++ addze 25, 25 ++ ++ # Eliminate in[11] ++ ++ # acc[7] += in[11] >> 32; ++ ld 5, 176(4) # in[11].l ++ ld 6, 176+8(4) # in[11].h ++ SHR 9, 10, 6, 5, 32 ++ addc 28, 28, 10 ++ adde 29, 29, 9 ++ ++ # acc[6] += (in[11] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ addc 26, 26, 11 ++ addze 27, 27 ++ ++ # acc[6] += in[11] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 26, 26, 10 ++ adde 27, 27, 9 ++ ++ # acc[5] += (in[11] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 24, 24, 11 ++ addze 25, 25 ++ ++ # acc[5] -= in[11] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 24, 10, 24 ++ subfe 25, 9, 25 ++ ++ # acc[4] -= (in[11] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 22, 11, 22 ++ subfe 23, 9, 23 ++ ++ # acc[5] += in[11] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 24, 24, 10 ++ adde 25, 25, 9 ++ ++ # acc[4] += (in[11] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 22, 22, 11 ++ addze 23, 23 ++ ++ # Eliminate in[10] ++ ++ # acc[6] += in[10] >> 32; ++ ld 5, 160(4) # in[10].l ++ ld 6, 160+8(4) # in[10].h ++ SHR 9, 10, 6, 5, 32 ++ addc 26, 26, 10 ++ adde 27, 27, 9 ++ ++ # acc[5] += (in[10] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ addc 24, 24, 11 ++ addze 25, 25 ++ ++ # acc[5] += in[10] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 24, 24, 10 ++ adde 25, 25, 9 ++ ++ # acc[4] += (in[10] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 22, 22, 11 ++ addze 23, 23 ++ ++ # acc[4] -= in[10] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 22, 10, 22 ++ subfe 23, 9, 23 ++ ++ # acc[3] -= (in[10] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 20, 11, 20 ++ subfe 21, 9, 21 ++ ++ # acc[4] += in[10] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 22, 22, 10 ++ adde 23, 23, 9 ++ ++ # acc[3] += (in[10] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 20, 20, 11 ++ addze 21, 21 ++ ++ # Eliminate in[9] ++ ++ # acc[5] += in[9] >> 32; ++ ld 5, 144(4) # in[9].l ++ ld 6, 144+8(4) # in[9].h ++ SHR 9, 10, 6, 5, 32 ++ addc 24, 24, 10 ++ adde 25, 25, 9 ++ ++ # acc[4] += (in[9] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ addc 22, 22, 11 ++ addze 23, 23 ++ ++ # acc[4] += in[9] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 22, 22, 10 ++ adde 23, 23, 9 ++ ++ # acc[3] += (in[9] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 20, 20, 11 ++ addze 21, 21 ++ ++ # acc[3] -= in[9] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 20, 10, 20 ++ subfe 21, 9, 21 ++ ++ # acc[2] -= (in[9] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 18, 11, 18 ++ subfe 19, 9, 19 ++ ++ # acc[3] += in[9] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 20, 20, 10 ++ adde 21, 21, 9 ++ ++ # acc[2] += (in[9] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 18, 18, 11 ++ addze 19, 19 ++ ++ # Eliminate acc[8] ++ ++ # acc[4] += acc[8] >> 32; ++ mr 5, 30 # acc[8].l ++ mr 6, 31 # acc[8].h ++ SHR 9, 10, 6, 5, 32 ++ addc 22, 22, 10 ++ adde 23, 23, 9 ++ ++ # acc[3] += (acc[8] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ addc 20, 20, 11 ++ addze 21, 21 ++ ++ # acc[3] += acc[8] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 20, 20, 10 ++ adde 21, 21, 9 ++ ++ # acc[2] += (acc[8] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 18, 18, 11 ++ addze 19, 19 ++ ++ # acc[2] -= acc[8] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 18, 10, 18 ++ subfe 19, 9, 19 ++ ++ # acc[1] -= (acc[8] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 16, 11, 16 ++ subfe 17, 9, 17 ++ ++ #acc[2] += acc[8] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 18, 18, 10 ++ adde 19, 19, 9 ++ ++ # acc[1] += (acc[8] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 16, 16, 11 ++ addze 17, 17 ++ ++ # Eliminate acc[7] ++ ++ # acc[3] += acc[7] >> 32; ++ mr 5, 28 # acc[7].l ++ mr 6, 29 # acc[7].h ++ SHR 9, 10, 6, 5, 32 ++ addc 20, 20, 10 ++ adde 21, 21, 9 ++ ++ # acc[2] += (acc[7] & 0xffffffff) << 24; ++ srdi 11, 7, 32 # 0xffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 24 # << 24 ++ addc 18, 18, 11 ++ addze 19, 19 ++ ++ # acc[2] += acc[7] >> 8; ++ SHR 9, 10, 6, 5, 8 ++ addc 18, 18, 10 ++ adde 19, 19, 9 ++ ++ # acc[1] += (acc[7] & 0xff) << 48; ++ andi. 11, 5, 0xff ++ sldi 11, 11, 48 ++ addc 16, 16, 11 ++ addze 17, 17 ++ ++ # acc[1] -= acc[7] >> 16; ++ SHR 9, 10, 6, 5, 16 ++ subfc 16, 10, 16 ++ subfe 17, 9, 17 ++ ++ # acc[0] -= (acc[7] & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 11, 11, 5 ++ sldi 11, 11, 40 # << 40 ++ li 9, 0 ++ subfc 14, 11, 14 ++ subfe 15, 9, 15 ++ ++ # acc[1] += acc[7] >> 48; ++ SHR 9, 10, 6, 5, 48 ++ addc 16, 16, 10 ++ adde 17, 17, 9 ++ ++ # acc[0] += (acc[7] & 0xffffffffffff) << 8; ++ srdi 11, 7, 16 # 0xffffffffffff ++ and 11, 11, 5 ++ sldi 11, 11, 8 # << 8 ++ addc 14, 14, 11 ++ addze 15, 15 ++ ++ # ++ # Carry 4 -> 5 -> 6 ++ # ++ # acc[5] += acc[4] >> 56; ++ # acc[4] &= 0x00ffffffffffffff; ++ SHR 9, 10, 23, 22, 56 ++ addc 24, 24, 10 ++ adde 25, 25, 9 ++ srdi 11, 7, 8 # 0x00ffffffffffffff ++ and 22, 22, 11 ++ li 23, 0 ++ ++ # acc[6] += acc[5] >> 56; ++ # acc[5] &= 0x00ffffffffffffff; ++ SHR 9, 10, 25, 24, 56 ++ addc 26, 26, 10 ++ adde 27, 27, 9 ++ and 24, 24, 11 ++ li 25, 0 ++ ++ # [3]: Eliminate high bits of acc[6] */ ++ # temp = acc[6] >> 48; ++ # acc[6] &= 0x0000ffffffffffff; ++ SHR 31, 30, 27, 26, 48 # temp = acc[6] >> 48 ++ srdi 11, 7, 16 # 0x0000ffffffffffff ++ and 26, 26, 11 ++ li 27, 0 ++ ++ # temp < 2^80 ++ # acc[3] += temp >> 40; ++ SHR 9, 10, 31, 30, 40 ++ addc 20, 20, 10 ++ adde 21, 21, 9 ++ ++ # acc[2] += (temp & 0xffffffffff) << 16; ++ srdi 11, 7, 24 # 0xffffffffff ++ and 10, 30, 11 ++ sldi 10, 10, 16 ++ addc 18, 18, 10 ++ addze 19, 19 ++ ++ # acc[2] += temp >> 16; ++ SHR 9, 10, 31, 30, 16 ++ addc 18, 18, 10 ++ adde 19, 19, 9 ++ ++ # acc[1] += (temp & 0xffff) << 40; ++ srdi 11, 7, 48 # 0xffff ++ and 10, 30, 11 ++ sldi 10, 10, 40 ++ addc 16, 16, 10 ++ addze 17, 17 ++ ++ # acc[1] -= temp >> 24; ++ SHR 9, 10, 31, 30, 24 ++ subfc 16, 10, 16 ++ subfe 17, 9, 17 ++ ++ # acc[0] -= (temp & 0xffffff) << 32; ++ srdi 11, 7, 40 # 0xffffff ++ and 10, 30, 11 ++ sldi 10, 10, 32 ++ li 9, 0 ++ subfc 14, 10, 14 ++ subfe 15, 9, 15 ++ ++ # acc[0] += temp; ++ addc 14, 14, 30 ++ adde 15, 15, 31 ++ ++ # Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 ++ # ++ # acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */ ++ SHR 9, 10, 15, 14, 56 ++ addc 16, 16, 10 ++ adde 17, 17, 9 ++ ++ # acc[0] &= 0x00ffffffffffffff; ++ srdi 11, 7, 8 # 0x00ffffffffffffff ++ and 14, 14, 11 ++ li 15, 0 ++ ++ # acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */ ++ SHR 9, 10, 17, 16, 56 ++ addc 18, 18, 10 ++ adde 19, 19, 9 ++ ++ # acc[1] &= 0x00ffffffffffffff; ++ and 16, 16, 11 ++ li 17, 0 ++ ++ # acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */ ++ SHR 9, 10, 19, 18, 56 ++ addc 20, 20, 10 ++ adde 21, 21, 9 ++ ++ # acc[2] &= 0x00ffffffffffffff; ++ and 18, 18, 11 ++ li 19, 0 ++ ++ # acc[4] += acc[3] >> 56; ++ SHR 9, 10, 21, 20, 56 ++ addc 22, 22, 10 ++ adde 23, 23, 9 ++ ++ # acc[3] &= 0x00ffffffffffffff; ++ and 20, 20, 11 ++ li 21, 0 ++ ++ # acc[5] += acc[4] >> 56; ++ SHR 9, 10, 23, 22, 56 ++ addc 24, 24, 10 ++ adde 25, 25, 9 ++ ++ # acc[4] &= 0x00ffffffffffffff; ++ and 22, 22, 11 ++ ++ # acc[6] += acc[5] >> 56; ++ SHR 9, 10, 25, 24, 56 ++ addc 26, 26, 10 ++ adde 27, 27, 9 ++ ++ # acc[5] &= 0x00ffffffffffffff; ++ and 24, 24, 11 ++ ++ std 14, 0(3) ++ std 16, 8(3) ++ std 18, 16(3) ++ std 20, 24(3) ++ std 22, 32(3) ++ std 24, 40(3) ++ std 26, 48(3) ++ blr ++.size _p384_felem_reduce_core,.-_p384_felem_reduce_core ++ ++.data ++.align 4 ++.LConst: ++# two124m68: ++.long 0x0, 0x0, 0xfffffff0, 0xfffffff ++# two124m116m68: ++.long 0x0, 0x0, 0xfffffff0, 0xfefffff ++#two124p108m76: ++.long 0x0, 0x0, 0xfffff000, 0x10000fff ++#two124m92m68: ++.long 0x0, 0x0, 0xeffffff0, 0xfffffff ++ ++.text ++ ++# ++# void p384_felem_square_reduce(felem out, const felem in) ++# ++.global p384_felem_square_reduce ++.type p384_felem_square_reduce,\@function ++.align 4 ++p384_felem_square_reduce: ++ stdu 1, -512(1) ++ mflr 0 ++ std 14, 56(1) ++ std 15, 64(1) ++ std 16, 72(1) ++ std 17, 80(1) ++ std 18, 88(1) ++ std 19, 96(1) ++ std 20, 104(1) ++ std 21, 112(1) ++ std 22, 120(1) ++ std 23, 128(1) ++ std 24, 136(1) ++ std 25, 144(1) ++ std 26, 152(1) ++ std 27, 160(1) ++ std 28, 168(1) ++ std 29, 176(1) ++ std 30, 184(1) ++ std 31, 192(1) ++ ++ std 3, 496(1) ++ addi 3, 1, 208 ++ bl _p384_felem_square_core ++ ++ mr 4, 3 ++ ld 3, 496(1) ++ bl _p384_felem_reduce_core ++ ++ ld 14, 56(1) ++ ld 15, 64(1) ++ ld 16, 72(1) ++ ld 17, 80(1) ++ ld 18, 88(1) ++ ld 19, 96(1) ++ ld 20, 104(1) ++ ld 21, 112(1) ++ ld 22, 120(1) ++ ld 23, 128(1) ++ ld 24, 136(1) ++ ld 25, 144(1) ++ ld 26, 152(1) ++ ld 27, 160(1) ++ ld 28, 168(1) ++ ld 29, 176(1) ++ ld 30, 184(1) ++ ld 31, 192(1) ++ addi 1, 1, 512 ++ mtlr 0 ++ blr ++.size p384_felem_square_reduce,.-p384_felem_square_reduce ++ ++# ++# void p384_felem_mul_reduce(felem out, const felem in1, const felem in2) ++# ++.global p384_felem_mul_reduce ++.type p384_felem_mul_reduce,\@function ++.align 5 ++p384_felem_mul_reduce: ++ stdu 1, -512(1) ++ mflr 0 ++ std 14, 56(1) ++ std 15, 64(1) ++ std 16, 72(1) ++ std 17, 80(1) ++ std 18, 88(1) ++ std 19, 96(1) ++ std 20, 104(1) ++ std 21, 112(1) ++ std 22, 120(1) ++ std 23, 128(1) ++ std 24, 136(1) ++ std 25, 144(1) ++ std 26, 152(1) ++ std 27, 160(1) ++ std 28, 168(1) ++ std 29, 176(1) ++ std 30, 184(1) ++ std 31, 192(1) ++ ++ std 3, 496(1) ++ addi 3, 1, 208 ++ bl _p384_felem_mul_core ++ ++ mr 4, 3 ++ ld 3, 496(1) ++ bl _p384_felem_reduce_core ++ ++ ld 14, 56(1) ++ ld 15, 64(1) ++ ld 16, 72(1) ++ ld 17, 80(1) ++ ld 18, 88(1) ++ ld 19, 96(1) ++ ld 20, 104(1) ++ ld 21, 112(1) ++ ld 22, 120(1) ++ ld 23, 128(1) ++ ld 24, 136(1) ++ ld 25, 144(1) ++ ld 26, 152(1) ++ ld 27, 160(1) ++ ld 28, 168(1) ++ ld 29, 176(1) ++ ld 30, 184(1) ++ ld 31, 192(1) ++ addi 1, 1, 512 ++ mtlr 0 ++ blr ++.size p384_felem_mul_reduce,.-p384_felem_mul_reduce ++___ + + $code =~ s/\`([^\`]*)\`/eval $1/gem; + print $code; +diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c +index 3fd7a40020..e0b5786bc1 100644 +--- a/crypto/ec/ecp_nistp384.c ++++ b/crypto/ec/ecp_nistp384.c +@@ -252,6 +252,16 @@ static void felem_neg(felem out, const felem in) + out[6] = two60m4 - in[6]; + } + ++#if defined(ECP_NISTP384_ASM) ++void p384_felem_diff64(felem out, const felem in); ++void p384_felem_diff128(widefelem out, const widefelem in); ++void p384_felem_diff_128_64(widefelem out, const felem in); ++ ++# define felem_diff64 p384_felem_diff64 ++# define felem_diff128 p384_felem_diff128 ++# define felem_diff_128_64 p384_felem_diff_128_64 ++ ++#else + /*- + * felem_diff64 subtracts |in| from |out| + * On entry: +@@ -369,6 +379,7 @@ static void felem_diff128(widefelem out, const widefelem in) + for (i = 0; i < 2*NLIMBS-1; i++) + out[i] -= in[i]; + } ++#endif /* ECP_NISTP384_ASM */ + + static void felem_square_ref(widefelem out, const felem in) + { +@@ -503,7 +514,7 @@ static void felem_mul_ref(widefelem out, const felem in1, const felem in2) + * [3]: Y = 2^48 (acc[6] >> 48) + * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d) + */ +-static void felem_reduce(felem out, const widefelem in) ++static void felem_reduce_ref(felem out, const widefelem in) + { + /* + * In order to prevent underflow, we add a multiple of p before subtracting. +@@ -682,8 +693,11 @@ static void (*felem_square_p)(widefelem out, const felem in) = + static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) = + felem_mul_wrapper; + ++static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref; ++ + void p384_felem_square(widefelem out, const felem in); + void p384_felem_mul(widefelem out, const felem in1, const felem in2); ++void p384_felem_reduce(felem out, const widefelem in); + + # if defined(_ARCH_PPC64) + # include "crypto/ppc_arch.h" +@@ -695,6 +709,7 @@ static void felem_select(void) + if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { + felem_square_p = p384_felem_square; + felem_mul_p = p384_felem_mul; ++ felem_reduce_p = p384_felem_reduce; + + return; + } +@@ -703,6 +718,7 @@ static void felem_select(void) + /* Default */ + felem_square_p = felem_square_ref; + felem_mul_p = felem_mul_ref; ++ felem_reduce_p = p384_felem_reduce; + } + + static void felem_square_wrapper(widefelem out, const felem in) +@@ -719,10 +735,17 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2) + + # define felem_square felem_square_p + # define felem_mul felem_mul_p ++# define felem_reduce felem_reduce_p ++ ++void p384_felem_square_reduce(felem out, const felem in); ++void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); ++ ++# define felem_square_reduce p384_felem_square_reduce ++# define felem_mul_reduce p384_felem_mul_reduce + #else + # define felem_square felem_square_ref + # define felem_mul felem_mul_ref +-#endif ++# define felem_reduce felem_reduce_ref + + static ossl_inline void felem_square_reduce(felem out, const felem in) + { +@@ -739,6 +762,7 @@ static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem + felem_mul(tmp, in1, in2); + felem_reduce(out, tmp); + } ++#endif + + /*- + * felem_inv calculates |out| = |in|^{-1} diff --git a/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch new file mode 100644 index 0000000000..0659a9d6d9 --- /dev/null +++ b/meta/recipes-connectivity/openssl/openssl/CVE-2025-27587-2.patch @@ -0,0 +1,129 @@ +From 6b1646e472c9e8c08bb14066ba2a7c3eed45f84a Mon Sep 17 00:00:00 2001 +From: "A. Wilcox" +Date: Thu, 17 Apr 2025 08:51:53 -0500 +Subject: [PATCH] Fix P-384 curve on lower-than-P9 PPC64 targets + +The change adding an asm implementation of p384_felem_reduce incorrectly +uses the accelerated version on both targets that support the intrinsics +*and* targets that don't, instead of falling back to the generics on older +targets. This results in crashes when trying to use P-384 on < Power9. + +Signed-off-by: Anna Wilcox +Closes: #27350 +Fixes: 85cabd94 ("Fix Minerva timing side-channel signal for P-384 curve on PPC") + +Reviewed-by: Dmitry Belyavskiy +Reviewed-by: Tomas Mraz +(Merged from https://github.com/openssl/openssl/pull/27429) + +(cherry picked from commit 29864f2b0f1046177e8048a5b17440893d3f9425) + +CVE: CVE-2025-27587 +Upstream-Status: Backport [https://github.com/openssl/openssl/commit/6b1646e472c9e8c08bb14066ba2a7c3eed45f84a] +Signed-off-by: Peter Marko +--- + crypto/ec/ecp_nistp384.c | 54 ++++++++++++++++++++++++---------------- + 1 file changed, 33 insertions(+), 21 deletions(-) + +diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c +index e0b5786bc1..439b4d03a3 100644 +--- a/crypto/ec/ecp_nistp384.c ++++ b/crypto/ec/ecp_nistp384.c +@@ -684,6 +684,22 @@ static void felem_reduce_ref(felem out, const widefelem in) + out[i] = acc[i]; + } + ++static ossl_inline void felem_square_reduce_ref(felem out, const felem in) ++{ ++ widefelem tmp; ++ ++ felem_square_ref(tmp, in); ++ felem_reduce_ref(out, tmp); ++} ++ ++static ossl_inline void felem_mul_reduce_ref(felem out, const felem in1, const felem in2) ++{ ++ widefelem tmp; ++ ++ felem_mul_ref(tmp, in1, in2); ++ felem_reduce_ref(out, tmp); ++} ++ + #if defined(ECP_NISTP384_ASM) + static void felem_square_wrapper(widefelem out, const felem in); + static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2); +@@ -695,10 +711,18 @@ static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) = + + static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref; + ++static void (*felem_square_reduce_p)(felem out, const felem in) = ++ felem_square_reduce_ref; ++static void (*felem_mul_reduce_p)(felem out, const felem in1, const felem in2) = ++ felem_mul_reduce_ref; ++ + void p384_felem_square(widefelem out, const felem in); + void p384_felem_mul(widefelem out, const felem in1, const felem in2); + void p384_felem_reduce(felem out, const widefelem in); + ++void p384_felem_square_reduce(felem out, const felem in); ++void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); ++ + # if defined(_ARCH_PPC64) + # include "crypto/ppc_arch.h" + # endif +@@ -710,6 +734,8 @@ static void felem_select(void) + felem_square_p = p384_felem_square; + felem_mul_p = p384_felem_mul; + felem_reduce_p = p384_felem_reduce; ++ felem_square_reduce_p = p384_felem_square_reduce; ++ felem_mul_reduce_p = p384_felem_mul_reduce; + + return; + } +@@ -718,7 +744,9 @@ static void felem_select(void) + /* Default */ + felem_square_p = felem_square_ref; + felem_mul_p = felem_mul_ref; +- felem_reduce_p = p384_felem_reduce; ++ felem_reduce_p = felem_reduce_ref; ++ felem_square_reduce_p = felem_square_reduce_ref; ++ felem_mul_reduce_p = felem_mul_reduce_ref; + } + + static void felem_square_wrapper(widefelem out, const felem in) +@@ -737,31 +765,15 @@ static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2) + # define felem_mul felem_mul_p + # define felem_reduce felem_reduce_p + +-void p384_felem_square_reduce(felem out, const felem in); +-void p384_felem_mul_reduce(felem out, const felem in1, const felem in2); +- +-# define felem_square_reduce p384_felem_square_reduce +-# define felem_mul_reduce p384_felem_mul_reduce ++# define felem_square_reduce felem_square_reduce_p ++# define felem_mul_reduce felem_mul_reduce_p + #else + # define felem_square felem_square_ref + # define felem_mul felem_mul_ref + # define felem_reduce felem_reduce_ref + +-static ossl_inline void felem_square_reduce(felem out, const felem in) +-{ +- widefelem tmp; +- +- felem_square(tmp, in); +- felem_reduce(out, tmp); +-} +- +-static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2) +-{ +- widefelem tmp; +- +- felem_mul(tmp, in1, in2); +- felem_reduce(out, tmp); +-} ++# define felem_square_reduce felem_square_reduce_ref ++# define felem_mul_reduce felem_mul_reduce_ref + #endif + + /*- diff --git a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb index d6bf32d989..fd98b32007 100644 --- a/meta/recipes-connectivity/openssl/openssl_3.2.4.bb +++ b/meta/recipes-connectivity/openssl/openssl_3.2.4.bb @@ -13,6 +13,8 @@ SRC_URI = "https://github.com/openssl/openssl/releases/download/openssl-${PV}/op file://0001-Configure-do-not-tweak-mips-cflags.patch \ file://0001-Added-handshake-history-reporting-when-test-fails.patch \ file://CVE-2024-41996.patch \ + file://CVE-2025-27587-1.patch \ + file://CVE-2025-27587-2.patch \ " SRC_URI:append:class-nativesdk = " \