Skip to content

Commit

Permalink
Add FIPS aarch POC build to CI. (#138)
Browse files Browse the repository at this point in the history
* Fix FIPS static build parse issue on aarch. (#135)

* Remove 'ep_nistz256_add' function, and change offset calculation.

'ep_nistz256_add' in p256-armv8-asm.pl is duplicate to p256-nistz.c. Removing 'ep_nistz256_add' function is to avoid delocate.go reporting 'Duplicate symbol found'.

The offset calculation expression change is to avoid 'peg' parser errors because the current delocate.peg is not comprehensive enough to parse all assembly expressions.

* Add new peg rules to address new assembly expression.

* Change assembly expression.

* Add delocate.peg convert README.

* Remove auto-generated comment.

* Add more delocate.peg.go generate commands.

* Fix command.

* Update build files in generated-src

* Fix FIPS static build transform issue on aarch. (#137)

* Remove 'ep_nistz256_add' function, and change offset calculation.

'ep_nistz256_add' in p256-armv8-asm.pl is duplicate to p256-nistz.c. Removing 'ep_nistz256_add' function is to avoid delocate.go reporting 'Duplicate symbol found'.

The offset calculation expression change is to avoid 'peg' parser errors because the current delocate.peg is not comprehensive enough to parse all assembly expressions.

* Add new peg rules to address new assembly expression.

* Change assembly expression.

* Add delocate.peg convert README.

* Remove auto-generated comment.

* Add more delocate.peg.go generate commands.

* Fix command.

* Add offset to local symbol in delocate.

* Update util/fipstools/delocate/delocate.go

Co-authored-by: Nevine Ebeid <66388554+nebeid@users.noreply.github.com>

* Update util/fipstools/delocate/delocate.go

Co-authored-by: Nevine Ebeid <66388554+nebeid@users.noreply.github.com>

* Update comments.

Co-authored-by: Nevine Ebeid <66388554+nebeid@users.noreply.github.com>

* Add FIPS build CI on aarch.

* Increase go test timeout.

* Increase more timeout.

* Remove timeout variable because ASAN is not enabled.

* Add comment to trigger CI.

Co-authored-by: Nevine Ebeid <66388554+nebeid@users.noreply.github.com>
  • Loading branch information
bryce-shang and nebeid authored Apr 28, 2021
1 parent 6642666 commit d60b60e
Show file tree
Hide file tree
Showing 9 changed files with 558 additions and 493 deletions.
56 changes: 16 additions & 40 deletions crypto/fipsmodule/ec/asm/p256-armv8-asm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -166,30 +166,6 @@
ret
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
// void ecp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4],
// const BN_ULONG x2[4]);
.globl ecp_nistz256_add
.type ecp_nistz256_add,%function
.align 4
ecp_nistz256_add:
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldp $acc0,$acc1,[$ap]
ldp $t0,$t1,[$bp]
ldp $acc2,$acc3,[$ap,#16]
ldp $t2,$t3,[$bp,#16]
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
bl __ecp_nistz256_add
ldp x29,x30,[sp],#16
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_add,.-ecp_nistz256_add
// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
.globl ecp_nistz256_div_by_2
.type ecp_nistz256_div_by_2,%function
Expand Down Expand Up @@ -229,7 +205,7 @@
mov $t2,$acc2
mov $t3,$acc3
bl __ecp_nistz256_add // ret = a+a // 2*a
bl __ecp_nistz256_add_to // ret = a+a // 2*a
ldp x29,x30,[sp],#16
.inst 0xd50323bf // autiasp
Expand Down Expand Up @@ -258,14 +234,14 @@
mov $a2,$acc2
mov $a3,$acc3
bl __ecp_nistz256_add // ret = a+a // 2*a
bl __ecp_nistz256_add_to // ret = a+a // 2*a
mov $t0,$a0
mov $t1,$a1
mov $t2,$a2
mov $t3,$a3
bl __ecp_nistz256_add // ret += a // 2*a+a=3*a
bl __ecp_nistz256_add_to // ret += a // 2*a+a=3*a
ldp x29,x30,[sp],#16
.inst 0xd50323bf // autiasp
Expand Down Expand Up @@ -545,12 +521,12 @@
ret
.size __ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont
// Note that __ecp_nistz256_add expects both input vectors pre-loaded to
// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to
// $a0-$a3 and $t0-$t3. This is done because it's used in multiple
// contexts, e.g. in multiplication by 2 and 3...
.type __ecp_nistz256_add,%function
.type __ecp_nistz256_add_to,%function
.align 4
__ecp_nistz256_add:
__ecp_nistz256_add_to:
adds $acc0,$acc0,$t0 // ret = a+b
adcs $acc1,$acc1,$t1
adcs $acc2,$acc2,$t2
Expand All @@ -571,7 +547,7 @@
stp $acc2,$acc3,[$rp,#16]
ret
.size __ecp_nistz256_add,.-__ecp_nistz256_add
.size __ecp_nistz256_add_to,.-__ecp_nistz256_add_to
.type __ecp_nistz256_sub_from,%function
.align 4
Expand Down Expand Up @@ -696,7 +672,7 @@
mov $t3,$acc3
ldp $a2,$a3,[$ap_real,#64+16]
add $rp,sp,#$S
bl __ecp_nistz256_add // p256_mul_by_2(S, in_y);
bl __ecp_nistz256_add_to // p256_mul_by_2(S, in_y);
add $rp,sp,#$Zsqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z);
Expand All @@ -708,7 +684,7 @@
mov $a2,$acc2
mov $a3,$acc3
add $rp,sp,#$M
bl __ecp_nistz256_add // p256_add(M, Zsqr, in_x);
bl __ecp_nistz256_add_to // p256_add(M, Zsqr, in_x);
add $bp,$ap_real,#0
mov $acc0,$a0 // restore Zsqr
Expand Down Expand Up @@ -737,7 +713,7 @@
mov $t3,$acc3
ldp $a2,$a3,[sp,#$S+16]
add $rp,$rp_real,#64
bl __ecp_nistz256_add // p256_mul_by_2(res_z, tmp0);
bl __ecp_nistz256_add_to // p256_mul_by_2(res_z, tmp0);
add $rp,sp,#$tmp0
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S);
Expand All @@ -761,15 +737,15 @@
mov $a2,$acc2
mov $a3,$acc3
add $rp,sp,#$M
bl __ecp_nistz256_add
bl __ecp_nistz256_add_to
mov $t0,$a0 // restore M
mov $t1,$a1
ldr $bi,[$ap_real] // forward load for p256_mul_mont
mov $t2,$a2
ldp $a0,$a1,[sp,#$S]
mov $t3,$a3
ldp $a2,$a3,[sp,#$S+16]
bl __ecp_nistz256_add // p256_mul_by_3(M, M);
bl __ecp_nistz256_add_to // p256_mul_by_3(M, M);
add $bp,$ap_real,#0
add $rp,sp,#$S
Expand All @@ -782,7 +758,7 @@
mov $t3,$acc3
ldp $a2,$a3,[sp,#$M+16]
add $rp,sp,#$tmp0
bl __ecp_nistz256_add // p256_mul_by_2(tmp0, S);
bl __ecp_nistz256_add_to // p256_mul_by_2(tmp0, S);
add $rp,$rp_real,#0
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M);
Expand Down Expand Up @@ -941,7 +917,7 @@
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
add sp,sp,#32*(12-4) // difference in stack frames
add sp,sp,#256 // #256 is from #32*(12-4). difference in stack frames
b .Ldouble_shortcut
.align 4
Expand Down Expand Up @@ -987,7 +963,7 @@
mov $t2,$acc2
mov $t3,$acc3
add $rp,sp,#$Hsqr
bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2);
bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2);
add $bp,sp,#$Rsqr
add $rp,sp,#$res_x
Expand Down Expand Up @@ -1197,7 +1173,7 @@
mov $t2,$acc2
mov $t3,$acc3
add $rp,sp,#$Hsqr
bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2);
bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2);
add $bp,sp,#$Rsqr
add $rp,sp,#$res_x
Expand Down
53 changes: 14 additions & 39 deletions generated-src/ios-aarch64/crypto/fipsmodule/p256-armv8-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -133,31 +133,6 @@ _ecp_nistz256_sqr_mont:
ret


// void ecp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4],
// const BN_ULONG x2[4]);
.globl _ecp_nistz256_add
.private_extern _ecp_nistz256_add

.align 4
_ecp_nistz256_add:
.long 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0

ldp x14,x15,[x1]
ldp x8,x9,[x2]
ldp x16,x17,[x1,#16]
ldp x10,x11,[x2,#16]
ldr x12,Lpoly+8
ldr x13,Lpoly+24

bl __ecp_nistz256_add

ldp x29,x30,[sp],#16
.long 0xd50323bf // autiasp
ret


// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
.globl _ecp_nistz256_div_by_2
.private_extern _ecp_nistz256_div_by_2
Expand Down Expand Up @@ -199,7 +174,7 @@ _ecp_nistz256_mul_by_2:
mov x10,x16
mov x11,x17

bl __ecp_nistz256_add // ret = a+a // 2*a
bl __ecp_nistz256_add_to // ret = a+a // 2*a

ldp x29,x30,[sp],#16
.long 0xd50323bf // autiasp
Expand Down Expand Up @@ -229,14 +204,14 @@ _ecp_nistz256_mul_by_3:
mov x6,x16
mov x7,x17

bl __ecp_nistz256_add // ret = a+a // 2*a
bl __ecp_nistz256_add_to // ret = a+a // 2*a

mov x8,x4
mov x9,x5
mov x10,x6
mov x11,x7

bl __ecp_nistz256_add // ret += a // 2*a+a=3*a
bl __ecp_nistz256_add_to // ret += a // 2*a+a=3*a

ldp x29,x30,[sp],#16
.long 0xd50323bf // autiasp
Expand Down Expand Up @@ -550,12 +525,12 @@ __ecp_nistz256_sqr_mont:
ret


// Note that __ecp_nistz256_add expects both input vectors pre-loaded to
// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to
// x4-x7 and x8-x11. This is done because it's used in multiple
// contexts, e.g. in multiplication by 2 and 3...

.align 4
__ecp_nistz256_add:
__ecp_nistz256_add_to:
adds x14,x14,x8 // ret = a+b
adcs x15,x15,x9
adcs x16,x16,x10
Expand Down Expand Up @@ -687,7 +662,7 @@ Ldouble_shortcut:
mov x11,x17
ldp x6,x7,[x22,#64+16]
add x0,sp,#0
bl __ecp_nistz256_add // p256_mul_by_2(S, in_y);
bl __ecp_nistz256_add_to // p256_mul_by_2(S, in_y);

add x0,sp,#64
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z);
Expand All @@ -699,7 +674,7 @@ Ldouble_shortcut:
mov x6,x16
mov x7,x17
add x0,sp,#32
bl __ecp_nistz256_add // p256_add(M, Zsqr, in_x);
bl __ecp_nistz256_add_to // p256_add(M, Zsqr, in_x);

add x2,x22,#0
mov x14,x4 // restore Zsqr
Expand Down Expand Up @@ -728,7 +703,7 @@ Ldouble_shortcut:
mov x11,x17
ldp x6,x7,[sp,#0+16]
add x0,x21,#64
bl __ecp_nistz256_add // p256_mul_by_2(res_z, tmp0);
bl __ecp_nistz256_add_to // p256_mul_by_2(res_z, tmp0);

add x0,sp,#96
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S);
Expand All @@ -752,15 +727,15 @@ Ldouble_shortcut:
mov x6,x16
mov x7,x17
add x0,sp,#32
bl __ecp_nistz256_add
bl __ecp_nistz256_add_to
mov x8,x4 // restore M
mov x9,x5
ldr x3,[x22] // forward load for p256_mul_mont
mov x10,x6
ldp x4,x5,[sp,#0]
mov x11,x7
ldp x6,x7,[sp,#0+16]
bl __ecp_nistz256_add // p256_mul_by_3(M, M);
bl __ecp_nistz256_add_to // p256_mul_by_3(M, M);

add x2,x22,#0
add x0,sp,#0
Expand All @@ -773,7 +748,7 @@ Ldouble_shortcut:
mov x11,x17
ldp x6,x7,[sp,#32+16]
add x0,sp,#96
bl __ecp_nistz256_add // p256_mul_by_2(tmp0, S);
bl __ecp_nistz256_add_to // p256_mul_by_2(tmp0, S);

add x0,x21,#0
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M);
Expand Down Expand Up @@ -917,7 +892,7 @@ Ladd_double:
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
add sp,sp,#32*(12-4) // difference in stack frames
add sp,sp,#256 // #256 is from #32*(12-4). difference in stack frames
b Ldouble_shortcut

.align 4
Expand Down Expand Up @@ -963,7 +938,7 @@ Ladd_proceed:
mov x10,x16
mov x11,x17
add x0,sp,#128
bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2);
bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2);

add x2,sp,#192
add x0,sp,#0
Expand Down Expand Up @@ -1171,7 +1146,7 @@ _ecp_nistz256_point_add_affine:
mov x10,x16
mov x11,x17
add x0,sp,#224
bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2);
bl __ecp_nistz256_add_to // p256_mul_by_2(Hsqr, U2);

add x2,sp,#288
add x0,sp,#0
Expand Down
Loading

0 comments on commit d60b60e

Please sign in to comment.