Add comment explaining structure of UMAAL assembly

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
This commit is contained in:
Hanno Becker 2022-04-17 06:57:34 +01:00
parent d46d96cc3f
commit 606cb1626f

View File

@ -739,6 +739,16 @@
mbedtls_mpi_uint tmp_a1, tmp_b1; \
asm volatile (
/* - Make sure loop is 4-byte aligned to avoid stalls
* upon repeated non-word aligned instructions in
* some microarchitectures.
* - Don't use ldm with post-increment or back-to-back
* loads with post-increment and same address register
* to avoid stalls on some microarchitectures.
* - Bunch loads and stores to reduce latency on some
* microarchitectures. E.g., on Cortex-M4, the first
* in a series of load/store operations has latency
* 2 cycles, while subsequent loads/stores are single-cycle. */
#define MULADDC_X2_CORE \
".p2align 2 \n\t" \
"ldr.w %[a0], [%[in]], #+8 \n\t" \