add ascon-xofa-based randomness compressor

This commit is contained in:
Triss 2023-08-23 22:40:23 +02:00
parent 572883b269
commit 1117391aa6
23 changed files with 1665 additions and 2 deletions

View File

@ -18,12 +18,17 @@ pico_enable_stdio_uart(${PROJECT} 0)
pico_enable_stdio_usb(${PROJECT} 1)
target_sources(${PROJECT} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/hash.c
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/permutations.c
${CMAKE_CURRENT_SOURCE_DIR}/src/util.c
${CMAKE_CURRENT_SOURCE_DIR}/src/rorand.c
${CMAKE_CURRENT_SOURCE_DIR}/src/rourand.c
${CMAKE_CURRENT_SOURCE_DIR}/src/main.c
)
target_include_directories(${PROJECT} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/
${CMAKE_CURRENT_SOURCE_DIR}/src/
)

116
src/ascon-xofa/LICENSE Normal file
View File

@ -0,0 +1,116 @@
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator and
subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for the
purpose of contributing to a commons of creative, cultural and scientific
works ("Commons") that the public can reliably and without fear of later
claims of infringement build upon, modify, incorporate in other works, reuse
and redistribute as freely as possible in any form whatsoever and for any
purposes, including without limitation commercial purposes. These owners may
contribute to the Commons to promote the ideal of a free culture and the
further production of creative, cultural and scientific works, or to gain
reputation or greater distribution for their Work in part through the use and
efforts of others.
For these and/or other purposes and motivations, and without any expectation
of additional consideration or compensation, the person associating CC0 with a
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
and publicly distribute the Work under its terms, with knowledge of his or her
Copyright and Related Rights in the Work and the meaning and intended legal
effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not limited
to, the following:
i. the right to reproduce, adapt, distribute, perform, display, communicate,
and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or likeness
depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data in
a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation thereof,
including any amended or successor version of such directive); and
vii. other similar, equivalent or corresponding rights throughout the world
based on applicable law or treaty, and any national implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention of,
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
and Related Rights and associated claims and causes of action, whether now
known or unknown (including existing as well as future claims and causes of
action), in the Work (i) in all territories worldwide, (ii) for the maximum
duration provided by applicable law or treaty (including future time
extensions), (iii) in any current or future medium and for any number of
copies, and (iv) for any purpose whatsoever, including without limitation
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
the Waiver for the benefit of each member of the public at large and to the
detriment of Affirmer's heirs and successors, fully intending that such Waiver
shall not be subject to revocation, rescission, cancellation, termination, or
any other legal or equitable action to disrupt the quiet enjoyment of the Work
by the public as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason be
judged legally invalid or ineffective under applicable law, then the Waiver
shall be preserved to the maximum extent permitted taking into account
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
is so judged Affirmer hereby grants to each affected person a royalty-free,
non transferable, non sublicensable, non exclusive, irrevocable and
unconditional license to exercise Affirmer's Copyright and Related Rights in
the Work (i) in all territories worldwide, (ii) for the maximum duration
provided by applicable law or treaty (including future time extensions), (iii)
in any current or future medium and for any number of copies, and (iv) for any
purpose whatsoever, including without limitation commercial, advertising or
promotional purposes (the "License"). The License shall be deemed effective as
of the date CC0 was applied by Affirmer to the Work. Should any part of the
License for any reason be judged legally invalid or ineffective under
applicable law, such partial invalidity or ineffectiveness shall not
invalidate the remainder of the License, and in such case Affirmer hereby
affirms that he or she will not (i) exercise any of his or her remaining
Copyright and Related Rights in the Work or (ii) assert any associated claims
and causes of action with respect to the Work, in either case contrary to
Affirmer's express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or warranties
of any kind concerning the Work, express, implied, statutory or otherwise,
including without limitation warranties of title, merchantability, fitness
for a particular purpose, non infringement, or the absence of latent or
other defects, accuracy, or the present or absence of errors, whether or not
discoverable, all to the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without limitation
any person's Copyright and Related Rights in the Work. Further, Affirmer
disclaims responsibility for obtaining any necessary consents, permissions
or other rights required for any use of the Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to this
CC0 or use of the Work.
For more information, please see
<http://creativecommons.org/publicdomain/zero/1.0/>

519
src/ascon-xofa/README.md Normal file
View File

@ -0,0 +1,519 @@
# Reference, highly optimized, masked C and ASM implementations of Ascon
Ascon is a family of lightweight cryptographic algorithms and consists of:
- Authenticated encryption schemes with associated data (AEAD)
- Hash functions (HASH) and extendible output functions (XOF)
- Pseudo-random functions (PRF) and message authentication codes (MAC)
All implementations use the "ECRYPT Benchmarking of Cryptographic Systems (eBACS)" interface:
- https://bench.cr.yp.to/call-aead.html for AEAD (Ascon-128, Ascon-128a, Ascon-80pq)
- https://bench.cr.yp.to/call-hash.html for HASH and XOF (Ascon-Hash, Ascon-Hasha, Ascon-Xof, Ascon-Xofa)
- https://nacl.cr.yp.to/auth.html for PRF and MAC (Ascon-Mac, Ascon-Prf, Ascon-PrfShort)
For more information on Ascon visit: https://ascon.iaik.tugraz.at/
## TL;DR
If you do not know where to start, use the reference implementations (self-contained, portable, very fast):
- `crypto_aead/ascon128v12/ref`
- `crypto_aead/ascon128av12/ref`
- `crypto_aead/asconxofv12/ref`
- `crypto_aead/asconxofav12/ref`
## Algorithms
This repository contains implementations of the following 10 Ascon v1.2 algorithms:
- `crypto_aead/ascon128v12`: Ascon-128
- `crypto_aead/ascon128av12`: Ascon-128a
- `crypto_aead/ascon80pqv12`: Ascon-80pq
- `crypto_hash/asconhashv12`: Ascon-Hash
- `crypto_hash/asconhashav12`: Ascon-Hasha
- `crypto_hash/asconxofv12`: Ascon-Xof
- `crypto_hash/asconxofav12`: Ascon-Xofa
- `crypto_auth/asconmacv12`: Ascon-Mac
- `crypto_auth/asconprfv12`: Ascon-Prf
- `crypto_auth/asconprfsv12`: Ascon-PrfShort
We also provide two combined algorithm implementations supporting both AEAD and
hashing:
- `crypto_aead_hash/asconv12`: Ascon-128 combined with Ascon-Hash
- `crypto_aead_hash/asconav12`: Ascon-128a combined with Ascon-Hasha
The following algorithms demonstrate the performance improvement of Ascon on
32-bit platforms without bit interleaving overhead. Bit interleaving could be
performed externally on the host side or using a dedicated instruction (e.g.
using the ARM Custom Datapath Extension). Note that a similar performance
improvement could be achieved using funnel shift instructions (available on some
32-bit RISC-V extensions).
- `crypto_aead/ascon128bi32v12`: Ascon-128 (+17% on ARM1176JZF-S)
- `crypto_aead/ascon128abi32v12`: Ascon-128a (+23% on ARM1176JZF-S)
- `crypto_hash/asconhashbi32v12`: Ascon-Hash (+5% on ARM1176JZF-S)
- `crypto_hash/asconhashabi32v12`: Ascon-Hasha (+8% on ARM1176JZF-S)
- `crypto_aead_hash/asconbi32v12`: Ascon-128 combined with Ascon-Hash
- `crypto_aead_hash/asconabi32v12`: Ascon-128a combined with Ascon-Hasha
## Implementations
For most algorithms, we provide the following pure C implementations:
- `ref`: reference implementation
- `opt64`: 64-bit speed-optimized
- `opt32`: 32-bit speed-optimized
- `opt64_lowsize`: 64-bit size-optimized
- `opt32_lowsize`: 32-bit size-optimized
- `bi32`: 32-bit speed-optimized bit-interleaved
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved (low register usage)
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved
- `esp32`: 32-bit ESP32 optimized
- `opt8`: 8-bit size- and speed-optimized
- `bi8`: 8-bit optimized bit-interleaved
the following C with inline or partial ASM implementations:
- `avx512`: 320-bit speed-optimized AVX512
- `neon`: 64-bit speed-optimized ARM NEON
- `armv6`: 32-bit speed-optimized ARMv6
- `armv6m`: 32-bit speed-optimized ARMv6-M
- `armv7m`: 32-bit speed-optimized ARMv7-M
- `armv6_lowsize`: 32-bit size-optimized ARMv6
- `armv6m_lowsize`: 32-bit size-optimized ARMv6-M
- `armv7m_lowsize`: 32-bit size-optimized ARMv7-M
- `armv7m_small`: 32-bit small speed-optimized ARMv7-M
- `bi32_armv6`: 32-bit speed-optimized bit-interleaved ARMv6
- `bi32_armv6m`: 32-bit speed-optimized bit-interleaved ARMv6-M
- `bi32_armv7m`: 32-bit speed-optimized bit-interleaved ARMv7-M
- `bi32_armv7m_small`: 32-bit small bit-interleaved ARMv7-M
- `avr`: 8-bit size- and speed-optimized AVR
- `avr_lowsize`: 8-bit size-optimized AVR
the following ASM implementations:
- `asm_esp32`: 32-bit optimized ESP32 using funnel-shift instructions
- `asm_rv32i`: 32-bit optimized RV32I using the base instruction set
- `asm_rv32b`: 32-bit optimized RV32B using bitmanip base (Zbb)
- `asm_fsr_rv32b`: 32-bit optimized funnel-shift RV32B using bitmanip base and bitmanip terniary (ZbbZbt)
- `asm_bi32_rv32b`: 32-bit optimized bit-interleaved RV32B using bitmanip base and bitmanip permutations (ZbbZbp)
and the following high-level masked (shared) C with inline ASM implementations:
- `protected_bi32_armv6`: 32-bit masked bit-interleaved ARMv6
- `protected_bi32_armv6_leveled`: 32-bit masked and leveled bit-interleaved ARMv6
The masked C implementations can be used as a starting point to generate
device specific C/ASM implementations. Note that the masked C implementations
require a minimum amount of ASM instructions. Otherwise, the compiler may
heavily optimize the code and even combine shares. Obviously, the output
generated is very sensitive to compiler and environment changes and any
generated output needs to be security evaluated. A preliminary evaluation of
these implementations has been performed on some
[ChipWhisperer](https://www.newae.com/chipwhisperer) devices. The setup and
preliminary results can found at: https://github.com/ascon/simpleserial-ascon
# Performance results on different CPUs in cycles per byte
## Ascon-128a
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD EPYC 7742\* | | | | | 7.4 | 4.4 | 4.2 |
| AMD Ryzen 9 5950X\* | | | | | 8.1 | 5.3 | 5.2 |
| Apple M1 (ARMv8)\* | | | | | 9.4 | 6.3 | 6.3 |
| Cortex-A72 (ARMv8)\* | | | | | 10.9 | 7.2 | 7.0 |
| Intel Xeon E5-2609 v4\* | | | | | 11.3 | 7.4 | 7.2 |
| Intel Core i5-6300U | 365 | 47 | 31 | 19 | 13.5 | 8.0 | 7.8 |
| Intel Core i5-4200U | 519 | 67 | 44 | 27 | 18.8 | 11.0 | 10.6 |
| Cortex-A9 (ARMv7)\* | | | | | 42.8 | 24.6 | 24.0 |
| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 |
| Cortex-A7 (ARMv7)\* | | | | | 55.5 | 38.2 | 37.5 |
| ARM1176JZF-S (ARMv6) | 1908 | 235 | 156 | 99 | 70.4 | 43.0 | 42.9 |
## Ascon-128 and Ascon-80pq
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| AMD EPYC 7742\* | | | | | 8.1 | 6.6 | 6.5 |
| AMD Ryzen 9 5950X\* | | | | | 11.0 | 8.2 | 8.1 |
| Apple M1 (ARMv8)\* | | | | | 12.5 | 9.5 | 9.3 |
| Cortex-A72 (ARMv8)\* | | | | | 13.8 | 10.7 | 10.5 |
| Intel Xeon E5-2609 v4\* | | | | | 14.9 | 10.8 | 10.6 |
| Intel Core i5-6300U | 367 | 58 | 35 | 23 | 17.6 | 11.9 | 11.4 |
| Intel Core i5-4200U | 521 | 81 | 49 | 32 | 23.9 | 16.2 | 15.8 |
| Cortex-A9 (ARMv7)\* | | | | | 51.7 | 34.1 | 33.3 |
| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 |
| Cortex-A7 (ARMv7)\* | | | | | 69.6 | 52.0 | 51.6 |
| ARM1176JZF-S (ARMv6) | 1921 | 277 | 167 | 112 | 83.7 | 57.2 | 56.8 |
## Ascon-Hasha and Ascon-Xofa
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|------:|-----:|-----:|
| AMD EPYC 7742\* | | | | | | | |
| AMD Ryzen 7 1700\* | | | | | 22.0 | 12.1 | 11.7 |
| Apple M1 (ARMv8)\* | | | | | | | |
| Cortex-A72 (ARMv8)\* | | | | | 22.2 | 14.5 | 14.2 |
| Intel Xeon E5-2609 v4\* | | | | | 23.3 | 14.4 | 14.0 |
| Intel Core i5-6300U | 550 | 83 | 49 | 33 | 23.7 | 15.6 | 15.5 |
| Intel Core i5-4200U | 749 | 112 | 67 | 44 | 31.8 | 20.8 | 20.7 |
| Cortex-A9 (ARMv7)\* | | | | | 87.5 | 45.6 | 44.0 |
| Cortex-A7 (ARMv7)\* | | | | | 102.3 | 63.5 | 61.8 |
| ARM1176JZF-S (ARMv6) | 2390 | 356 | 211 | 138 | 100.7 | 65.7 | 65.3 |
## Ascon-Hash and Ascon-Xof
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|------:|-----:|-----:|
| AMD EPYC 7742\* | | | | | 21.1 | 13.3 | 12.4 |
| AMD Ryzen 9 5950X\* | | | | | 24.1 | 16.1 | 15.8 |
| Apple M1 (ARMv8)\* | | | | | 29.2 | 19.6 | 18.5 |
| Cortex-A72 (ARMv8)\* | | | | | 30.5 | 20.5 | 20.0 |
| Intel Xeon E5-2609 v4\* | | | | | 31.9 | 21.4 | 21.2 |
| Intel Core i5-6300U | 747 | 114 | 69 | 46 | 34.2 | 23.2 | 23.1 |
| Intel Core i5-4200U | 998 | 153 | 92 | 61 | 45.5 | 30.9 | 30.7 |
| Cortex-A9 (ARMv7)\* | | | | | 95.8 | 55.5 | 53.9 |
| Cortex-A7 (ARMv7)\* | | | | | 138.1 | 89.9 | 88.8 |
| ARM1176JZF-S (ARMv6) | 3051 | 462 | 277 | 184 | 137.3 | 92.6 | 92.2 |
## Ascon-Mac and Ascon-Prf
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| Intel Core i5-6300U | 369 | 46 | 24 | 18 | 11.7 | 6.4 | 6.3 |
| Intel Core i5-4200U | 506 | 63 | 32 | 24 | 16.2 | 8.8 | 8.7 |
| ARM1176JZF-S (ARMv6) | 1769 | 223 | 117 | 85 | 57.5 | 31.9 | 31.6 |
## Ascon-PrfShort
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
| Intel Core i5-6300U | 185 | 23 | 12 | - | - | - | - |
| Intel Core i5-4200U | 257 | 33 | 17 | - | - | - | - |
| ARM1176JZF-S (ARMv6) | 1057 | 132 | 69 | - | - | - | - |
\* Results taken from eBACS: http://bench.cr.yp.to/
# Build and test
Build and test all Ascon C targets using release flags (-O2 -fomit-frame-pointer -march=native -mtune=native):
```
mkdir build && cd build
cmake ..
cmake --build .
ctest
```
Build and test all Ascon C targets on Windows:
```
mkdir build && cd build
cmake ..
cmake --build . --config Release
ctest -C Release
```
Build and test all Ascon C targets using debug flags (with NIST defined flags and sanitizers):
```
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
cmake --build .
ctest
```
Manually set the compiler and/or release flags (e.g. to disable -march=native -mtune=native).
```
mkdir build && cd build
cmake .. -DCMAKE_C_COMPILER=clang -DREL_FLAGS="-O2;-fomit-frame-pointer"
cmake --build .
ctest
```
Build and run only specific algorithms, implementations and tests:
```
mkdir build && cd build
cmake .. -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt64;bi32" -DTEST_LIST="genkat"
cmake --build .
ctest
```
Note that cmake stores variables in a cache. Therefore, variables can be set
one-by-one, unset using e.g. `cmake . -UIMPL_LIST` and shown using `cmake . -L`:
```
mkdir build && cd build
cmake ..
cmake . -DALG_LIST="ascon128;asconhash"
cmake . -DIMPL_LIST="opt64;bi32"
cmake . -DTEST_LIST="genkat"
cmake . -L
cmake --build .
ctest
```
Cross compile and test with custom emulator using e.g. `qemu-arm`:
```
mkdir build && cd build
cmake .. -DCMAKE_C_COMPILER="arm-linux-gnueabi-gcc" \
-DREL_FLAGS="-O2;-fomit-frame-pointer;-march=armv7;-mtune=cortex-m4" \
-DEMULATOR="qemu-arm;-L;/usr/arm-linux-gnueabi" \
-DALG_LIST="ascon128;ascon128a" -DIMPL_LIST="armv7m;bi32_armv7m"
cmake --build .
ctest
```
or using Intel SDE (use full path to `sde` or add to path variable):
```
mkdir build && cd build
cmake .. -DCMAKE_C_COMPILER=gcc -DIMPL_LIST=avx512 -DEMULATOR="sde;--" \
-DREL_FLAGS="-O2;-fomit-frame-pointer;-march=icelake-client"
cmake --build .
ctest
```
# Build and benchmark:
Build the getcycles test:
```
mkdir build && cd build
cmake .. -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt32;opt32_lowsize" -DTEST_LIST="getcycles"
cmake --build .
```
Get the CPU cycle performance:
```
./getcycles_crypto_aead_ascon128v12_opt32
./getcycles_crypto_aead_ascon128v12_opt32_lowsize
./getcycles_crypto_hash_asconhashv12_opt32
./getcycles_crypto_hash_asconhashv12_opt32_lowsize
```
Get the implementation size:
```
size -t libcrypto_aead_ascon128v12_opt32.a
size -t libcrypto_aead_ascon128v12_opt32_lowsize.a
size -t libcrypto_hash_asconhashv12_opt32.a
size -t libcrypto_hash_asconhashv12_opt32_lowsize.a
```
# Manually build and run a single Ascon target:
Build example for AEAD algorithms:
```
gcc -march=native -O3 -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -Itests tests/genkat_aead.c -o genkat
gcc -march=native -O3 -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
```
Build example for HASH algorithms:
```
gcc -march=native -O3 -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -Itests tests/genkat_hash.c -o genkat
gcc -march=native -O3 -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -DCRYPTO_HASH -Itests tests/getcycles.c -o getcycles
```
Generate KATs and get CPU cycles:
```
./genkat
./getcycles
```
## Manually build and run an RV32 target:
Setup:
```
sudo apt install gcc-riscv64-unknown-elf picolibc-riscv64-unknown-elf qemu-system-misc
```
Example to build, run and test an AEAD/HASH algorithm using `gcc`, `picolibc` and `qemu`:
```
riscv64-unknown-elf-gcc -O2 -march=rv32i -mabi=ilp32 --specs=picolibc.specs --oslib=semihost --crt0=hosted -Ttests/rv32.ld \
-Icrypto_aead/ascon128v12/asm_rv32i crypto_aead/ascon128v12/asm_rv32i/*.[cS] -Itests tests/genkat_aead.c -o genkat
qemu-system-riscv32 -semihosting-config enable=on -monitor none -serial none -nographic -machine virt,accel=tcg -cpu rv32 -bios none -kernel genkat
diff LWC_AEAD_KAT_128_128.txt crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
```
```
riscv64-unknown-elf-gcc -O2 -march=rv32i -mabi=ilp32 --specs=picolibc.specs --oslib=semihost --crt0=hosted -Ttests/rv32.ld \
-Icrypto_hash/asconhashv12/opt32 crypto_hash/asconhashv12/opt32/*.[cS] -Itests tests/genkat_hash.c -o genkat
qemu-system-riscv32 -semihosting-config enable=on -monitor none -serial none -nographic -machine virt,accel=tcg -cpu rv32 -bios none -kernel genkat
diff LWC_HASH_KAT_256.txt crypto_hash/asconhashv12/LWC_HASH_KAT_256.txt
```
## Manually build and run an AVR target:
Example to build, run and test an AEAD algorithm using `avr-gcc`, `avr-libc` and `simavr`.
Setup:
```
sudo apt install gcc-avr avr-libc simavr
git clone https://github.com/JohannCahier/avr_uart.git
```
Single test vector using `demo` and performance measurement using `getcycles`:
```
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
-DCRYPTO_AEAD -Itests tests/demo.c -o demo
simavr -m atmega128 ./demo
```
```
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
-DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
simavr -t -m atmega128 ./getcycles
```
Generate all test vectors for AEAD/HASH and write result to a file. Press Ctrl-C to quit `simavr` after about a minute:
```
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
-Itests tests/genkat_aead.c -o genkat_aead
echo "Press Ctrl-C to quit simavr after about a minute"
simavr -t -m atmega128 ./genkat_aead 2> LWC_AEAD_KAT_128_128.txt
sed -i -e 's/\x1b\[[0-9;]*m//g' -e 's/\.\.$//' LWC_AEAD_KAT_128_128.txt
diff LWC_AEAD_KAT_128_128.txt crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
```
```
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_hash/asconhashv12/opt8 crypto_hash/asconhashv12/opt8/*.[cS] \
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
-Itests tests/genkat_hash.c -o genkat_hash
echo "Press Ctrl-C to quit simavr after about a minute"
simavr -t -m atmega128 ./genkat_hash 2> LWC_HASH_KAT_256.txt
sed -i -e 's/\x1b\[[0-9;]*m//g' -e 's/\.\.$//' LWC_HASH_KAT_256.txt
diff LWC_HASH_KAT_256.txt crypto_hash/asconhashv12/LWC_HASH_KAT_256.txt
```
# Benchmarking
## Hints to get more reliable getcycles results on Intel/AMD CPUs:
* Determine the processor base frequency (also called design frequency):
- e.g. using the Intel/AMD website
- or using `lscpu` listed under model name
* Disable turbo boost (this should lock the frequency to the next value
below the processor base frequency):
```
echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
```
* If the above does not work, manually set the frequency using e.g. `cpufreq-set`.
* Determine the actual frequency (under load):
- e.g. by watching the frequency using `lscpu` or `cpufreq-info`
* Determine the scaling factor between the actual and base frequency:
- factor = actual frequency / base frequency
* Run a getcycles program using the frequency factor and watch the results:
```
while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done
```
* Run the `benchmark-getcycles.sh` script with the frequency factor and a
specific algorithm to benchmark all corresponding getcycles implementations:
```
scripts/benchmark-getcycles.sh $factor ascon128
```
## Hints to activate the performance monitor unit (PMU) on ARM CPUs:
* First try to install `linux-tools` and see if it works.
* On many ARM platforms, the PMU has to be enabled using a kernel module:
- Source code for Armv6 (32-bit):
<http://sandsoftwaresound.net/raspberry-pi/raspberry-pi-gen-1/performance-counter-kernel-module/>
- Source code for Armv7 (32-bit):
<https://github.com/thoughtpolice/enable_arm_pmu>
- Source code for Armv8/Aarch64 (64-bit):
<https://github.com/rdolbeau/enable_arm_pmu>
* Steps to compile the kernel module on the raspberry pi:
- Find out the kernel version using `uname -a`
- Download the kernel header files, e.g. `raspberrypi-kernel-header`
- Download the source code for the Armv6 kernel module
- Build, install and load the kernel module
## Benchmark Ascon v1.2 using supercop
Download supercop according to the website: http://bench.cr.yp.to/supercop.html
To test only Ascon, just run the following commands:
```
./do-part init
./do-part crypto_aead ascon128v12
./do-part crypto_aead ascon128av12
./do-part crypto_aead ascon80pqv12
./do-part crypto_hash asconhashv12
./do-part crypto_hash asconxofv12
```
Show the cycles/Byte for a 1536 Byte long message:
```
cat bench/*/data | grep '_cycles 1536 ' | awk '{printf "%.1f\t%s\t%s\n", $9/$8,
$6, $7}' | sort -nr
```
## Evaluate and optimize Ascon on constraint devices:
* The ascon-c code allows to set compile-time parameters `ASCON_INLINE_MODE`
(IM), `ASCON_INLINE_PERM` (IP), `ASCON_UNROLL_LOOPS` (UL), `ASCON_INLINE_BI`
(IB), via command line or in the `crypto_*/ascon*/*/config.h` files.
* Use the `benchmark-config.sh` script to evaluate all combinations of these
parameters for a given list of Ascon implementations. The script is called
with an output file, frequency factor, the algorithm, and the list of
implementations to test:
```
scripts/benchmark-config.sh results-config.md $factor ascon128 ref opt64 opt64_lowsize
```
* The `results-config.md` file then contains a markup table with size and cycles
for each implementation and parameter set to evaluate several time-area
trade-offs.
* The `benchmark-all.sh` and `benchmark-size.sh` scripts provides a time/size
and size-only table of all currently compiled implementations:
```
scripts/benchmark-all.sh results-all.md
scripts/benchmark-size.sh results-size.md
```

4
src/ascon-xofa/api.h Normal file
View File

@ -0,0 +1,4 @@
#define CRYPTO_VERSION "1.2.7"
#define CRYPTO_BYTES 32
#define ASCON_HASH_BYTES 0 /* XOF */
#define ASCON_HASH_ROUNDS 8

View File

@ -0,0 +1,3 @@
aarch64
armeabi
arm

53
src/ascon-xofa/ascon.h Normal file
View File

@ -0,0 +1,53 @@
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "api.h"
#include "config.h"
typedef union {
uint64_t x[5];
uint32_t w[5][2];
uint8_t b[5][8];
} ascon_state_t;
#ifdef ASCON_AEAD_RATE
#define ASCON_KEYWORDS (CRYPTO_KEYBYTES + 7) / 8
typedef union {
uint64_t x[ASCON_KEYWORDS];
uint32_t w[ASCON_KEYWORDS][2];
uint8_t b[ASCON_KEYWORDS][8];
} ascon_key_t;
#if !ASCON_INLINE_MODE
void ascon_loadkey(ascon_key_t* key, const uint8_t* k);
void ascon_initaead(ascon_state_t* s, const ascon_key_t* key,
const uint8_t* npub);
void ascon_adata(ascon_state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(ascon_state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen);
void ascon_decrypt(ascon_state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen);
void ascon_final(ascon_state_t* s, const ascon_key_t* k);
#endif
#endif
#ifdef ASCON_HASH_BYTES
#if !ASCON_INLINE_MODE
void ascon_inithash(ascon_state_t* s);
void ascon_absorb(ascon_state_t* s, const uint8_t* in, uint64_t inlen);
void ascon_squeeze(ascon_state_t* s, uint8_t* out, uint64_t outlen);
#endif
#endif
#endif /* ASCON_H_ */

39
src/ascon-xofa/bendian.h Normal file
View File

@ -0,0 +1,39 @@
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in bendian.h"
#endif
#endif /* ENDIAN_H_ */

19
src/ascon-xofa/config.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 0
#endif
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
#endif /* CONFIG_H_ */

View File

@ -0,0 +1,90 @@
#ifndef CONSTANTS_H_
#define CONSTANTS_H_
#include <stdint.h>
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_PRF_IN_RATE 32
#define ASCON_PRFA_IN_RATE 40
#define ASCON_PRF_OUT_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_PRF_PA_ROUNDS 12
#define ASCON_PRF_PB_ROUNDS 12
#define ASCON_PRFA_PA_ROUNDS 12
#define ASCON_PRFA_PB_ROUNDS 8
#define ASCON_128_IV 0x80400c0600000000ull
#define ASCON_128A_IV 0x80800c0800000000ull
#define ASCON_80PQ_IV 0xa0400c0600000000ull
#define ASCON_HASH_IV 0x00400c0000000100ull
#define ASCON_HASHA_IV 0x00400c0400000100ull
#define ASCON_XOF_IV 0x00400c0000000000ull
#define ASCON_XOFA_IV 0x00400c0400000000ull
#define ASCON_HASH_IV0 0xee9398aadb67f03dull
#define ASCON_HASH_IV1 0x8bb21831c60f1002ull
#define ASCON_HASH_IV2 0xb48a92db98d5da62ull
#define ASCON_HASH_IV3 0x43189921b8f8e3e8ull
#define ASCON_HASH_IV4 0x348fa5c9d525e140ull
#define ASCON_HASHA_IV0 0x01470194fc6528a6ull
#define ASCON_HASHA_IV1 0x738ec38ac0adffa7ull
#define ASCON_HASHA_IV2 0x2ec8e3296c76384cull
#define ASCON_HASHA_IV3 0xd6f6a54d7f52377dull
#define ASCON_HASHA_IV4 0xa13c42a223be8d87ull
#define ASCON_XOF_IV0 0xb57e273b814cd416ull
#define ASCON_XOF_IV1 0x2b51042562ae2420ull
#define ASCON_XOF_IV2 0x66a3a7768ddf2218ull
#define ASCON_XOF_IV3 0x5aad0a7a8153650cull
#define ASCON_XOF_IV4 0x4f3e0e32539493b6ull
#define ASCON_XOFA_IV0 0x44906568b77b9832ull
#define ASCON_XOFA_IV1 0xcd8d6cae53455532ull
#define ASCON_XOFA_IV2 0xf7b5212756422129ull
#define ASCON_XOFA_IV3 0x246885e1de0d225bull
#define ASCON_XOFA_IV4 0xa8cb5ce33449973full
#define ASCON_MAC_IV 0x80808c0000000080ull
#define ASCON_MACA_IV 0x80808c0400000080ull
#define ASCON_PRF_IV 0x80808c0000000000ull
#define ASCON_PRFA_IV 0x80808c0400000000ull
#define ASCON_PRFS_IV 0x80004c8000000000ull
#define RC0 0xf0
#define RC1 0xe1
#define RC2 0xd2
#define RC3 0xc3
#define RC4 0xb4
#define RC5 0xa5
#define RC6 0x96
#define RC7 0x87
#define RC8 0x78
#define RC9 0x69
#define RCa 0x5a
#define RCb 0x4b
#define RC(i) (i)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define INC -0x0f
#define END 0x3c
#endif /* CONSTANTS_H_ */

View File

@ -0,0 +1,23 @@
#ifndef FORCEINLINE_H_
#define FORCEINLINE_H_
/* define forceinline macro */
#ifdef _MSC_VER
#define forceinline __forceinline
#elif defined(__GNUC__)
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline static inline
#endif
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define forceinline inline __attribute__((__always_inline__))
#else
#define forceinline inline
#endif
#else
#define forceinline inline
#endif
#endif /* FORCEINLINE_H_ */

View File

@ -0,0 +1 @@
Branches reviewed 2020-11-13 by Martin Schläffer.

View File

@ -0,0 +1 @@
Addresses reviewed 2020-11-13 by Martin Schläffer.

89
src/ascon-xofa/hash.c Normal file
View File

@ -0,0 +1,89 @@
#include "api.h"
#include "ascon.h"
/*#include "crypto_hash.h"*/
#include "permutations.h"
#include "printstate.h"
#if !ASCON_INLINE_MODE
#undef forceinline
#define forceinline
#endif
#ifdef ASCON_HASH_BYTES
forceinline void ascon_inithash(ascon_state_t* s) {
int i;
/* initialize */
#ifdef ASCON_PRINT_STATE
#if ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 12
s->x[0] = ASCON_HASH_IV;
#elif ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 8
s->x[0] = ASCON_HASHA_IV;
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 12
s->x[0] = ASCON_XOF_IV;
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 8
s->x[0] = ASCON_XOFA_IV;
#endif
for (i = 1; i < 5; ++i) s->x[i] = 0;
printstate("initial value", s);
P(s, 12);
#endif
#if ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 12
const uint64_t iv[5] = {ASCON_HASH_IV0, ASCON_HASH_IV1, ASCON_HASH_IV2,
ASCON_HASH_IV3, ASCON_HASH_IV4};
#elif ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 8
const uint64_t iv[5] = {ASCON_HASHA_IV0, ASCON_HASHA_IV1, ASCON_HASHA_IV2,
ASCON_HASHA_IV3, ASCON_HASHA_IV4};
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 12
const uint64_t iv[5] = {ASCON_XOF_IV0, ASCON_XOF_IV1, ASCON_XOF_IV2,
ASCON_XOF_IV3, ASCON_XOF_IV4};
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 8
const uint64_t iv[5] = {ASCON_XOFA_IV0, ASCON_XOFA_IV1, ASCON_XOFA_IV2,
ASCON_XOFA_IV3, ASCON_XOFA_IV4};
#endif
for (i = 0; i < 5; ++i) s->x[i] = (iv[i]);
printstate("initialization", s);
}
forceinline void ascon_absorb(ascon_state_t* s, const uint8_t* in,
uint64_t inlen) {
/* absorb full plaintext blocks */
while (inlen >= ASCON_HASH_RATE) {
s->x[0] ^= LOAD(in, 8);
printstate("absorb plaintext", s);
P(s, ASCON_HASH_ROUNDS);
in += ASCON_HASH_RATE;
inlen -= ASCON_HASH_RATE;
}
/* absorb final plaintext block */
s->x[0] ^= LOADBYTES(in, inlen);
s->x[0] ^= PAD(inlen);
printstate("pad plaintext", s);
}
forceinline void ascon_squeeze(ascon_state_t* s, uint8_t* out,
uint64_t outlen) {
/* squeeze full output blocks */
P(s, 12);
while (outlen > ASCON_HASH_RATE) {
STORE(out, s->x[0], 8);
printstate("squeeze output", s);
P(s, ASCON_HASH_ROUNDS);
out += ASCON_HASH_RATE;
outlen -= ASCON_HASH_RATE;
}
/* squeeze final output block */
STOREBYTES(out, s->x[0], outlen);
printstate("squeeze output", s);
}
/*int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
ascon_state_t s;
ascon_inithash(&s);
ascon_absorb(&s, in, inlen);
ascon_squeeze(&s, out, CRYPTO_BYTES);
return 0;
}*/
#endif

View File

@ -0,0 +1,2 @@
Christoph Dobraunig
Martin Schläffer

View File

@ -0,0 +1,29 @@
#include "permutations.h"
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
void P12(ascon_state_t* s) { P12ROUNDS(s); }
#endif
#if ((defined(ASCON_AEAD_RATE) && ASCON_AEAD_RATE == 16) || \
(defined(ASCON_HASH_ROUNDS) && ASCON_HASH_ROUNDS == 8) || \
(defined(ASCON_PRF_ROUNDS) && ASCON_PRF_ROUNDS == 8)) && \
!ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
void P8(ascon_state_t* s) { P8ROUNDS(s); }
#endif
#if (defined(ASCON_AEAD_RATE) && ASCON_AEAD_RATE == 8) && \
!ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
void P6(ascon_state_t* s) { P6ROUNDS(s); }
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(ascon_state_t* s, int nr) { PROUNDS(s, nr); }
#endif

View File

@ -0,0 +1,78 @@
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "constants.h"
#include "printstate.h"
#include "round.h"
forceinline void P12ROUNDS(ascon_state_t* s) {
ROUND(s, RC0);
ROUND(s, RC1);
ROUND(s, RC2);
ROUND(s, RC3);
ROUND(s, RC4);
ROUND(s, RC5);
ROUND(s, RC6);
ROUND(s, RC7);
ROUND(s, RC8);
ROUND(s, RC9);
ROUND(s, RCa);
ROUND(s, RCb);
}
forceinline void P8ROUNDS(ascon_state_t* s) {
ROUND(s, RC4);
ROUND(s, RC5);
ROUND(s, RC6);
ROUND(s, RC7);
ROUND(s, RC8);
ROUND(s, RC9);
ROUND(s, RCa);
ROUND(s, RCb);
}
forceinline void P6ROUNDS(ascon_state_t* s) {
ROUND(s, RC6);
ROUND(s, RC7);
ROUND(s, RC8);
ROUND(s, RC9);
ROUND(s, RCa);
ROUND(s, RCb);
}
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
forceinline void P(ascon_state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
void P12(ascon_state_t* s);
void P8(ascon_state_t* s);
void P6(ascon_state_t* s);
forceinline void P(ascon_state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
forceinline void P(ascon_state_t* s, int nr) { PROUNDS(s, nr); }
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
void P(ascon_state_t* s, int nr);
#endif
#endif /* PERMUTATIONS_H_ */

View File

@ -0,0 +1,41 @@
#ifdef ASCON_PRINT_STATE
#include "printstate.h"
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#ifndef WORDTOU64
#define WORDTOU64
#endif
#ifndef U64BIG
#define U64BIG
#endif
void printword(const char* text, const uint64_t x) {
printf("%s=%016" PRIx64, text, U64BIG(WORDTOU64(x)));
}
void printstate(const char* text, const ascon_state_t* s) {
int i;
printf("%s:", text);
for (i = strlen(text); i < 17; ++i) printf(" ");
printword(" x0", s->x[0]);
printword(" x1", s->x[1]);
printword(" x2", s->x[2]);
printword(" x3", s->x[3]);
printword(" x4", s->x[4]);
#ifdef ASCON_PRINT_BI
printf(" ");
printf(" x0=%08x_%08x", s->w[0][1], s->w[0][0]);
printf(" x1=%08x_%08x", s->w[1][1], s->w[1][0]);
printf(" x2=%08x_%08x", s->w[2][1], s->w[2][0]);
printf(" x3=%08x_%08x", s->w[3][1], s->w[3][0]);
printf(" x4=%08x_%08x", s->w[4][1], s->w[4][0]);
#endif
printf("\n");
}
#endif

View File

@ -0,0 +1,24 @@
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef ASCON_PRINT_STATE
#include "ascon.h"
#include "word.h"
void printword(const char* text, const uint64_t x);
void printstate(const char* text, const ascon_state_t* s);
#else
#define printword(text, w) \
do { \
} while (0)
#define printstate(text, s) \
do { \
} while (0)
#endif
#endif /* PRINTSTATE_H_ */

350
src/ascon-xofa/round.h Normal file
View File

@ -0,0 +1,350 @@
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "constants.h"
#include "forceinline.h"
#include "printstate.h"
#include "word.h"
forceinline void ROUND_LOOP(ascon_state_t* s, uint32_t C) {
uint32_t tmp0, tmp1;
__asm__ __volatile__(
"@.syntax_unified\n\t"
"rbegin_%=:;\n\t"
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
"push {%[tmp1]}\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"eor %[x4_l], %[x4_l], %[x3_l]\n\t"
"eor %[x2_l], %[x2_l], %[x1_l]\n\t"
"mvn %[tmp0], %[x0_l]\n\t"
"orr %[tmp0], %[tmp0], %[x4_l]\n\t"
"movs %[tmp1], %[x2_l]\n\t"
"bic %[tmp1], %[tmp1], %[x1_l]\n\t"
"eor %[x0_l], %[x0_l], %[tmp1]\n\t"
"mvn %[tmp1], %[x4_l]\n\t"
"orr %[tmp1], %[tmp1], %[x3_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
"movs %[tmp1], %[x1_l]\n\t"
"bic %[tmp1], %[tmp1], %[x0_l]\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"movs %[tmp1], %[x3_l]\n\t"
"and %[tmp1], %[tmp1], %[x2_l]\n\t"
"eor %[tmp1], %[x1_l], %[tmp1]\n\t"
"eor %[tmp0], %[x3_l], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x2_l]\n\t"
"eor %[tmp1], %[tmp1], %[x0_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"movs %[x1_l], %[x0_h]\n\t"
"movs %[x3_l], %[x1_h]\n\t"
"movs %[x0_h], %[x2_l]\n\t"
"movs %[x1_h], %[x0_l]\n\t"
"movs %[x0_l], %[x2_h]\n\t"
"movs %[x2_l], %[x3_h]\n\t"
"movs %[tmp2], %[x4_h]\n\t"
"movs %[x2_h], %[tmp0]\n\t"
"movs %[x3_h], %[x4_l]\n\t"
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
"eor %[tmp2], %[tmp2], %[x2_l]\n\t"
"eor %[x0_l], %[x0_l], %[x3_l]\n\t"
"mvn %[tmp0], %[x1_l]\n\t"
"orr %[tmp0], %[tmp0], %[tmp2]\n\t"
"movs %[x4_l], %[x0_l]\n\t"
"bic %[x4_l], %[x4_l], %[x3_l]\n\t"
"eor %[x1_l], %[x1_l], %[x4_l]\n\t"
"mvn %[x4_l], %[tmp2]\n\t"
"orr %[x4_l], %[x4_l], %[x2_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"movs %[x4_l], %[x3_l]\n\t"
"bic %[x4_l], %[x4_l], %[x1_l]\n\t"
"eor %[tmp2], %[tmp2], %[x4_l]\n\t"
"movs %[x4_l], %[x2_l]\n\t"
"and %[x4_l], %[x4_l], %[x0_l]\n\t"
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
"eor %[x2_l], %[x2_l], %[x0_l]\n\t"
"eor %[x3_l], %[x3_l], %[x1_l]\n\t"
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
"movs %[x4_h], %[x2_l]\n\t"
"movs %[x2_l], %[x0_h]\n\t"
"movs %[x0_h], %[x1_l]\n\t"
"lsr %[x4_l], %[x0_l], #6\n\t"
"lsl %[x1_l], %[x2_l], #26\n\t"
"lsr %[tmp0], %[x2_l], #6\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[x0_l], #26\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[x0_l], #1\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[x2_l], #31\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[x2_l], #1\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[x0_l], #31\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
"lsl %[x4_l], %[x3_l], #3\n\t"
"lsr %[x1_l], %[tmp1], #29\n\t"
"lsl %[tmp0], %[tmp1], #3\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[x3_l], #29\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[x3_l], #25\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp1], #7\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[tmp1], #25\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[x3_l], #7\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
"eor %[tmp1], %[tmp1], %[tmp0]\n\t"
"movs %[x4_l], %[x3_h]\n\t"
"movs %[x3_h], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp2], #23\n\t"
"lsr %[x1_l], %[x4_l], #9\n\t"
"lsl %[tmp0], %[x4_l], #23\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp2], #9\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp2], #7\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsl %[x1_l], %[x4_l], #25\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsr %[x1_l], %[x4_l], #7\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[tmp2], #25\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"eor %[x4_l], %[x4_l], %[tmp0]\n\t"
"movs %[x1_l], %[x3_h]\n\t"
"movs %[tmp1], %[x4_h]\n\t"
"movs %[x4_h], %[tmp2]\n\t"
"movs %[x3_h], %[x3_l]\n\t"
"movs %[x3_l], %[x2_h]\n\t"
"movs %[x2_h], %[x0_l]\n\t"
"lsr %[tmp2], %[tmp1], #17\n\t"
"lsl %[x0_l], %[x3_l], #15\n\t"
"lsr %[tmp0], %[x3_l], #17\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsl %[x0_l], %[tmp1], #15\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"lsr %[x0_l], %[tmp1], #10\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsl %[x0_l], %[x3_l], #22\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsr %[x0_l], %[x3_l], #10\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"lsl %[x0_l], %[tmp1], #22\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"eor %[tmp1], %[tmp1], %[tmp2]\n\t"
"eor %[x3_l], %[x3_l], %[tmp0]\n\t"
"movs %[tmp0], %[x0_h]\n\t"
"movs %[x0_l], %[x1_h]\n\t"
"movs %[x0_h], %[x4_l]\n\t"
"movs %[x1_h], %[x3_h]\n\t"
"movs %[x3_h], %[tmp1]\n\t"
"lsr %[x4_l], %[tmp0], #28\n\t"
"lsl %[tmp1], %[x0_l], #4\n\t"
"lsr %[tmp2], %[x0_l], #28\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp0], #4\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"lsr %[tmp1], %[tmp0], #19\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsl %[tmp1], %[x0_l], #13\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsr %[tmp1], %[x0_l], #19\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp0], #13\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"pop {%[tmp1]}\n\t"
"eor %[tmp0], %[tmp0], %[x4_l]\n\t"
"eor %[x0_l], %[x0_l], %[tmp2]\n\t"
"movs %[x4_l], %[x0_h]\n\t"
"movs %[x0_h], %[tmp0]\n\t"
"sub %[tmp1], %[tmp1], #15\n\t"
"cmp %[tmp1], #60\n\t"
"beq rend_%=\n\t"
"b rbegin_%=\n\t"
"rend_%=:;\n\t"
:
[x0_l] "+l"(s->w[0][0]), [x0_h] "+h"(s->w[0][1]), [x1_l] "+l"(s->w[1][0]),
[x1_h] "+h"(s->w[1][1]), [x2_l] "+l"(s->w[2][0]), [x2_h] "+h"(s->w[2][1]),
[x3_l] "+l"(s->w[3][0]), [x3_h] "+h"(s->w[3][1]), [x4_l] "+l"(s->w[4][0]),
[x4_h] "+h"(s->w[4][1]), [tmp1] "+l"(C), [tmp0] "=l"(tmp0),
[tmp2] "=l"(tmp1)
:
:);
}
forceinline void ROUND(ascon_state_t* s, uint32_t C) {
uint32_t tmp0, tmp1, tmp2;
__asm__ __volatile__(
"@.syntax_unified\n\t"
"movs %[tmp0], %[C]\n\t"
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"eor %[x4_l], %[x4_l], %[x3_l]\n\t"
"eor %[x2_l], %[x2_l], %[x1_l]\n\t"
"mvn %[tmp0], %[x0_l]\n\t"
"orr %[tmp0], %[tmp0], %[x4_l]\n\t"
"movs %[tmp1], %[x2_l]\n\t"
"bic %[tmp1], %[tmp1], %[x1_l]\n\t"
"eor %[x0_l], %[x0_l], %[tmp1]\n\t"
"mvn %[tmp1], %[x4_l]\n\t"
"orr %[tmp1], %[tmp1], %[x3_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
"movs %[tmp1], %[x1_l]\n\t"
"bic %[tmp1], %[tmp1], %[x0_l]\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"movs %[tmp1], %[x3_l]\n\t"
"and %[tmp1], %[tmp1], %[x2_l]\n\t"
"eor %[tmp1], %[x1_l], %[tmp1]\n\t"
"eor %[tmp0], %[x3_l], %[tmp0]\n\t"
"eor %[tmp0], %[tmp0], %[x2_l]\n\t"
"eor %[tmp1], %[tmp1], %[x0_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"movs %[x1_l], %[x0_h]\n\t"
"movs %[x3_l], %[x1_h]\n\t"
"movs %[x0_h], %[x2_l]\n\t"
"movs %[x1_h], %[x0_l]\n\t"
"movs %[x0_l], %[x2_h]\n\t"
"movs %[x2_l], %[x3_h]\n\t"
"movs %[tmp2], %[x4_h]\n\t"
"movs %[x2_h], %[tmp0]\n\t"
"movs %[x3_h], %[x4_l]\n\t"
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
"eor %[tmp2], %[tmp2], %[x2_l]\n\t"
"eor %[x0_l], %[x0_l], %[x3_l]\n\t"
"mvn %[tmp0], %[x1_l]\n\t"
"orr %[tmp0], %[tmp0], %[tmp2]\n\t"
"movs %[x4_l], %[x0_l]\n\t"
"bic %[x4_l], %[x4_l], %[x3_l]\n\t"
"eor %[x1_l], %[x1_l], %[x4_l]\n\t"
"mvn %[x4_l], %[tmp2]\n\t"
"orr %[x4_l], %[x4_l], %[x2_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"movs %[x4_l], %[x3_l]\n\t"
"bic %[x4_l], %[x4_l], %[x1_l]\n\t"
"eor %[tmp2], %[tmp2], %[x4_l]\n\t"
"movs %[x4_l], %[x2_l]\n\t"
"and %[x4_l], %[x4_l], %[x0_l]\n\t"
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
"eor %[x2_l], %[x2_l], %[x0_l]\n\t"
"eor %[x3_l], %[x3_l], %[x1_l]\n\t"
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
"movs %[x4_h], %[x2_l]\n\t"
"movs %[x2_l], %[x0_h]\n\t"
"movs %[x0_h], %[x1_l]\n\t"
"lsr %[x4_l], %[x0_l], #6\n\t"
"lsl %[x1_l], %[x2_l], #26\n\t"
"lsr %[tmp0], %[x2_l], #6\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[x0_l], #26\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[x0_l], #1\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[x2_l], #31\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[x2_l], #1\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[x0_l], #31\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
"lsl %[x4_l], %[x3_l], #3\n\t"
"lsr %[x1_l], %[tmp1], #29\n\t"
"lsl %[tmp0], %[tmp1], #3\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[x3_l], #29\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[x3_l], #25\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp1], #7\n\t"
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
"lsl %[x1_l], %[tmp1], #25\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[x3_l], #7\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
"eor %[tmp1], %[tmp1], %[tmp0]\n\t"
"movs %[x4_l], %[x3_h]\n\t"
"movs %[x3_h], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp2], #23\n\t"
"lsr %[x1_l], %[x4_l], #9\n\t"
"lsl %[tmp0], %[x4_l], #23\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp2], #9\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsr %[x1_l], %[tmp2], #7\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsl %[x1_l], %[x4_l], #25\n\t"
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
"lsr %[x1_l], %[x4_l], #7\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"lsl %[x1_l], %[tmp2], #25\n\t"
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"eor %[x4_l], %[x4_l], %[tmp0]\n\t"
"movs %[x1_l], %[x3_h]\n\t"
"movs %[tmp1], %[x4_h]\n\t"
"movs %[x4_h], %[tmp2]\n\t"
"movs %[x3_h], %[x3_l]\n\t"
"movs %[x3_l], %[x2_h]\n\t"
"movs %[x2_h], %[x0_l]\n\t"
"lsr %[tmp2], %[tmp1], #17\n\t"
"lsl %[x0_l], %[x3_l], #15\n\t"
"lsr %[tmp0], %[x3_l], #17\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsl %[x0_l], %[tmp1], #15\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"lsr %[x0_l], %[tmp1], #10\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsl %[x0_l], %[x3_l], #22\n\t"
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
"lsr %[x0_l], %[x3_l], #10\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"lsl %[x0_l], %[tmp1], #22\n\t"
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
"eor %[tmp1], %[tmp1], %[tmp2]\n\t"
"eor %[x3_l], %[x3_l], %[tmp0]\n\t"
"movs %[tmp0], %[x0_h]\n\t"
"movs %[x0_l], %[x1_h]\n\t"
"movs %[x0_h], %[x4_l]\n\t"
"movs %[x1_h], %[x3_h]\n\t"
"movs %[x3_h], %[tmp1]\n\t"
"lsr %[x4_l], %[tmp0], #28\n\t"
"lsl %[tmp1], %[x0_l], #4\n\t"
"lsr %[tmp2], %[x0_l], #28\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp0], #4\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"lsr %[tmp1], %[tmp0], #19\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsl %[tmp1], %[x0_l], #13\n\t"
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
"lsr %[tmp1], %[x0_l], #19\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"lsl %[tmp1], %[tmp0], #13\n\t"
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
"eor %[tmp0], %[tmp0], %[x4_l]\n\t"
"eor %[x0_l], %[x0_l], %[tmp2]\n\t"
"movs %[x4_l], %[x0_h]\n\t"
"movs %[x0_h], %[tmp0]\n\t"
:
[x0_l] "+l"(s->w[0][0]), [x0_h] "+h"(s->w[0][1]), [x1_l] "+l"(s->w[1][0]),
[x1_h] "+h"(s->w[1][1]), [x2_l] "+l"(s->w[2][0]), [x2_h] "+h"(s->w[2][1]),
[x3_l] "+l"(s->w[3][0]), [x3_h] "+h"(s->w[3][1]), [x4_l] "+l"(s->w[4][0]),
[x4_h] "+h"(s->w[4][1]), [tmp0] "=l"(tmp0), [tmp1] "=l"(tmp1),
[tmp2] "=l"(tmp2)
: [C] "ri"(C)
:);
printstate(" round output", s);
}
forceinline void PROUNDS(ascon_state_t* s, int nr) { ROUND_LOOP(s, START(nr)); }
#endif /* ROUND_H_ */

69
src/ascon-xofa/word.h Normal file
View File

@ -0,0 +1,69 @@
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include <string.h>
#include "bendian.h"
#include "forceinline.h"
typedef union {
uint64_t x;
uint32_t w[2];
uint8_t b[8];
} word_t;
#define U64TOWORD(x) U64BIG(x)
#define WORDTOU64(x) U64BIG(x)
forceinline uint64_t ROR(uint64_t x, int n) { return x >> n | x << (-n & 63); }
forceinline uint64_t KEYROT(uint64_t lo2hi, uint64_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
forceinline int NOTZERO(uint64_t a, uint64_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
forceinline uint64_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
forceinline uint64_t PRFS_MLEN(uint64_t len) { return len << 51; }
forceinline uint64_t CLEAR(uint64_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = ~0ull >> (8 * n);
return w & mask;
}
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
forceinline uint64_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(x);
}
forceinline void STORE(uint8_t* bytes, uint64_t w, int n) {
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= WORDTOU64(w);
}
forceinline uint64_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy(&x, bytes, n);
return U64TOWORD(x);
}
forceinline void STOREBYTES(uint8_t* bytes, uint64_t w, int n) {
uint64_t x = WORDTOU64(w);
memcpy(bytes, &x, n);
}
#endif /* WORD_H_ */

View File

@ -11,11 +11,13 @@
#include "rorand.h"
#include "rourand.h"
#include "util.h"
#define DO_TIME_BENCH 0
#define DATA_TOTAL 1024*1024
#define USE_URANDOM 1
#if DO_TIME_BENCH
static uint8_t time_bench[128*1024];
@ -50,13 +52,23 @@ int main() {
iprintf("rorand_init() returned %d\n", d);
panic("can't init rorand");
}
struct rourand_state* ur = rourand_init(rorand_get, 0);
if (!ur) {
panic("Can't init rourand");
}
#if USE_URANDOM
#define rand_get(dst, size) rourand_get(ur, dst, size)
#else
#define rand_get(dst, size) rorand_get(dst, (size)*CHAR_BIT)
#endif
#if DO_TIME_BENCH
memset(time_bench, 0, sizeof(time_bench));
iprintf("[---] throughput benchmark start\n");
absolute_time_t ta = get_absolute_time();
rorand_get(time_bench, count_of(time_bench)*CHAR_BIT);
rand_get(time_bench, count_of(time_bench));
absolute_time_t tb = get_absolute_time();
int64_t dt_us = absolute_time_diff_us(ta, tb);
@ -72,10 +84,11 @@ int main() {
memset(data, 0, sizeof(data));
const uintptr_t total = DATA_TOTAL;
for (uintptr_t off = 0; off < total; off += count_of(data)) {
rorand_get(data, count_of(data)*CHAR_BIT);
rand_get(data, count_of(data));
hexdump(NULL, off, data, sizeof(data));
}
#endif
rourand_free(ur);
iprintf("done\n");
while(1);

71
src/rourand.c Normal file
View File

@ -0,0 +1,71 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "ascon.h"
#include "rourand.h"
struct rourand_state {
ascon_state_t ascon;
uint8_t buf[16];
rourand_get_fn rand;
size_t bpos;
size_t rate;
size_t rcount;
};
struct rourand_state* rourand_init(rourand_get_fn rawrand, int rate) {
struct rourand_state* r = calloc(1, sizeof(struct rourand_state));
if (!r) return NULL;
if (rate == 0) rate = 16;
ascon_inithash(&r->ascon);
r->rand = rawrand;
r->bpos = 0;
r->rate = rate;
r->rcount = 0;
rawrand(r->buf, sizeof(r->buf)*CHAR_BIT);
ascon_absorb(&r->ascon, r->buf, sizeof(r->buf));
rawrand(r->buf, sizeof(r->buf)*CHAR_BIT);
ascon_absorb(&r->ascon, r->buf, sizeof(r->buf));
return r;
}
void rourand_free(struct rourand_state* st) {
if (st) {
explicit_bzero(st, sizeof(*st));
free(st);
}
}
void rourand_get(struct rourand_state* st, void* dst_, size_t nbytes) {
uint8_t* dst = (uint8_t*)dst_;
while (nbytes > 0) {
if (st->bpos == 0) {
++st->rcount;
if (st->rcount == st->rate) {
st->rand(st->buf, sizeof(st->buf)*CHAR_BIT);
ascon_absorb(&st->ascon, st->buf, sizeof(st->buf));
st->rcount = 0;
}
ascon_squeeze(&st->ascon, st->buf, sizeof(st->buf));
st->bpos = sizeof(st->buf);
}
size_t todo = nbytes;
if (todo > st->bpos) todo = st->bpos;
memcpy(dst, &st->buf[sizeof(st->buf) - st->bpos], todo);
nbytes -= todo;
st->bpos -= todo;
dst += todo;
}
}

24
src/rourand.h Normal file
View File

@ -0,0 +1,24 @@
#ifndef ROURAND_H_
#define ROURAND_H_
#include <stddef.h>
struct rourand_state;
typedef void (*rourand_get_fn)(void* dst, size_t nbits);
struct rourand_state* rourand_init(rourand_get_fn rawrand, int rate);
void rourand_free(struct rourand_state* st);
void rourand_get(struct rourand_state* st, void* dst, size_t nbytes);
static inline uint32_t rourand_get32(struct rourand_state* st) {
uint32_t r = 0;
rourand_get(st, &r, sizeof(r));
return r;
}
#endif