add ascon-xofa-based randomness compressor
This commit is contained in:
parent
572883b269
commit
1117391aa6
|
@ -18,12 +18,17 @@ pico_enable_stdio_uart(${PROJECT} 0)
|
||||||
pico_enable_stdio_usb(${PROJECT} 1)
|
pico_enable_stdio_usb(${PROJECT} 1)
|
||||||
|
|
||||||
target_sources(${PROJECT} PUBLIC
|
target_sources(${PROJECT} PUBLIC
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/hash.c
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/permutations.c
|
||||||
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src/util.c
|
${CMAKE_CURRENT_SOURCE_DIR}/src/util.c
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src/rorand.c
|
${CMAKE_CURRENT_SOURCE_DIR}/src/rorand.c
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/rourand.c
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src/main.c
|
${CMAKE_CURRENT_SOURCE_DIR}/src/main.c
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(${PROJECT} PUBLIC
|
target_include_directories(${PROJECT} PUBLIC
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/ascon-xofa/
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src/
|
${CMAKE_CURRENT_SOURCE_DIR}/src/
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
CC0 1.0 Universal
|
||||||
|
|
||||||
|
Statement of Purpose
|
||||||
|
|
||||||
|
The laws of most jurisdictions throughout the world automatically confer
|
||||||
|
exclusive Copyright and Related Rights (defined below) upon the creator and
|
||||||
|
subsequent owner(s) (each and all, an "owner") of an original work of
|
||||||
|
authorship and/or a database (each, a "Work").
|
||||||
|
|
||||||
|
Certain owners wish to permanently relinquish those rights to a Work for the
|
||||||
|
purpose of contributing to a commons of creative, cultural and scientific
|
||||||
|
works ("Commons") that the public can reliably and without fear of later
|
||||||
|
claims of infringement build upon, modify, incorporate in other works, reuse
|
||||||
|
and redistribute as freely as possible in any form whatsoever and for any
|
||||||
|
purposes, including without limitation commercial purposes. These owners may
|
||||||
|
contribute to the Commons to promote the ideal of a free culture and the
|
||||||
|
further production of creative, cultural and scientific works, or to gain
|
||||||
|
reputation or greater distribution for their Work in part through the use and
|
||||||
|
efforts of others.
|
||||||
|
|
||||||
|
For these and/or other purposes and motivations, and without any expectation
|
||||||
|
of additional consideration or compensation, the person associating CC0 with a
|
||||||
|
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
|
||||||
|
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
|
||||||
|
and publicly distribute the Work under its terms, with knowledge of his or her
|
||||||
|
Copyright and Related Rights in the Work and the meaning and intended legal
|
||||||
|
effect of CC0 on those rights.
|
||||||
|
|
||||||
|
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||||
|
protected by copyright and related or neighboring rights ("Copyright and
|
||||||
|
Related Rights"). Copyright and Related Rights include, but are not limited
|
||||||
|
to, the following:
|
||||||
|
|
||||||
|
i. the right to reproduce, adapt, distribute, perform, display, communicate,
|
||||||
|
and translate a Work;
|
||||||
|
|
||||||
|
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||||
|
|
||||||
|
iii. publicity and privacy rights pertaining to a person's image or likeness
|
||||||
|
depicted in a Work;
|
||||||
|
|
||||||
|
iv. rights protecting against unfair competition in regards to a Work,
|
||||||
|
subject to the limitations in paragraph 4(a), below;
|
||||||
|
|
||||||
|
v. rights protecting the extraction, dissemination, use and reuse of data in
|
||||||
|
a Work;
|
||||||
|
|
||||||
|
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||||
|
European Parliament and of the Council of 11 March 1996 on the legal
|
||||||
|
protection of databases, and under any national implementation thereof,
|
||||||
|
including any amended or successor version of such directive); and
|
||||||
|
|
||||||
|
vii. other similar, equivalent or corresponding rights throughout the world
|
||||||
|
based on applicable law or treaty, and any national implementations thereof.
|
||||||
|
|
||||||
|
2. Waiver. To the greatest extent permitted by, but not in contravention of,
|
||||||
|
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
|
||||||
|
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
|
||||||
|
and Related Rights and associated claims and causes of action, whether now
|
||||||
|
known or unknown (including existing as well as future claims and causes of
|
||||||
|
action), in the Work (i) in all territories worldwide, (ii) for the maximum
|
||||||
|
duration provided by applicable law or treaty (including future time
|
||||||
|
extensions), (iii) in any current or future medium and for any number of
|
||||||
|
copies, and (iv) for any purpose whatsoever, including without limitation
|
||||||
|
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
|
||||||
|
the Waiver for the benefit of each member of the public at large and to the
|
||||||
|
detriment of Affirmer's heirs and successors, fully intending that such Waiver
|
||||||
|
shall not be subject to revocation, rescission, cancellation, termination, or
|
||||||
|
any other legal or equitable action to disrupt the quiet enjoyment of the Work
|
||||||
|
by the public as contemplated by Affirmer's express Statement of Purpose.
|
||||||
|
|
||||||
|
3. Public License Fallback. Should any part of the Waiver for any reason be
|
||||||
|
judged legally invalid or ineffective under applicable law, then the Waiver
|
||||||
|
shall be preserved to the maximum extent permitted taking into account
|
||||||
|
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
|
||||||
|
is so judged Affirmer hereby grants to each affected person a royalty-free,
|
||||||
|
non transferable, non sublicensable, non exclusive, irrevocable and
|
||||||
|
unconditional license to exercise Affirmer's Copyright and Related Rights in
|
||||||
|
the Work (i) in all territories worldwide, (ii) for the maximum duration
|
||||||
|
provided by applicable law or treaty (including future time extensions), (iii)
|
||||||
|
in any current or future medium and for any number of copies, and (iv) for any
|
||||||
|
purpose whatsoever, including without limitation commercial, advertising or
|
||||||
|
promotional purposes (the "License"). The License shall be deemed effective as
|
||||||
|
of the date CC0 was applied by Affirmer to the Work. Should any part of the
|
||||||
|
License for any reason be judged legally invalid or ineffective under
|
||||||
|
applicable law, such partial invalidity or ineffectiveness shall not
|
||||||
|
invalidate the remainder of the License, and in such case Affirmer hereby
|
||||||
|
affirms that he or she will not (i) exercise any of his or her remaining
|
||||||
|
Copyright and Related Rights in the Work or (ii) assert any associated claims
|
||||||
|
and causes of action with respect to the Work, in either case contrary to
|
||||||
|
Affirmer's express Statement of Purpose.
|
||||||
|
|
||||||
|
4. Limitations and Disclaimers.
|
||||||
|
|
||||||
|
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||||
|
surrendered, licensed or otherwise affected by this document.
|
||||||
|
|
||||||
|
b. Affirmer offers the Work as-is and makes no representations or warranties
|
||||||
|
of any kind concerning the Work, express, implied, statutory or otherwise,
|
||||||
|
including without limitation warranties of title, merchantability, fitness
|
||||||
|
for a particular purpose, non infringement, or the absence of latent or
|
||||||
|
other defects, accuracy, or the present or absence of errors, whether or not
|
||||||
|
discoverable, all to the greatest extent permissible under applicable law.
|
||||||
|
|
||||||
|
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||||
|
that may apply to the Work or any use thereof, including without limitation
|
||||||
|
any person's Copyright and Related Rights in the Work. Further, Affirmer
|
||||||
|
disclaims responsibility for obtaining any necessary consents, permissions
|
||||||
|
or other rights required for any use of the Work.
|
||||||
|
|
||||||
|
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||||
|
party to this document and has no duty or obligation with respect to this
|
||||||
|
CC0 or use of the Work.
|
||||||
|
|
||||||
|
For more information, please see
|
||||||
|
<http://creativecommons.org/publicdomain/zero/1.0/>
|
|
@ -0,0 +1,519 @@
|
||||||
|
# Reference, highly optimized, masked C and ASM implementations of Ascon
|
||||||
|
|
||||||
|
Ascon is a family of lightweight cryptographic algorithms and consists of:
|
||||||
|
- Authenticated encryption schemes with associated data (AEAD)
|
||||||
|
- Hash functions (HASH) and extendible output functions (XOF)
|
||||||
|
- Pseudo-random functions (PRF) and message authentication codes (MAC)
|
||||||
|
|
||||||
|
All implementations use the "ECRYPT Benchmarking of Cryptographic Systems (eBACS)" interface:
|
||||||
|
|
||||||
|
- https://bench.cr.yp.to/call-aead.html for AEAD (Ascon-128, Ascon-128a, Ascon-80pq)
|
||||||
|
- https://bench.cr.yp.to/call-hash.html for HASH and XOF (Ascon-Hash, Ascon-Hasha, Ascon-Xof, Ascon-Xofa)
|
||||||
|
- https://nacl.cr.yp.to/auth.html for PRF and MAC (Ascon-Mac, Ascon-Prf, Ascon-PrfShort)
|
||||||
|
|
||||||
|
For more information on Ascon visit: https://ascon.iaik.tugraz.at/
|
||||||
|
|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
|
||||||
|
If you do not know where to start, use the reference implementations (self-contained, portable, very fast):
|
||||||
|
|
||||||
|
- `crypto_aead/ascon128v12/ref`
|
||||||
|
- `crypto_aead/ascon128av12/ref`
|
||||||
|
- `crypto_aead/asconxofv12/ref`
|
||||||
|
- `crypto_aead/asconxofav12/ref`
|
||||||
|
|
||||||
|
|
||||||
|
## Algorithms
|
||||||
|
|
||||||
|
This repository contains implementations of the following 10 Ascon v1.2 algorithms:
|
||||||
|
|
||||||
|
- `crypto_aead/ascon128v12`: Ascon-128
|
||||||
|
- `crypto_aead/ascon128av12`: Ascon-128a
|
||||||
|
- `crypto_aead/ascon80pqv12`: Ascon-80pq
|
||||||
|
- `crypto_hash/asconhashv12`: Ascon-Hash
|
||||||
|
- `crypto_hash/asconhashav12`: Ascon-Hasha
|
||||||
|
- `crypto_hash/asconxofv12`: Ascon-Xof
|
||||||
|
- `crypto_hash/asconxofav12`: Ascon-Xofa
|
||||||
|
- `crypto_auth/asconmacv12`: Ascon-Mac
|
||||||
|
- `crypto_auth/asconprfv12`: Ascon-Prf
|
||||||
|
- `crypto_auth/asconprfsv12`: Ascon-PrfShort
|
||||||
|
|
||||||
|
We also provide two combined algorithm implementations supporting both AEAD and
|
||||||
|
hashing:
|
||||||
|
|
||||||
|
- `crypto_aead_hash/asconv12`: Ascon-128 combined with Ascon-Hash
|
||||||
|
- `crypto_aead_hash/asconav12`: Ascon-128a combined with Ascon-Hasha
|
||||||
|
|
||||||
|
The following algorithms demonstrate the performance improvement of Ascon on
|
||||||
|
32-bit platforms without bit interleaving overhead. Bit interleaving could be
|
||||||
|
performed externally on the host side or using a dedicated instruction (e.g.
|
||||||
|
using the ARM Custom Datapath Extension). Note that a similar performance
|
||||||
|
improvement could be achieved using funnel shift instructions (available on some
|
||||||
|
32-bit RISC-V extensions).
|
||||||
|
|
||||||
|
- `crypto_aead/ascon128bi32v12`: Ascon-128 (+17% on ARM1176JZF-S)
|
||||||
|
- `crypto_aead/ascon128abi32v12`: Ascon-128a (+23% on ARM1176JZF-S)
|
||||||
|
- `crypto_hash/asconhashbi32v12`: Ascon-Hash (+5% on ARM1176JZF-S)
|
||||||
|
- `crypto_hash/asconhashabi32v12`: Ascon-Hasha (+8% on ARM1176JZF-S)
|
||||||
|
- `crypto_aead_hash/asconbi32v12`: Ascon-128 combined with Ascon-Hash
|
||||||
|
- `crypto_aead_hash/asconabi32v12`: Ascon-128a combined with Ascon-Hasha
|
||||||
|
|
||||||
|
|
||||||
|
## Implementations
|
||||||
|
|
||||||
|
For most algorithms, we provide the following pure C implementations:
|
||||||
|
|
||||||
|
- `ref`: reference implementation
|
||||||
|
- `opt64`: 64-bit speed-optimized
|
||||||
|
- `opt32`: 32-bit speed-optimized
|
||||||
|
- `opt64_lowsize`: 64-bit size-optimized
|
||||||
|
- `opt32_lowsize`: 32-bit size-optimized
|
||||||
|
- `bi32`: 32-bit speed-optimized bit-interleaved
|
||||||
|
- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved (low register usage)
|
||||||
|
- `bi32_lowsize`: 32-bit size-optimized bit-interleaved
|
||||||
|
- `esp32`: 32-bit ESP32 optimized
|
||||||
|
- `opt8`: 8-bit size- and speed-optimized
|
||||||
|
- `bi8`: 8-bit optimized bit-interleaved
|
||||||
|
|
||||||
|
the following C with inline or partial ASM implementations:
|
||||||
|
|
||||||
|
- `avx512`: 320-bit speed-optimized AVX512
|
||||||
|
- `neon`: 64-bit speed-optimized ARM NEON
|
||||||
|
- `armv6`: 32-bit speed-optimized ARMv6
|
||||||
|
- `armv6m`: 32-bit speed-optimized ARMv6-M
|
||||||
|
- `armv7m`: 32-bit speed-optimized ARMv7-M
|
||||||
|
- `armv6_lowsize`: 32-bit size-optimized ARMv6
|
||||||
|
- `armv6m_lowsize`: 32-bit size-optimized ARMv6-M
|
||||||
|
- `armv7m_lowsize`: 32-bit size-optimized ARMv7-M
|
||||||
|
- `armv7m_small`: 32-bit small speed-optimized ARMv7-M
|
||||||
|
- `bi32_armv6`: 32-bit speed-optimized bit-interleaved ARMv6
|
||||||
|
- `bi32_armv6m`: 32-bit speed-optimized bit-interleaved ARMv6-M
|
||||||
|
- `bi32_armv7m`: 32-bit speed-optimized bit-interleaved ARMv7-M
|
||||||
|
- `bi32_armv7m_small`: 32-bit small bit-interleaved ARMv7-M
|
||||||
|
- `avr`: 8-bit size- and speed-optimized AVR
|
||||||
|
- `avr_lowsize`: 8-bit size-optimized AVR
|
||||||
|
|
||||||
|
the following ASM implementations:
|
||||||
|
|
||||||
|
- `asm_esp32`: 32-bit optimized ESP32 using funnel-shift instructions
|
||||||
|
- `asm_rv32i`: 32-bit optimized RV32I using the base instruction set
|
||||||
|
- `asm_rv32b`: 32-bit optimized RV32B using bitmanip base (Zbb)
|
||||||
|
- `asm_fsr_rv32b`: 32-bit optimized funnel-shift RV32B using bitmanip base and bitmanip terniary (ZbbZbt)
|
||||||
|
- `asm_bi32_rv32b`: 32-bit optimized bit-interleaved RV32B using bitmanip base and bitmanip permutations (ZbbZbp)
|
||||||
|
|
||||||
|
and the following high-level masked (shared) C with inline ASM implementations:
|
||||||
|
|
||||||
|
- `protected_bi32_armv6`: 32-bit masked bit-interleaved ARMv6
|
||||||
|
- `protected_bi32_armv6_leveled`: 32-bit masked and leveled bit-interleaved ARMv6
|
||||||
|
|
||||||
|
The masked C implementations can be used as a starting point to generate
|
||||||
|
device specific C/ASM implementations. Note that the masked C implementations
|
||||||
|
require a minimum amount of ASM instructions. Otherwise, the compiler may
|
||||||
|
heavily optimize the code and even combine shares. Obviously, the output
|
||||||
|
generated is very sensitive to compiler and environment changes and any
|
||||||
|
generated output needs to be security evaluated. A preliminary evaluation of
|
||||||
|
these implementations has been performed on some
|
||||||
|
[ChipWhisperer](https://www.newae.com/chipwhisperer) devices. The setup and
|
||||||
|
preliminary results can found at: https://github.com/ascon/simpleserial-ascon
|
||||||
|
|
||||||
|
|
||||||
|
# Performance results on different CPUs in cycles per byte
|
||||||
|
|
||||||
|
## Ascon-128a
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
|
||||||
|
| AMD EPYC 7742\* | | | | | 7.4 | 4.4 | 4.2 |
|
||||||
|
| AMD Ryzen 9 5950X\* | | | | | 8.1 | 5.3 | 5.2 |
|
||||||
|
| Apple M1 (ARMv8)\* | | | | | 9.4 | 6.3 | 6.3 |
|
||||||
|
| Cortex-A72 (ARMv8)\* | | | | | 10.9 | 7.2 | 7.0 |
|
||||||
|
| Intel Xeon E5-2609 v4\* | | | | | 11.3 | 7.4 | 7.2 |
|
||||||
|
| Intel Core i5-6300U | 365 | 47 | 31 | 19 | 13.5 | 8.0 | 7.8 |
|
||||||
|
| Intel Core i5-4200U | 519 | 67 | 44 | 27 | 18.8 | 11.0 | 10.6 |
|
||||||
|
| Cortex-A9 (ARMv7)\* | | | | | 42.8 | 24.6 | 24.0 |
|
||||||
|
| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 |
|
||||||
|
| Cortex-A7 (ARMv7)\* | | | | | 55.5 | 38.2 | 37.5 |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 1908 | 235 | 156 | 99 | 70.4 | 43.0 | 42.9 |
|
||||||
|
|
||||||
|
|
||||||
|
## Ascon-128 and Ascon-80pq
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
|
||||||
|
| AMD EPYC 7742\* | | | | | 8.1 | 6.6 | 6.5 |
|
||||||
|
| AMD Ryzen 9 5950X\* | | | | | 11.0 | 8.2 | 8.1 |
|
||||||
|
| Apple M1 (ARMv8)\* | | | | | 12.5 | 9.5 | 9.3 |
|
||||||
|
| Cortex-A72 (ARMv8)\* | | | | | 13.8 | 10.7 | 10.5 |
|
||||||
|
| Intel Xeon E5-2609 v4\* | | | | | 14.9 | 10.8 | 10.6 |
|
||||||
|
| Intel Core i5-6300U | 367 | 58 | 35 | 23 | 17.6 | 11.9 | 11.4 |
|
||||||
|
| Intel Core i5-4200U | 521 | 81 | 49 | 32 | 23.9 | 16.2 | 15.8 |
|
||||||
|
| Cortex-A9 (ARMv7)\* | | | | | 51.7 | 34.1 | 33.3 |
|
||||||
|
| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 |
|
||||||
|
| Cortex-A7 (ARMv7)\* | | | | | 69.6 | 52.0 | 51.6 |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 1921 | 277 | 167 | 112 | 83.7 | 57.2 | 56.8 |
|
||||||
|
|
||||||
|
|
||||||
|
## Ascon-Hasha and Ascon-Xofa
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|------:|-----:|-----:|
|
||||||
|
| AMD EPYC 7742\* | | | | | | | |
|
||||||
|
| AMD Ryzen 7 1700\* | | | | | 22.0 | 12.1 | 11.7 |
|
||||||
|
| Apple M1 (ARMv8)\* | | | | | | | |
|
||||||
|
| Cortex-A72 (ARMv8)\* | | | | | 22.2 | 14.5 | 14.2 |
|
||||||
|
| Intel Xeon E5-2609 v4\* | | | | | 23.3 | 14.4 | 14.0 |
|
||||||
|
| Intel Core i5-6300U | 550 | 83 | 49 | 33 | 23.7 | 15.6 | 15.5 |
|
||||||
|
| Intel Core i5-4200U | 749 | 112 | 67 | 44 | 31.8 | 20.8 | 20.7 |
|
||||||
|
| Cortex-A9 (ARMv7)\* | | | | | 87.5 | 45.6 | 44.0 |
|
||||||
|
| Cortex-A7 (ARMv7)\* | | | | | 102.3 | 63.5 | 61.8 |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 2390 | 356 | 211 | 138 | 100.7 | 65.7 | 65.3 |
|
||||||
|
|
||||||
|
|
||||||
|
## Ascon-Hash and Ascon-Xof
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|------:|-----:|-----:|
|
||||||
|
| AMD EPYC 7742\* | | | | | 21.1 | 13.3 | 12.4 |
|
||||||
|
| AMD Ryzen 9 5950X\* | | | | | 24.1 | 16.1 | 15.8 |
|
||||||
|
| Apple M1 (ARMv8)\* | | | | | 29.2 | 19.6 | 18.5 |
|
||||||
|
| Cortex-A72 (ARMv8)\* | | | | | 30.5 | 20.5 | 20.0 |
|
||||||
|
| Intel Xeon E5-2609 v4\* | | | | | 31.9 | 21.4 | 21.2 |
|
||||||
|
| Intel Core i5-6300U | 747 | 114 | 69 | 46 | 34.2 | 23.2 | 23.1 |
|
||||||
|
| Intel Core i5-4200U | 998 | 153 | 92 | 61 | 45.5 | 30.9 | 30.7 |
|
||||||
|
| Cortex-A9 (ARMv7)\* | | | | | 95.8 | 55.5 | 53.9 |
|
||||||
|
| Cortex-A7 (ARMv7)\* | | | | | 138.1 | 89.9 | 88.8 |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 3051 | 462 | 277 | 184 | 137.3 | 92.6 | 92.2 |
|
||||||
|
|
||||||
|
|
||||||
|
## Ascon-Mac and Ascon-Prf
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
|
||||||
|
| Intel Core i5-6300U | 369 | 46 | 24 | 18 | 11.7 | 6.4 | 6.3 |
|
||||||
|
| Intel Core i5-4200U | 506 | 63 | 32 | 24 | 16.2 | 8.8 | 8.7 |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 1769 | 223 | 117 | 85 | 57.5 | 31.9 | 31.6 |
|
||||||
|
|
||||||
|
|
||||||
|
## Ascon-PrfShort
|
||||||
|
|
||||||
|
| Message Length in Bytes | 1 | 8 | 16 | 32 | 64 | 1536 | long |
|
||||||
|
|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:|
|
||||||
|
| Intel Core i5-6300U | 185 | 23 | 12 | - | - | - | - |
|
||||||
|
| Intel Core i5-4200U | 257 | 33 | 17 | - | - | - | - |
|
||||||
|
| ARM1176JZF-S (ARMv6) | 1057 | 132 | 69 | - | - | - | - |
|
||||||
|
|
||||||
|
\* Results taken from eBACS: http://bench.cr.yp.to/
|
||||||
|
|
||||||
|
|
||||||
|
# Build and test
|
||||||
|
|
||||||
|
Build and test all Ascon C targets using release flags (-O2 -fomit-frame-pointer -march=native -mtune=native):
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake ..
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Build and test all Ascon C targets on Windows:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake ..
|
||||||
|
cmake --build . --config Release
|
||||||
|
ctest -C Release
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Build and test all Ascon C targets using debug flags (with NIST defined flags and sanitizers):
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_BUILD_TYPE=Debug
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
Manually set the compiler and/or release flags (e.g. to disable -march=native -mtune=native).
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_C_COMPILER=clang -DREL_FLAGS="-O2;-fomit-frame-pointer"
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
Build and run only specific algorithms, implementations and tests:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt64;bi32" -DTEST_LIST="genkat"
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that cmake stores variables in a cache. Therefore, variables can be set
|
||||||
|
one-by-one, unset using e.g. `cmake . -UIMPL_LIST` and shown using `cmake . -L`:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake ..
|
||||||
|
cmake . -DALG_LIST="ascon128;asconhash"
|
||||||
|
cmake . -DIMPL_LIST="opt64;bi32"
|
||||||
|
cmake . -DTEST_LIST="genkat"
|
||||||
|
cmake . -L
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
Cross compile and test with custom emulator using e.g. `qemu-arm`:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_C_COMPILER="arm-linux-gnueabi-gcc" \
|
||||||
|
-DREL_FLAGS="-O2;-fomit-frame-pointer;-march=armv7;-mtune=cortex-m4" \
|
||||||
|
-DEMULATOR="qemu-arm;-L;/usr/arm-linux-gnueabi" \
|
||||||
|
-DALG_LIST="ascon128;ascon128a" -DIMPL_LIST="armv7m;bi32_armv7m"
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
or using Intel SDE (use full path to `sde` or add to path variable):
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_C_COMPILER=gcc -DIMPL_LIST=avx512 -DEMULATOR="sde;--" \
|
||||||
|
-DREL_FLAGS="-O2;-fomit-frame-pointer;-march=icelake-client"
|
||||||
|
cmake --build .
|
||||||
|
ctest
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Build and benchmark:
|
||||||
|
|
||||||
|
Build the getcycles test:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt32;opt32_lowsize" -DTEST_LIST="getcycles"
|
||||||
|
cmake --build .
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the CPU cycle performance:
|
||||||
|
|
||||||
|
```
|
||||||
|
./getcycles_crypto_aead_ascon128v12_opt32
|
||||||
|
./getcycles_crypto_aead_ascon128v12_opt32_lowsize
|
||||||
|
./getcycles_crypto_hash_asconhashv12_opt32
|
||||||
|
./getcycles_crypto_hash_asconhashv12_opt32_lowsize
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the implementation size:
|
||||||
|
|
||||||
|
```
|
||||||
|
size -t libcrypto_aead_ascon128v12_opt32.a
|
||||||
|
size -t libcrypto_aead_ascon128v12_opt32_lowsize.a
|
||||||
|
size -t libcrypto_hash_asconhashv12_opt32.a
|
||||||
|
size -t libcrypto_hash_asconhashv12_opt32_lowsize.a
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Manually build and run a single Ascon target:
|
||||||
|
|
||||||
|
Build example for AEAD algorithms:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcc -march=native -O3 -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -Itests tests/genkat_aead.c -o genkat
|
||||||
|
gcc -march=native -O3 -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
|
||||||
|
```
|
||||||
|
|
||||||
|
Build example for HASH algorithms:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcc -march=native -O3 -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -Itests tests/genkat_hash.c -o genkat
|
||||||
|
gcc -march=native -O3 -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -DCRYPTO_HASH -Itests tests/getcycles.c -o getcycles
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate KATs and get CPU cycles:
|
||||||
|
|
||||||
|
```
|
||||||
|
./genkat
|
||||||
|
./getcycles
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Manually build and run an RV32 target:
|
||||||
|
|
||||||
|
|
||||||
|
Setup:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo apt install gcc-riscv64-unknown-elf picolibc-riscv64-unknown-elf qemu-system-misc
|
||||||
|
```
|
||||||
|
|
||||||
|
Example to build, run and test an AEAD/HASH algorithm using `gcc`, `picolibc` and `qemu`:
|
||||||
|
|
||||||
|
```
|
||||||
|
riscv64-unknown-elf-gcc -O2 -march=rv32i -mabi=ilp32 --specs=picolibc.specs --oslib=semihost --crt0=hosted -Ttests/rv32.ld \
|
||||||
|
-Icrypto_aead/ascon128v12/asm_rv32i crypto_aead/ascon128v12/asm_rv32i/*.[cS] -Itests tests/genkat_aead.c -o genkat
|
||||||
|
qemu-system-riscv32 -semihosting-config enable=on -monitor none -serial none -nographic -machine virt,accel=tcg -cpu rv32 -bios none -kernel genkat
|
||||||
|
diff LWC_AEAD_KAT_128_128.txt crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
riscv64-unknown-elf-gcc -O2 -march=rv32i -mabi=ilp32 --specs=picolibc.specs --oslib=semihost --crt0=hosted -Ttests/rv32.ld \
|
||||||
|
-Icrypto_hash/asconhashv12/opt32 crypto_hash/asconhashv12/opt32/*.[cS] -Itests tests/genkat_hash.c -o genkat
|
||||||
|
qemu-system-riscv32 -semihosting-config enable=on -monitor none -serial none -nographic -machine virt,accel=tcg -cpu rv32 -bios none -kernel genkat
|
||||||
|
diff LWC_HASH_KAT_256.txt crypto_hash/asconhashv12/LWC_HASH_KAT_256.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Manually build and run an AVR target:
|
||||||
|
|
||||||
|
Example to build, run and test an AEAD algorithm using `avr-gcc`, `avr-libc` and `simavr`.
|
||||||
|
|
||||||
|
Setup:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo apt install gcc-avr avr-libc simavr
|
||||||
|
git clone https://github.com/JohannCahier/avr_uart.git
|
||||||
|
```
|
||||||
|
|
||||||
|
Single test vector using `demo` and performance measurement using `getcycles`:
|
||||||
|
|
||||||
|
```
|
||||||
|
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
|
||||||
|
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
|
||||||
|
-DCRYPTO_AEAD -Itests tests/demo.c -o demo
|
||||||
|
simavr -m atmega128 ./demo
|
||||||
|
```
|
||||||
|
```
|
||||||
|
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
|
||||||
|
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
|
||||||
|
-DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles
|
||||||
|
simavr -t -m atmega128 ./getcycles
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate all test vectors for AEAD/HASH and write result to a file. Press Ctrl-C to quit `simavr` after about a minute:
|
||||||
|
|
||||||
|
```
|
||||||
|
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_aead/ascon128v12/opt8 crypto_aead/ascon128v12/opt8/*.[cS] \
|
||||||
|
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
|
||||||
|
-Itests tests/genkat_aead.c -o genkat_aead
|
||||||
|
echo "Press Ctrl-C to quit simavr after about a minute"
|
||||||
|
simavr -t -m atmega128 ./genkat_aead 2> LWC_AEAD_KAT_128_128.txt
|
||||||
|
sed -i -e 's/\x1b\[[0-9;]*m//g' -e 's/\.\.$//' LWC_AEAD_KAT_128_128.txt
|
||||||
|
diff LWC_AEAD_KAT_128_128.txt crypto_aead/ascon128v12/LWC_AEAD_KAT_128_128.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
avr-gcc -mmcu=atmega128 -std=c99 -Os -Icrypto_hash/asconhashv12/opt8 crypto_hash/asconhashv12/opt8/*.[cS] \
|
||||||
|
-DAVR_UART -Iavr_uart avr_uart/avr_uart.c -Wno-incompatible-pointer-types -Wno-cpp \
|
||||||
|
-Itests tests/genkat_hash.c -o genkat_hash
|
||||||
|
echo "Press Ctrl-C to quit simavr after about a minute"
|
||||||
|
simavr -t -m atmega128 ./genkat_hash 2> LWC_HASH_KAT_256.txt
|
||||||
|
sed -i -e 's/\x1b\[[0-9;]*m//g' -e 's/\.\.$//' LWC_HASH_KAT_256.txt
|
||||||
|
diff LWC_HASH_KAT_256.txt crypto_hash/asconhashv12/LWC_HASH_KAT_256.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Benchmarking
|
||||||
|
|
||||||
|
## Hints to get more reliable getcycles results on Intel/AMD CPUs:
|
||||||
|
|
||||||
|
* Determine the processor base frequency (also called design frequency):
|
||||||
|
- e.g. using the Intel/AMD website
|
||||||
|
- or using `lscpu` listed under model name
|
||||||
|
|
||||||
|
* Disable turbo boost (this should lock the frequency to the next value
|
||||||
|
below the processor base frequency):
|
||||||
|
```
|
||||||
|
echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
|
||||||
|
```
|
||||||
|
|
||||||
|
* If the above does not work, manually set the frequency using e.g. `cpufreq-set`.
|
||||||
|
|
||||||
|
* Determine the actual frequency (under load):
|
||||||
|
- e.g. by watching the frequency using `lscpu` or `cpufreq-info`
|
||||||
|
|
||||||
|
* Determine the scaling factor between the actual and base frequency:
|
||||||
|
- factor = actual frequency / base frequency
|
||||||
|
|
||||||
|
* Run a getcycles program using the frequency factor and watch the results:
|
||||||
|
```
|
||||||
|
while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done
|
||||||
|
```
|
||||||
|
|
||||||
|
* Run the `benchmark-getcycles.sh` script with the frequency factor and a
|
||||||
|
specific algorithm to benchmark all corresponding getcycles implementations:
|
||||||
|
```
|
||||||
|
scripts/benchmark-getcycles.sh $factor ascon128
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Hints to activate the performance monitor unit (PMU) on ARM CPUs:
|
||||||
|
|
||||||
|
* First try to install `linux-tools` and see if it works.
|
||||||
|
|
||||||
|
* On many ARM platforms, the PMU has to be enabled using a kernel module:
|
||||||
|
- Source code for Armv6 (32-bit):
|
||||||
|
<http://sandsoftwaresound.net/raspberry-pi/raspberry-pi-gen-1/performance-counter-kernel-module/>
|
||||||
|
- Source code for Armv7 (32-bit):
|
||||||
|
<https://github.com/thoughtpolice/enable_arm_pmu>
|
||||||
|
- Source code for Armv8/Aarch64 (64-bit):
|
||||||
|
<https://github.com/rdolbeau/enable_arm_pmu>
|
||||||
|
|
||||||
|
* Steps to compile the kernel module on the raspberry pi:
|
||||||
|
- Find out the kernel version using `uname -a`
|
||||||
|
- Download the kernel header files, e.g. `raspberrypi-kernel-header`
|
||||||
|
- Download the source code for the Armv6 kernel module
|
||||||
|
- Build, install and load the kernel module
|
||||||
|
|
||||||
|
|
||||||
|
## Benchmark Ascon v1.2 using supercop
|
||||||
|
|
||||||
|
Download supercop according to the website: http://bench.cr.yp.to/supercop.html
|
||||||
|
|
||||||
|
To test only Ascon, just run the following commands:
|
||||||
|
|
||||||
|
```
|
||||||
|
./do-part init
|
||||||
|
./do-part crypto_aead ascon128v12
|
||||||
|
./do-part crypto_aead ascon128av12
|
||||||
|
./do-part crypto_aead ascon80pqv12
|
||||||
|
./do-part crypto_hash asconhashv12
|
||||||
|
./do-part crypto_hash asconxofv12
|
||||||
|
```
|
||||||
|
|
||||||
|
Show the cycles/Byte for a 1536 Byte long message:
|
||||||
|
|
||||||
|
```
|
||||||
|
cat bench/*/data | grep '_cycles 1536 ' | awk '{printf "%.1f\t%s\t%s\n", $9/$8,
|
||||||
|
$6, $7}' | sort -nr
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Evaluate and optimize Ascon on constraint devices:
|
||||||
|
|
||||||
|
* The ascon-c code allows to set compile-time parameters `ASCON_INLINE_MODE`
|
||||||
|
(IM), `ASCON_INLINE_PERM` (IP), `ASCON_UNROLL_LOOPS` (UL), `ASCON_INLINE_BI`
|
||||||
|
(IB), via command line or in the `crypto_*/ascon*/*/config.h` files.
|
||||||
|
* Use the `benchmark-config.sh` script to evaluate all combinations of these
|
||||||
|
parameters for a given list of Ascon implementations. The script is called
|
||||||
|
with an output file, frequency factor, the algorithm, and the list of
|
||||||
|
implementations to test:
|
||||||
|
```
|
||||||
|
scripts/benchmark-config.sh results-config.md $factor ascon128 ref opt64 opt64_lowsize
|
||||||
|
```
|
||||||
|
* The `results-config.md` file then contains a markup table with size and cycles
|
||||||
|
for each implementation and parameter set to evaluate several time-area
|
||||||
|
trade-offs.
|
||||||
|
* The `benchmark-all.sh` and `benchmark-size.sh` scripts provides a time/size
|
||||||
|
and size-only table of all currently compiled implementations:
|
||||||
|
```
|
||||||
|
scripts/benchmark-all.sh results-all.md
|
||||||
|
scripts/benchmark-size.sh results-size.md
|
||||||
|
```
|
|
@ -0,0 +1,4 @@
|
||||||
|
#define CRYPTO_VERSION "1.2.7"
|
||||||
|
#define CRYPTO_BYTES 32
|
||||||
|
#define ASCON_HASH_BYTES 0 /* XOF */
|
||||||
|
#define ASCON_HASH_ROUNDS 8
|
|
@ -0,0 +1,3 @@
|
||||||
|
aarch64
|
||||||
|
armeabi
|
||||||
|
arm
|
|
@ -0,0 +1,53 @@
|
||||||
|
#ifndef ASCON_H_
|
||||||
|
#define ASCON_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "api.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
uint64_t x[5];
|
||||||
|
uint32_t w[5][2];
|
||||||
|
uint8_t b[5][8];
|
||||||
|
} ascon_state_t;
|
||||||
|
|
||||||
|
#ifdef ASCON_AEAD_RATE
|
||||||
|
|
||||||
|
#define ASCON_KEYWORDS (CRYPTO_KEYBYTES + 7) / 8
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
uint64_t x[ASCON_KEYWORDS];
|
||||||
|
uint32_t w[ASCON_KEYWORDS][2];
|
||||||
|
uint8_t b[ASCON_KEYWORDS][8];
|
||||||
|
} ascon_key_t;
|
||||||
|
|
||||||
|
#if !ASCON_INLINE_MODE
|
||||||
|
|
||||||
|
void ascon_loadkey(ascon_key_t* key, const uint8_t* k);
|
||||||
|
void ascon_initaead(ascon_state_t* s, const ascon_key_t* key,
|
||||||
|
const uint8_t* npub);
|
||||||
|
void ascon_adata(ascon_state_t* s, const uint8_t* ad, uint64_t adlen);
|
||||||
|
void ascon_encrypt(ascon_state_t* s, uint8_t* c, const uint8_t* m,
|
||||||
|
uint64_t mlen);
|
||||||
|
void ascon_decrypt(ascon_state_t* s, uint8_t* m, const uint8_t* c,
|
||||||
|
uint64_t clen);
|
||||||
|
void ascon_final(ascon_state_t* s, const ascon_key_t* k);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ASCON_HASH_BYTES
|
||||||
|
|
||||||
|
#if !ASCON_INLINE_MODE
|
||||||
|
|
||||||
|
void ascon_inithash(ascon_state_t* s);
|
||||||
|
void ascon_absorb(ascon_state_t* s, const uint8_t* in, uint64_t inlen);
|
||||||
|
void ascon_squeeze(ascon_state_t* s, uint8_t* out, uint64_t outlen);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* ASCON_H_ */
|
|
@ -0,0 +1,39 @@
|
||||||
|
#ifndef ENDIAN_H_
|
||||||
|
#define ENDIAN_H_
|
||||||
|
|
||||||
|
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
|
||||||
|
/* macros for big endian machines */
|
||||||
|
#ifdef PRAGMA_ENDIAN
|
||||||
|
#pragma message("Using macros for big endian machines")
|
||||||
|
#endif
|
||||||
|
#define U64BIG(x) (x)
|
||||||
|
#define U32BIG(x) (x)
|
||||||
|
#define U16BIG(x) (x)
|
||||||
|
|
||||||
|
#elif defined(_MSC_VER) || \
|
||||||
|
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||||
|
|
||||||
|
/* macros for little endian machines */
|
||||||
|
#ifdef PRAGMA_ENDIAN
|
||||||
|
#pragma message("Using macros for little endian machines")
|
||||||
|
#endif
|
||||||
|
#define U64BIG(x) \
|
||||||
|
(((0x00000000000000FFULL & (x)) << 56) | \
|
||||||
|
((0x000000000000FF00ULL & (x)) << 40) | \
|
||||||
|
((0x0000000000FF0000ULL & (x)) << 24) | \
|
||||||
|
((0x00000000FF000000ULL & (x)) << 8) | \
|
||||||
|
((0x000000FF00000000ULL & (x)) >> 8) | \
|
||||||
|
((0x0000FF0000000000ULL & (x)) >> 24) | \
|
||||||
|
((0x00FF000000000000ULL & (x)) >> 40) | \
|
||||||
|
((0xFF00000000000000ULL & (x)) >> 56))
|
||||||
|
#define U32BIG(x) \
|
||||||
|
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
|
||||||
|
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
|
||||||
|
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
|
||||||
|
|
||||||
|
#else
|
||||||
|
#error "Ascon byte order macros not defined in bendian.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* ENDIAN_H_ */
|
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef CONFIG_H_
|
||||||
|
#define CONFIG_H_
|
||||||
|
|
||||||
|
/* inline the ascon mode */
|
||||||
|
#ifndef ASCON_INLINE_MODE
|
||||||
|
#define ASCON_INLINE_MODE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* inline all permutations */
|
||||||
|
#ifndef ASCON_INLINE_PERM
|
||||||
|
#define ASCON_INLINE_PERM 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* unroll permutation loops */
|
||||||
|
#ifndef ASCON_UNROLL_LOOPS
|
||||||
|
#define ASCON_UNROLL_LOOPS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* CONFIG_H_ */
|
|
@ -0,0 +1,90 @@
|
||||||
|
#ifndef CONSTANTS_H_
|
||||||
|
#define CONSTANTS_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define ASCON_128_KEYBYTES 16
|
||||||
|
#define ASCON_128A_KEYBYTES 16
|
||||||
|
#define ASCON_80PQ_KEYBYTES 20
|
||||||
|
|
||||||
|
#define ASCON_128_RATE 8
|
||||||
|
#define ASCON_128A_RATE 16
|
||||||
|
#define ASCON_HASH_RATE 8
|
||||||
|
#define ASCON_PRF_IN_RATE 32
|
||||||
|
#define ASCON_PRFA_IN_RATE 40
|
||||||
|
#define ASCON_PRF_OUT_RATE 16
|
||||||
|
|
||||||
|
#define ASCON_128_PA_ROUNDS 12
|
||||||
|
#define ASCON_128_PB_ROUNDS 6
|
||||||
|
#define ASCON_128A_PA_ROUNDS 12
|
||||||
|
#define ASCON_128A_PB_ROUNDS 8
|
||||||
|
|
||||||
|
#define ASCON_HASH_PA_ROUNDS 12
|
||||||
|
#define ASCON_HASH_PB_ROUNDS 12
|
||||||
|
#define ASCON_HASHA_PA_ROUNDS 12
|
||||||
|
#define ASCON_HASHA_PB_ROUNDS 8
|
||||||
|
|
||||||
|
#define ASCON_PRF_PA_ROUNDS 12
|
||||||
|
#define ASCON_PRF_PB_ROUNDS 12
|
||||||
|
#define ASCON_PRFA_PA_ROUNDS 12
|
||||||
|
#define ASCON_PRFA_PB_ROUNDS 8
|
||||||
|
|
||||||
|
#define ASCON_128_IV 0x80400c0600000000ull
|
||||||
|
#define ASCON_128A_IV 0x80800c0800000000ull
|
||||||
|
#define ASCON_80PQ_IV 0xa0400c0600000000ull
|
||||||
|
|
||||||
|
#define ASCON_HASH_IV 0x00400c0000000100ull
|
||||||
|
#define ASCON_HASHA_IV 0x00400c0400000100ull
|
||||||
|
#define ASCON_XOF_IV 0x00400c0000000000ull
|
||||||
|
#define ASCON_XOFA_IV 0x00400c0400000000ull
|
||||||
|
|
||||||
|
#define ASCON_HASH_IV0 0xee9398aadb67f03dull
|
||||||
|
#define ASCON_HASH_IV1 0x8bb21831c60f1002ull
|
||||||
|
#define ASCON_HASH_IV2 0xb48a92db98d5da62ull
|
||||||
|
#define ASCON_HASH_IV3 0x43189921b8f8e3e8ull
|
||||||
|
#define ASCON_HASH_IV4 0x348fa5c9d525e140ull
|
||||||
|
|
||||||
|
#define ASCON_HASHA_IV0 0x01470194fc6528a6ull
|
||||||
|
#define ASCON_HASHA_IV1 0x738ec38ac0adffa7ull
|
||||||
|
#define ASCON_HASHA_IV2 0x2ec8e3296c76384cull
|
||||||
|
#define ASCON_HASHA_IV3 0xd6f6a54d7f52377dull
|
||||||
|
#define ASCON_HASHA_IV4 0xa13c42a223be8d87ull
|
||||||
|
|
||||||
|
#define ASCON_XOF_IV0 0xb57e273b814cd416ull
|
||||||
|
#define ASCON_XOF_IV1 0x2b51042562ae2420ull
|
||||||
|
#define ASCON_XOF_IV2 0x66a3a7768ddf2218ull
|
||||||
|
#define ASCON_XOF_IV3 0x5aad0a7a8153650cull
|
||||||
|
#define ASCON_XOF_IV4 0x4f3e0e32539493b6ull
|
||||||
|
|
||||||
|
#define ASCON_XOFA_IV0 0x44906568b77b9832ull
|
||||||
|
#define ASCON_XOFA_IV1 0xcd8d6cae53455532ull
|
||||||
|
#define ASCON_XOFA_IV2 0xf7b5212756422129ull
|
||||||
|
#define ASCON_XOFA_IV3 0x246885e1de0d225bull
|
||||||
|
#define ASCON_XOFA_IV4 0xa8cb5ce33449973full
|
||||||
|
|
||||||
|
#define ASCON_MAC_IV 0x80808c0000000080ull
|
||||||
|
#define ASCON_MACA_IV 0x80808c0400000080ull
|
||||||
|
#define ASCON_PRF_IV 0x80808c0000000000ull
|
||||||
|
#define ASCON_PRFA_IV 0x80808c0400000000ull
|
||||||
|
#define ASCON_PRFS_IV 0x80004c8000000000ull
|
||||||
|
|
||||||
|
#define RC0 0xf0
|
||||||
|
#define RC1 0xe1
|
||||||
|
#define RC2 0xd2
|
||||||
|
#define RC3 0xc3
|
||||||
|
#define RC4 0xb4
|
||||||
|
#define RC5 0xa5
|
||||||
|
#define RC6 0x96
|
||||||
|
#define RC7 0x87
|
||||||
|
#define RC8 0x78
|
||||||
|
#define RC9 0x69
|
||||||
|
#define RCa 0x5a
|
||||||
|
#define RCb 0x4b
|
||||||
|
|
||||||
|
#define RC(i) (i)
|
||||||
|
|
||||||
|
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
|
||||||
|
#define INC -0x0f
|
||||||
|
#define END 0x3c
|
||||||
|
|
||||||
|
#endif /* CONSTANTS_H_ */
|
|
@ -0,0 +1,23 @@
|
||||||
|
#ifndef FORCEINLINE_H_
|
||||||
|
#define FORCEINLINE_H_
|
||||||
|
|
||||||
|
/* define forceinline macro */
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define forceinline __forceinline
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||||
|
#define forceinline inline __attribute__((__always_inline__))
|
||||||
|
#else
|
||||||
|
#define forceinline static inline
|
||||||
|
#endif
|
||||||
|
#elif defined(__CLANG__)
|
||||||
|
#if __has_attribute(__always_inline__)
|
||||||
|
#define forceinline inline __attribute__((__always_inline__))
|
||||||
|
#else
|
||||||
|
#define forceinline inline
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define forceinline inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* FORCEINLINE_H_ */
|
|
@ -0,0 +1 @@
|
||||||
|
Branches reviewed 2020-11-13 by Martin Schläffer.
|
|
@ -0,0 +1 @@
|
||||||
|
Addresses reviewed 2020-11-13 by Martin Schläffer.
|
|
@ -0,0 +1,89 @@
|
||||||
|
#include "api.h"
|
||||||
|
#include "ascon.h"
|
||||||
|
/*#include "crypto_hash.h"*/
|
||||||
|
#include "permutations.h"
|
||||||
|
#include "printstate.h"
|
||||||
|
|
||||||
|
#if !ASCON_INLINE_MODE
|
||||||
|
#undef forceinline
|
||||||
|
#define forceinline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ASCON_HASH_BYTES
|
||||||
|
|
||||||
|
forceinline void ascon_inithash(ascon_state_t* s) {
|
||||||
|
int i;
|
||||||
|
/* initialize */
|
||||||
|
#ifdef ASCON_PRINT_STATE
|
||||||
|
#if ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 12
|
||||||
|
s->x[0] = ASCON_HASH_IV;
|
||||||
|
#elif ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 8
|
||||||
|
s->x[0] = ASCON_HASHA_IV;
|
||||||
|
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 12
|
||||||
|
s->x[0] = ASCON_XOF_IV;
|
||||||
|
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 8
|
||||||
|
s->x[0] = ASCON_XOFA_IV;
|
||||||
|
#endif
|
||||||
|
for (i = 1; i < 5; ++i) s->x[i] = 0;
|
||||||
|
printstate("initial value", s);
|
||||||
|
P(s, 12);
|
||||||
|
#endif
|
||||||
|
#if ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 12
|
||||||
|
const uint64_t iv[5] = {ASCON_HASH_IV0, ASCON_HASH_IV1, ASCON_HASH_IV2,
|
||||||
|
ASCON_HASH_IV3, ASCON_HASH_IV4};
|
||||||
|
#elif ASCON_HASH_BYTES == 32 && ASCON_HASH_ROUNDS == 8
|
||||||
|
const uint64_t iv[5] = {ASCON_HASHA_IV0, ASCON_HASHA_IV1, ASCON_HASHA_IV2,
|
||||||
|
ASCON_HASHA_IV3, ASCON_HASHA_IV4};
|
||||||
|
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 12
|
||||||
|
const uint64_t iv[5] = {ASCON_XOF_IV0, ASCON_XOF_IV1, ASCON_XOF_IV2,
|
||||||
|
ASCON_XOF_IV3, ASCON_XOF_IV4};
|
||||||
|
#elif ASCON_HASH_BYTES == 0 && ASCON_HASH_ROUNDS == 8
|
||||||
|
const uint64_t iv[5] = {ASCON_XOFA_IV0, ASCON_XOFA_IV1, ASCON_XOFA_IV2,
|
||||||
|
ASCON_XOFA_IV3, ASCON_XOFA_IV4};
|
||||||
|
#endif
|
||||||
|
for (i = 0; i < 5; ++i) s->x[i] = (iv[i]);
|
||||||
|
printstate("initialization", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void ascon_absorb(ascon_state_t* s, const uint8_t* in,
|
||||||
|
uint64_t inlen) {
|
||||||
|
/* absorb full plaintext blocks */
|
||||||
|
while (inlen >= ASCON_HASH_RATE) {
|
||||||
|
s->x[0] ^= LOAD(in, 8);
|
||||||
|
printstate("absorb plaintext", s);
|
||||||
|
P(s, ASCON_HASH_ROUNDS);
|
||||||
|
in += ASCON_HASH_RATE;
|
||||||
|
inlen -= ASCON_HASH_RATE;
|
||||||
|
}
|
||||||
|
/* absorb final plaintext block */
|
||||||
|
s->x[0] ^= LOADBYTES(in, inlen);
|
||||||
|
s->x[0] ^= PAD(inlen);
|
||||||
|
printstate("pad plaintext", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void ascon_squeeze(ascon_state_t* s, uint8_t* out,
|
||||||
|
uint64_t outlen) {
|
||||||
|
/* squeeze full output blocks */
|
||||||
|
P(s, 12);
|
||||||
|
while (outlen > ASCON_HASH_RATE) {
|
||||||
|
STORE(out, s->x[0], 8);
|
||||||
|
printstate("squeeze output", s);
|
||||||
|
P(s, ASCON_HASH_ROUNDS);
|
||||||
|
out += ASCON_HASH_RATE;
|
||||||
|
outlen -= ASCON_HASH_RATE;
|
||||||
|
}
|
||||||
|
/* squeeze final output block */
|
||||||
|
STOREBYTES(out, s->x[0], outlen);
|
||||||
|
printstate("squeeze output", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*int crypto_hash(unsigned char* out, const unsigned char* in,
|
||||||
|
unsigned long long inlen) {
|
||||||
|
ascon_state_t s;
|
||||||
|
ascon_inithash(&s);
|
||||||
|
ascon_absorb(&s, in, inlen);
|
||||||
|
ascon_squeeze(&s, out, CRYPTO_BYTES);
|
||||||
|
return 0;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,2 @@
|
||||||
|
Christoph Dobraunig
|
||||||
|
Martin Schläffer
|
|
@ -0,0 +1,29 @@
|
||||||
|
#include "permutations.h"
|
||||||
|
|
||||||
|
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
void P12(ascon_state_t* s) { P12ROUNDS(s); }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ((defined(ASCON_AEAD_RATE) && ASCON_AEAD_RATE == 16) || \
|
||||||
|
(defined(ASCON_HASH_ROUNDS) && ASCON_HASH_ROUNDS == 8) || \
|
||||||
|
(defined(ASCON_PRF_ROUNDS) && ASCON_PRF_ROUNDS == 8)) && \
|
||||||
|
!ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
void P8(ascon_state_t* s) { P8ROUNDS(s); }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined(ASCON_AEAD_RATE) && ASCON_AEAD_RATE == 8) && \
|
||||||
|
!ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
void P6(ascon_state_t* s) { P6ROUNDS(s); }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
void P(ascon_state_t* s, int nr) { PROUNDS(s, nr); }
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,78 @@
|
||||||
|
#ifndef PERMUTATIONS_H_
|
||||||
|
#define PERMUTATIONS_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "api.h"
|
||||||
|
#include "ascon.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include "printstate.h"
|
||||||
|
#include "round.h"
|
||||||
|
|
||||||
|
forceinline void P12ROUNDS(ascon_state_t* s) {
|
||||||
|
ROUND(s, RC0);
|
||||||
|
ROUND(s, RC1);
|
||||||
|
ROUND(s, RC2);
|
||||||
|
ROUND(s, RC3);
|
||||||
|
ROUND(s, RC4);
|
||||||
|
ROUND(s, RC5);
|
||||||
|
ROUND(s, RC6);
|
||||||
|
ROUND(s, RC7);
|
||||||
|
ROUND(s, RC8);
|
||||||
|
ROUND(s, RC9);
|
||||||
|
ROUND(s, RCa);
|
||||||
|
ROUND(s, RCb);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void P8ROUNDS(ascon_state_t* s) {
|
||||||
|
ROUND(s, RC4);
|
||||||
|
ROUND(s, RC5);
|
||||||
|
ROUND(s, RC6);
|
||||||
|
ROUND(s, RC7);
|
||||||
|
ROUND(s, RC8);
|
||||||
|
ROUND(s, RC9);
|
||||||
|
ROUND(s, RCa);
|
||||||
|
ROUND(s, RCb);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void P6ROUNDS(ascon_state_t* s) {
|
||||||
|
ROUND(s, RC6);
|
||||||
|
ROUND(s, RC7);
|
||||||
|
ROUND(s, RC8);
|
||||||
|
ROUND(s, RC9);
|
||||||
|
ROUND(s, RCa);
|
||||||
|
ROUND(s, RCb);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
forceinline void P(ascon_state_t* s, int nr) {
|
||||||
|
if (nr == 12) P12ROUNDS(s);
|
||||||
|
if (nr == 8) P8ROUNDS(s);
|
||||||
|
if (nr == 6) P6ROUNDS(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
void P12(ascon_state_t* s);
|
||||||
|
void P8(ascon_state_t* s);
|
||||||
|
void P6(ascon_state_t* s);
|
||||||
|
|
||||||
|
forceinline void P(ascon_state_t* s, int nr) {
|
||||||
|
if (nr == 12) P12(s);
|
||||||
|
if (nr == 8) P8(s);
|
||||||
|
if (nr == 6) P6(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
|
||||||
|
|
||||||
|
forceinline void P(ascon_state_t* s, int nr) { PROUNDS(s, nr); }
|
||||||
|
|
||||||
|
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
|
||||||
|
|
||||||
|
void P(ascon_state_t* s, int nr);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* PERMUTATIONS_H_ */
|
|
@ -0,0 +1,41 @@
|
||||||
|
#ifdef ASCON_PRINT_STATE
|
||||||
|
|
||||||
|
#include "printstate.h"
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifndef WORDTOU64
|
||||||
|
#define WORDTOU64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef U64BIG
|
||||||
|
#define U64BIG
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void printword(const char* text, const uint64_t x) {
|
||||||
|
printf("%s=%016" PRIx64, text, U64BIG(WORDTOU64(x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void printstate(const char* text, const ascon_state_t* s) {
|
||||||
|
int i;
|
||||||
|
printf("%s:", text);
|
||||||
|
for (i = strlen(text); i < 17; ++i) printf(" ");
|
||||||
|
printword(" x0", s->x[0]);
|
||||||
|
printword(" x1", s->x[1]);
|
||||||
|
printword(" x2", s->x[2]);
|
||||||
|
printword(" x3", s->x[3]);
|
||||||
|
printword(" x4", s->x[4]);
|
||||||
|
#ifdef ASCON_PRINT_BI
|
||||||
|
printf(" ");
|
||||||
|
printf(" x0=%08x_%08x", s->w[0][1], s->w[0][0]);
|
||||||
|
printf(" x1=%08x_%08x", s->w[1][1], s->w[1][0]);
|
||||||
|
printf(" x2=%08x_%08x", s->w[2][1], s->w[2][0]);
|
||||||
|
printf(" x3=%08x_%08x", s->w[3][1], s->w[3][0]);
|
||||||
|
printf(" x4=%08x_%08x", s->w[4][1], s->w[4][0]);
|
||||||
|
#endif
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,24 @@
|
||||||
|
#ifndef PRINTSTATE_H_
|
||||||
|
#define PRINTSTATE_H_
|
||||||
|
|
||||||
|
#ifdef ASCON_PRINT_STATE
|
||||||
|
|
||||||
|
#include "ascon.h"
|
||||||
|
#include "word.h"
|
||||||
|
|
||||||
|
void printword(const char* text, const uint64_t x);
|
||||||
|
void printstate(const char* text, const ascon_state_t* s);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define printword(text, w) \
|
||||||
|
do { \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define printstate(text, s) \
|
||||||
|
do { \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* PRINTSTATE_H_ */
|
|
@ -0,0 +1,350 @@
|
||||||
|
#ifndef ROUND_H_
|
||||||
|
#define ROUND_H_
|
||||||
|
|
||||||
|
#include "ascon.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include "forceinline.h"
|
||||||
|
#include "printstate.h"
|
||||||
|
#include "word.h"
|
||||||
|
|
||||||
|
forceinline void ROUND_LOOP(ascon_state_t* s, uint32_t C) {
|
||||||
|
uint32_t tmp0, tmp1;
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"@.syntax_unified\n\t"
|
||||||
|
"rbegin_%=:;\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
|
||||||
|
"push {%[tmp1]}\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x3_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[x1_l]\n\t"
|
||||||
|
"mvn %[tmp0], %[x0_l]\n\t"
|
||||||
|
"orr %[tmp0], %[tmp0], %[x4_l]\n\t"
|
||||||
|
"movs %[tmp1], %[x2_l]\n\t"
|
||||||
|
"bic %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[tmp1]\n\t"
|
||||||
|
"mvn %[tmp1], %[x4_l]\n\t"
|
||||||
|
"orr %[tmp1], %[tmp1], %[x3_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
|
||||||
|
"movs %[tmp1], %[x1_l]\n\t"
|
||||||
|
"bic %[tmp1], %[tmp1], %[x0_l]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"movs %[tmp1], %[x3_l]\n\t"
|
||||||
|
"and %[tmp1], %[tmp1], %[x2_l]\n\t"
|
||||||
|
"eor %[tmp1], %[x1_l], %[tmp1]\n\t"
|
||||||
|
"eor %[tmp0], %[x3_l], %[tmp0]\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x2_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x0_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"movs %[x1_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x3_l], %[x1_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x2_l]\n\t"
|
||||||
|
"movs %[x1_h], %[x0_l]\n\t"
|
||||||
|
"movs %[x0_l], %[x2_h]\n\t"
|
||||||
|
"movs %[x2_l], %[x3_h]\n\t"
|
||||||
|
"movs %[tmp2], %[x4_h]\n\t"
|
||||||
|
"movs %[x2_h], %[tmp0]\n\t"
|
||||||
|
"movs %[x3_h], %[x4_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x2_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x3_l]\n\t"
|
||||||
|
"mvn %[tmp0], %[x1_l]\n\t"
|
||||||
|
"orr %[tmp0], %[tmp0], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_l], %[x0_l]\n\t"
|
||||||
|
"bic %[x4_l], %[x4_l], %[x3_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[x4_l]\n\t"
|
||||||
|
"mvn %[x4_l], %[tmp2]\n\t"
|
||||||
|
"orr %[x4_l], %[x4_l], %[x2_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"movs %[x4_l], %[x3_l]\n\t"
|
||||||
|
"bic %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x4_l]\n\t"
|
||||||
|
"movs %[x4_l], %[x2_l]\n\t"
|
||||||
|
"and %[x4_l], %[x4_l], %[x0_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[x0_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x1_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_h], %[x2_l]\n\t"
|
||||||
|
"movs %[x2_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x1_l]\n\t"
|
||||||
|
"lsr %[x4_l], %[x0_l], #6\n\t"
|
||||||
|
"lsl %[x1_l], %[x2_l], #26\n\t"
|
||||||
|
"lsr %[tmp0], %[x2_l], #6\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x0_l], #26\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x0_l], #1\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x2_l], #31\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x2_l], #1\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x0_l], #31\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
|
||||||
|
"lsl %[x4_l], %[x3_l], #3\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp1], #29\n\t"
|
||||||
|
"lsl %[tmp0], %[tmp1], #3\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x3_l], #29\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x3_l], #25\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp1], #7\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[tmp1], #25\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x3_l], #7\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[tmp0]\n\t"
|
||||||
|
"movs %[x4_l], %[x3_h]\n\t"
|
||||||
|
"movs %[x3_h], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp2], #23\n\t"
|
||||||
|
"lsr %[x1_l], %[x4_l], #9\n\t"
|
||||||
|
"lsl %[tmp0], %[x4_l], #23\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp2], #9\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp2], #7\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x4_l], #25\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x4_l], #7\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[tmp2], #25\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp0]\n\t"
|
||||||
|
"movs %[x1_l], %[x3_h]\n\t"
|
||||||
|
"movs %[tmp1], %[x4_h]\n\t"
|
||||||
|
"movs %[x4_h], %[tmp2]\n\t"
|
||||||
|
"movs %[x3_h], %[x3_l]\n\t"
|
||||||
|
"movs %[x3_l], %[x2_h]\n\t"
|
||||||
|
"movs %[x2_h], %[x0_l]\n\t"
|
||||||
|
"lsr %[tmp2], %[tmp1], #17\n\t"
|
||||||
|
"lsl %[x0_l], %[x3_l], #15\n\t"
|
||||||
|
"lsr %[tmp0], %[x3_l], #17\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[tmp1], #15\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"lsr %[x0_l], %[tmp1], #10\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[x3_l], #22\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsr %[x0_l], %[x3_l], #10\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[tmp1], #22\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[tmp2]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[tmp0]\n\t"
|
||||||
|
"movs %[tmp0], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_l], %[x1_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x4_l]\n\t"
|
||||||
|
"movs %[x1_h], %[x3_h]\n\t"
|
||||||
|
"movs %[x3_h], %[tmp1]\n\t"
|
||||||
|
"lsr %[x4_l], %[tmp0], #28\n\t"
|
||||||
|
"lsl %[tmp1], %[x0_l], #4\n\t"
|
||||||
|
"lsr %[tmp2], %[x0_l], #28\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp0], #4\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"lsr %[tmp1], %[tmp0], #19\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[x0_l], #13\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsr %[tmp1], %[x0_l], #19\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp0], #13\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"pop {%[tmp1]}\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x4_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_h], %[tmp0]\n\t"
|
||||||
|
"sub %[tmp1], %[tmp1], #15\n\t"
|
||||||
|
"cmp %[tmp1], #60\n\t"
|
||||||
|
"beq rend_%=\n\t"
|
||||||
|
"b rbegin_%=\n\t"
|
||||||
|
"rend_%=:;\n\t"
|
||||||
|
:
|
||||||
|
[x0_l] "+l"(s->w[0][0]), [x0_h] "+h"(s->w[0][1]), [x1_l] "+l"(s->w[1][0]),
|
||||||
|
[x1_h] "+h"(s->w[1][1]), [x2_l] "+l"(s->w[2][0]), [x2_h] "+h"(s->w[2][1]),
|
||||||
|
[x3_l] "+l"(s->w[3][0]), [x3_h] "+h"(s->w[3][1]), [x4_l] "+l"(s->w[4][0]),
|
||||||
|
[x4_h] "+h"(s->w[4][1]), [tmp1] "+l"(C), [tmp0] "=l"(tmp0),
|
||||||
|
[tmp2] "=l"(tmp1)
|
||||||
|
:
|
||||||
|
:);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void ROUND(ascon_state_t* s, uint32_t C) {
|
||||||
|
uint32_t tmp0, tmp1, tmp2;
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"@.syntax_unified\n\t"
|
||||||
|
"movs %[tmp0], %[C]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x3_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[x1_l]\n\t"
|
||||||
|
"mvn %[tmp0], %[x0_l]\n\t"
|
||||||
|
"orr %[tmp0], %[tmp0], %[x4_l]\n\t"
|
||||||
|
"movs %[tmp1], %[x2_l]\n\t"
|
||||||
|
"bic %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[tmp1]\n\t"
|
||||||
|
"mvn %[tmp1], %[x4_l]\n\t"
|
||||||
|
"orr %[tmp1], %[tmp1], %[x3_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp1]\n\t"
|
||||||
|
"movs %[tmp1], %[x1_l]\n\t"
|
||||||
|
"bic %[tmp1], %[tmp1], %[x0_l]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"movs %[tmp1], %[x3_l]\n\t"
|
||||||
|
"and %[tmp1], %[tmp1], %[x2_l]\n\t"
|
||||||
|
"eor %[tmp1], %[x1_l], %[tmp1]\n\t"
|
||||||
|
"eor %[tmp0], %[x3_l], %[tmp0]\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x2_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x0_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"movs %[x1_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x3_l], %[x1_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x2_l]\n\t"
|
||||||
|
"movs %[x1_h], %[x0_l]\n\t"
|
||||||
|
"movs %[x0_l], %[x2_h]\n\t"
|
||||||
|
"movs %[x2_l], %[x3_h]\n\t"
|
||||||
|
"movs %[tmp2], %[x4_h]\n\t"
|
||||||
|
"movs %[x2_h], %[tmp0]\n\t"
|
||||||
|
"movs %[x3_h], %[x4_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x2_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x3_l]\n\t"
|
||||||
|
"mvn %[tmp0], %[x1_l]\n\t"
|
||||||
|
"orr %[tmp0], %[tmp0], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_l], %[x0_l]\n\t"
|
||||||
|
"bic %[x4_l], %[x4_l], %[x3_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[x4_l]\n\t"
|
||||||
|
"mvn %[x4_l], %[tmp2]\n\t"
|
||||||
|
"orr %[x4_l], %[x4_l], %[x2_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"movs %[x4_l], %[x3_l]\n\t"
|
||||||
|
"bic %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x4_l]\n\t"
|
||||||
|
"movs %[x4_l], %[x2_l]\n\t"
|
||||||
|
"and %[x4_l], %[x4_l], %[x0_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[x0_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x1_l]\n\t"
|
||||||
|
"eor %[x1_l], %[x1_l], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_h], %[x2_l]\n\t"
|
||||||
|
"movs %[x2_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x1_l]\n\t"
|
||||||
|
"lsr %[x4_l], %[x0_l], #6\n\t"
|
||||||
|
"lsl %[x1_l], %[x2_l], #26\n\t"
|
||||||
|
"lsr %[tmp0], %[x2_l], #6\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x0_l], #26\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x0_l], #1\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x2_l], #31\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x2_l], #1\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x0_l], #31\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[x4_l]\n\t"
|
||||||
|
"eor %[x2_l], %[x2_l], %[tmp0]\n\t"
|
||||||
|
"lsl %[x4_l], %[x3_l], #3\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp1], #29\n\t"
|
||||||
|
"lsl %[tmp0], %[tmp1], #3\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x3_l], #29\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x3_l], #25\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp1], #7\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[tmp1], #25\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x3_l], #7\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[x4_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[tmp0]\n\t"
|
||||||
|
"movs %[x4_l], %[x3_h]\n\t"
|
||||||
|
"movs %[x3_h], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp2], #23\n\t"
|
||||||
|
"lsr %[x1_l], %[x4_l], #9\n\t"
|
||||||
|
"lsl %[tmp0], %[x4_l], #23\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp2], #9\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[tmp2], #7\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[x4_l], #25\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[x1_l]\n\t"
|
||||||
|
"lsr %[x1_l], %[x4_l], #7\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"lsl %[x1_l], %[tmp2], #25\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x1_l]\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp0]\n\t"
|
||||||
|
"movs %[x1_l], %[x3_h]\n\t"
|
||||||
|
"movs %[tmp1], %[x4_h]\n\t"
|
||||||
|
"movs %[x4_h], %[tmp2]\n\t"
|
||||||
|
"movs %[x3_h], %[x3_l]\n\t"
|
||||||
|
"movs %[x3_l], %[x2_h]\n\t"
|
||||||
|
"movs %[x2_h], %[x0_l]\n\t"
|
||||||
|
"lsr %[tmp2], %[tmp1], #17\n\t"
|
||||||
|
"lsl %[x0_l], %[x3_l], #15\n\t"
|
||||||
|
"lsr %[tmp0], %[x3_l], #17\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[tmp1], #15\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"lsr %[x0_l], %[tmp1], #10\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[x3_l], #22\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[x0_l]\n\t"
|
||||||
|
"lsr %[x0_l], %[x3_l], #10\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"lsl %[x0_l], %[tmp1], #22\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x0_l]\n\t"
|
||||||
|
"eor %[tmp1], %[tmp1], %[tmp2]\n\t"
|
||||||
|
"eor %[x3_l], %[x3_l], %[tmp0]\n\t"
|
||||||
|
"movs %[tmp0], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_l], %[x1_h]\n\t"
|
||||||
|
"movs %[x0_h], %[x4_l]\n\t"
|
||||||
|
"movs %[x1_h], %[x3_h]\n\t"
|
||||||
|
"movs %[x3_h], %[tmp1]\n\t"
|
||||||
|
"lsr %[x4_l], %[tmp0], #28\n\t"
|
||||||
|
"lsl %[tmp1], %[x0_l], #4\n\t"
|
||||||
|
"lsr %[tmp2], %[x0_l], #28\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp0], #4\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"lsr %[tmp1], %[tmp0], #19\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[x0_l], #13\n\t"
|
||||||
|
"eor %[x4_l], %[x4_l], %[tmp1]\n\t"
|
||||||
|
"lsr %[tmp1], %[x0_l], #19\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"lsl %[tmp1], %[tmp0], #13\n\t"
|
||||||
|
"eor %[tmp2], %[tmp2], %[tmp1]\n\t"
|
||||||
|
"eor %[tmp0], %[tmp0], %[x4_l]\n\t"
|
||||||
|
"eor %[x0_l], %[x0_l], %[tmp2]\n\t"
|
||||||
|
"movs %[x4_l], %[x0_h]\n\t"
|
||||||
|
"movs %[x0_h], %[tmp0]\n\t"
|
||||||
|
:
|
||||||
|
[x0_l] "+l"(s->w[0][0]), [x0_h] "+h"(s->w[0][1]), [x1_l] "+l"(s->w[1][0]),
|
||||||
|
[x1_h] "+h"(s->w[1][1]), [x2_l] "+l"(s->w[2][0]), [x2_h] "+h"(s->w[2][1]),
|
||||||
|
[x3_l] "+l"(s->w[3][0]), [x3_h] "+h"(s->w[3][1]), [x4_l] "+l"(s->w[4][0]),
|
||||||
|
[x4_h] "+h"(s->w[4][1]), [tmp0] "=l"(tmp0), [tmp1] "=l"(tmp1),
|
||||||
|
[tmp2] "=l"(tmp2)
|
||||||
|
: [C] "ri"(C)
|
||||||
|
:);
|
||||||
|
printstate(" round output", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void PROUNDS(ascon_state_t* s, int nr) { ROUND_LOOP(s, START(nr)); }
|
||||||
|
|
||||||
|
#endif /* ROUND_H_ */
|
|
@ -0,0 +1,69 @@
|
||||||
|
#ifndef WORD_H_
|
||||||
|
#define WORD_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "bendian.h"
|
||||||
|
#include "forceinline.h"
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
uint64_t x;
|
||||||
|
uint32_t w[2];
|
||||||
|
uint8_t b[8];
|
||||||
|
} word_t;
|
||||||
|
|
||||||
|
#define U64TOWORD(x) U64BIG(x)
|
||||||
|
#define WORDTOU64(x) U64BIG(x)
|
||||||
|
|
||||||
|
forceinline uint64_t ROR(uint64_t x, int n) { return x >> n | x << (-n & 63); }
|
||||||
|
|
||||||
|
forceinline uint64_t KEYROT(uint64_t lo2hi, uint64_t hi2lo) {
|
||||||
|
return lo2hi << 32 | hi2lo >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline int NOTZERO(uint64_t a, uint64_t b) {
|
||||||
|
uint64_t result = a | b;
|
||||||
|
result |= result >> 32;
|
||||||
|
result |= result >> 16;
|
||||||
|
result |= result >> 8;
|
||||||
|
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline uint64_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
|
||||||
|
|
||||||
|
forceinline uint64_t PRFS_MLEN(uint64_t len) { return len << 51; }
|
||||||
|
|
||||||
|
forceinline uint64_t CLEAR(uint64_t w, int n) {
|
||||||
|
/* undefined for n == 0 */
|
||||||
|
uint64_t mask = ~0ull >> (8 * n);
|
||||||
|
return w & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline uint64_t MASK(int n) {
|
||||||
|
/* undefined for n == 0 */
|
||||||
|
return ~0ull >> (64 - 8 * n);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline uint64_t LOAD(const uint8_t* bytes, int n) {
|
||||||
|
uint64_t x = *(uint64_t*)bytes & MASK(n);
|
||||||
|
return U64TOWORD(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void STORE(uint8_t* bytes, uint64_t w, int n) {
|
||||||
|
*(uint64_t*)bytes &= ~MASK(n);
|
||||||
|
*(uint64_t*)bytes |= WORDTOU64(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline uint64_t LOADBYTES(const uint8_t* bytes, int n) {
|
||||||
|
uint64_t x = 0;
|
||||||
|
memcpy(&x, bytes, n);
|
||||||
|
return U64TOWORD(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
forceinline void STOREBYTES(uint8_t* bytes, uint64_t w, int n) {
|
||||||
|
uint64_t x = WORDTOU64(w);
|
||||||
|
memcpy(bytes, &x, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* WORD_H_ */
|
17
src/main.c
17
src/main.c
|
@ -11,11 +11,13 @@
|
||||||
|
|
||||||
|
|
||||||
#include "rorand.h"
|
#include "rorand.h"
|
||||||
|
#include "rourand.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
|
|
||||||
#define DO_TIME_BENCH 0
|
#define DO_TIME_BENCH 0
|
||||||
#define DATA_TOTAL 1024*1024
|
#define DATA_TOTAL 1024*1024
|
||||||
|
#define USE_URANDOM 1
|
||||||
|
|
||||||
#if DO_TIME_BENCH
|
#if DO_TIME_BENCH
|
||||||
static uint8_t time_bench[128*1024];
|
static uint8_t time_bench[128*1024];
|
||||||
|
@ -50,13 +52,23 @@ int main() {
|
||||||
iprintf("rorand_init() returned %d\n", d);
|
iprintf("rorand_init() returned %d\n", d);
|
||||||
panic("can't init rorand");
|
panic("can't init rorand");
|
||||||
}
|
}
|
||||||
|
struct rourand_state* ur = rourand_init(rorand_get, 0);
|
||||||
|
if (!ur) {
|
||||||
|
panic("Can't init rourand");
|
||||||
|
}
|
||||||
|
|
||||||
|
#if USE_URANDOM
|
||||||
|
#define rand_get(dst, size) rourand_get(ur, dst, size)
|
||||||
|
#else
|
||||||
|
#define rand_get(dst, size) rorand_get(dst, (size)*CHAR_BIT)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if DO_TIME_BENCH
|
#if DO_TIME_BENCH
|
||||||
memset(time_bench, 0, sizeof(time_bench));
|
memset(time_bench, 0, sizeof(time_bench));
|
||||||
iprintf("[---] throughput benchmark start\n");
|
iprintf("[---] throughput benchmark start\n");
|
||||||
|
|
||||||
absolute_time_t ta = get_absolute_time();
|
absolute_time_t ta = get_absolute_time();
|
||||||
rorand_get(time_bench, count_of(time_bench)*CHAR_BIT);
|
rand_get(time_bench, count_of(time_bench));
|
||||||
absolute_time_t tb = get_absolute_time();
|
absolute_time_t tb = get_absolute_time();
|
||||||
|
|
||||||
int64_t dt_us = absolute_time_diff_us(ta, tb);
|
int64_t dt_us = absolute_time_diff_us(ta, tb);
|
||||||
|
@ -72,10 +84,11 @@ int main() {
|
||||||
memset(data, 0, sizeof(data));
|
memset(data, 0, sizeof(data));
|
||||||
const uintptr_t total = DATA_TOTAL;
|
const uintptr_t total = DATA_TOTAL;
|
||||||
for (uintptr_t off = 0; off < total; off += count_of(data)) {
|
for (uintptr_t off = 0; off < total; off += count_of(data)) {
|
||||||
rorand_get(data, count_of(data)*CHAR_BIT);
|
rand_get(data, count_of(data));
|
||||||
hexdump(NULL, off, data, sizeof(data));
|
hexdump(NULL, off, data, sizeof(data));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
rourand_free(ur);
|
||||||
|
|
||||||
iprintf("done\n");
|
iprintf("done\n");
|
||||||
while(1);
|
while(1);
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
#include "ascon.h"
|
||||||
|
#include "rourand.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct rourand_state {
|
||||||
|
ascon_state_t ascon;
|
||||||
|
uint8_t buf[16];
|
||||||
|
rourand_get_fn rand;
|
||||||
|
size_t bpos;
|
||||||
|
size_t rate;
|
||||||
|
size_t rcount;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct rourand_state* rourand_init(rourand_get_fn rawrand, int rate) {
|
||||||
|
struct rourand_state* r = calloc(1, sizeof(struct rourand_state));
|
||||||
|
if (!r) return NULL;
|
||||||
|
|
||||||
|
if (rate == 0) rate = 16;
|
||||||
|
|
||||||
|
ascon_inithash(&r->ascon);
|
||||||
|
r->rand = rawrand;
|
||||||
|
r->bpos = 0;
|
||||||
|
r->rate = rate;
|
||||||
|
r->rcount = 0;
|
||||||
|
|
||||||
|
rawrand(r->buf, sizeof(r->buf)*CHAR_BIT);
|
||||||
|
ascon_absorb(&r->ascon, r->buf, sizeof(r->buf));
|
||||||
|
rawrand(r->buf, sizeof(r->buf)*CHAR_BIT);
|
||||||
|
ascon_absorb(&r->ascon, r->buf, sizeof(r->buf));
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
void rourand_free(struct rourand_state* st) {
|
||||||
|
if (st) {
|
||||||
|
explicit_bzero(st, sizeof(*st));
|
||||||
|
free(st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rourand_get(struct rourand_state* st, void* dst_, size_t nbytes) {
|
||||||
|
uint8_t* dst = (uint8_t*)dst_;
|
||||||
|
|
||||||
|
while (nbytes > 0) {
|
||||||
|
if (st->bpos == 0) {
|
||||||
|
++st->rcount;
|
||||||
|
if (st->rcount == st->rate) {
|
||||||
|
st->rand(st->buf, sizeof(st->buf)*CHAR_BIT);
|
||||||
|
ascon_absorb(&st->ascon, st->buf, sizeof(st->buf));
|
||||||
|
st->rcount = 0;
|
||||||
|
}
|
||||||
|
ascon_squeeze(&st->ascon, st->buf, sizeof(st->buf));
|
||||||
|
st->bpos = sizeof(st->buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t todo = nbytes;
|
||||||
|
if (todo > st->bpos) todo = st->bpos;
|
||||||
|
|
||||||
|
memcpy(dst, &st->buf[sizeof(st->buf) - st->bpos], todo);
|
||||||
|
nbytes -= todo;
|
||||||
|
st->bpos -= todo;
|
||||||
|
dst += todo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
#ifndef ROURAND_H_
|
||||||
|
#define ROURAND_H_
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
|
||||||
|
struct rourand_state;
|
||||||
|
|
||||||
|
typedef void (*rourand_get_fn)(void* dst, size_t nbits);
|
||||||
|
|
||||||
|
struct rourand_state* rourand_init(rourand_get_fn rawrand, int rate);
|
||||||
|
void rourand_free(struct rourand_state* st);
|
||||||
|
|
||||||
|
void rourand_get(struct rourand_state* st, void* dst, size_t nbytes);
|
||||||
|
|
||||||
|
static inline uint32_t rourand_get32(struct rourand_state* st) {
|
||||||
|
uint32_t r = 0;
|
||||||
|
rourand_get(st, &r, sizeof(r));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue