From babb90832ceb28f4ba7ef68ce9be2aac912a96ae Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 2 Jul 2021 16:24:23 -0700 Subject: [PATCH] initial u256 Signed-off-by: Nikolaj Bjorner --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 1 + src/math/bigfix/Hacl_Bignum.c | 2470 +++++++++++++++++ src/math/bigfix/Hacl_Bignum.h | 345 +++ src/math/bigfix/Hacl_Bignum256.c | 1534 ++++++++++ src/math/bigfix/Hacl_Bignum256.h | 375 +++ src/math/bigfix/Hacl_Bignum_Base.h | 73 + src/math/bigfix/Hacl_IntTypes_Intrinsics.h | 88 + src/math/bigfix/kremlib/FStar_UInt128.h | 79 + .../bigfix/kremlib/FStar_UInt128_Verified.h | 347 +++ .../bigfix/kremlib/FStar_UInt_8_16_32_64.h | 104 + src/math/bigfix/kremlib/LowStar_Endianness.h | 28 + src/math/bigfix/kremlib/fstar_uint128_gcc64.h | 165 ++ src/math/bigfix/kremlib/fstar_uint128_msvc.h | 510 ++++ .../kremlib/fstar_uint128_struct_endianness.h | 68 + src/math/bigfix/kremlin/internal/target.h | 60 + src/math/bigfix/kremlin/internal/types.h | 70 + src/math/bigfix/kremlin/lowstar_endianness.h | 230 ++ src/math/bigfix/lib_intrinsics.h | 67 + src/math/bigfix/u256.cpp | 8 + src/math/bigfix/u256.h | 11 + src/math/polysat/CMakeLists.txt | 1 + 22 files changed, 6635 insertions(+), 1 deletion(-) create mode 100644 src/math/bigfix/Hacl_Bignum.c create mode 100644 src/math/bigfix/Hacl_Bignum.h create mode 100644 src/math/bigfix/Hacl_Bignum256.c create mode 100644 src/math/bigfix/Hacl_Bignum256.h create mode 100644 src/math/bigfix/Hacl_Bignum_Base.h create mode 100644 src/math/bigfix/Hacl_IntTypes_Intrinsics.h create mode 100644 src/math/bigfix/kremlib/FStar_UInt128.h create mode 100644 src/math/bigfix/kremlib/FStar_UInt128_Verified.h create mode 100644 src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h create mode 100644 src/math/bigfix/kremlib/LowStar_Endianness.h create mode 100644 src/math/bigfix/kremlib/fstar_uint128_gcc64.h create mode 100644 src/math/bigfix/kremlib/fstar_uint128_msvc.h create mode 100644 src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h create mode 100644 src/math/bigfix/kremlin/internal/target.h create mode 100644 src/math/bigfix/kremlin/internal/types.h create mode 100644 src/math/bigfix/kremlin/lowstar_endianness.h create mode 100644 src/math/bigfix/lib_intrinsics.h create mode 100644 src/math/bigfix/u256.cpp create mode 100644 src/math/bigfix/u256.h diff --git a/CMakeLists.txt b/CMakeLists.txt index fa2531670..ccf7ea2ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.4) set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_compiler_flags_overrides.cmake") -project(Z3 VERSION 4.8.11.0 LANGUAGES CXX) +project(Z3 VERSION 4.8.11.0 LANGUAGES CXX C) ################################################################################ # Project version diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 166f960e0..88b149e0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,7 @@ add_subdirectory(math/dd) add_subdirectory(math/hilbert) add_subdirectory(math/simplex) add_subdirectory(math/interval) +add_subdirectory(math/bigfix) add_subdirectory(math/polysat) add_subdirectory(math/automata) add_subdirectory(math/realclosure) diff --git a/src/math/bigfix/Hacl_Bignum.c b/src/math/bigfix/Hacl_Bignum.c new file mode 100644 index 000000000..54a282839 --- /dev/null +++ b/src/math/bigfix/Hacl_Bignum.c @@ -0,0 +1,2470 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "math/bigfix/Hacl_Bignum.h" + +void Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *res) +{ + uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t)); + for (uint32_t i = (uint32_t)0U; i < bnLen; i++) + { + uint64_t *os = res; + uint64_t u = load64_be(tmp + (bnLen - i - (uint32_t)1U) * (uint32_t)8U); + uint64_t x = u; + os[i] = x; + } +} + +void Hacl_Bignum_Convert_bn_to_bytes_be_uint64(uint32_t len, uint64_t *b, uint8_t *res) +{ + uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + uint32_t numb = (uint32_t)8U; + for (uint32_t i = (uint32_t)0U; i < bnLen; i++) + { + store64_be(tmp + i * numb, b[bnLen - i - (uint32_t)1U]); + } + memcpy(res, tmp + tmpLen - len, len * sizeof (uint8_t)); +} + +uint32_t Hacl_Bignum_Lib_bn_get_top_index_u32(uint32_t len, uint32_t *b) +{ + uint32_t priv = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t mask = FStar_UInt32_eq_mask(b[i], (uint32_t)0U); + priv = (mask & priv) | (~mask & i); + } + return priv; +} + +uint64_t Hacl_Bignum_Lib_bn_get_top_index_u64(uint32_t len, uint64_t *b) +{ + uint64_t priv = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t mask = FStar_UInt64_eq_mask(b[i], (uint64_t)0U); + priv = (mask & priv) | (~mask & (uint64_t)i); + } + return priv; +} + +inline uint32_t +Hacl_Bignum_Addition_bn_sub_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res) +{ + uint32_t c = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = a[(uint32_t)4U * i]; + uint32_t t20 = b[(uint32_t)4U * i]; + uint32_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t20, res_i0); + uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t10, t21, res_i1); + uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t11, t22, res_i2); + uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t12, t2, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint32_t t1 = a[i]; + uint32_t t2 = b[i]; + uint32_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t2, res_i); + } + return c; +} + +inline uint64_t +Hacl_Bignum_Addition_bn_sub_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); + } + return c; +} + +inline uint32_t +Hacl_Bignum_Addition_bn_add_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res) +{ + uint32_t c = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = a[(uint32_t)4U * i]; + uint32_t t20 = b[(uint32_t)4U * i]; + uint32_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t20, res_i0); + uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, t21, res_i1); + uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, t22, res_i2); + uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, t2, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint32_t t1 = a[i]; + uint32_t t2 = b[i]; + uint32_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t2, res_i); + } + return c; +} + +inline uint64_t +Hacl_Bignum_Addition_bn_add_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t2, res_i); + } + return c; +} + +static inline void +bn_mul_u32(uint32_t aLen, uint32_t *a, uint32_t bLen, uint32_t *b, uint32_t *res) +{ + uint32_t resLen = aLen + bLen; + memset(res, 0U, resLen * sizeof (uint32_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++) + { + uint32_t bj = b[i0]; + uint32_t *res_j = res + i0; + uint32_t c = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t a_i = a[(uint32_t)4U * i]; + uint32_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i0); + uint32_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, bj, c, res_i1); + uint32_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, bj, c, res_i2); + uint32_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, bj, c, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint32_t a_i = a[i]; + uint32_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i); + } + uint32_t r = c; + res[aLen + i0] = r; + } +} + +static inline void +bn_mul_u64(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res) +{ + uint32_t resLen = aLen + bLen; + memset(res, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++) + { + uint64_t bj = b[i0]; + uint64_t *res_j = res + i0; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = a[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0); + uint64_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1); + uint64_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2); + uint64_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i); + } + for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) + { + uint64_t a_i = a[i]; + uint64_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i); + } + uint64_t r = c; + res[aLen + i0] = r; + } +} + +static inline void bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res) +{ + uint32_t resLen = aLen + aLen; + memset(res, 0U, resLen * sizeof (uint32_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++) + { + uint32_t *ab = a; + uint32_t a_j = a[i0]; + uint32_t *res_j = res + i0; + uint32_t c = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t a_i = ab[(uint32_t)4U * i]; + uint32_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i0); + uint32_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, a_j, c, res_i1); + uint32_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, a_j, c, res_i2); + uint32_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, a_j, c, res_i); + } + for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) + { + uint32_t a_i = ab[i]; + uint32_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i); + } + uint32_t r = c; + res[i0 + i0] = r; + } + uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(resLen, res, res, res); + KRML_CHECK_SIZE(sizeof (uint32_t), resLen); + uint32_t *tmp = alloca(resLen * sizeof (uint32_t)); + memset(tmp, 0U, resLen * sizeof (uint32_t)); + for (uint32_t i = (uint32_t)0U; i < aLen; i++) + { + uint64_t res1 = (uint64_t)a[i] * (uint64_t)a[i]; + uint32_t hi = (uint32_t)(res1 >> (uint32_t)32U); + uint32_t lo = (uint32_t)res1; + tmp[(uint32_t)2U * i] = lo; + tmp[(uint32_t)2U * i + (uint32_t)1U] = hi; + } + uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(resLen, res, tmp, res); +} + +static inline void bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res) +{ + uint32_t resLen = aLen + aLen; + memset(res, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++) + { + uint64_t *ab = a; + uint64_t a_j = a[i0]; + uint64_t *res_j = res + i0; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = ab[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0); + uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1); + uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2); + uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i); + } + for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) + { + uint64_t a_i = ab[i]; + uint64_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i); + } + uint64_t r = c; + res[i0 + i0] = r; + } + uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, res, res); + KRML_CHECK_SIZE(sizeof (uint64_t), resLen); + uint64_t *tmp = alloca(resLen * sizeof (uint64_t)); + memset(tmp, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < aLen; i++) + { + FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(a[i], a[i]); + uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U)); + uint64_t lo = FStar_UInt128_uint128_to_uint64(res1); + tmp[(uint32_t)2U * i] = lo; + tmp[(uint32_t)2U * i + (uint32_t)1U] = hi; + } + uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, tmp, res); +} + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *b, + uint32_t *tmp, + uint32_t *res +) +{ + if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U) + { + bn_mul_u32(aLen, a, aLen, b, res); + return; + } + uint32_t len2 = aLen / (uint32_t)2U; + uint32_t *a0 = a; + uint32_t *a1 = a + len2; + uint32_t *b0 = b; + uint32_t *b1 = b + len2; + uint32_t *t0 = tmp; + uint32_t *t1 = tmp + len2; + uint32_t *tmp_ = tmp + aLen; + uint32_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a0, a1, tmp_); + uint32_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint32_t *os = t0; + uint32_t x = (((uint32_t)0U - c0) & t0[i]) | (~((uint32_t)0U - c0) & tmp_[i]); + os[i] = x; + } + uint32_t c00 = c0; + uint32_t c010 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b0, b1, tmp_); + uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b1, b0, t1); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint32_t *os = t1; + uint32_t x = (((uint32_t)0U - c010) & t1[i]) | (~((uint32_t)0U - c010) & tmp_[i]); + os[i] = x; + } + uint32_t c11 = c010; + uint32_t *t23 = tmp + aLen; + uint32_t *tmp1 = tmp + aLen + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, t0, t1, tmp1, t23); + uint32_t *r01 = res; + uint32_t *r23 = res + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a0, b0, tmp1, r01); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a1, b1, tmp1, r23); + uint32_t *r011 = res; + uint32_t *r231 = res + aLen; + uint32_t *t01 = tmp; + uint32_t *t231 = tmp + aLen; + uint32_t *t45 = tmp + (uint32_t)2U * aLen; + uint32_t *t67 = tmp + (uint32_t)3U * aLen; + uint32_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r011, r231, t01); + uint32_t c_sign = c00 ^ c11; + uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t67); + uint32_t c31 = c2 - c3; + uint32_t c4 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, t01, t231, t45); + uint32_t c41 = c2 + c4; + uint32_t mask = (uint32_t)0U - c_sign; + for (uint32_t i = (uint32_t)0U; i < aLen; i++) + { + uint32_t *os = t45; + uint32_t x = (mask & t45[i]) | (~mask & t67[i]); + os[i] = x; + } + uint32_t c5 = (mask & c41) | (~mask & c31); + uint32_t aLen2 = aLen / (uint32_t)2U; + uint32_t *r0 = res + aLen2; + uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0); + uint32_t c6 = r10; + uint32_t c60 = c6; + uint32_t c7 = c5 + c60; + uint32_t *r = res + aLen + aLen2; + uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32((uint32_t)0U, r[0U], c7, r); + uint32_t r1; + if ((uint32_t)1U < aLen + aLen - (aLen + aLen2)) + { + uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U; + uint32_t *a11 = r + (uint32_t)1U; + uint32_t *res1 = r + (uint32_t)1U; + uint32_t c = c01; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t11 = a11[(uint32_t)4U * i]; + uint32_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i0); + uint32_t t110 = a11[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t110, (uint32_t)0U, res_i1); + uint32_t t111 = a11[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t111, (uint32_t)0U, res_i2); + uint32_t t112 = a11[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t112, (uint32_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint32_t t11 = a11[i]; + uint32_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i); + } + uint32_t c110 = c; + r1 = c110; + } + else + { + r1 = c01; + } + uint32_t c8 = r1; + uint32_t c = c8; + uint32_t c9 = c; +} + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *b, + uint64_t *tmp, + uint64_t *res +) +{ + if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U) + { + bn_mul_u64(aLen, a, aLen, b, res); + return; + } + uint32_t len2 = aLen / (uint32_t)2U; + uint64_t *a0 = a; + uint64_t *a1 = a + len2; + uint64_t *b0 = b; + uint64_t *b1 = b + len2; + uint64_t *t0 = tmp; + uint64_t *t1 = tmp + len2; + uint64_t *tmp_ = tmp + aLen; + uint64_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a0, a1, tmp_); + uint64_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint64_t *os = t0; + uint64_t x = (((uint64_t)0U - c0) & t0[i]) | (~((uint64_t)0U - c0) & tmp_[i]); + os[i] = x; + } + uint64_t c00 = c0; + uint64_t c010 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b0, b1, tmp_); + uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b1, b0, t1); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint64_t *os = t1; + uint64_t x = (((uint64_t)0U - c010) & t1[i]) | (~((uint64_t)0U - c010) & tmp_[i]); + os[i] = x; + } + uint64_t c11 = c010; + uint64_t *t23 = tmp + aLen; + uint64_t *tmp1 = tmp + aLen + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, t0, t1, tmp1, t23); + uint64_t *r01 = res; + uint64_t *r23 = res + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a0, b0, tmp1, r01); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a1, b1, tmp1, r23); + uint64_t *r011 = res; + uint64_t *r231 = res + aLen; + uint64_t *t01 = tmp; + uint64_t *t231 = tmp + aLen; + uint64_t *t45 = tmp + (uint32_t)2U * aLen; + uint64_t *t67 = tmp + (uint32_t)3U * aLen; + uint64_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r011, r231, t01); + uint64_t c_sign = c00 ^ c11; + uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t67); + uint64_t c31 = c2 - c3; + uint64_t c4 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, t01, t231, t45); + uint64_t c41 = c2 + c4; + uint64_t mask = (uint64_t)0U - c_sign; + for (uint32_t i = (uint32_t)0U; i < aLen; i++) + { + uint64_t *os = t45; + uint64_t x = (mask & t45[i]) | (~mask & t67[i]); + os[i] = x; + } + uint64_t c5 = (mask & c41) | (~mask & c31); + uint32_t aLen2 = aLen / (uint32_t)2U; + uint64_t *r0 = res + aLen2; + uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0); + uint64_t c6 = r10; + uint64_t c60 = c6; + uint64_t c7 = c5 + c60; + uint64_t *r = res + aLen + aLen2; + uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64((uint64_t)0U, r[0U], c7, r); + uint64_t r1; + if ((uint32_t)1U < aLen + aLen - (aLen + aLen2)) + { + uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U; + uint64_t *a11 = r + (uint32_t)1U; + uint64_t *res1 = r + (uint32_t)1U; + uint64_t c = c01; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t11 = a11[(uint32_t)4U * i]; + uint64_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i0); + uint64_t t110 = a11[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t110, (uint64_t)0U, res_i1); + uint64_t t111 = a11[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t111, (uint64_t)0U, res_i2); + uint64_t t112 = a11[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t112, (uint64_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint64_t t11 = a11[i]; + uint64_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i); + } + uint64_t c110 = c; + r1 = c110; + } + else + { + r1 = c01; + } + uint64_t c8 = r1; + uint64_t c = c8; + uint64_t c9 = c; +} + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *tmp, + uint32_t *res +) +{ + if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U) + { + bn_sqr_u32(aLen, a, res); + return; + } + uint32_t len2 = aLen / (uint32_t)2U; + uint32_t *a0 = a; + uint32_t *a1 = a + len2; + uint32_t *t0 = tmp; + uint32_t *tmp_ = tmp + aLen; + uint32_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a0, a1, tmp_); + uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint32_t *os = t0; + uint32_t x = (((uint32_t)0U - c0) & t0[i]) | (~((uint32_t)0U - c0) & tmp_[i]); + os[i] = x; + } + uint32_t c00 = c0; + uint32_t *t23 = tmp + aLen; + uint32_t *tmp1 = tmp + aLen + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, t0, tmp1, t23); + uint32_t *r01 = res; + uint32_t *r23 = res + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a0, tmp1, r01); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a1, tmp1, r23); + uint32_t *r011 = res; + uint32_t *r231 = res + aLen; + uint32_t *t01 = tmp; + uint32_t *t231 = tmp + aLen; + uint32_t *t45 = tmp + (uint32_t)2U * aLen; + uint32_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r011, r231, t01); + uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t45); + uint32_t c5 = c2 - c3; + uint32_t aLen2 = aLen / (uint32_t)2U; + uint32_t *r0 = res + aLen2; + uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0); + uint32_t c4 = r10; + uint32_t c6 = c4; + uint32_t c7 = c5 + c6; + uint32_t *r = res + aLen + aLen2; + uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32((uint32_t)0U, r[0U], c7, r); + uint32_t r1; + if ((uint32_t)1U < aLen + aLen - (aLen + aLen2)) + { + uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U; + uint32_t *a11 = r + (uint32_t)1U; + uint32_t *res1 = r + (uint32_t)1U; + uint32_t c = c01; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = a11[(uint32_t)4U * i]; + uint32_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, (uint32_t)0U, res_i0); + uint32_t t10 = a11[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, (uint32_t)0U, res_i1); + uint32_t t11 = a11[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i2); + uint32_t t12 = a11[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, (uint32_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint32_t t1 = a11[i]; + uint32_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, (uint32_t)0U, res_i); + } + uint32_t c10 = c; + r1 = c10; + } + else + { + r1 = c01; + } + uint32_t c8 = r1; + uint32_t c = c8; + uint32_t c9 = c; +} + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *tmp, + uint64_t *res +) +{ + if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U) + { + bn_sqr_u64(aLen, a, res); + return; + } + uint32_t len2 = aLen / (uint32_t)2U; + uint64_t *a0 = a; + uint64_t *a1 = a + len2; + uint64_t *t0 = tmp; + uint64_t *tmp_ = tmp + aLen; + uint64_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a0, a1, tmp_); + uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0); + for (uint32_t i = (uint32_t)0U; i < len2; i++) + { + uint64_t *os = t0; + uint64_t x = (((uint64_t)0U - c0) & t0[i]) | (~((uint64_t)0U - c0) & tmp_[i]); + os[i] = x; + } + uint64_t c00 = c0; + uint64_t *t23 = tmp + aLen; + uint64_t *tmp1 = tmp + aLen + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, t0, tmp1, t23); + uint64_t *r01 = res; + uint64_t *r23 = res + aLen; + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a0, tmp1, r01); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a1, tmp1, r23); + uint64_t *r011 = res; + uint64_t *r231 = res + aLen; + uint64_t *t01 = tmp; + uint64_t *t231 = tmp + aLen; + uint64_t *t45 = tmp + (uint32_t)2U * aLen; + uint64_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r011, r231, t01); + uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t45); + uint64_t c5 = c2 - c3; + uint32_t aLen2 = aLen / (uint32_t)2U; + uint64_t *r0 = res + aLen2; + uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0); + uint64_t c4 = r10; + uint64_t c6 = c4; + uint64_t c7 = c5 + c6; + uint64_t *r = res + aLen + aLen2; + uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64((uint64_t)0U, r[0U], c7, r); + uint64_t r1; + if ((uint32_t)1U < aLen + aLen - (aLen + aLen2)) + { + uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U; + uint64_t *a11 = r + (uint32_t)1U; + uint64_t *res1 = r + (uint32_t)1U; + uint64_t c = c01; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a11[(uint32_t)4U * i]; + uint64_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, (uint64_t)0U, res_i0); + uint64_t t10 = a11[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, (uint64_t)0U, res_i1); + uint64_t t11 = a11[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i2); + uint64_t t12 = a11[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, (uint64_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint64_t t1 = a11[i]; + uint64_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, (uint64_t)0U, res_i); + } + uint64_t c10 = c; + r1 = c10; + } + else + { + r1 = c01; + } + uint64_t c8 = r1; + uint64_t c = c8; + uint64_t c9 = c; +} + +inline void +Hacl_Bignum_bn_add_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res +) +{ + uint32_t c0 = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = a[(uint32_t)4U * i]; + uint32_t t20 = b[(uint32_t)4U * i]; + uint32_t *res_i0 = res + (uint32_t)4U * i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t1, t20, res_i0); + uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t10, t21, res_i1); + uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t11, t22, res_i2); + uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t12, t2, res_i); + } + for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++) + { + uint32_t t1 = a[i]; + uint32_t t2 = b[i]; + uint32_t *res_i = res + i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t1, t2, res_i); + } + uint32_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint32_t), len1); + uint32_t *tmp = alloca(len1 * sizeof (uint32_t)); + memset(tmp, 0U, len1 * sizeof (uint32_t)); + uint32_t c = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = res[(uint32_t)4U * i]; + uint32_t t20 = n[(uint32_t)4U * i]; + uint32_t *res_i0 = tmp + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t20, res_i0); + uint32_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t10, t21, res_i1); + uint32_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t11, t22, res_i2); + uint32_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t12, t2, res_i); + } + for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++) + { + uint32_t t1 = res[i]; + uint32_t t2 = n[i]; + uint32_t *res_i = tmp + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t2, res_i); + } + uint32_t c1 = c; + uint32_t c2 = c00 - c1; + for (uint32_t i = (uint32_t)0U; i < len1; i++) + { + uint32_t *os = res; + uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +inline void +Hacl_Bignum_bn_add_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res +) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i); + } + for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i); + } + uint64_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint64_t), len1); + uint64_t *tmp = alloca(len1 * sizeof (uint64_t)); + memset(tmp, 0U, len1 * sizeof (uint64_t)); + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = res[(uint32_t)4U * i]; + uint64_t t20 = n[(uint32_t)4U * i]; + uint64_t *res_i0 = tmp + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); + uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); + uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); + uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); + } + for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++) + { + uint64_t t1 = res[i]; + uint64_t t2 = n[i]; + uint64_t *res_i = tmp + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); + } + uint64_t c1 = c; + uint64_t c2 = c00 - c1; + for (uint32_t i = (uint32_t)0U; i < len1; i++) + { + uint64_t *os = res; + uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +inline uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0) +{ + uint32_t alpha = (uint32_t)2147483648U; + uint32_t beta = n0; + uint32_t ub = (uint32_t)0U; + uint32_t vb = (uint32_t)0U; + ub = (uint32_t)1U; + vb = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) + { + uint32_t us = ub; + uint32_t vs = vb; + uint32_t u_is_odd = (uint32_t)0U - (us & (uint32_t)1U); + uint32_t beta_if_u_is_odd = beta & u_is_odd; + ub = ((us ^ beta_if_u_is_odd) >> (uint32_t)1U) + (us & beta_if_u_is_odd); + uint32_t alpha_if_u_is_odd = alpha & u_is_odd; + vb = (vs >> (uint32_t)1U) + alpha_if_u_is_odd; + } + return vb; +} + +inline uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0) +{ + uint64_t alpha = (uint64_t)9223372036854775808U; + uint64_t beta = n0; + uint64_t ub = (uint64_t)0U; + uint64_t vb = (uint64_t)0U; + ub = (uint64_t)1U; + vb = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)64U; i++) + { + uint64_t us = ub; + uint64_t vs = vb; + uint64_t u_is_odd = (uint64_t)0U - (us & (uint64_t)1U); + uint64_t beta_if_u_is_odd = beta & u_is_odd; + ub = ((us ^ beta_if_u_is_odd) >> (uint32_t)1U) + (us & beta_if_u_is_odd); + uint64_t alpha_if_u_is_odd = alpha & u_is_odd; + vb = (vs >> (uint32_t)1U) + alpha_if_u_is_odd; + } + return vb; +} + +uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *one = alloca(len * sizeof (uint32_t)); + memset(one, 0U, len * sizeof (uint32_t)); + memset(one, 0U, len * sizeof (uint32_t)); + one[0U] = (uint32_t)1U; + uint32_t bit0 = n[0U] & (uint32_t)1U; + uint32_t m0 = (uint32_t)0U - bit0; + uint32_t acc = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]); + uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U))); + } + uint32_t m1 = acc; + return m0 & m1; +} + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *res +) +{ + memset(res, 0U, len * sizeof (uint32_t)); + uint32_t i = nBits / (uint32_t)32U; + uint32_t j = nBits % (uint32_t)32U; + res[i] = res[i] | (uint32_t)1U << j; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)64U * len - nBits; i0++) + { + Hacl_Bignum_bn_add_mod_n_u32(len, n, res, res, res); + } +} + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res +) +{ + uint32_t c0 = (uint32_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++) + { + uint32_t qj = nInv * c[i0]; + uint32_t *res_j0 = c + i0; + uint32_t c1 = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t a_i = n[(uint32_t)4U * i]; + uint32_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i0); + uint32_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, qj, c1, res_i1); + uint32_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, qj, c1, res_i2); + uint32_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint32_t a_i = n[i]; + uint32_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i); + } + uint32_t r = c1; + uint32_t c10 = r; + uint32_t *resb = c + len + i0; + uint32_t res_j = c[len + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb); + } + memcpy(res, c + len, (len + len - len) * sizeof (uint32_t)); + uint32_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *tmp = alloca(len * sizeof (uint32_t)); + memset(tmp, 0U, len * sizeof (uint32_t)); + uint32_t c1 = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t t1 = res[(uint32_t)4U * i]; + uint32_t t20 = n[(uint32_t)4U * i]; + uint32_t *res_i0 = tmp + (uint32_t)4U * i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t1, t20, res_i0); + uint32_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t10, t21, res_i1); + uint32_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t11, t22, res_i2); + uint32_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t12, t2, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint32_t t1 = res[i]; + uint32_t t2 = n[i]; + uint32_t *res_i = tmp + i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t1, t2, res_i); + } + uint32_t c10 = c1; + uint32_t c2 = c00 - c10; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t *os = res; + uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +void +Hacl_Bignum_Montgomery_bn_to_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *r2, + uint32_t *a, + uint32_t *aM +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv, c, aM); +} + +void +Hacl_Bignum_Montgomery_bn_from_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *a +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp = alloca((len + len) * sizeof (uint32_t)); + memset(tmp, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp, aM, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, tmp, a); +} + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, aM, bM, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, c, resM); +} + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len, aM, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, c, resM); +} + +uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *one = alloca(len * sizeof (uint64_t)); + memset(one, 0U, len * sizeof (uint64_t)); + memset(one, 0U, len * sizeof (uint64_t)); + one[0U] = (uint64_t)1U; + uint64_t bit0 = n[0U] & (uint64_t)1U; + uint64_t m0 = (uint64_t)0U - bit0; + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m1 = acc; + return m0 & m1; +} + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *res +) +{ + memset(res, 0U, len * sizeof (uint64_t)); + uint32_t i = nBits / (uint32_t)64U; + uint32_t j = nBits % (uint32_t)64U; + res[i] = res[i] | (uint64_t)1U << j; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)128U * len - nBits; i0++) + { + Hacl_Bignum_bn_add_mod_n_u64(len, n, res, res, res); + } +} + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res +) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++) + { + uint64_t qj = nInv * c[i0]; + uint64_t *res_j0 = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = n[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0); + uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1); + uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2); + uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint64_t a_i = n[i]; + uint64_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i); + } + uint64_t r = c1; + uint64_t c10 = r; + uint64_t *resb = c + len + i0; + uint64_t res_j = c[len + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb); + } + memcpy(res, c + len, (len + len - len) * sizeof (uint64_t)); + uint64_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *tmp = alloca(len * sizeof (uint64_t)); + memset(tmp, 0U, len * sizeof (uint64_t)); + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = res[(uint32_t)4U * i]; + uint64_t t20 = n[(uint32_t)4U * i]; + uint64_t *res_i0 = tmp + (uint32_t)4U * i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0); + uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1); + uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2); + uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint64_t t1 = res[i]; + uint64_t t2 = n[i]; + uint64_t *res_i = tmp + i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i); + } + uint64_t c10 = c1; + uint64_t c2 = c00 - c10; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t *os = res; + uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +void +Hacl_Bignum_Montgomery_bn_to_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *r2, + uint64_t *a, + uint64_t *aM +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv, c, aM); +} + +void +Hacl_Bignum_Montgomery_bn_from_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *a +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp = alloca((len + len) * sizeof (uint64_t)); + memset(tmp, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp, aM, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, tmp, a); +} + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, aM, bM, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, c, resM); +} + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len, aM, tmp, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, c, resM); +} + +static void +bn_almost_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res +) +{ + uint32_t c0 = (uint32_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++) + { + uint32_t qj = nInv * c[i0]; + uint32_t *res_j0 = c + i0; + uint32_t c1 = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint32_t a_i = n[(uint32_t)4U * i]; + uint32_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i0); + uint32_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint32_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, qj, c1, res_i1); + uint32_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint32_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, qj, c1, res_i2); + uint32_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint32_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint32_t a_i = n[i]; + uint32_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i); + } + uint32_t r = c1; + uint32_t c10 = r; + uint32_t *resb = c + len + i0; + uint32_t res_j = c[len + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb); + } + memcpy(res, c + len, (len + len - len) * sizeof (uint32_t)); + uint32_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *tmp = alloca(len * sizeof (uint32_t)); + memset(tmp, 0U, len * sizeof (uint32_t)); + uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len, res, n, tmp); + uint32_t m = (uint32_t)0U - c00; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t *os = res; + uint32_t x = (m & tmp[i]) | (~m & res[i]); + os[i] = x; + } +} + +static void +bn_almost_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, aM, bM, tmp, c); + bn_almost_mont_reduction_u32(len, n, nInv_u64, c, resM); +} + +static void +bn_almost_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len, aM, tmp, c); + bn_almost_mont_reduction_u32(len, n, nInv_u64, c, resM); +} + +static void +bn_almost_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res +) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++) + { + uint64_t qj = nInv * c[i0]; + uint64_t *res_j0 = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = n[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0); + uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1); + uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2); + uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i); + } + for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++) + { + uint64_t a_i = n[i]; + uint64_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i); + } + uint64_t r = c1; + uint64_t c10 = r; + uint64_t *resb = c + len + i0; + uint64_t res_j = c[len + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb); + } + memcpy(res, c + len, (len + len - len) * sizeof (uint64_t)); + uint64_t c00 = c0; + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *tmp = alloca(len * sizeof (uint64_t)); + memset(tmp, 0U, len * sizeof (uint64_t)); + uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len, res, n, tmp); + uint64_t m = (uint64_t)0U - c00; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t *os = res; + uint64_t x = (m & tmp[i]) | (~m & res[i]); + os[i] = x; + } +} + +static void +bn_almost_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, aM, bM, tmp, c); + bn_almost_mont_reduction_u64(len, n, nInv_u64, c, resM); +} + +static void +bn_almost_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len, aM, tmp, c); + bn_almost_mont_reduction_u64(len, n, nInv_u64, c, resM); +} + +uint32_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32( + uint32_t len, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *one = alloca(len * sizeof (uint32_t)); + memset(one, 0U, len * sizeof (uint32_t)); + memset(one, 0U, len * sizeof (uint32_t)); + one[0U] = (uint32_t)1U; + uint32_t bit0 = n[0U] & (uint32_t)1U; + uint32_t m0 = (uint32_t)0U - bit0; + uint32_t acc0 = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]); + uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]); + acc0 = (beq & acc0) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U))); + } + uint32_t m10 = acc0; + uint32_t m00 = m0 & m10; + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U; + } + uint32_t m1; + if (bBits < (uint32_t)32U * bLen) + { + KRML_CHECK_SIZE(sizeof (uint32_t), bLen); + uint32_t *b2 = alloca(bLen * sizeof (uint32_t)); + memset(b2, 0U, bLen * sizeof (uint32_t)); + uint32_t i0 = bBits / (uint32_t)32U; + uint32_t j = bBits % (uint32_t)32U; + b2[i0] = b2[i0] | (uint32_t)1U << j; + uint32_t acc = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < bLen; i++) + { + uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]); + uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]); + acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U))); + } + uint32_t res = acc; + m1 = res; + } + else + { + m1 = (uint32_t)0xFFFFFFFFU; + } + uint32_t acc = (uint32_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]); + uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U))); + } + uint32_t m2 = acc; + uint32_t m = m1 & m2; + return m00 & m; +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +) +{ + if (bBits < (uint32_t)200U) + { + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *aM = alloca(len * sizeof (uint32_t)); + memset(aM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *resM = alloca(len * sizeof (uint32_t)); + memset(resM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp1, r2, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, resM); + for (uint32_t i = (uint32_t)0U; i < bBits; i++) + { + uint32_t i1 = i / (uint32_t)32U; + uint32_t j = i % (uint32_t)32U; + uint32_t tmp = b[i1]; + uint32_t bit = tmp >> j & (uint32_t)1U; + if (!(bit == (uint32_t)0U)) + { + bn_almost_mont_mul_u32(len, n, mu, resM, aM, resM); + } + bn_almost_mont_sqr_u32(len, n, mu, aM, aM); + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp = alloca((len + len) * sizeof (uint32_t)); + memset(tmp, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp, resM, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, res); + return; + } + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *aM = alloca(len * sizeof (uint32_t)); + memset(aM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *resM = alloca(len * sizeof (uint32_t)); + memset(resM, 0U, len * sizeof (uint32_t)); + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U; + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp = alloca((len + len) * sizeof (uint32_t)); + memset(tmp, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp, r2, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint32_t), table_len * len); + uint32_t *table = alloca(table_len * len * sizeof (uint32_t)); + memset(table, 0U, table_len * len * sizeof (uint32_t)); + memcpy(table, resM, len * sizeof (uint32_t)); + uint32_t *t1 = table + len; + memcpy(t1, aM, len * sizeof (uint32_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint32_t *t11 = table + (i + (uint32_t)1U) * len; + uint32_t *t2 = table + (i + (uint32_t)2U) * len; + bn_almost_mont_mul_u32(len, n, mu, t11, aM, t2); + } + for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++) + { + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + bn_almost_mont_sqr_u32(len, n, mu, resM, resM); + } + uint32_t mask_l = (uint32_t)16U - (uint32_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)32U; + uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)32U; + uint32_t p1 = b[i1] >> j; + uint32_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)32U - j); + } + else + { + ite = p1; + } + uint32_t bits_l = ite & mask_l; + uint32_t bits_l32 = bits_l; + uint32_t *a_bits_l = table + bits_l32 * len; + bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c0 = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c0; i++) + { + bn_almost_mont_sqr_u32(len, n, mu, resM, resM); + } + uint32_t c1 = bBits % (uint32_t)4U; + uint32_t mask_l = ((uint32_t)1U << c1) - (uint32_t)1U; + uint32_t i = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint32_t p1 = b[i] >> j; + uint32_t ite; + if (i + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)32U - j); + } + else + { + ite = p1; + } + uint32_t bits_c = ite & mask_l; + uint32_t bits_c0 = bits_c; + uint32_t bits_c32 = bits_c0; + uint32_t *a_bits_c = table + bits_c32 * len; + bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_c, resM); + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp1, resM, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +) +{ + if (bBits < (uint32_t)200U) + { + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *aM = alloca(len * sizeof (uint32_t)); + memset(aM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c = alloca((len + len) * sizeof (uint32_t)); + memset(c, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *resM = alloca(len * sizeof (uint32_t)); + memset(resM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp1, r2, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, resM); + uint32_t sw = (uint32_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++) + { + uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)32U; + uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)32U; + uint32_t tmp = b[i1]; + uint32_t bit = tmp >> j & (uint32_t)1U; + uint32_t sw1 = bit ^ sw; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t dummy = ((uint32_t)0U - sw1) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + bn_almost_mont_mul_u32(len, n, mu, aM, resM, aM); + bn_almost_mont_sqr_u32(len, n, mu, resM, resM); + sw = bit; + } + uint32_t sw0 = sw; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t dummy = ((uint32_t)0U - sw0) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp = alloca((len + len) * sizeof (uint32_t)); + memset(tmp, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp, resM, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, res); + return; + } + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *aM = alloca(len * sizeof (uint32_t)); + memset(aM, 0U, len * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *c0 = alloca((len + len) * sizeof (uint32_t)); + memset(c0, 0U, (len + len) * sizeof (uint32_t)); + KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len); + uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c0); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c0, aM); + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *resM = alloca(len * sizeof (uint32_t)); + memset(resM, 0U, len * sizeof (uint32_t)); + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U; + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp = alloca((len + len) * sizeof (uint32_t)); + memset(tmp, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp, r2, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint32_t), table_len * len); + uint32_t *table = alloca(table_len * len * sizeof (uint32_t)); + memset(table, 0U, table_len * len * sizeof (uint32_t)); + memcpy(table, resM, len * sizeof (uint32_t)); + uint32_t *t1 = table + len; + memcpy(t1, aM, len * sizeof (uint32_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint32_t *t11 = table + (i + (uint32_t)1U) * len; + uint32_t *t2 = table + (i + (uint32_t)2U) * len; + bn_almost_mont_mul_u32(len, n, mu, t11, aM, t2); + } + for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++) + { + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + bn_almost_mont_sqr_u32(len, n, mu, resM, resM); + } + uint32_t mask_l = (uint32_t)16U - (uint32_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)32U; + uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)32U; + uint32_t p1 = b[i1] >> j; + uint32_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)32U - j); + } + else + { + ite = p1; + } + uint32_t bits_l = ite & mask_l; + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *a_bits_l = alloca(len * sizeof (uint32_t)); + memset(a_bits_l, 0U, len * sizeof (uint32_t)); + memcpy(a_bits_l, table, len * sizeof (uint32_t)); + for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++) + { + uint32_t c = FStar_UInt32_eq_mask(bits_l, i2 + (uint32_t)1U); + uint32_t *res_j = table + (i2 + (uint32_t)1U) * len; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t *os = a_bits_l; + uint32_t x = (c & res_j[i]) | (~c & a_bits_l[i]); + os[i] = x; + } + } + bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c; i++) + { + bn_almost_mont_sqr_u32(len, n, mu, resM, resM); + } + uint32_t c10 = bBits % (uint32_t)4U; + uint32_t mask_l = ((uint32_t)1U << c10) - (uint32_t)1U; + uint32_t i0 = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint32_t p1 = b[i0] >> j; + uint32_t ite; + if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)32U - j); + } + else + { + ite = p1; + } + uint32_t bits_c = ite & mask_l; + uint32_t bits_c0 = bits_c; + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *a_bits_c = alloca(len * sizeof (uint32_t)); + memset(a_bits_c, 0U, len * sizeof (uint32_t)); + memcpy(a_bits_c, table, len * sizeof (uint32_t)); + for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++) + { + uint32_t c1 = FStar_UInt32_eq_mask(bits_c0, i1 + (uint32_t)1U); + uint32_t *res_j = table + (i1 + (uint32_t)1U) * len; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint32_t *os = a_bits_c; + uint32_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]); + os[i] = x; + } + } + bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_c, resM); + } + KRML_CHECK_SIZE(sizeof (uint32_t), len + len); + uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint32_t)); + memcpy(tmp1, resM, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *r2 = alloca(len * sizeof (uint32_t)); + memset(r2, 0U, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(len, nBits, n, r2); + uint32_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint32(n[0U]); + Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len, n, mu, r2, a, bBits, b, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +) +{ + KRML_CHECK_SIZE(sizeof (uint32_t), len); + uint32_t *r2 = alloca(len * sizeof (uint32_t)); + memset(r2, 0U, len * sizeof (uint32_t)); + Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(len, nBits, n, r2); + uint32_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint32(n[0U]); + Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len, n, mu, r2, a, bBits, b, res); +} + +uint64_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64( + uint32_t len, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *one = alloca(len * sizeof (uint64_t)); + memset(one, 0U, len * sizeof (uint64_t)); + memset(one, 0U, len * sizeof (uint64_t)); + one[0U] = (uint64_t)1U; + uint64_t bit0 = n[0U] & (uint64_t)1U; + uint64_t m0 = (uint64_t)0U - bit0; + uint64_t acc0 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]); + acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m10 = acc0; + uint64_t m00 = m0 & m10; + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + uint64_t m1; + if (bBits < (uint32_t)64U * bLen) + { + KRML_CHECK_SIZE(sizeof (uint64_t), bLen); + uint64_t *b2 = alloca(bLen * sizeof (uint64_t)); + memset(b2, 0U, bLen * sizeof (uint64_t)); + uint32_t i0 = bBits / (uint32_t)64U; + uint32_t j = bBits % (uint32_t)64U; + b2[i0] = b2[i0] | (uint64_t)1U << j; + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < bLen; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t res = acc; + m1 = res; + } + else + { + m1 = (uint64_t)0xFFFFFFFFFFFFFFFFU; + } + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m2 = acc; + uint64_t m = m1 & m2; + return m00 & m; +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + if (bBits < (uint32_t)200U) + { + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *aM = alloca(len * sizeof (uint64_t)); + memset(aM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *resM = alloca(len * sizeof (uint64_t)); + memset(resM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp1, r2, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, resM); + for (uint32_t i = (uint32_t)0U; i < bBits; i++) + { + uint32_t i1 = i / (uint32_t)64U; + uint32_t j = i % (uint32_t)64U; + uint64_t tmp = b[i1]; + uint64_t bit = tmp >> j & (uint64_t)1U; + if (!(bit == (uint64_t)0U)) + { + bn_almost_mont_mul_u64(len, n, mu, resM, aM, resM); + } + bn_almost_mont_sqr_u64(len, n, mu, aM, aM); + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp = alloca((len + len) * sizeof (uint64_t)); + memset(tmp, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp, resM, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, res); + return; + } + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *aM = alloca(len * sizeof (uint64_t)); + memset(aM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *resM = alloca(len * sizeof (uint64_t)); + memset(resM, 0U, len * sizeof (uint64_t)); + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp = alloca((len + len) * sizeof (uint64_t)); + memset(tmp, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp, r2, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint64_t), table_len * len); + uint64_t *table = alloca(table_len * len * sizeof (uint64_t)); + memset(table, 0U, table_len * len * sizeof (uint64_t)); + memcpy(table, resM, len * sizeof (uint64_t)); + uint64_t *t1 = table + len; + memcpy(t1, aM, len * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint64_t *t11 = table + (i + (uint32_t)1U) * len; + uint64_t *t2 = table + (i + (uint32_t)2U) * len; + bn_almost_mont_mul_u64(len, n, mu, t11, aM, t2); + } + for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++) + { + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + bn_almost_mont_sqr_u64(len, n, mu, resM, resM); + } + uint64_t mask_l = (uint64_t)16U - (uint64_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)64U; + uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)64U; + uint64_t p1 = b[i1] >> j; + uint64_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_l = ite & mask_l; + uint32_t bits_l32 = (uint32_t)bits_l; + uint64_t *a_bits_l = table + bits_l32 * len; + bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c0 = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c0; i++) + { + bn_almost_mont_sqr_u64(len, n, mu, resM, resM); + } + uint32_t c1 = bBits % (uint32_t)4U; + uint64_t mask_l = ((uint64_t)1U << c1) - (uint64_t)1U; + uint32_t i = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint64_t p1 = b[i] >> j; + uint64_t ite; + if (i + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_c = ite & mask_l; + uint64_t bits_c0 = bits_c; + uint32_t bits_c32 = (uint32_t)bits_c0; + uint64_t *a_bits_c = table + bits_c32 * len; + bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_c, resM); + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp1, resM, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + if (bBits < (uint32_t)200U) + { + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *aM = alloca(len * sizeof (uint64_t)); + memset(aM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c = alloca((len + len) * sizeof (uint64_t)); + memset(c, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM); + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *resM = alloca(len * sizeof (uint64_t)); + memset(resM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp1, r2, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, resM); + uint64_t sw = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++) + { + uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)64U; + uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)64U; + uint64_t tmp = b[i1]; + uint64_t bit = tmp >> j & (uint64_t)1U; + uint64_t sw1 = bit ^ sw; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t dummy = ((uint64_t)0U - sw1) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + bn_almost_mont_mul_u64(len, n, mu, aM, resM, aM); + bn_almost_mont_sqr_u64(len, n, mu, resM, resM); + sw = bit; + } + uint64_t sw0 = sw; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t dummy = ((uint64_t)0U - sw0) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp = alloca((len + len) * sizeof (uint64_t)); + memset(tmp, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp, resM, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, res); + return; + } + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *aM = alloca(len * sizeof (uint64_t)); + memset(aM, 0U, len * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *c0 = alloca((len + len) * sizeof (uint64_t)); + memset(c0, 0U, (len + len) * sizeof (uint64_t)); + KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len); + uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t)); + memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t)); + Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c0); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c0, aM); + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *resM = alloca(len * sizeof (uint64_t)); + memset(resM, 0U, len * sizeof (uint64_t)); + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp = alloca((len + len) * sizeof (uint64_t)); + memset(tmp, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp, r2, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint64_t), table_len * len); + uint64_t *table = alloca(table_len * len * sizeof (uint64_t)); + memset(table, 0U, table_len * len * sizeof (uint64_t)); + memcpy(table, resM, len * sizeof (uint64_t)); + uint64_t *t1 = table + len; + memcpy(t1, aM, len * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint64_t *t11 = table + (i + (uint32_t)1U) * len; + uint64_t *t2 = table + (i + (uint32_t)2U) * len; + bn_almost_mont_mul_u64(len, n, mu, t11, aM, t2); + } + for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++) + { + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + bn_almost_mont_sqr_u64(len, n, mu, resM, resM); + } + uint64_t mask_l = (uint64_t)16U - (uint64_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)64U; + uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)64U; + uint64_t p1 = b[i1] >> j; + uint64_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_l = ite & mask_l; + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *a_bits_l = alloca(len * sizeof (uint64_t)); + memset(a_bits_l, 0U, len * sizeof (uint64_t)); + memcpy(a_bits_l, table, len * sizeof (uint64_t)); + for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++) + { + uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + (uint32_t)1U)); + uint64_t *res_j = table + (i2 + (uint32_t)1U) * len; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t *os = a_bits_l; + uint64_t x = (c & res_j[i]) | (~c & a_bits_l[i]); + os[i] = x; + } + } + bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c; i++) + { + bn_almost_mont_sqr_u64(len, n, mu, resM, resM); + } + uint32_t c10 = bBits % (uint32_t)4U; + uint64_t mask_l = ((uint64_t)1U << c10) - (uint64_t)1U; + uint32_t i0 = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint64_t p1 = b[i0] >> j; + uint64_t ite; + if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_c = ite & mask_l; + uint64_t bits_c0 = bits_c; + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *a_bits_c = alloca(len * sizeof (uint64_t)); + memset(a_bits_c, 0U, len * sizeof (uint64_t)); + memcpy(a_bits_c, table, len * sizeof (uint64_t)); + for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++) + { + uint64_t c1 = FStar_UInt64_eq_mask(bits_c0, (uint64_t)(i1 + (uint32_t)1U)); + uint64_t *res_j = table + (i1 + (uint32_t)1U) * len; + for (uint32_t i = (uint32_t)0U; i < len; i++) + { + uint64_t *os = a_bits_c; + uint64_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]); + os[i] = x; + } + } + bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_c, resM); + } + KRML_CHECK_SIZE(sizeof (uint64_t), len + len); + uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t)); + memset(tmp1, 0U, (len + len) * sizeof (uint64_t)); + memcpy(tmp1, resM, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *r2 = alloca(len * sizeof (uint64_t)); + memset(r2, 0U, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(len, nBits, n, r2); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len, n, mu, r2, a, bBits, b, res); +} + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + KRML_CHECK_SIZE(sizeof (uint64_t), len); + uint64_t *r2 = alloca(len * sizeof (uint64_t)); + memset(r2, 0U, len * sizeof (uint64_t)); + Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(len, nBits, n, r2); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len, n, mu, r2, a, bBits, b, res); +} + diff --git a/src/math/bigfix/Hacl_Bignum.h b/src/math/bigfix/Hacl_Bignum.h new file mode 100644 index 000000000..e9f204d1a --- /dev/null +++ b/src/math/bigfix/Hacl_Bignum.h @@ -0,0 +1,345 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Bignum_H +#define __Hacl_Bignum_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "lib_intrinsics.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include "kremlin/internal/target.h" + +#include "Hacl_Bignum_Base.h" + +void Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *res); + +void Hacl_Bignum_Convert_bn_to_bytes_be_uint64(uint32_t len, uint64_t *b, uint8_t *res); + +uint32_t Hacl_Bignum_Lib_bn_get_top_index_u32(uint32_t len, uint32_t *b); + +uint64_t Hacl_Bignum_Lib_bn_get_top_index_u64(uint32_t len, uint64_t *b); + +uint32_t +Hacl_Bignum_Addition_bn_sub_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res); + +uint64_t +Hacl_Bignum_Addition_bn_sub_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res); + +uint32_t +Hacl_Bignum_Addition_bn_add_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res); + +uint64_t +Hacl_Bignum_Addition_bn_add_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *b, + uint32_t *tmp, + uint32_t *res +); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *b, + uint64_t *tmp, + uint64_t *res +); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *tmp, + uint32_t *res +); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *tmp, + uint64_t *res +); + +void +Hacl_Bignum_bn_add_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res +); + +void +Hacl_Bignum_bn_add_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res +); + +uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0); + +uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0); + +uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *res +); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res +); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *r2, + uint32_t *a, + uint32_t *aM +); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *a +); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM +); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM +); + +uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *res +); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res +); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *r2, + uint64_t *a, + uint64_t *aM +); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *a +); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM +); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM +); + +uint32_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32( + uint32_t len, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res +); + +uint64_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64( + uint32_t len, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum_H_DEFINED +#endif diff --git a/src/math/bigfix/Hacl_Bignum256.c b/src/math/bigfix/Hacl_Bignum256.c new file mode 100644 index 000000000..372baa30f --- /dev/null +++ b/src/math/bigfix/Hacl_Bignum256.c @@ -0,0 +1,1534 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "math/bigfix/Hacl_Bignum256.h" + +/******************************************************************************* + +A verified 256-bit bignum library. + +This is a 64-bit optimized version, where bignums are represented as an array +of four unsigned 64-bit integers, i.e. uint64_t[4]. Furthermore, the +limbs are stored in little-endian format, i.e. the least significant limb is at +index 0. Each limb is stored in native format in memory. Example: + + uint64_t sixteen[4] = { 0x10; 0x00; 0x00; 0x00 } + +We strongly encourage users to go through the conversion functions, e.g. +bn_from_bytes_be, to i) not depend on internal representation choices and ii) +have the ability to switch easily to a 32-bit optimized version in the future. + +*******************************************************************************/ + +/************************/ +/* Arithmetic functions */ +/************************/ + + +/* +Write `a + b mod 2^256` in `res`. + + This functions returns the carry. + + The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4] +*/ +uint64_t Hacl_Bignum256_add(uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t2, res_i); + } + return c; +} + +/* +Write `a - b mod 2^256` in `res`. + + This functions returns the carry. + + The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4] +*/ +uint64_t Hacl_Bignum256_sub(uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); + } + return c; +} + +static inline void add_mod_n(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t t1 = a[(uint32_t)4U * i]; + uint64_t t20 = b[(uint32_t)4U * i]; + uint64_t *res_i0 = res + (uint32_t)4U * i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0); + uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1); + uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2); + uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t t1 = a[i]; + uint64_t t2 = b[i]; + uint64_t *res_i = res + i; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i); + } + uint64_t c00 = c0; + uint64_t tmp[4U] = { 0U }; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t t1 = res[(uint32_t)4U * i]; + uint64_t t20 = n[(uint32_t)4U * i]; + uint64_t *res_i0 = tmp + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); + uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); + uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); + uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t t1 = res[i]; + uint64_t t2 = n[i]; + uint64_t *res_i = tmp + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); + } + uint64_t c1 = c; + uint64_t c2 = c00 - c1; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = res; + uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +/* +Write `a * b` in `res`. + + The arguments a and b are meant to be 256-bit bignums, i.e. uint64_t[4]. + The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8]. +*/ +void Hacl_Bignum256_mul(uint64_t *a, uint64_t *b, uint64_t *res) +{ + uint32_t resLen = (uint32_t)8U; + memset(res, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t bj = b[i0]; + uint64_t *res_j = res + i0; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t a_i = a[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0); + uint64_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1); + uint64_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2); + uint64_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t a_i = a[i]; + uint64_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i); + } + uint64_t r = c; + res[(uint32_t)4U + i0] = r; + } +} + +/* +Write `a * a` in `res`. + + The argument a is meant to be a 256-bit bignum, i.e. uint64_t[4]. + The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8]. +*/ +void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res) +{ + uint32_t resLen = (uint32_t)8U; + memset(res, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t *ab = a; + uint64_t a_j = a[i0]; + uint64_t *res_j = res + i0; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = ab[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0); + uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1); + uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2); + uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i); + } + for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) + { + uint64_t a_i = ab[i]; + uint64_t *res_i = res_j + i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i); + } + uint64_t r = c; + res[i0 + i0] = r; + } + uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, res, res); + KRML_CHECK_SIZE(sizeof (uint64_t), resLen); + uint64_t *tmp = alloca(resLen * sizeof (uint64_t)); + memset(tmp, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(a[i], a[i]); + uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U)); + uint64_t lo = FStar_UInt128_uint128_to_uint64(res1); + tmp[(uint32_t)2U * i] = lo; + tmp[(uint32_t)2U * i + (uint32_t)1U] = hi; + } + uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, tmp, res); +} + +static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res) +{ + memset(res, 0U, (uint32_t)4U * sizeof (uint64_t)); + uint32_t i = nBits / (uint32_t)64U; + uint32_t j = nBits % (uint32_t)64U; + res[i] = res[i] | (uint64_t)1U << j; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)512U - nBits; i0++) + { + add_mod_n(n, res, res, res); + } +} + +static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *res) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t qj = nInv * c[i0]; + uint64_t *res_j0 = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t a_i = n[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0); + uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1); + uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2); + uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t a_i = n[i]; + uint64_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i); + } + uint64_t r = c1; + uint64_t c10 = r; + uint64_t *resb = c + (uint32_t)4U + i0; + uint64_t res_j = c[(uint32_t)4U + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb); + } + memcpy(res, c + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t)); + uint64_t c00 = c0; + uint64_t tmp[4U] = { 0U }; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t t1 = res[(uint32_t)4U * i]; + uint64_t t20 = n[(uint32_t)4U * i]; + uint64_t *res_i0 = tmp + (uint32_t)4U * i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0); + uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1); + uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2); + uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t t1 = res[i]; + uint64_t t2 = n[i]; + uint64_t *res_i = tmp + i; + c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i); + } + uint64_t c10 = c1; + uint64_t c2 = c00 - c10; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = res; + uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]); + os[i] = x; + } +} + +static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *res) +{ + uint64_t c0 = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t qj = nInv * c[i0]; + uint64_t *res_j0 = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t a_i = n[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j0 + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0); + uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1); + uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2); + uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t a_i = n[i]; + uint64_t *res_i = res_j0 + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i); + } + uint64_t r = c1; + uint64_t c10 = r; + uint64_t *resb = c + (uint32_t)4U + i0; + uint64_t res_j = c[(uint32_t)4U + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb); + } + memcpy(res, c + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t)); + uint64_t c00 = c0; + uint64_t tmp[4U] = { 0U }; + uint64_t c1 = Hacl_Bignum256_sub(res, n, tmp); + uint64_t m = (uint64_t)0U - c00; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = res; + uint64_t x = (m & tmp[i]) | (~m & res[i]); + os[i] = x; + } +} + +static inline void +amont_mul(uint64_t *n, uint64_t nInv_u64, uint64_t *aM, uint64_t *bM, uint64_t *resM) +{ + uint64_t c[8U] = { 0U }; + uint32_t resLen = (uint32_t)8U; + memset(c, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t bj = bM[i0]; + uint64_t *res_j = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t a_i = aM[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i0); + uint64_t a_i0 = aM[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c1, res_i1); + uint64_t a_i1 = aM[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c1, res_i2); + uint64_t a_i2 = aM[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c1, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t a_i = aM[i]; + uint64_t *res_i = res_j + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i); + } + uint64_t r = c1; + c[(uint32_t)4U + i0] = r; + } + areduction(n, nInv_u64, c, resM); +} + +static inline void amont_sqr(uint64_t *n, uint64_t nInv_u64, uint64_t *aM, uint64_t *resM) +{ + uint64_t c[8U] = { 0U }; + uint32_t resLen = (uint32_t)8U; + memset(c, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t *ab = aM; + uint64_t a_j = aM[i0]; + uint64_t *res_j = c + i0; + uint64_t c1 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t a_i = ab[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j + (uint32_t)4U * i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c1, res_i0); + uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c1, res_i1); + uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c1, res_i2); + uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c1, res_i); + } + for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) + { + uint64_t a_i = ab[i]; + uint64_t *res_i = res_j + i; + c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c1, res_i); + } + uint64_t r = c1; + c[i0 + i0] = r; + } + uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, c, c, c); + KRML_CHECK_SIZE(sizeof (uint64_t), resLen); + uint64_t *tmp = alloca(resLen * sizeof (uint64_t)); + memset(tmp, 0U, resLen * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + FStar_UInt128_uint128 res = FStar_UInt128_mul_wide(aM[i], aM[i]); + uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U)); + uint64_t lo = FStar_UInt128_uint128_to_uint64(res); + tmp[(uint32_t)2U * i] = lo; + tmp[(uint32_t)2U * i + (uint32_t)1U] = hi; + } + uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, c, tmp, c); + areduction(n, nInv_u64, c, resM); +} + +static inline void +bn_slow_precomp(uint64_t *n, uint64_t mu, uint64_t *r2, uint64_t *a, uint64_t *res) +{ + uint64_t a_mod[4U] = { 0U }; + uint64_t a1[8U] = { 0U }; + memcpy(a1, a, (uint32_t)8U * sizeof (uint64_t)); + uint64_t c0 = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + uint64_t qj = mu * a1[i0]; + uint64_t *res_j0 = a1 + i0; + uint64_t c = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++) + { + uint64_t a_i = n[(uint32_t)4U * i]; + uint64_t *res_i0 = res_j0 + (uint32_t)4U * i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0); + uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1); + uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2); + uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i); + } + for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++) + { + uint64_t a_i = n[i]; + uint64_t *res_i = res_j0 + i; + c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i); + } + uint64_t r = c; + uint64_t c1 = r; + uint64_t *resb = a1 + (uint32_t)4U + i0; + uint64_t res_j = a1[(uint32_t)4U + i0]; + c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb); + } + memcpy(a_mod, a1 + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t)); + uint64_t c00 = c0; + uint64_t tmp[4U] = { 0U }; + uint64_t c1 = Hacl_Bignum256_sub(a_mod, n, tmp); + uint64_t m = (uint64_t)0U - c00; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = a_mod; + uint64_t x = (m & tmp[i]) | (~m & a_mod[i]); + os[i] = x; + } + uint64_t c[8U] = { 0U }; + Hacl_Bignum256_mul(a_mod, r2, c); + reduction(n, mu, c, res); +} + +/* +Write `a mod n` in `res`. + + The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8]. + The argument n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • 1 < n + • n % 2 = 1 +*/ +bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res) +{ + uint64_t one[4U] = { 0U }; + memset(one, 0U, (uint32_t)4U * sizeof (uint64_t)); + one[0U] = (uint64_t)1U; + uint64_t bit0 = n[0U] & (uint64_t)1U; + uint64_t m0 = (uint64_t)0U - bit0; + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m1 = acc; + uint64_t is_valid_m = m0 & m1; + uint32_t + nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n); + if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU) + { + uint64_t r2[4U] = { 0U }; + precompr2(nBits, n, r2); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + bn_slow_precomp(n, mu, r2, a, res); + } + else + { + memset(res, 0U, (uint32_t)4U * sizeof (uint64_t)); + } + return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU; +} + +static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b) +{ + uint64_t one[4U] = { 0U }; + memset(one, 0U, (uint32_t)4U * sizeof (uint64_t)); + one[0U] = (uint64_t)1U; + uint64_t bit0 = n[0U] & (uint64_t)1U; + uint64_t m0 = (uint64_t)0U - bit0; + uint64_t acc0 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]); + acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m10 = acc0; + uint64_t m00 = m0 & m10; + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + uint64_t m1; + if (bBits < (uint32_t)64U * bLen) + { + KRML_CHECK_SIZE(sizeof (uint64_t), bLen); + uint64_t *b2 = alloca(bLen * sizeof (uint64_t)); + memset(b2, 0U, bLen * sizeof (uint64_t)); + uint32_t i0 = bBits / (uint32_t)64U; + uint32_t j = bBits % (uint32_t)64U; + b2[i0] = b2[i0] | (uint64_t)1U << j; + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < bLen; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t res = acc; + m1 = res; + } + else + { + m1 = (uint64_t)0xFFFFFFFFFFFFFFFFU; + } + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m2 = acc; + uint64_t m = m1 & m2; + return m00 & m; +} + +static inline void +exp_vartime_precomp( + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + if (bBits < (uint32_t)200U) + { + uint64_t aM[4U] = { 0U }; + uint64_t c[8U] = { 0U }; + Hacl_Bignum256_mul(a, r2, c); + reduction(n, mu, c, aM); + uint64_t resM[4U] = { 0U }; + uint64_t tmp0[8U] = { 0U }; + memcpy(tmp0, r2, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp0, resM); + for (uint32_t i = (uint32_t)0U; i < bBits; i++) + { + uint32_t i1 = i / (uint32_t)64U; + uint32_t j = i % (uint32_t)64U; + uint64_t tmp = b[i1]; + uint64_t bit = tmp >> j & (uint64_t)1U; + if (!(bit == (uint64_t)0U)) + { + amont_mul(n, mu, resM, aM, resM); + } + amont_sqr(n, mu, aM, aM); + } + uint64_t tmp[8U] = { 0U }; + memcpy(tmp, resM, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp, res); + return; + } + uint64_t aM[4U] = { 0U }; + uint64_t c[8U] = { 0U }; + Hacl_Bignum256_mul(a, r2, c); + reduction(n, mu, c, aM); + uint64_t resM[4U] = { 0U }; + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + uint64_t tmp[8U] = { 0U }; + memcpy(tmp, r2, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint64_t), table_len * (uint32_t)4U); + uint64_t *table = alloca(table_len * (uint32_t)4U * sizeof (uint64_t)); + memset(table, 0U, table_len * (uint32_t)4U * sizeof (uint64_t)); + memcpy(table, resM, (uint32_t)4U * sizeof (uint64_t)); + uint64_t *t1 = table + (uint32_t)4U; + memcpy(t1, aM, (uint32_t)4U * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)4U; + uint64_t *t2 = table + (i + (uint32_t)2U) * (uint32_t)4U; + amont_mul(n, mu, t11, aM, t2); + } + for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++) + { + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) + { + amont_sqr(n, mu, resM, resM); + } + uint64_t mask_l = (uint64_t)16U - (uint64_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)64U; + uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)64U; + uint64_t p1 = b[i1] >> j; + uint64_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_l = ite & mask_l; + uint32_t bits_l32 = (uint32_t)bits_l; + uint64_t *a_bits_l = table + bits_l32 * (uint32_t)4U; + amont_mul(n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c0 = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c0; i++) + { + amont_sqr(n, mu, resM, resM); + } + uint32_t c1 = bBits % (uint32_t)4U; + uint64_t mask_l = ((uint64_t)1U << c1) - (uint64_t)1U; + uint32_t i = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint64_t p1 = b[i] >> j; + uint64_t ite; + if (i + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_c = ite & mask_l; + uint64_t bits_c0 = bits_c; + uint32_t bits_c32 = (uint32_t)bits_c0; + uint64_t *a_bits_c = table + bits_c32 * (uint32_t)4U; + amont_mul(n, mu, resM, a_bits_c, resM); + } + uint64_t tmp0[8U] = { 0U }; + memcpy(tmp0, resM, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp0, res); +} + +static inline void +exp_consttime_precomp( + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + if (bBits < (uint32_t)200U) + { + uint64_t aM[4U] = { 0U }; + uint64_t c[8U] = { 0U }; + Hacl_Bignum256_mul(a, r2, c); + reduction(n, mu, c, aM); + uint64_t resM[4U] = { 0U }; + uint64_t tmp0[8U] = { 0U }; + memcpy(tmp0, r2, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp0, resM); + uint64_t sw = (uint64_t)0U; + for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++) + { + uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)64U; + uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)64U; + uint64_t tmp = b[i1]; + uint64_t bit = tmp >> j & (uint64_t)1U; + uint64_t sw1 = bit ^ sw; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t dummy = ((uint64_t)0U - sw1) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + amont_mul(n, mu, aM, resM, aM); + amont_sqr(n, mu, resM, resM); + sw = bit; + } + uint64_t sw0 = sw; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t dummy = ((uint64_t)0U - sw0) & (resM[i] ^ aM[i]); + resM[i] = resM[i] ^ dummy; + aM[i] = aM[i] ^ dummy; + } + uint64_t tmp[8U] = { 0U }; + memcpy(tmp, resM, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp, res); + return; + } + uint64_t aM[4U] = { 0U }; + uint64_t c0[8U] = { 0U }; + Hacl_Bignum256_mul(a, r2, c0); + reduction(n, mu, c0, aM); + uint64_t resM[4U] = { 0U }; + uint32_t bLen; + if (bBits == (uint32_t)0U) + { + bLen = (uint32_t)1U; + } + else + { + bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U; + } + uint64_t tmp[8U] = { 0U }; + memcpy(tmp, r2, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp, resM); + uint32_t table_len = (uint32_t)16U; + KRML_CHECK_SIZE(sizeof (uint64_t), table_len * (uint32_t)4U); + uint64_t *table = alloca(table_len * (uint32_t)4U * sizeof (uint64_t)); + memset(table, 0U, table_len * (uint32_t)4U * sizeof (uint64_t)); + memcpy(table, resM, (uint32_t)4U * sizeof (uint64_t)); + uint64_t *t1 = table + (uint32_t)4U; + memcpy(t1, aM, (uint32_t)4U * sizeof (uint64_t)); + for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++) + { + uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)4U; + uint64_t *t2 = table + (i + (uint32_t)2U) * (uint32_t)4U; + amont_mul(n, mu, t11, aM, t2); + } + for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++) + { + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + amont_sqr(n, mu, resM, resM); + } + uint64_t mask_l = (uint64_t)16U - (uint64_t)1U; + uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)64U; + uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)64U; + uint64_t p1 = b[i1] >> j; + uint64_t ite; + if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_l = ite & mask_l; + uint64_t a_bits_l[4U] = { 0U }; + memcpy(a_bits_l, table, (uint32_t)4U * sizeof (uint64_t)); + for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++) + { + uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + (uint32_t)1U)); + uint64_t *res_j = table + (i2 + (uint32_t)1U) * (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = a_bits_l; + uint64_t x = (c & res_j[i]) | (~c & a_bits_l[i]); + os[i] = x; + } + } + amont_mul(n, mu, resM, a_bits_l, resM); + } + if (!(bBits % (uint32_t)4U == (uint32_t)0U)) + { + uint32_t c = bBits % (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < c; i++) + { + amont_sqr(n, mu, resM, resM); + } + uint32_t c10 = bBits % (uint32_t)4U; + uint64_t mask_l = ((uint64_t)1U << c10) - (uint64_t)1U; + uint32_t i0 = (uint32_t)0U; + uint32_t j = (uint32_t)0U; + uint64_t p1 = b[i0] >> j; + uint64_t ite; + if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j) + { + ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)64U - j); + } + else + { + ite = p1; + } + uint64_t bits_c = ite & mask_l; + uint64_t bits_c0 = bits_c; + uint64_t a_bits_c[4U] = { 0U }; + memcpy(a_bits_c, table, (uint32_t)4U * sizeof (uint64_t)); + for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++) + { + uint64_t c1 = FStar_UInt64_eq_mask(bits_c0, (uint64_t)(i1 + (uint32_t)1U)); + uint64_t *res_j = table + (i1 + (uint32_t)1U) * (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t *os = a_bits_c; + uint64_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]); + os[i] = x; + } + } + amont_mul(n, mu, resM, a_bits_c, resM); + } + uint64_t tmp0[8U] = { 0U }; + memcpy(tmp0, resM, (uint32_t)4U * sizeof (uint64_t)); + reduction(n, mu, tmp0, res); +} + +static inline void +exp_vartime( + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + uint64_t r2[4U] = { 0U }; + precompr2(nBits, n, r2); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + exp_vartime_precomp(n, mu, r2, a, bBits, b, res); +} + +static inline void +exp_consttime( + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + uint64_t r2[4U] = { 0U }; + precompr2(nBits, n, r2); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + exp_consttime_precomp(n, mu, r2, a, bBits, b, res); +} + +/* +Write `a ^ b mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + The function is *NOT* constant-time on the argument b. See the + mod_exp_consttime_* functions for constant-time variants. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • n % 2 = 1 + • 1 < n + • b < pow2 bBits + • a < n +*/ +bool +Hacl_Bignum256_mod_exp_vartime( + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + uint64_t is_valid_m = exp_check(n, a, bBits, b); + uint32_t + nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n); + if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU) + { + exp_vartime(nBits, n, a, bBits, b, res); + } + else + { + memset(res, 0U, (uint32_t)4U * sizeof (uint64_t)); + } + return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU; +} + +/* +Write `a ^ b mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + This function is constant-time over its argument b, at the cost of a slower + execution time than mod_exp_vartime. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • n % 2 = 1 + • 1 < n + • b < pow2 bBits + • a < n +*/ +bool +Hacl_Bignum256_mod_exp_consttime( + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + uint64_t is_valid_m = exp_check(n, a, bBits, b); + uint32_t + nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n); + if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU) + { + exp_consttime(nBits, n, a, bBits, b, res); + } + else + { + memset(res, 0U, (uint32_t)4U * sizeof (uint64_t)); + } + return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU; +} + +/* +Write `a ^ (-1) mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n is a prime + + The function returns false if any of the following preconditions are violated, true otherwise. + • n % 2 = 1 + • 1 < n + • 0 < a + • a < n +*/ +bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *res) +{ + uint64_t one[4U] = { 0U }; + memset(one, 0U, (uint32_t)4U * sizeof (uint64_t)); + one[0U] = (uint64_t)1U; + uint64_t bit0 = n[0U] & (uint64_t)1U; + uint64_t m0 = (uint64_t)0U - bit0; + uint64_t acc0 = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]); + acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m1 = acc0; + uint64_t m00 = m0 & m1; + uint64_t bn_zero[4U] = { 0U }; + uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t uu____0 = FStar_UInt64_eq_mask(a[i], bn_zero[i]); + mask = uu____0 & mask; + } + uint64_t mask1 = mask; + uint64_t res10 = mask1; + uint64_t m10 = res10; + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + uint64_t m2 = acc; + uint64_t is_valid_m = (m00 & ~m10) & m2; + uint32_t + nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n); + if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU) + { + uint64_t n2[4U] = { 0U }; + uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64((uint64_t)0U, n[0U], (uint64_t)2U, n2); + uint64_t c1; + if ((uint32_t)1U < (uint32_t)4U) + { + uint32_t rLen = (uint32_t)3U; + uint64_t *a1 = n + (uint32_t)1U; + uint64_t *res1 = n2 + (uint32_t)1U; + uint64_t c = c0; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a1[(uint32_t)4U * i]; + uint64_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i0); + uint64_t t10 = a1[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, (uint64_t)0U, res_i1); + uint64_t t11 = a1[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, (uint64_t)0U, res_i2); + uint64_t t12 = a1[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, (uint64_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint64_t t1 = a1[i]; + uint64_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i); + } + uint64_t c10 = c; + c1 = c10; + } + else + { + c1 = c0; + } + exp_vartime(nBits, n, a, (uint32_t)256U, n2, res); + } + else + { + memset(res, 0U, (uint32_t)4U * sizeof (uint64_t)); + } + return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU; +} + + +/**********************************************/ +/* Arithmetic functions with precomputations. */ +/**********************************************/ + + +/* +Heap-allocate and initialize a montgomery context. + + The argument n is meant to be a 256-bit bignum, i.e. uint64_t[4]. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n % 2 = 1 + • 1 < n + + The caller will need to call Hacl_Bignum256_mont_ctx_free on the return value + to avoid memory leaks. +*/ +Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *Hacl_Bignum256_mont_ctx_init(uint64_t *n) +{ + uint64_t *r2 = KRML_HOST_CALLOC((uint32_t)4U, sizeof (uint64_t)); + uint64_t *n1 = KRML_HOST_CALLOC((uint32_t)4U, sizeof (uint64_t)); + uint64_t *r21 = r2; + uint64_t *n11 = n1; + memcpy(n11, n, (uint32_t)4U * sizeof (uint64_t)); + uint32_t + nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n); + precompr2(nBits, n, r21); + uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]); + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 + res = { .len = (uint32_t)4U, .n = n11, .mu = mu, .r2 = r21 }; + KRML_CHECK_SIZE(sizeof (Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64), (uint32_t)1U); + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 + *buf = KRML_HOST_MALLOC(sizeof (Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64)); + buf[0U] = res; + return buf; +} + +/* +Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init. + + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. +*/ +void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k) +{ + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k; + uint64_t *n = k1.n; + uint64_t *r2 = k1.r2; + KRML_HOST_FREE(n); + KRML_HOST_FREE(r2); + KRML_HOST_FREE(k); +} + +/* +Write `a mod n` in `res`. + + The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8]. + The outparam res is meant to be a 256-bit bignum, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. +*/ +void +Hacl_Bignum256_mod_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint64_t *res +) +{ + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k; + bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res); +} + +/* +Write `a ^ b mod n` in `res`. + + The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + The function is *NOT* constant-time on the argument b. See the + mod_exp_consttime_* functions for constant-time variants. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • b < pow2 bBits + • a < n +*/ +void +Hacl_Bignum256_mod_exp_vartime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k; + exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res); +} + +/* +Write `a ^ b mod n` in `res`. + + The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + This function is constant-time over its argument b, at the cost of a slower + execution time than mod_exp_vartime_*. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • b < pow2 bBits + • a < n +*/ +void +Hacl_Bignum256_mod_exp_consttime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +) +{ + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k; + exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res); +} + +/* +Write `a ^ (-1) mod n` in `res`. + + The argument a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n is a prime + • 0 < a + • a < n +*/ +void +Hacl_Bignum256_mod_inv_prime_vartime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint64_t *res +) +{ + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k; + uint64_t n2[4U] = { 0U }; + uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64((uint64_t)0U, k1.n[0U], (uint64_t)2U, n2); + uint64_t c1; + if ((uint32_t)1U < (uint32_t)4U) + { + uint32_t rLen = (uint32_t)3U; + uint64_t *a1 = k1.n + (uint32_t)1U; + uint64_t *res1 = n2 + (uint32_t)1U; + uint64_t c = c0; + for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++) + { + uint64_t t1 = a1[(uint32_t)4U * i]; + uint64_t *res_i0 = res1 + (uint32_t)4U * i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i0); + uint64_t t10 = a1[(uint32_t)4U * i + (uint32_t)1U]; + uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, (uint64_t)0U, res_i1); + uint64_t t11 = a1[(uint32_t)4U * i + (uint32_t)2U]; + uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, (uint64_t)0U, res_i2); + uint64_t t12 = a1[(uint32_t)4U * i + (uint32_t)3U]; + uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, (uint64_t)0U, res_i); + } + for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++) + { + uint64_t t1 = a1[i]; + uint64_t *res_i = res1 + i; + c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i); + } + uint64_t c10 = c; + c1 = c10; + } + else + { + c1 = c0; + } + exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, (uint32_t)256U, n2, res); +} + + +/********************/ +/* Loads and stores */ +/********************/ + + +/* +Load a bid-endian bignum from memory. + + The argument b points to len bytes of valid memory. + The function returns a heap-allocated bignum of size sufficient to hold the + result of loading b, or NULL if either the allocation failed, or the amount of + required memory would exceed 4GB. + + If the return value is non-null, clients must eventually call free(3) on it to + avoid memory leaks. +*/ +uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b) +{ + if + ( + len + == (uint32_t)0U + || !((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U <= (uint32_t)536870911U) + ) + { + return NULL; + } + KRML_CHECK_SIZE(sizeof (uint64_t), (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U); + uint64_t + *res = KRML_HOST_CALLOC((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U, sizeof (uint64_t)); + if (res == NULL) + { + return res; + } + uint64_t *res1 = res; + uint64_t *res2 = res1; + uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t)); + for (uint32_t i = (uint32_t)0U; i < bnLen; i++) + { + uint64_t *os = res2; + uint64_t u = load64_be(tmp + (bnLen - i - (uint32_t)1U) * (uint32_t)8U); + uint64_t x = u; + os[i] = x; + } + return res2; +} + +/* +Load a little-endian bignum from memory. + + The argument b points to len bytes of valid memory. + The function returns a heap-allocated bignum of size sufficient to hold the + result of loading b, or NULL if either the allocation failed, or the amount of + required memory would exceed 4GB. + + If the return value is non-null, clients must eventually call free(3) on it to + avoid memory leaks. +*/ +uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b) +{ + if + ( + len + == (uint32_t)0U + || !((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U <= (uint32_t)536870911U) + ) + { + return NULL; + } + KRML_CHECK_SIZE(sizeof (uint64_t), (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U); + uint64_t + *res = KRML_HOST_CALLOC((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U, sizeof (uint64_t)); + if (res == NULL) + { + return res; + } + uint64_t *res1 = res; + uint64_t *res2 = res1; + uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + memcpy(tmp, b, len * sizeof (uint8_t)); + for (uint32_t i = (uint32_t)0U; i < (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; i++) + { + uint64_t *os = res2; + uint8_t *bj = tmp + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r1 = u; + uint64_t x = r1; + os[i] = x; + } + return res2; +} + +/* +Serialize a bignum into big-endian memory. + + The argument b points to a 256-bit bignum. + The outparam res points to 32 bytes of valid memory. +*/ +void Hacl_Bignum256_bn_to_bytes_be(uint64_t *b, uint8_t *res) +{ + uint32_t bnLen = ((uint32_t)32U - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + uint32_t numb = (uint32_t)8U; + for (uint32_t i = (uint32_t)0U; i < bnLen; i++) + { + store64_be(tmp + i * numb, b[bnLen - i - (uint32_t)1U]); + } + memcpy(res, tmp + tmpLen - (uint32_t)32U, (uint32_t)32U * sizeof (uint8_t)); +} + +/* +Serialize a bignum into little-endian memory. + + The argument b points to a 256-bit bignum. + The outparam res points to 32 bytes of valid memory. +*/ +void Hacl_Bignum256_bn_to_bytes_le(uint64_t *b, uint8_t *res) +{ + uint32_t bnLen = ((uint32_t)32U - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; + uint32_t tmpLen = (uint32_t)8U * bnLen; + KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen); + uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t)); + memset(tmp, 0U, tmpLen * sizeof (uint8_t)); + for (uint32_t i = (uint32_t)0U; i < bnLen; i++) + { + store64_le(tmp + i * (uint32_t)8U, b[i]); + } + memcpy(res, tmp, (uint32_t)32U * sizeof (uint8_t)); +} + + +/***************/ +/* Comparisons */ +/***************/ + + +/* +Returns 2 ^ 64 - 1 if and only if the argument a is strictly less than the argument b, + otherwise returns 0. +*/ +uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b) +{ + uint64_t acc = (uint64_t)0U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) + { + uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]); + uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]); + acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U))); + } + return acc; +} + diff --git a/src/math/bigfix/Hacl_Bignum256.h b/src/math/bigfix/Hacl_Bignum256.h new file mode 100644 index 000000000..db7c99b78 --- /dev/null +++ b/src/math/bigfix/Hacl_Bignum256.h @@ -0,0 +1,375 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Bignum256_H +#define __Hacl_Bignum256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include "kremlin/internal/target.h" + +#include "Hacl_Bignum.h" +#include "Hacl_Bignum_Base.h" + +/******************************************************************************* + +A verified 256-bit bignum library. + +This is a 64-bit optimized version, where bignums are represented as an array +of four unsigned 64-bit integers, i.e. uint64_t[4]. Furthermore, the +limbs are stored in little-endian format, i.e. the least significant limb is at +index 0. Each limb is stored in native format in memory. Example: + + uint64_t sixteen[4] = { 0x10; 0x00; 0x00; 0x00 } + +We strongly encourage users to go through the conversion functions, e.g. +bn_from_bytes_be, to i) not depend on internal representation choices and ii) +have the ability to switch easily to a 32-bit optimized version in the future. + +*******************************************************************************/ + +/************************/ +/* Arithmetic functions */ +/************************/ + + +/* +Write `a + b mod 2^256` in `res`. + + This functions returns the carry. + + The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4] +*/ +uint64_t Hacl_Bignum256_add(uint64_t *a, uint64_t *b, uint64_t *res); + +/* +Write `a - b mod 2^256` in `res`. + + This functions returns the carry. + + The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4] +*/ +uint64_t Hacl_Bignum256_sub(uint64_t *a, uint64_t *b, uint64_t *res); + +/* +Write `a * b` in `res`. + + The arguments a and b are meant to be 256-bit bignums, i.e. uint64_t[4]. + The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8]. +*/ +void Hacl_Bignum256_mul(uint64_t *a, uint64_t *b, uint64_t *res); + +/* +Write `a * a` in `res`. + + The argument a is meant to be a 256-bit bignum, i.e. uint64_t[4]. + The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8]. +*/ +void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res); + +/* +Write `a mod n` in `res`. + + The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8]. + The argument n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • 1 < n + • n % 2 = 1 +*/ +bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res); + +/* +Write `a ^ b mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + The function is *NOT* constant-time on the argument b. See the + mod_exp_consttime_* functions for constant-time variants. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • n % 2 = 1 + • 1 < n + • b < pow2 bBits + • a < n +*/ +bool +Hacl_Bignum256_mod_exp_vartime( + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +/* +Write `a ^ b mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + This function is constant-time over its argument b, at the cost of a slower + execution time than mod_exp_vartime. + + The function returns false if any of the following preconditions are violated, + true otherwise. + • n % 2 = 1 + • 1 < n + • b < pow2 bBits + • a < n +*/ +bool +Hacl_Bignum256_mod_exp_consttime( + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +/* +Write `a ^ (-1) mod n` in `res`. + + The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n is a prime + + The function returns false if any of the following preconditions are violated, true otherwise. + • n % 2 = 1 + • 1 < n + • 0 < a + • a < n +*/ +bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *res); + +typedef struct Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64_s +{ + uint32_t len; + uint64_t *n; + uint64_t mu; + uint64_t *r2; +} +Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64; + + +/**********************************************/ +/* Arithmetic functions with precomputations. */ +/**********************************************/ + + +/* +Heap-allocate and initialize a montgomery context. + + The argument n is meant to be a 256-bit bignum, i.e. uint64_t[4]. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n % 2 = 1 + • 1 < n + + The caller will need to call Hacl_Bignum256_mont_ctx_free on the return value + to avoid memory leaks. +*/ +Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *Hacl_Bignum256_mont_ctx_init(uint64_t *n); + +/* +Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init. + + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. +*/ +void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k); + +/* +Write `a mod n` in `res`. + + The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8]. + The outparam res is meant to be a 256-bit bignum, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. +*/ +void +Hacl_Bignum256_mod_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint64_t *res +); + +/* +Write `a ^ b mod n` in `res`. + + The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + The function is *NOT* constant-time on the argument b. See the + mod_exp_consttime_* functions for constant-time variants. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • b < pow2 bBits + • a < n +*/ +void +Hacl_Bignum256_mod_exp_vartime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +/* +Write `a ^ b mod n` in `res`. + + The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + The argument b is a bignum of any size, and bBits is an upper bound on the + number of significant bits of b. A tighter bound results in faster execution + time. When in doubt, the number of bits for the bignum size is always a safe + default, e.g. if b is a 256-bit bignum, bBits should be 256. + + This function is constant-time over its argument b, at the cost of a slower + execution time than mod_exp_vartime_*. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • b < pow2 bBits + • a < n +*/ +void +Hacl_Bignum256_mod_exp_consttime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res +); + +/* +Write `a ^ (-1) mod n` in `res`. + + The argument a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4]. + The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init. + + Before calling this function, the caller will need to ensure that the following + preconditions are observed. + • n is a prime + • 0 < a + • a < n +*/ +void +Hacl_Bignum256_mod_inv_prime_vartime_precomp( + Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, + uint64_t *a, + uint64_t *res +); + + +/********************/ +/* Loads and stores */ +/********************/ + + +/* +Load a bid-endian bignum from memory. + + The argument b points to len bytes of valid memory. + The function returns a heap-allocated bignum of size sufficient to hold the + result of loading b, or NULL if either the allocation failed, or the amount of + required memory would exceed 4GB. + + If the return value is non-null, clients must eventually call free(3) on it to + avoid memory leaks. +*/ +uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b); + +/* +Load a little-endian bignum from memory. + + The argument b points to len bytes of valid memory. + The function returns a heap-allocated bignum of size sufficient to hold the + result of loading b, or NULL if either the allocation failed, or the amount of + required memory would exceed 4GB. + + If the return value is non-null, clients must eventually call free(3) on it to + avoid memory leaks. +*/ +uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b); + +/* +Serialize a bignum into big-endian memory. + + The argument b points to a 256-bit bignum. + The outparam res points to 32 bytes of valid memory. +*/ +void Hacl_Bignum256_bn_to_bytes_be(uint64_t *b, uint8_t *res); + +/* +Serialize a bignum into little-endian memory. + + The argument b points to a 256-bit bignum. + The outparam res points to 32 bytes of valid memory. +*/ +void Hacl_Bignum256_bn_to_bytes_le(uint64_t *b, uint8_t *res); + + +/***************/ +/* Comparisons */ +/***************/ + + +/* +Returns 2 ^ 64 - 1 if and only if the argument a is strictly less than the argument b, + otherwise returns 0. +*/ +uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum256_H_DEFINED +#endif diff --git a/src/math/bigfix/Hacl_Bignum_Base.h b/src/math/bigfix/Hacl_Bignum_Base.h new file mode 100644 index 000000000..f22c22392 --- /dev/null +++ b/src/math/bigfix/Hacl_Bignum_Base.h @@ -0,0 +1,73 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Bignum_Base_H +#define __Hacl_Bignum_Base_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include "kremlin/internal/target.h" + +static inline uint64_t +Hacl_Bignum_Base_mul_wide_add_u64(uint64_t a, uint64_t b, uint64_t c_in, uint64_t *out) +{ + FStar_UInt128_uint128 + res = FStar_UInt128_add(FStar_UInt128_mul_wide(a, b), FStar_UInt128_uint64_to_uint128(c_in)); + out[0U] = FStar_UInt128_uint128_to_uint64(res); + return FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U)); +} + +static inline uint32_t +Hacl_Bignum_Base_mul_wide_add2_u32(uint32_t a, uint32_t b, uint32_t c_in, uint32_t *out) +{ + uint32_t out0 = out[0U]; + uint64_t res = (uint64_t)a * (uint64_t)b + (uint64_t)c_in + (uint64_t)out0; + out[0U] = (uint32_t)res; + return (uint32_t)(res >> (uint32_t)32U); +} + +static inline uint64_t +Hacl_Bignum_Base_mul_wide_add2_u64(uint64_t a, uint64_t b, uint64_t c_in, uint64_t *out) +{ + uint64_t out0 = out[0U]; + FStar_UInt128_uint128 + res = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(a, b), + FStar_UInt128_uint64_to_uint128(c_in)), + FStar_UInt128_uint64_to_uint128(out0)); + out[0U] = FStar_UInt128_uint128_to_uint64(res); + return FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U)); +} + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum_Base_H_DEFINED +#endif diff --git a/src/math/bigfix/Hacl_IntTypes_Intrinsics.h b/src/math/bigfix/Hacl_IntTypes_Intrinsics.h new file mode 100644 index 000000000..5faf4eddf --- /dev/null +++ b/src/math/bigfix/Hacl_IntTypes_Intrinsics.h @@ -0,0 +1,88 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_IntTypes_Intrinsics_H +#define __Hacl_IntTypes_Intrinsics_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include "kremlin/internal/target.h" + +static inline uint32_t +Hacl_IntTypes_Intrinsics_add_carry_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r) +{ + uint32_t res = x + cin + y; + uint32_t + c = (~FStar_UInt32_gte_mask(res, x) | (FStar_UInt32_eq_mask(res, x) & cin)) & (uint32_t)1U; + r[0U] = res; + return c; +} + +static inline uint64_t +Hacl_IntTypes_Intrinsics_add_carry_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r) +{ + uint64_t res = x + cin + y; + uint64_t + c = (~FStar_UInt64_gte_mask(res, x) | (FStar_UInt64_eq_mask(res, x) & cin)) & (uint64_t)1U; + r[0U] = res; + return c; +} + +static inline uint32_t +Hacl_IntTypes_Intrinsics_sub_borrow_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r) +{ + uint32_t res = x - y - cin; + uint32_t + c = + ((FStar_UInt32_gte_mask(res, x) & ~FStar_UInt32_eq_mask(res, x)) + | (FStar_UInt32_eq_mask(res, x) & cin)) + & (uint32_t)1U; + r[0U] = res; + return c; +} + +static inline uint64_t +Hacl_IntTypes_Intrinsics_sub_borrow_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r) +{ + uint64_t res = x - y - cin; + uint64_t + c = + ((FStar_UInt64_gte_mask(res, x) & ~FStar_UInt64_eq_mask(res, x)) + | (FStar_UInt64_eq_mask(res, x) & cin)) + & (uint64_t)1U; + r[0U] = res; + return c; +} + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_IntTypes_Intrinsics_H_DEFINED +#endif diff --git a/src/math/bigfix/kremlib/FStar_UInt128.h b/src/math/bigfix/kremlib/FStar_UInt128.h new file mode 100644 index 000000000..9b5ece517 --- /dev/null +++ b/src/math/bigfix/kremlib/FStar_UInt128.h @@ -0,0 +1,79 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + + +#ifndef __FStar_UInt128_H +#define __FStar_UInt128_H +#include +#include +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + + + + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + + +#define __FStar_UInt128_H_DEFINED +#endif diff --git a/src/math/bigfix/kremlib/FStar_UInt128_Verified.h b/src/math/bigfix/kremlib/FStar_UInt128_Verified.h new file mode 100644 index 000000000..45e3c1117 --- /dev/null +++ b/src/math/bigfix/kremlib/FStar_UInt128_Verified.h @@ -0,0 +1,347 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + + +#ifndef __FStar_UInt128_Verified_H +#define __FStar_UInt128_Verified_H +#include +#include +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + + +#include "FStar_UInt_8_16_32_64.h" + +static inline uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; +} + +static inline uint64_t FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return FStar_UInt128_sub_mod_impl(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +} + +static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +} + +static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +static inline uint64_t FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + { + return a; + } + else + { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << (s - FStar_UInt128_u32_64); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) + { + return FStar_UInt128_shift_left_small(a, s); + } + else + { + return FStar_UInt128_shift_left_large(a, s); + } +} + +static inline uint64_t FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + { + return a; + } + else + { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> (s - FStar_UInt128_u32_64); + lit.high = (uint64_t)0U; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) + { + return FStar_UInt128_shift_right_small(a, s); + } + else + { + return FStar_UInt128_shift_right_large(a, s); + } +} + +static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low > b.low); +} + +static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low < b.low); +} + +static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low >= b.low); +} + +static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low <= b.low); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) + | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + lit.high = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) + | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + return lit; +} + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a) +{ + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +} + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +static inline uint64_t FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +static inline uint64_t FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) + * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = + ((x >> FStar_UInt128_u32_32) + * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) + >> FStar_UInt128_u32_32; + return lit; +} + +static inline uint64_t FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) + * (y >> FStar_UInt128_u32_32) + + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) + * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); + lit.high = + (x >> FStar_UInt128_u32_32) + * (y >> FStar_UInt128_u32_32) + + + (((x >> FStar_UInt128_u32_32) + * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) + >> FStar_UInt128_u32_32) + + + ((FStar_UInt128_u64_mod_32(x) + * (y >> FStar_UInt128_u32_32) + + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) + * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) + >> FStar_UInt128_u32_32); + return lit; +} + + +#define __FStar_UInt128_Verified_H_DEFINED +#endif diff --git a/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h b/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h new file mode 100644 index 000000000..a7d3cbae7 --- /dev/null +++ b/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h @@ -0,0 +1,104 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + + +#ifndef __FStar_UInt_8_16_32_64_H +#define __FStar_UInt_8_16_32_64_H +#include +#include +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + +static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a ^ b; + uint64_t minus_x = ~x + (uint64_t)1U; + uint64_t x_or_minus_x = x | minus_x; + uint64_t xnx = x_or_minus_x >> (uint32_t)63U; + return xnx - (uint64_t)1U; +} + +static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a; + uint64_t y = b; + uint64_t x_xor_y = x ^ y; + uint64_t x_sub_y = x - y; + uint64_t x_sub_y_xor_y = x_sub_y ^ y; + uint64_t q = x_xor_y | x_sub_y_xor_y; + uint64_t x_xor_q = x ^ q; + uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; + return x_xor_q_ - (uint64_t)1U; +} + +static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a ^ b; + uint32_t minus_x = ~x + (uint32_t)1U; + uint32_t x_or_minus_x = x | minus_x; + uint32_t xnx = x_or_minus_x >> (uint32_t)31U; + return xnx - (uint32_t)1U; +} + +static inline uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a; + uint32_t y = b; + uint32_t x_xor_y = x ^ y; + uint32_t x_sub_y = x - y; + uint32_t x_sub_y_xor_y = x_sub_y ^ y; + uint32_t q = x_xor_y | x_sub_y_xor_y; + uint32_t x_xor_q = x ^ q; + uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U; + return x_xor_q_ - (uint32_t)1U; +} + +static inline uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a ^ b; + uint16_t minus_x = ~x + (uint16_t)1U; + uint16_t x_or_minus_x = x | minus_x; + uint16_t xnx = x_or_minus_x >> (uint32_t)15U; + return xnx - (uint16_t)1U; +} + +static inline uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a; + uint16_t y = b; + uint16_t x_xor_y = x ^ y; + uint16_t x_sub_y = x - y; + uint16_t x_sub_y_xor_y = x_sub_y ^ y; + uint16_t q = x_xor_y | x_sub_y_xor_y; + uint16_t x_xor_q = x ^ q; + uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U; + return x_xor_q_ - (uint16_t)1U; +} + +static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a ^ b; + uint8_t minus_x = ~x + (uint8_t)1U; + uint8_t x_or_minus_x = x | minus_x; + uint8_t xnx = x_or_minus_x >> (uint32_t)7U; + return xnx - (uint8_t)1U; +} + +static inline uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a; + uint8_t y = b; + uint8_t x_xor_y = x ^ y; + uint8_t x_sub_y = x - y; + uint8_t x_sub_y_xor_y = x_sub_y ^ y; + uint8_t q = x_xor_y | x_sub_y_xor_y; + uint8_t x_xor_q = x ^ q; + uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U; + return x_xor_q_ - (uint8_t)1U; +} + +#define __FStar_UInt_8_16_32_64_H_DEFINED +#endif diff --git a/src/math/bigfix/kremlib/LowStar_Endianness.h b/src/math/bigfix/kremlib/LowStar_Endianness.h new file mode 100644 index 000000000..5cd3be350 --- /dev/null +++ b/src/math/bigfix/kremlib/LowStar_Endianness.h @@ -0,0 +1,28 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + + +#ifndef __LowStar_Endianness_H +#define __LowStar_Endianness_H +#include +#include +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + + +#include "FStar_UInt128.h" + +static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_le(uint8_t *x0); + +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + + +#define __LowStar_Endianness_H_DEFINED +#endif diff --git a/src/math/bigfix/kremlib/fstar_uint128_gcc64.h b/src/math/bigfix/kremlib/fstar_uint128_gcc64.h new file mode 100644 index 000000000..aae6a7dc9 --- /dev/null +++ b/src/math/bigfix/kremlib/fstar_uint128_gcc64.h @@ -0,0 +1,165 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/******************************************************************************/ +/* Machine integers (128-bit arithmetic) */ +/******************************************************************************/ + +/* This header contains two things. + * + * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and + * Clang, i.e. all the operations from FStar.UInt128. + * + * Second, 128-bit operations from C.Endianness (or LowStar.Endianness), + * suitable for any compiler and platform (via a series of ifdefs). This second + * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to + * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code. + * */ + +/* This file is used for both the minimal and generic kremlib distributions. As + * such, it assumes that the machine integers have been bundled the exact same + * way in both cases. */ + +#ifndef FSTAR_UINT128_GCC64 +#define FSTAR_UINT128_GCC64 + +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" +#include "LowStar_Endianness.h" + +/* GCC + using native unsigned __int128 support */ + +inline static uint128_t load128_le(uint8_t *b) { + uint128_t l = (uint128_t)load64_le(b); + uint128_t h = (uint128_t)load64_le(b + 8); + return (h << 64 | l); +} + +inline static void store128_le(uint8_t *b, uint128_t n) { + store64_le(b, (uint64_t)n); + store64_le(b + 8, (uint64_t)(n >> 64)); +} + +inline static uint128_t load128_be(uint8_t *b) { + uint128_t h = (uint128_t)load64_be(b); + uint128_t l = (uint128_t)load64_be(b + 8); + return (h << 64 | l); +} + +inline static void store128_be(uint8_t *b, uint128_t n) { + store64_be(b, (uint64_t)(n >> 64)); + store64_be(b + 8, (uint64_t)n); +} + +inline static uint128_t FStar_UInt128_add(uint128_t x, uint128_t y) { + return x + y; +} + +inline static uint128_t FStar_UInt128_mul(uint128_t x, uint128_t y) { + return x * y; +} + +inline static uint128_t FStar_UInt128_add_mod(uint128_t x, uint128_t y) { + return x + y; +} + +inline static uint128_t FStar_UInt128_sub(uint128_t x, uint128_t y) { + return x - y; +} + +inline static uint128_t FStar_UInt128_sub_mod(uint128_t x, uint128_t y) { + return x - y; +} + +inline static uint128_t FStar_UInt128_logand(uint128_t x, uint128_t y) { + return x & y; +} + +inline static uint128_t FStar_UInt128_logor(uint128_t x, uint128_t y) { + return x | y; +} + +inline static uint128_t FStar_UInt128_logxor(uint128_t x, uint128_t y) { + return x ^ y; +} + +inline static uint128_t FStar_UInt128_lognot(uint128_t x) { + return ~x; +} + +inline static uint128_t FStar_UInt128_shift_left(uint128_t x, uint32_t y) { + return x << y; +} + +inline static uint128_t FStar_UInt128_shift_right(uint128_t x, uint32_t y) { + return x >> y; +} + +inline static uint128_t FStar_UInt128_uint64_to_uint128(uint64_t x) { + return (uint128_t)x; +} + +inline static uint64_t FStar_UInt128_uint128_to_uint64(uint128_t x) { + return (uint64_t)x; +} + +inline static uint128_t FStar_UInt128_mul_wide(uint64_t x, uint64_t y) { + return ((uint128_t) x) * y; +} + +inline static uint128_t FStar_UInt128_eq_mask(uint128_t x, uint128_t y) { + uint64_t mask = + FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & + FStar_UInt64_eq_mask(x, y); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint128_t FStar_UInt128_gte_mask(uint128_t x, uint128_t y) { + uint64_t mask = + (FStar_UInt64_gte_mask(x >> 64, y >> 64) & + ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | + (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint64_t FStar_UInt128___proj__Mkuint128__item__low(uint128_t x) { + return (uint64_t) x; +} + +inline static uint64_t FStar_UInt128___proj__Mkuint128__item__high(uint128_t x) { + return (uint64_t) (x >> 64); +} + +inline static uint128_t FStar_UInt128_add_underspec(uint128_t x, uint128_t y) { + return x + y; +} + +inline static uint128_t FStar_UInt128_sub_underspec(uint128_t x, uint128_t y) { + return x - y; +} + +inline static bool FStar_UInt128_eq(uint128_t x, uint128_t y) { + return x == y; +} + +inline static bool FStar_UInt128_gt(uint128_t x, uint128_t y) { + return x > y; +} + +inline static bool FStar_UInt128_lt(uint128_t x, uint128_t y) { + return x < y; +} + +inline static bool FStar_UInt128_gte(uint128_t x, uint128_t y) { + return x >= y; +} + +inline static bool FStar_UInt128_lte(uint128_t x, uint128_t y) { + return x <= y; +} + +inline static uint128_t FStar_UInt128_mul32(uint64_t x, uint32_t y) { + return (uint128_t) x * (uint128_t) y; +} + +#endif diff --git a/src/math/bigfix/kremlib/fstar_uint128_msvc.h b/src/math/bigfix/kremlib/fstar_uint128_msvc.h new file mode 100644 index 000000000..c2a28abc6 --- /dev/null +++ b/src/math/bigfix/kremlib/fstar_uint128_msvc.h @@ -0,0 +1,510 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file was generated by KreMLin + * then hand-edited to use MSVC intrinsics KreMLin invocation: + * C:\users\barrybo\mitls2c\kremlin\_build\src\Kremlin.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "kremlib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims + * F* version: 15104ff8 + * KreMLin version: 318b7fa8 + */ + +#ifndef FSTAR_UINT128_MSVC +#define FSTAR_UINT128_MSVC + +#include "kremlin/internal/types.h" +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" + +#ifndef _MSC_VER +# error This file only works with the MSVC compiler +#endif + +/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now + * done properly and only include this file when we know for sure we are on + * 64-bit MSVC. */ + +#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128) +#define HAS_OPTIMIZED 1 +#else +#define HAS_OPTIMIZED 0 +#endif + +// Define .low and .high in terms of the __m128i fields, to reduce +// the amount of churn in this file. +#if HAS_OPTIMIZED +#include +#include +#define low m128i_u64[0] +#define high m128i_u64[1] +#endif + +inline static FStar_UInt128_uint128 load128_le(uint8_t *b) { +#if HAS_OPTIMIZED + return _mm_loadu_si128((__m128i *)b); +#else + FStar_UInt128_uint128 lit; + lit.low = load64_le(b); + lit.high = load64_le(b + 8); + return lit; +#endif +} + +inline static void store128_le(uint8_t *b, FStar_UInt128_uint128 n) { + store64_le(b, n.low); + store64_le(b + 8, n.high); +} + +inline static FStar_UInt128_uint128 load128_be(uint8_t *b) { + uint64_t l = load64_be(b + 8); + uint64_t h = load64_be(b); +#if HAS_OPTIMIZED + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = l; + lit.high = h; + return lit; +#endif +} + +inline static void store128_be(uint8_t *b, uint128_t n) { + store64_be(b, n.high); + store64_be(b + 8, n.low); +} + +inline static uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) { + return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U; +} + +inline static uint64_t FStar_UInt128_carry(uint64_t a, uint64_t b) { + return FStar_UInt128_constant_time_carry(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char carry = + _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0 + _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l); + _subborrow_u64(borrow, a.high, b.high, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + return FStar_UInt128_sub_mod_impl(a, b); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return _mm_and_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return _mm_xor_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + return _mm_or_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a) { +#if HAS_OPTIMIZED + return _mm_andnot_si128(a, a); +#else + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +#endif +} + +static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +inline static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) { + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) { + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) { + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) { + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << (s - FStar_UInt128_u32_64); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) { +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = a.low << s; + uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s); + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_left_small(a, s); + else + return FStar_UInt128_shift_left_large(a, s); +#endif +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) { + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) { + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) { + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) { + FStar_UInt128_uint128 lit; + lit.low = a.high >> (s - FStar_UInt128_u32_64); + lit.high = (uint64_t)0U; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) { +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s); + uint64_t h = a.high >> s; + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64)); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_right_small(a, s); + else + return FStar_UInt128_shift_right_large(a, s); +#endif +} + +inline static bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + return a.low == b.low && a.high == b.high; +} + +inline static bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + return a.high > b.high || a.high == b.high && a.low > b.low; +} + +inline static bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + return a.high < b.high || a.high == b.high && a.low < b.low; +} + +inline static bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + return a.high > b.high || a.high == b.high && a.low >= b.low; +} + +inline static bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { + return a.high < b.high || a.high == b.high && a.low <= b.low; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED + // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff + __m128i r32 = _mm_cmpeq_epi32(a, b); + // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half) + __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1)); + // Bitwise and to compute (3&2),(2&3),(1&0),(0&1) + __m128i ret64 = _mm_and_si128(r32, s32); + // Swap the two 64-bit values to form s64 + __m128i s64 = + _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2 + // And them together + return _mm_and_si128(ret64, s64); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { +#if HAS_OPTIMIZED && 0 + // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each + // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each + // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3) + // then splat slot 0 to 3,2,1,0 + __m128i gt = _mm_cmpgt_epi32(a, b); + __m128i eq = _mm_cmpeq_epi32(a, b); + __m128i ge = _mm_or_si128(gt, eq); + __m128i ge0 = ge; + __m128i eq0 = eq; + __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1 + __m128i t1 = _mm_and_si128(eq0, ge1); + __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0 + __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1 + __m128i ge2 = + _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2 + __m128i t2 = + _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2) + ret = _mm_or_si128(ret, t2); + __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3 + __m128i ge3 = + _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3 + __m128i t3 = _mm_and_si128( + eq0, _mm_and_si128( + eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3) + ret = _mm_or_si128(ret, t3); + return _mm_shuffle_epi32( + ret, + _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords. +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + lit.high = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a) { +#if HAS_OPTIMIZED + return _mm_set_epi64x(0, a); +#else + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +#endif +} + +inline static uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) { + return a.low; +} + +inline static uint64_t FStar_UInt128_u64_mod_32(uint64_t a) { + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +inline static uint64_t FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) { + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) { +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, (uint64_t)y, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine( + (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32) >> + FStar_UInt128_u32_32; + return lit; +#endif +} + +/* Note: static headers bring scope collision issues when they define types! + * Because now client (kremlin-generated) code will include this header and + * there might be type collisions if the client code uses quadruples of uint64s. + * So, we cannot use the kremlin-generated name. */ +typedef struct K_quad_s { + uint64_t fst; + uint64_t snd; + uint64_t thd; + uint64_t f3; +} K_quad; + +inline static K_quad +FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) { + K_quad tmp; + tmp.fst = FStar_UInt128_u64_mod_32(x); + tmp.snd = FStar_UInt128_u64_mod_32( + FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)); + tmp.thd = x >> FStar_UInt128_u32_32; + tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> + FStar_UInt128_u32_32); + return tmp; +} + +static uint64_t FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) { + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) { + K_quad scrut = + FStar_UInt128_mul_wide_impl_t_(x, y); + uint64_t u1 = scrut.fst; + uint64_t w3 = scrut.snd; + uint64_t x_ = scrut.thd; + uint64_t t_ = scrut.f3; + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine_( + u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3); + lit.high = + x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + (u1 * (y >> FStar_UInt128_u32_32) + (FStar_UInt128_u64_mod_32(t_) >> + FStar_UInt128_u32_32)); + return lit; +} + +inline static +FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y) { +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, y, &h); + return _mm_set_epi64x(h, l); +#else + return FStar_UInt128_mul_wide_impl(x, y); +#endif +} + +#undef low +#undef high + +#endif diff --git a/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h b/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h new file mode 100644 index 000000000..e2b6d6285 --- /dev/null +++ b/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h @@ -0,0 +1,68 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H +#define FSTAR_UINT128_STRUCT_ENDIANNESS_H + +/* Hand-written implementation of endianness-related uint128 functions + * for the extracted uint128 implementation */ + +/* Access 64-bit fields within the int128. */ +#define HIGH64_OF(x) ((x)->high) +#define LOW64_OF(x) ((x)->low) + +/* A series of definitions written using pointers. */ + +inline static void load128_le_(uint8_t *b, uint128_t *r) { + LOW64_OF(r) = load64_le(b); + HIGH64_OF(r) = load64_le(b + 8); +} + +inline static void store128_le_(uint8_t *b, uint128_t *n) { + store64_le(b, LOW64_OF(n)); + store64_le(b + 8, HIGH64_OF(n)); +} + +inline static void load128_be_(uint8_t *b, uint128_t *r) { + HIGH64_OF(r) = load64_be(b); + LOW64_OF(r) = load64_be(b + 8); +} + +inline static void store128_be_(uint8_t *b, uint128_t *n) { + store64_be(b, HIGH64_OF(n)); + store64_be(b + 8, LOW64_OF(n)); +} + +#ifndef KRML_NOSTRUCT_PASSING + +inline static uint128_t load128_le(uint8_t *b) { + uint128_t r; + load128_le_(b, &r); + return r; +} + +inline static void store128_le(uint8_t *b, uint128_t n) { + store128_le_(b, &n); +} + +inline static uint128_t load128_be(uint8_t *b) { + uint128_t r; + load128_be_(b, &r); + return r; +} + +inline static void store128_be(uint8_t *b, uint128_t n) { + store128_be_(b, &n); +} + +#else /* !defined(KRML_STRUCT_PASSING) */ + +# define print128 print128_ +# define load128_le load128_le_ +# define store128_le store128_le_ +# define load128_be load128_be_ +# define store128_be store128_be_ + +#endif /* KRML_STRUCT_PASSING */ + +#endif diff --git a/src/math/bigfix/kremlin/internal/target.h b/src/math/bigfix/kremlin/internal/target.h new file mode 100644 index 000000000..b25254a23 --- /dev/null +++ b/src/math/bigfix/kremlin/internal/target.h @@ -0,0 +1,60 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KREMLIN_TARGET_H +#define __KREMLIN_TARGET_H + +#include +#include +#include + +/******************************************************************************/ +/* Macros that KreMLin will generate. */ +/******************************************************************************/ + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-early-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +# define KRML_HOST_PRINTF printf +#endif + +#ifndef KRML_HOST_EXIT +# define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +# define KRML_HOST_MALLOC malloc +#endif + +#ifndef KRML_HOST_CALLOC +# define KRML_HOST_CALLOC calloc +#endif + +#ifndef KRML_HOST_FREE +# define KRML_HOST_FREE free +#endif + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate). + */ + +#ifdef __GNUC__ +# define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") +#else +# define _KRML_CHECK_SIZE_PRAGMA +#endif + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +#endif diff --git a/src/math/bigfix/kremlin/internal/types.h b/src/math/bigfix/kremlin/internal/types.h new file mode 100644 index 000000000..885d956c4 --- /dev/null +++ b/src/math/bigfix/kremlin/internal/types.h @@ -0,0 +1,70 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_TYPES_H +#define KRML_TYPES_H + +#include +#include +#include +#include + +/* This file picks a suitable uint128 implementation depending on whether the + * target compiler supports it, and or whether KRML_VERIFIED_UINT128 is defined. */ + +#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__)) +#define IS_MSVC64 1 +#endif + +/* This code makes a number of assumptions and should be refined. In particular, + * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would + * be easier to just test for defined(__SIZEOF_INT128__) only? */ +#if (defined(__x86_64__) || \ + defined(__x86_64) || \ + defined(__aarch64__) || \ + (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \ + defined(__s390x__) || \ + (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \ + (defined(__mips__) && defined(__LP64__)) || \ + (defined(__riscv) && __riscv_xlen == 64) || \ + defined(__SIZEOF_INT128__)) +#define HAS_INT128 1 +#endif + +/* The uint128 type is a special case since we offer several implementations of + * it, depending on the compiler and whether the user wants the verified + * implementation or not. */ +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +# include +typedef __m128i FStar_UInt128_uint128; +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +typedef unsigned __int128 FStar_UInt128_uint128; +#else +typedef struct FStar_UInt128_uint128_s { + uint64_t low; + uint64_t high; +} FStar_UInt128_uint128; +#endif + +/* The former is defined once, here (otherwise, conflicts for test-c89. The + * latter is for internal use. */ +typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; + +#include "math/bigfix/kremlin/lowstar_endianness.h" + +#endif + +/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64, + * then don't bring the uint128 definitions into scope. */ +#ifndef __FStar_UInt_8_16_32_64_H + +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +#include "math/bigfix/kremlib/fstar_uint128_msvc.h" +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +#include "math/bigfix/kremlib/fstar_uint128_gcc64.h" +#else +#include "math/bigfix/kremlib/FStar_UInt128_Verified.h" +#include "math/bigfix/kremlib/fstar_uint128_struct_endianness.h" +#endif + +#endif diff --git a/src/math/bigfix/kremlin/lowstar_endianness.h b/src/math/bigfix/kremlin/lowstar_endianness.h new file mode 100644 index 000000000..3b120c7fb --- /dev/null +++ b/src/math/bigfix/kremlin/lowstar_endianness.h @@ -0,0 +1,230 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __LOWSTAR_ENDIANNESS_H +#define __LOWSTAR_ENDIANNESS_H + +#include +#include + +/******************************************************************************/ +/* Implementing C.fst (part 2: endian-ness macros) */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) || defined (__USE_SYSTEM_ENDIAN_H__) +# include + +/* ... for OSX */ +#elif defined(__APPLE__) +# include +# define htole64(x) OSSwapHostToLittleInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) +# define htobe64(x) OSSwapHostToBigInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) + +# define htole16(x) OSSwapHostToLittleInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) +# define htobe16(x) OSSwapHostToBigInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) + +# define htole32(x) OSSwapHostToLittleInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) +# define htobe32(x) OSSwapHostToBigInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +# include +# define htole64(x) LE_64(x) +# define le64toh(x) LE_64(x) +# define htobe64(x) BE_64(x) +# define be64toh(x) BE_64(x) + +# define htole16(x) LE_16(x) +# define le16toh(x) LE_16(x) +# define htobe16(x) BE_16(x) +# define be16toh(x) BE_16(x) + +# define htole32(x) LE_32(x) +# define le32toh(x) LE_32(x) +# define htobe32(x) BE_32(x) +# define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +# include +#elif defined(__OpenBSD__) +# include + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +# include +# define htobe16(x) _byteswap_ushort(x) +# define htole16(x) (x) +# define be16toh(x) _byteswap_ushort(x) +# define le16toh(x) (x) + +# define htobe32(x) _byteswap_ulong(x) +# define htole32(x) (x) +# define be32toh(x) _byteswap_ulong(x) +# define le32toh(x) (x) + +# define htobe64(x) _byteswap_uint64(x) +# define htole64(x) (x) +# define be64toh(x) _byteswap_uint64(x) +# define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64)) && \ + (defined(__GNUC__) || defined(__clang__)) + +# define htobe16(x) __builtin_bswap16(x) +# define htole16(x) (x) +# define be16toh(x) __builtin_bswap16(x) +# define le16toh(x) (x) + +# define htobe32(x) __builtin_bswap32(x) +# define htole32(x) (x) +# define be32toh(x) __builtin_bswap32(x) +# define le32toh(x) (x) + +# define htobe64(x) __builtin_bswap64(x) +# define htole64(x) (x) +# define be64toh(x) __builtin_bswap64(x) +# define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +# define htobe32(x) (x) +# define be32toh(x) (x) +# define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +# define le32toh(x) (htole32((x))) + +# define htobe64(x) (x) +# define be64toh(x) (x) +# define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +# define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +# define htole32(x) (x) +# define le32toh(x) (x) +# define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +# define be32toh(x) (htobe32((x))) + +# define htole64(x) (x) +# define le64toh(x) (x) +# define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +# define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +# error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t load16(uint8_t *b) { + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t load32(uint8_t *b) { + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t load64(uint8_t *b) { + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void store16(uint8_t *b, uint16_t i) { + memcpy(b, &i, 2); +} + +inline static void store32(uint8_t *b, uint32_t i) { + memcpy(b, &i, 4); +} + +inline static void store64(uint8_t *b, uint64_t i) { + memcpy(b, &i, 8); +} + +/* Legacy accessors so that this header can serve as an implementation of + * C.Endianness */ +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/* Co-existence of LowStar.Endianness and FStar.Endianness generates name + * conflicts, because of course both insist on having no prefixes. Until a + * prefix is added, or until we truly retire FStar.Endianness, solve this issue + * in an elegant way. */ +#define load16_le0 load16_le +#define store16_le0 store16_le +#define load16_be0 load16_be +#define store16_be0 store16_be + +#define load32_le0 load32_le +#define store32_le0 store32_le +#define load32_be0 load32_be +#define store32_be0 store32_be + +#define load64_le0 load64_le +#define store64_le0 store64_le +#define load64_be0 load64_be +#define store64_be0 store64_be + +#define load128_le0 load128_le +#define store128_le0 store128_le +#define load128_be0 load128_be +#define store128_be0 store128_be + +#endif diff --git a/src/math/bigfix/lib_intrinsics.h b/src/math/bigfix/lib_intrinsics.h new file mode 100644 index 000000000..cf269bb89 --- /dev/null +++ b/src/math/bigfix/lib_intrinsics.h @@ -0,0 +1,67 @@ +#pragma once + +#include + +#if __has_include("config.h") +#include "config.h" +#endif + +#if defined(COMPILE_INTRINSICS) +#if defined(_MSC_VER) +#include +#else +#include +#endif +#endif + +#if !defined(COMPILE_INTRINSICS) + +#include "Hacl_IntTypes_Intrinsics.h" + +#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \ + (Hacl_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4)) + +#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \ + (Hacl_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4)) + +#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \ + (Hacl_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4)) + +#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \ + (Hacl_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4)) + +#else + +#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \ + (_addcarry_u32(x1, x2, x3, (unsigned int *) x4)) + +#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \ + (_addcarry_u64(x1, x2, x3, (long long unsigned int *) x4)) + + +/* + GCC versions prior to 7.2 pass arguments to _subborrow_u{32,64} + in an incorrect order. + + See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 +*/ +#if defined(__GNUC__) && !defined (__clang__) && \ + (__GNUC__ < 7 || (__GNUC__ == 7 && (__GNUC_MINOR__ < 2))) + +#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \ + (_subborrow_u32(x1, x3, x2, (unsigned int *) x4)) + +#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \ + (_subborrow_u64(x1, x3, x2, (long long unsigned int *) x4)) + +#else + +#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \ + (_subborrow_u32(x1, x2, x3, (unsigned int *) x4)) + +#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \ + (_subborrow_u64(x1, x2, x3, (long long unsigned int *) x4)) + +#endif // GCC < 7.2 + +#endif // !COMPILE_INTRINSICS diff --git a/src/math/bigfix/u256.cpp b/src/math/bigfix/u256.cpp new file mode 100644 index 000000000..79ebb1844 --- /dev/null +++ b/src/math/bigfix/u256.cpp @@ -0,0 +1,8 @@ +#include "math/bigfix/u256.h" +#include "math/bigfix/Hacl_Bignum256.h" + +u256 u256::operator*(u256 const& other) const { + uint64_t result[8]; + Hacl_Bignum256_mul(const_cast(m_num), const_cast(other.m_num), result); + return u256(result); +} diff --git a/src/math/bigfix/u256.h b/src/math/bigfix/u256.h new file mode 100644 index 000000000..91bbd9742 --- /dev/null +++ b/src/math/bigfix/u256.h @@ -0,0 +1,11 @@ +#pragma once + +#include "util/util.h" + +class u256 { + uint64_t m_num[4]; +public: + u256() { memset(this, 0, sizeof(*this)); } + u256(uint64_t const* v) { memcpy(m_num, v, sizeof(*this)); } + u256 operator*(u256 const& other) const; +}; diff --git a/src/math/polysat/CMakeLists.txt b/src/math/polysat/CMakeLists.txt index 2648fe70e..14f1a4ac6 100644 --- a/src/math/polysat/CMakeLists.txt +++ b/src/math/polysat/CMakeLists.txt @@ -18,4 +18,5 @@ z3_add_component(polysat dd simplex interval + bigfix )