From babb90832ceb28f4ba7ef68ce9be2aac912a96ae Mon Sep 17 00:00:00 2001
From: Nikolaj Bjorner <nbjorner@microsoft.com>
Date: Fri, 2 Jul 2021 16:24:23 -0700
Subject: [PATCH] initial u256

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
---
 CMakeLists.txt                                |    2 +-
 src/CMakeLists.txt                            |    1 +
 src/math/bigfix/Hacl_Bignum.c                 | 2470 +++++++++++++++++
 src/math/bigfix/Hacl_Bignum.h                 |  345 +++
 src/math/bigfix/Hacl_Bignum256.c              | 1534 ++++++++++
 src/math/bigfix/Hacl_Bignum256.h              |  375 +++
 src/math/bigfix/Hacl_Bignum_Base.h            |   73 +
 src/math/bigfix/Hacl_IntTypes_Intrinsics.h    |   88 +
 src/math/bigfix/kremlib/FStar_UInt128.h       |   79 +
 .../bigfix/kremlib/FStar_UInt128_Verified.h   |  347 +++
 .../bigfix/kremlib/FStar_UInt_8_16_32_64.h    |  104 +
 src/math/bigfix/kremlib/LowStar_Endianness.h  |   28 +
 src/math/bigfix/kremlib/fstar_uint128_gcc64.h |  165 ++
 src/math/bigfix/kremlib/fstar_uint128_msvc.h  |  510 ++++
 .../kremlib/fstar_uint128_struct_endianness.h |   68 +
 src/math/bigfix/kremlin/internal/target.h     |   60 +
 src/math/bigfix/kremlin/internal/types.h      |   70 +
 src/math/bigfix/kremlin/lowstar_endianness.h  |  230 ++
 src/math/bigfix/lib_intrinsics.h              |   67 +
 src/math/bigfix/u256.cpp                      |    8 +
 src/math/bigfix/u256.h                        |   11 +
 src/math/polysat/CMakeLists.txt               |    1 +
 22 files changed, 6635 insertions(+), 1 deletion(-)
 create mode 100644 src/math/bigfix/Hacl_Bignum.c
 create mode 100644 src/math/bigfix/Hacl_Bignum.h
 create mode 100644 src/math/bigfix/Hacl_Bignum256.c
 create mode 100644 src/math/bigfix/Hacl_Bignum256.h
 create mode 100644 src/math/bigfix/Hacl_Bignum_Base.h
 create mode 100644 src/math/bigfix/Hacl_IntTypes_Intrinsics.h
 create mode 100644 src/math/bigfix/kremlib/FStar_UInt128.h
 create mode 100644 src/math/bigfix/kremlib/FStar_UInt128_Verified.h
 create mode 100644 src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h
 create mode 100644 src/math/bigfix/kremlib/LowStar_Endianness.h
 create mode 100644 src/math/bigfix/kremlib/fstar_uint128_gcc64.h
 create mode 100644 src/math/bigfix/kremlib/fstar_uint128_msvc.h
 create mode 100644 src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h
 create mode 100644 src/math/bigfix/kremlin/internal/target.h
 create mode 100644 src/math/bigfix/kremlin/internal/types.h
 create mode 100644 src/math/bigfix/kremlin/lowstar_endianness.h
 create mode 100644 src/math/bigfix/lib_intrinsics.h
 create mode 100644 src/math/bigfix/u256.cpp
 create mode 100644 src/math/bigfix/u256.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa2531670..ccf7ea2ec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.4)
 
 set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_compiler_flags_overrides.cmake")
-project(Z3 VERSION 4.8.11.0 LANGUAGES CXX)
+project(Z3 VERSION 4.8.11.0 LANGUAGES CXX C)
 
 ################################################################################
 # Project version
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 166f960e0..88b149e0f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -40,6 +40,7 @@ add_subdirectory(math/dd)
 add_subdirectory(math/hilbert)
 add_subdirectory(math/simplex)
 add_subdirectory(math/interval)
+add_subdirectory(math/bigfix)
 add_subdirectory(math/polysat)
 add_subdirectory(math/automata)
 add_subdirectory(math/realclosure)
diff --git a/src/math/bigfix/Hacl_Bignum.c b/src/math/bigfix/Hacl_Bignum.c
new file mode 100644
index 000000000..54a282839
--- /dev/null
+++ b/src/math/bigfix/Hacl_Bignum.c
@@ -0,0 +1,2470 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "math/bigfix/Hacl_Bignum.h"
+
+void Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *res)
+{
+  uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
+  for (uint32_t i = (uint32_t)0U; i < bnLen; i++)
+  {
+    uint64_t *os = res;
+    uint64_t u = load64_be(tmp + (bnLen - i - (uint32_t)1U) * (uint32_t)8U);
+    uint64_t x = u;
+    os[i] = x;
+  }
+}
+
+void Hacl_Bignum_Convert_bn_to_bytes_be_uint64(uint32_t len, uint64_t *b, uint8_t *res)
+{
+  uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  uint32_t numb = (uint32_t)8U;
+  for (uint32_t i = (uint32_t)0U; i < bnLen; i++)
+  {
+    store64_be(tmp + i * numb, b[bnLen - i - (uint32_t)1U]);
+  }
+  memcpy(res, tmp + tmpLen - len, len * sizeof (uint8_t));
+}
+
+uint32_t Hacl_Bignum_Lib_bn_get_top_index_u32(uint32_t len, uint32_t *b)
+{
+  uint32_t priv = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t mask = FStar_UInt32_eq_mask(b[i], (uint32_t)0U);
+    priv = (mask & priv) | (~mask & i);
+  }
+  return priv;
+}
+
+uint64_t Hacl_Bignum_Lib_bn_get_top_index_u64(uint32_t len, uint64_t *b)
+{
+  uint64_t priv = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t mask = FStar_UInt64_eq_mask(b[i], (uint64_t)0U);
+    priv = (mask & priv) | (~mask & (uint64_t)i);
+  }
+  return priv;
+}
+
+inline uint32_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res)
+{
+  uint32_t c = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint32_t t1 = a[(uint32_t)4U * i];
+    uint32_t t20 = b[(uint32_t)4U * i];
+    uint32_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t20, res_i0);
+    uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t10, t21, res_i1);
+    uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t11, t22, res_i2);
+    uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t12, t2, res_i);
+  }
+  for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+  {
+    uint32_t t1 = a[i];
+    uint32_t t2 = b[i];
+    uint32_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+inline uint64_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+inline uint32_t
+Hacl_Bignum_Addition_bn_add_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res)
+{
+  uint32_t c = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint32_t t1 = a[(uint32_t)4U * i];
+    uint32_t t20 = b[(uint32_t)4U * i];
+    uint32_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t20, res_i0);
+    uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, t21, res_i1);
+    uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, t22, res_i2);
+    uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, t2, res_i);
+  }
+  for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+  {
+    uint32_t t1 = a[i];
+    uint32_t t2 = b[i];
+    uint32_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+inline uint64_t
+Hacl_Bignum_Addition_bn_add_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+static inline void
+bn_mul_u32(uint32_t aLen, uint32_t *a, uint32_t bLen, uint32_t *b, uint32_t *res)
+{
+  uint32_t resLen = aLen + bLen;
+  memset(res, 0U, resLen * sizeof (uint32_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++)
+  {
+    uint32_t bj = b[i0];
+    uint32_t *res_j = res + i0;
+    uint32_t c = (uint32_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t a_i = a[(uint32_t)4U * i];
+      uint32_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i0);
+      uint32_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, bj, c, res_i1);
+      uint32_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, bj, c, res_i2);
+      uint32_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, bj, c, res_i);
+    }
+    for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+    {
+      uint32_t a_i = a[i];
+      uint32_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i);
+    }
+    uint32_t r = c;
+    res[aLen + i0] = r;
+  }
+}
+
+static inline void
+bn_mul_u64(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res)
+{
+  uint32_t resLen = aLen + bLen;
+  memset(res, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++)
+  {
+    uint64_t bj = b[i0];
+    uint64_t *res_j = res + i0;
+    uint64_t c = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = a[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0);
+      uint64_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1);
+      uint64_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2);
+      uint64_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i);
+    }
+    for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++)
+    {
+      uint64_t a_i = a[i];
+      uint64_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i);
+    }
+    uint64_t r = c;
+    res[aLen + i0] = r;
+  }
+}
+
+static inline void bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
+{
+  uint32_t resLen = aLen + aLen;
+  memset(res, 0U, resLen * sizeof (uint32_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++)
+  {
+    uint32_t *ab = a;
+    uint32_t a_j = a[i0];
+    uint32_t *res_j = res + i0;
+    uint32_t c = (uint32_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t a_i = ab[(uint32_t)4U * i];
+      uint32_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i0);
+      uint32_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, a_j, c, res_i1);
+      uint32_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, a_j, c, res_i2);
+      uint32_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, a_j, c, res_i);
+    }
+    for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++)
+    {
+      uint32_t a_i = ab[i];
+      uint32_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i);
+    }
+    uint32_t r = c;
+    res[i0 + i0] = r;
+  }
+  uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(resLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), resLen);
+  uint32_t *tmp = alloca(resLen * sizeof (uint32_t));
+  memset(tmp, 0U, resLen * sizeof (uint32_t));
+  for (uint32_t i = (uint32_t)0U; i < aLen; i++)
+  {
+    uint64_t res1 = (uint64_t)a[i] * (uint64_t)a[i];
+    uint32_t hi = (uint32_t)(res1 >> (uint32_t)32U);
+    uint32_t lo = (uint32_t)res1;
+    tmp[(uint32_t)2U * i] = lo;
+    tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+  }
+  uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(resLen, res, tmp, res);
+}
+
+static inline void bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
+{
+  uint32_t resLen = aLen + aLen;
+  memset(res, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++)
+  {
+    uint64_t *ab = a;
+    uint64_t a_j = a[i0];
+    uint64_t *res_j = res + i0;
+    uint64_t c = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = ab[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0);
+      uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1);
+      uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2);
+      uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i);
+    }
+    for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++)
+    {
+      uint64_t a_i = ab[i];
+      uint64_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i);
+    }
+    uint64_t r = c;
+    res[i0 + i0] = r;
+  }
+  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), resLen);
+  uint64_t *tmp = alloca(resLen * sizeof (uint64_t));
+  memset(tmp, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < aLen; i++)
+  {
+    FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(a[i], a[i]);
+    uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U));
+    uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
+    tmp[(uint32_t)2U * i] = lo;
+    tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+  }
+  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, tmp, res);
+}
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
+  uint32_t aLen,
+  uint32_t *a,
+  uint32_t *b,
+  uint32_t *tmp,
+  uint32_t *res
+)
+{
+  if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U)
+  {
+    bn_mul_u32(aLen, a, aLen, b, res);
+    return;
+  }
+  uint32_t len2 = aLen / (uint32_t)2U;
+  uint32_t *a0 = a;
+  uint32_t *a1 = a + len2;
+  uint32_t *b0 = b;
+  uint32_t *b1 = b + len2;
+  uint32_t *t0 = tmp;
+  uint32_t *t1 = tmp + len2;
+  uint32_t *tmp_ = tmp + aLen;
+  uint32_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a0, a1, tmp_);
+  uint32_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint32_t *os = t0;
+    uint32_t x = (((uint32_t)0U - c0) & t0[i]) | (~((uint32_t)0U - c0) & tmp_[i]);
+    os[i] = x;
+  }
+  uint32_t c00 = c0;
+  uint32_t c010 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b0, b1, tmp_);
+  uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b1, b0, t1);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint32_t *os = t1;
+    uint32_t x = (((uint32_t)0U - c010) & t1[i]) | (~((uint32_t)0U - c010) & tmp_[i]);
+    os[i] = x;
+  }
+  uint32_t c11 = c010;
+  uint32_t *t23 = tmp + aLen;
+  uint32_t *tmp1 = tmp + aLen + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, t0, t1, tmp1, t23);
+  uint32_t *r01 = res;
+  uint32_t *r23 = res + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a0, b0, tmp1, r01);
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a1, b1, tmp1, r23);
+  uint32_t *r011 = res;
+  uint32_t *r231 = res + aLen;
+  uint32_t *t01 = tmp;
+  uint32_t *t231 = tmp + aLen;
+  uint32_t *t45 = tmp + (uint32_t)2U * aLen;
+  uint32_t *t67 = tmp + (uint32_t)3U * aLen;
+  uint32_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r011, r231, t01);
+  uint32_t c_sign = c00 ^ c11;
+  uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t67);
+  uint32_t c31 = c2 - c3;
+  uint32_t c4 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, t01, t231, t45);
+  uint32_t c41 = c2 + c4;
+  uint32_t mask = (uint32_t)0U - c_sign;
+  for (uint32_t i = (uint32_t)0U; i < aLen; i++)
+  {
+    uint32_t *os = t45;
+    uint32_t x = (mask & t45[i]) | (~mask & t67[i]);
+    os[i] = x;
+  }
+  uint32_t c5 = (mask & c41) | (~mask & c31);
+  uint32_t aLen2 = aLen / (uint32_t)2U;
+  uint32_t *r0 = res + aLen2;
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
+  uint32_t c6 = r10;
+  uint32_t c60 = c6;
+  uint32_t c7 = c5 + c60;
+  uint32_t *r = res + aLen + aLen2;
+  uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32((uint32_t)0U, r[0U], c7, r);
+  uint32_t r1;
+  if ((uint32_t)1U < aLen + aLen - (aLen + aLen2))
+  {
+    uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U;
+    uint32_t *a11 = r + (uint32_t)1U;
+    uint32_t *res1 = r + (uint32_t)1U;
+    uint32_t c = c01;
+    for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t t11 = a11[(uint32_t)4U * i];
+      uint32_t *res_i0 = res1 + (uint32_t)4U * i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i0);
+      uint32_t t110 = a11[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t110, (uint32_t)0U, res_i1);
+      uint32_t t111 = a11[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t111, (uint32_t)0U, res_i2);
+      uint32_t t112 = a11[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t112, (uint32_t)0U, res_i);
+    }
+    for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+    {
+      uint32_t t11 = a11[i];
+      uint32_t *res_i = res1 + i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i);
+    }
+    uint32_t c110 = c;
+    r1 = c110;
+  }
+  else
+  {
+    r1 = c01;
+  }
+  uint32_t c8 = r1;
+  uint32_t c = c8;
+  uint32_t c9 = c;
+}
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
+  uint32_t aLen,
+  uint64_t *a,
+  uint64_t *b,
+  uint64_t *tmp,
+  uint64_t *res
+)
+{
+  if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U)
+  {
+    bn_mul_u64(aLen, a, aLen, b, res);
+    return;
+  }
+  uint32_t len2 = aLen / (uint32_t)2U;
+  uint64_t *a0 = a;
+  uint64_t *a1 = a + len2;
+  uint64_t *b0 = b;
+  uint64_t *b1 = b + len2;
+  uint64_t *t0 = tmp;
+  uint64_t *t1 = tmp + len2;
+  uint64_t *tmp_ = tmp + aLen;
+  uint64_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a0, a1, tmp_);
+  uint64_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint64_t *os = t0;
+    uint64_t x = (((uint64_t)0U - c0) & t0[i]) | (~((uint64_t)0U - c0) & tmp_[i]);
+    os[i] = x;
+  }
+  uint64_t c00 = c0;
+  uint64_t c010 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b0, b1, tmp_);
+  uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b1, b0, t1);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint64_t *os = t1;
+    uint64_t x = (((uint64_t)0U - c010) & t1[i]) | (~((uint64_t)0U - c010) & tmp_[i]);
+    os[i] = x;
+  }
+  uint64_t c11 = c010;
+  uint64_t *t23 = tmp + aLen;
+  uint64_t *tmp1 = tmp + aLen + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, t0, t1, tmp1, t23);
+  uint64_t *r01 = res;
+  uint64_t *r23 = res + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a0, b0, tmp1, r01);
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a1, b1, tmp1, r23);
+  uint64_t *r011 = res;
+  uint64_t *r231 = res + aLen;
+  uint64_t *t01 = tmp;
+  uint64_t *t231 = tmp + aLen;
+  uint64_t *t45 = tmp + (uint32_t)2U * aLen;
+  uint64_t *t67 = tmp + (uint32_t)3U * aLen;
+  uint64_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r011, r231, t01);
+  uint64_t c_sign = c00 ^ c11;
+  uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t67);
+  uint64_t c31 = c2 - c3;
+  uint64_t c4 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, t01, t231, t45);
+  uint64_t c41 = c2 + c4;
+  uint64_t mask = (uint64_t)0U - c_sign;
+  for (uint32_t i = (uint32_t)0U; i < aLen; i++)
+  {
+    uint64_t *os = t45;
+    uint64_t x = (mask & t45[i]) | (~mask & t67[i]);
+    os[i] = x;
+  }
+  uint64_t c5 = (mask & c41) | (~mask & c31);
+  uint32_t aLen2 = aLen / (uint32_t)2U;
+  uint64_t *r0 = res + aLen2;
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
+  uint64_t c6 = r10;
+  uint64_t c60 = c6;
+  uint64_t c7 = c5 + c60;
+  uint64_t *r = res + aLen + aLen2;
+  uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64((uint64_t)0U, r[0U], c7, r);
+  uint64_t r1;
+  if ((uint32_t)1U < aLen + aLen - (aLen + aLen2))
+  {
+    uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U;
+    uint64_t *a11 = r + (uint32_t)1U;
+    uint64_t *res1 = r + (uint32_t)1U;
+    uint64_t c = c01;
+    for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t t11 = a11[(uint32_t)4U * i];
+      uint64_t *res_i0 = res1 + (uint32_t)4U * i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i0);
+      uint64_t t110 = a11[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t110, (uint64_t)0U, res_i1);
+      uint64_t t111 = a11[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t111, (uint64_t)0U, res_i2);
+      uint64_t t112 = a11[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t112, (uint64_t)0U, res_i);
+    }
+    for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+    {
+      uint64_t t11 = a11[i];
+      uint64_t *res_i = res1 + i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i);
+    }
+    uint64_t c110 = c;
+    r1 = c110;
+  }
+  else
+  {
+    r1 = c01;
+  }
+  uint64_t c8 = r1;
+  uint64_t c = c8;
+  uint64_t c9 = c;
+}
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
+  uint32_t aLen,
+  uint32_t *a,
+  uint32_t *tmp,
+  uint32_t *res
+)
+{
+  if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U)
+  {
+    bn_sqr_u32(aLen, a, res);
+    return;
+  }
+  uint32_t len2 = aLen / (uint32_t)2U;
+  uint32_t *a0 = a;
+  uint32_t *a1 = a + len2;
+  uint32_t *t0 = tmp;
+  uint32_t *tmp_ = tmp + aLen;
+  uint32_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a0, a1, tmp_);
+  uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint32_t *os = t0;
+    uint32_t x = (((uint32_t)0U - c0) & t0[i]) | (~((uint32_t)0U - c0) & tmp_[i]);
+    os[i] = x;
+  }
+  uint32_t c00 = c0;
+  uint32_t *t23 = tmp + aLen;
+  uint32_t *tmp1 = tmp + aLen + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, t0, tmp1, t23);
+  uint32_t *r01 = res;
+  uint32_t *r23 = res + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a0, tmp1, r01);
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a1, tmp1, r23);
+  uint32_t *r011 = res;
+  uint32_t *r231 = res + aLen;
+  uint32_t *t01 = tmp;
+  uint32_t *t231 = tmp + aLen;
+  uint32_t *t45 = tmp + (uint32_t)2U * aLen;
+  uint32_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r011, r231, t01);
+  uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t45);
+  uint32_t c5 = c2 - c3;
+  uint32_t aLen2 = aLen / (uint32_t)2U;
+  uint32_t *r0 = res + aLen2;
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
+  uint32_t c4 = r10;
+  uint32_t c6 = c4;
+  uint32_t c7 = c5 + c6;
+  uint32_t *r = res + aLen + aLen2;
+  uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32((uint32_t)0U, r[0U], c7, r);
+  uint32_t r1;
+  if ((uint32_t)1U < aLen + aLen - (aLen + aLen2))
+  {
+    uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U;
+    uint32_t *a11 = r + (uint32_t)1U;
+    uint32_t *res1 = r + (uint32_t)1U;
+    uint32_t c = c01;
+    for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t t1 = a11[(uint32_t)4U * i];
+      uint32_t *res_i0 = res1 + (uint32_t)4U * i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, (uint32_t)0U, res_i0);
+      uint32_t t10 = a11[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, (uint32_t)0U, res_i1);
+      uint32_t t11 = a11[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, (uint32_t)0U, res_i2);
+      uint32_t t12 = a11[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, (uint32_t)0U, res_i);
+    }
+    for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+    {
+      uint32_t t1 = a11[i];
+      uint32_t *res_i = res1 + i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, (uint32_t)0U, res_i);
+    }
+    uint32_t c10 = c;
+    r1 = c10;
+  }
+  else
+  {
+    r1 = c01;
+  }
+  uint32_t c8 = r1;
+  uint32_t c = c8;
+  uint32_t c9 = c;
+}
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
+  uint32_t aLen,
+  uint64_t *a,
+  uint64_t *tmp,
+  uint64_t *res
+)
+{
+  if (aLen < (uint32_t)32U || aLen % (uint32_t)2U == (uint32_t)1U)
+  {
+    bn_sqr_u64(aLen, a, res);
+    return;
+  }
+  uint32_t len2 = aLen / (uint32_t)2U;
+  uint64_t *a0 = a;
+  uint64_t *a1 = a + len2;
+  uint64_t *t0 = tmp;
+  uint64_t *tmp_ = tmp + aLen;
+  uint64_t c0 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a0, a1, tmp_);
+  uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
+  for (uint32_t i = (uint32_t)0U; i < len2; i++)
+  {
+    uint64_t *os = t0;
+    uint64_t x = (((uint64_t)0U - c0) & t0[i]) | (~((uint64_t)0U - c0) & tmp_[i]);
+    os[i] = x;
+  }
+  uint64_t c00 = c0;
+  uint64_t *t23 = tmp + aLen;
+  uint64_t *tmp1 = tmp + aLen + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, t0, tmp1, t23);
+  uint64_t *r01 = res;
+  uint64_t *r23 = res + aLen;
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a0, tmp1, r01);
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a1, tmp1, r23);
+  uint64_t *r011 = res;
+  uint64_t *r231 = res + aLen;
+  uint64_t *t01 = tmp;
+  uint64_t *t231 = tmp + aLen;
+  uint64_t *t45 = tmp + (uint32_t)2U * aLen;
+  uint64_t c2 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r011, r231, t01);
+  uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t45);
+  uint64_t c5 = c2 - c3;
+  uint32_t aLen2 = aLen / (uint32_t)2U;
+  uint64_t *r0 = res + aLen2;
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
+  uint64_t c4 = r10;
+  uint64_t c6 = c4;
+  uint64_t c7 = c5 + c6;
+  uint64_t *r = res + aLen + aLen2;
+  uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64((uint64_t)0U, r[0U], c7, r);
+  uint64_t r1;
+  if ((uint32_t)1U < aLen + aLen - (aLen + aLen2))
+  {
+    uint32_t rLen = aLen + aLen - (aLen + aLen2) - (uint32_t)1U;
+    uint64_t *a11 = r + (uint32_t)1U;
+    uint64_t *res1 = r + (uint32_t)1U;
+    uint64_t c = c01;
+    for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t t1 = a11[(uint32_t)4U * i];
+      uint64_t *res_i0 = res1 + (uint32_t)4U * i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, (uint64_t)0U, res_i0);
+      uint64_t t10 = a11[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, (uint64_t)0U, res_i1);
+      uint64_t t11 = a11[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, (uint64_t)0U, res_i2);
+      uint64_t t12 = a11[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, (uint64_t)0U, res_i);
+    }
+    for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+    {
+      uint64_t t1 = a11[i];
+      uint64_t *res_i = res1 + i;
+      c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, (uint64_t)0U, res_i);
+    }
+    uint64_t c10 = c;
+    r1 = c10;
+  }
+  else
+  {
+    r1 = c01;
+  }
+  uint64_t c8 = r1;
+  uint64_t c = c8;
+  uint64_t c9 = c;
+}
+
+inline void
+Hacl_Bignum_bn_add_mod_n_u32(
+  uint32_t len1,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t *b,
+  uint32_t *res
+)
+{
+  uint32_t c0 = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint32_t t1 = a[(uint32_t)4U * i];
+    uint32_t t20 = b[(uint32_t)4U * i];
+    uint32_t *res_i0 = res + (uint32_t)4U * i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t1, t20, res_i0);
+    uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t10, t21, res_i1);
+    uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t11, t22, res_i2);
+    uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t12, t2, res_i);
+  }
+  for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++)
+  {
+    uint32_t t1 = a[i];
+    uint32_t t2 = b[i];
+    uint32_t *res_i = res + i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, t1, t2, res_i);
+  }
+  uint32_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint32_t), len1);
+  uint32_t *tmp = alloca(len1 * sizeof (uint32_t));
+  memset(tmp, 0U, len1 * sizeof (uint32_t));
+  uint32_t c = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint32_t t1 = res[(uint32_t)4U * i];
+    uint32_t t20 = n[(uint32_t)4U * i];
+    uint32_t *res_i0 = tmp + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t20, res_i0);
+    uint32_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t10, t21, res_i1);
+    uint32_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t11, t22, res_i2);
+    uint32_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t12, t2, res_i);
+  }
+  for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++)
+  {
+    uint32_t t1 = res[i];
+    uint32_t t2 = n[i];
+    uint32_t *res_i = tmp + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t2, res_i);
+  }
+  uint32_t c1 = c;
+  uint32_t c2 = c00 - c1;
+  for (uint32_t i = (uint32_t)0U; i < len1; i++)
+  {
+    uint32_t *os = res;
+    uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+inline void
+Hacl_Bignum_bn_add_mod_n_u64(
+  uint32_t len1,
+  uint64_t *n,
+  uint64_t *a,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i);
+  }
+  for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i);
+  }
+  uint64_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint64_t), len1);
+  uint64_t *tmp = alloca(len1 * sizeof (uint64_t));
+  memset(tmp, 0U, len1 * sizeof (uint64_t));
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len1 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint64_t t1 = res[(uint32_t)4U * i];
+    uint64_t t20 = n[(uint32_t)4U * i];
+    uint64_t *res_i0 = tmp + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+    uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+    uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+    uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = len1 / (uint32_t)4U * (uint32_t)4U; i < len1; i++)
+  {
+    uint64_t t1 = res[i];
+    uint64_t t2 = n[i];
+    uint64_t *res_i = tmp + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
+  }
+  uint64_t c1 = c;
+  uint64_t c2 = c00 - c1;
+  for (uint32_t i = (uint32_t)0U; i < len1; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+inline uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0)
+{
+  uint32_t alpha = (uint32_t)2147483648U;
+  uint32_t beta = n0;
+  uint32_t ub = (uint32_t)0U;
+  uint32_t vb = (uint32_t)0U;
+  ub = (uint32_t)1U;
+  vb = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++)
+  {
+    uint32_t us = ub;
+    uint32_t vs = vb;
+    uint32_t u_is_odd = (uint32_t)0U - (us & (uint32_t)1U);
+    uint32_t beta_if_u_is_odd = beta & u_is_odd;
+    ub = ((us ^ beta_if_u_is_odd) >> (uint32_t)1U) + (us & beta_if_u_is_odd);
+    uint32_t alpha_if_u_is_odd = alpha & u_is_odd;
+    vb = (vs >> (uint32_t)1U) + alpha_if_u_is_odd;
+  }
+  return vb;
+}
+
+inline uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0)
+{
+  uint64_t alpha = (uint64_t)9223372036854775808U;
+  uint64_t beta = n0;
+  uint64_t ub = (uint64_t)0U;
+  uint64_t vb = (uint64_t)0U;
+  ub = (uint64_t)1U;
+  vb = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)64U; i++)
+  {
+    uint64_t us = ub;
+    uint64_t vs = vb;
+    uint64_t u_is_odd = (uint64_t)0U - (us & (uint64_t)1U);
+    uint64_t beta_if_u_is_odd = beta & u_is_odd;
+    ub = ((us ^ beta_if_u_is_odd) >> (uint32_t)1U) + (us & beta_if_u_is_odd);
+    uint64_t alpha_if_u_is_odd = alpha & u_is_odd;
+    vb = (vs >> (uint32_t)1U) + alpha_if_u_is_odd;
+  }
+  return vb;
+}
+
+uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *one = alloca(len * sizeof (uint32_t));
+  memset(one, 0U, len * sizeof (uint32_t));
+  memset(one, 0U, len * sizeof (uint32_t));
+  one[0U] = (uint32_t)1U;
+  uint32_t bit0 = n[0U] & (uint32_t)1U;
+  uint32_t m0 = (uint32_t)0U - bit0;
+  uint32_t acc = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
+    uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U)));
+  }
+  uint32_t m1 = acc;
+  return m0 & m1;
+}
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *res
+)
+{
+  memset(res, 0U, len * sizeof (uint32_t));
+  uint32_t i = nBits / (uint32_t)32U;
+  uint32_t j = nBits % (uint32_t)32U;
+  res[i] = res[i] | (uint32_t)1U << j;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)64U * len - nBits; i0++)
+  {
+    Hacl_Bignum_bn_add_mod_n_u32(len, n, res, res, res);
+  }
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv,
+  uint32_t *c,
+  uint32_t *res
+)
+{
+  uint32_t c0 = (uint32_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++)
+  {
+    uint32_t qj = nInv * c[i0];
+    uint32_t *res_j0 = c + i0;
+    uint32_t c1 = (uint32_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t a_i = n[(uint32_t)4U * i];
+      uint32_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i0);
+      uint32_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, qj, c1, res_i1);
+      uint32_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, qj, c1, res_i2);
+      uint32_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+    {
+      uint32_t a_i = n[i];
+      uint32_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i);
+    }
+    uint32_t r = c1;
+    uint32_t c10 = r;
+    uint32_t *resb = c + len + i0;
+    uint32_t res_j = c[len + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
+  uint32_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *tmp = alloca(len * sizeof (uint32_t));
+  memset(tmp, 0U, len * sizeof (uint32_t));
+  uint32_t c1 = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint32_t t1 = res[(uint32_t)4U * i];
+    uint32_t t20 = n[(uint32_t)4U * i];
+    uint32_t *res_i0 = tmp + (uint32_t)4U * i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t1, t20, res_i0);
+    uint32_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint32_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t10, t21, res_i1);
+    uint32_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint32_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t11, t22, res_i2);
+    uint32_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint32_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t12, t2, res_i);
+  }
+  for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+  {
+    uint32_t t1 = res[i];
+    uint32_t t2 = n[i];
+    uint32_t *res_i = tmp + i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u32(c1, t1, t2, res_i);
+  }
+  uint32_t c10 = c1;
+  uint32_t c2 = c00 - c10;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t *os = res;
+    uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t *aM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv, c, aM);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *a
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *tmp = alloca((len + len) * sizeof (uint32_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint32_t));
+  memcpy(tmp, aM, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, tmp, a);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *bM,
+  uint32_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, aM, bM, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, c, resM);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len, aM, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, nInv_u64, c, resM);
+}
+
+uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *one = alloca(len * sizeof (uint64_t));
+  memset(one, 0U, len * sizeof (uint64_t));
+  memset(one, 0U, len * sizeof (uint64_t));
+  one[0U] = (uint64_t)1U;
+  uint64_t bit0 = n[0U] & (uint64_t)1U;
+  uint64_t m0 = (uint64_t)0U - bit0;
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m1 = acc;
+  return m0 & m1;
+}
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *res
+)
+{
+  memset(res, 0U, len * sizeof (uint64_t));
+  uint32_t i = nBits / (uint32_t)64U;
+  uint32_t j = nBits % (uint32_t)64U;
+  res[i] = res[i] | (uint64_t)1U << j;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)128U * len - nBits; i0++)
+  {
+    Hacl_Bignum_bn_add_mod_n_u64(len, n, res, res, res);
+  }
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv,
+  uint64_t *c,
+  uint64_t *res
+)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++)
+  {
+    uint64_t qj = nInv * c[i0];
+    uint64_t *res_j0 = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = n[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0);
+      uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1);
+      uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2);
+      uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+    {
+      uint64_t a_i = n[i];
+      uint64_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i);
+    }
+    uint64_t r = c1;
+    uint64_t c10 = r;
+    uint64_t *resb = c + len + i0;
+    uint64_t res_j = c[len + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
+  uint64_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *tmp = alloca(len * sizeof (uint64_t));
+  memset(tmp, 0U, len * sizeof (uint64_t));
+  uint64_t c1 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+  {
+    uint64_t t1 = res[(uint32_t)4U * i];
+    uint64_t t20 = n[(uint32_t)4U * i];
+    uint64_t *res_i0 = tmp + (uint32_t)4U * i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
+    uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
+    uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
+    uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i);
+  }
+  for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+  {
+    uint64_t t1 = res[i];
+    uint64_t t2 = n[i];
+    uint64_t *res_i = tmp + i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i);
+  }
+  uint64_t c10 = c1;
+  uint64_t c2 = c00 - c10;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv,
+  uint64_t *r2,
+  uint64_t *a,
+  uint64_t *aM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv, c, aM);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *a
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *tmp = alloca((len + len) * sizeof (uint64_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint64_t));
+  memcpy(tmp, aM, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, tmp, a);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *bM,
+  uint64_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, aM, bM, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, c, resM);
+}
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len, aM, tmp, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, nInv_u64, c, resM);
+}
+
+static void
+bn_almost_mont_reduction_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv,
+  uint32_t *c,
+  uint32_t *res
+)
+{
+  uint32_t c0 = (uint32_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++)
+  {
+    uint32_t qj = nInv * c[i0];
+    uint32_t *res_j0 = c + i0;
+    uint32_t c1 = (uint32_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint32_t a_i = n[(uint32_t)4U * i];
+      uint32_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i0);
+      uint32_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint32_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, qj, c1, res_i1);
+      uint32_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint32_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, qj, c1, res_i2);
+      uint32_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint32_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+    {
+      uint32_t a_i = n[i];
+      uint32_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, qj, c1, res_i);
+    }
+    uint32_t r = c1;
+    uint32_t c10 = r;
+    uint32_t *resb = c + len + i0;
+    uint32_t res_j = c[len + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
+  uint32_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *tmp = alloca(len * sizeof (uint32_t));
+  memset(tmp, 0U, len * sizeof (uint32_t));
+  uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len, res, n, tmp);
+  uint32_t m = (uint32_t)0U - c00;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t *os = res;
+    uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    os[i] = x;
+  }
+}
+
+static void
+bn_almost_mont_mul_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *bM,
+  uint32_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, aM, bM, tmp, c);
+  bn_almost_mont_reduction_u32(len, n, nInv_u64, c, resM);
+}
+
+static void
+bn_almost_mont_sqr_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len, aM, tmp, c);
+  bn_almost_mont_reduction_u32(len, n, nInv_u64, c, resM);
+}
+
+static void
+bn_almost_mont_reduction_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv,
+  uint64_t *c,
+  uint64_t *res
+)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < len; i0++)
+  {
+    uint64_t qj = nInv * c[i0];
+    uint64_t *res_j0 = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < len / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = n[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0);
+      uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1);
+      uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2);
+      uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = len / (uint32_t)4U * (uint32_t)4U; i < len; i++)
+    {
+      uint64_t a_i = n[i];
+      uint64_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i);
+    }
+    uint64_t r = c1;
+    uint64_t c10 = r;
+    uint64_t *resb = c + len + i0;
+    uint64_t res_j = c[len + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
+  uint64_t c00 = c0;
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *tmp = alloca(len * sizeof (uint64_t));
+  memset(tmp, 0U, len * sizeof (uint64_t));
+  uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len, res, n, tmp);
+  uint64_t m = (uint64_t)0U - c00;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    os[i] = x;
+  }
+}
+
+static void
+bn_almost_mont_mul_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *bM,
+  uint64_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, aM, bM, tmp, c);
+  bn_almost_mont_reduction_u64(len, n, nInv_u64, c, resM);
+}
+
+static void
+bn_almost_mont_sqr_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *resM
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len, aM, tmp, c);
+  bn_almost_mont_reduction_u64(len, n, nInv_u64, c, resM);
+}
+
+uint32_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *one = alloca(len * sizeof (uint32_t));
+  memset(one, 0U, len * sizeof (uint32_t));
+  memset(one, 0U, len * sizeof (uint32_t));
+  one[0U] = (uint32_t)1U;
+  uint32_t bit0 = n[0U] & (uint32_t)1U;
+  uint32_t m0 = (uint32_t)0U - bit0;
+  uint32_t acc0 = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
+    uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
+    acc0 = (beq & acc0) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U)));
+  }
+  uint32_t m10 = acc0;
+  uint32_t m00 = m0 & m10;
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U;
+  }
+  uint32_t m1;
+  if (bBits < (uint32_t)32U * bLen)
+  {
+    KRML_CHECK_SIZE(sizeof (uint32_t), bLen);
+    uint32_t *b2 = alloca(bLen * sizeof (uint32_t));
+    memset(b2, 0U, bLen * sizeof (uint32_t));
+    uint32_t i0 = bBits / (uint32_t)32U;
+    uint32_t j = bBits % (uint32_t)32U;
+    b2[i0] = b2[i0] | (uint32_t)1U << j;
+    uint32_t acc = (uint32_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < bLen; i++)
+    {
+      uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
+      uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
+      acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U)));
+    }
+    uint32_t res = acc;
+    m1 = res;
+  }
+  else
+  {
+    m1 = (uint32_t)0xFFFFFFFFU;
+  }
+  uint32_t acc = (uint32_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
+    uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint32_t)0xFFFFFFFFU) | (~blt & (uint32_t)0U)));
+  }
+  uint32_t m2 = acc;
+  uint32_t m = m1 & m2;
+  return m00 & m;
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t mu,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *aM = alloca(len * sizeof (uint32_t));
+    memset(aM, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+    memset(c, 0U, (len + len) * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+    uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t));
+    memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+    Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c);
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM);
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *resM = alloca(len * sizeof (uint32_t));
+    memset(resM, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t));
+    memset(tmp1, 0U, (len + len) * sizeof (uint32_t));
+    memcpy(tmp1, r2, len * sizeof (uint32_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, resM);
+    for (uint32_t i = (uint32_t)0U; i < bBits; i++)
+    {
+      uint32_t i1 = i / (uint32_t)32U;
+      uint32_t j = i % (uint32_t)32U;
+      uint32_t tmp = b[i1];
+      uint32_t bit = tmp >> j & (uint32_t)1U;
+      if (!(bit == (uint32_t)0U))
+      {
+        bn_almost_mont_mul_u32(len, n, mu, resM, aM, resM);
+      }
+      bn_almost_mont_sqr_u32(len, n, mu, aM, aM);
+    }
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *tmp = alloca((len + len) * sizeof (uint32_t));
+    memset(tmp, 0U, (len + len) * sizeof (uint32_t));
+    memcpy(tmp, resM, len * sizeof (uint32_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, res);
+    return;
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *aM = alloca(len * sizeof (uint32_t));
+  memset(aM, 0U, len * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+  memset(c, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM);
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *resM = alloca(len * sizeof (uint32_t));
+  memset(resM, 0U, len * sizeof (uint32_t));
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U;
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *tmp = alloca((len + len) * sizeof (uint32_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint32_t));
+  memcpy(tmp, r2, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint32_t), table_len * len);
+  uint32_t *table = alloca(table_len * len * sizeof (uint32_t));
+  memset(table, 0U, table_len * len * sizeof (uint32_t));
+  memcpy(table, resM, len * sizeof (uint32_t));
+  uint32_t *t1 = table + len;
+  memcpy(t1, aM, len * sizeof (uint32_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint32_t *t11 = table + (i + (uint32_t)1U) * len;
+    uint32_t *t2 = table + (i + (uint32_t)2U) * len;
+    bn_almost_mont_mul_u32(len, n, mu, t11, aM, t2);
+  }
+  for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++)
+  {
+    for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+    {
+      bn_almost_mont_sqr_u32(len, n, mu, resM, resM);
+    }
+    uint32_t mask_l = (uint32_t)16U - (uint32_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)32U;
+    uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)32U;
+    uint32_t p1 = b[i1] >> j;
+    uint32_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)32U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint32_t bits_l = ite & mask_l;
+    uint32_t bits_l32 = bits_l;
+    uint32_t *a_bits_l = table + bits_l32 * len;
+    bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c0 = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c0; i++)
+    {
+      bn_almost_mont_sqr_u32(len, n, mu, resM, resM);
+    }
+    uint32_t c1 = bBits % (uint32_t)4U;
+    uint32_t mask_l = ((uint32_t)1U << c1) - (uint32_t)1U;
+    uint32_t i = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint32_t p1 = b[i] >> j;
+    uint32_t ite;
+    if (i + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)32U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint32_t bits_c = ite & mask_l;
+    uint32_t bits_c0 = bits_c;
+    uint32_t bits_c32 = bits_c0;
+    uint32_t *a_bits_c = table + bits_c32 * len;
+    bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_c, resM);
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t));
+  memset(tmp1, 0U, (len + len) * sizeof (uint32_t));
+  memcpy(tmp1, resM, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t mu,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *aM = alloca(len * sizeof (uint32_t));
+    memset(aM, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *c = alloca((len + len) * sizeof (uint32_t));
+    memset(c, 0U, (len + len) * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+    uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t));
+    memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+    Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c);
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c, aM);
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *resM = alloca(len * sizeof (uint32_t));
+    memset(resM, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t));
+    memset(tmp1, 0U, (len + len) * sizeof (uint32_t));
+    memcpy(tmp1, r2, len * sizeof (uint32_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, resM);
+    uint32_t sw = (uint32_t)0U;
+    for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++)
+    {
+      uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)32U;
+      uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)32U;
+      uint32_t tmp = b[i1];
+      uint32_t bit = tmp >> j & (uint32_t)1U;
+      uint32_t sw1 = bit ^ sw;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint32_t dummy = ((uint32_t)0U - sw1) & (resM[i] ^ aM[i]);
+        resM[i] = resM[i] ^ dummy;
+        aM[i] = aM[i] ^ dummy;
+      }
+      bn_almost_mont_mul_u32(len, n, mu, aM, resM, aM);
+      bn_almost_mont_sqr_u32(len, n, mu, resM, resM);
+      sw = bit;
+    }
+    uint32_t sw0 = sw;
+    for (uint32_t i = (uint32_t)0U; i < len; i++)
+    {
+      uint32_t dummy = ((uint32_t)0U - sw0) & (resM[i] ^ aM[i]);
+      resM[i] = resM[i] ^ dummy;
+      aM[i] = aM[i] ^ dummy;
+    }
+    KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+    uint32_t *tmp = alloca((len + len) * sizeof (uint32_t));
+    memset(tmp, 0U, (len + len) * sizeof (uint32_t));
+    memcpy(tmp, resM, len * sizeof (uint32_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, res);
+    return;
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *aM = alloca(len * sizeof (uint32_t));
+  memset(aM, 0U, len * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *c0 = alloca((len + len) * sizeof (uint32_t));
+  memset(c0, 0U, (len + len) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), (uint32_t)4U * len);
+  uint32_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint32_t));
+  memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint32_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len, a, r2, tmp0, c0);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, c0, aM);
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *resM = alloca(len * sizeof (uint32_t));
+  memset(resM, 0U, len * sizeof (uint32_t));
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)32U + (uint32_t)1U;
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *tmp = alloca((len + len) * sizeof (uint32_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint32_t));
+  memcpy(tmp, r2, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint32_t), table_len * len);
+  uint32_t *table = alloca(table_len * len * sizeof (uint32_t));
+  memset(table, 0U, table_len * len * sizeof (uint32_t));
+  memcpy(table, resM, len * sizeof (uint32_t));
+  uint32_t *t1 = table + len;
+  memcpy(t1, aM, len * sizeof (uint32_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint32_t *t11 = table + (i + (uint32_t)1U) * len;
+    uint32_t *t2 = table + (i + (uint32_t)2U) * len;
+    bn_almost_mont_mul_u32(len, n, mu, t11, aM, t2);
+  }
+  for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++)
+  {
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+    {
+      bn_almost_mont_sqr_u32(len, n, mu, resM, resM);
+    }
+    uint32_t mask_l = (uint32_t)16U - (uint32_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)32U;
+    uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)32U;
+    uint32_t p1 = b[i1] >> j;
+    uint32_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)32U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint32_t bits_l = ite & mask_l;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *a_bits_l = alloca(len * sizeof (uint32_t));
+    memset(a_bits_l, 0U, len * sizeof (uint32_t));
+    memcpy(a_bits_l, table, len * sizeof (uint32_t));
+    for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++)
+    {
+      uint32_t c = FStar_UInt32_eq_mask(bits_l, i2 + (uint32_t)1U);
+      uint32_t *res_j = table + (i2 + (uint32_t)1U) * len;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint32_t *os = a_bits_l;
+        uint32_t x = (c & res_j[i]) | (~c & a_bits_l[i]);
+        os[i] = x;
+      }
+    }
+    bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c; i++)
+    {
+      bn_almost_mont_sqr_u32(len, n, mu, resM, resM);
+    }
+    uint32_t c10 = bBits % (uint32_t)4U;
+    uint32_t mask_l = ((uint32_t)1U << c10) - (uint32_t)1U;
+    uint32_t i0 = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint32_t p1 = b[i0] >> j;
+    uint32_t ite;
+    if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)32U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint32_t bits_c = ite & mask_l;
+    uint32_t bits_c0 = bits_c;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t *a_bits_c = alloca(len * sizeof (uint32_t));
+    memset(a_bits_c, 0U, len * sizeof (uint32_t));
+    memcpy(a_bits_c, table, len * sizeof (uint32_t));
+    for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++)
+    {
+      uint32_t c1 = FStar_UInt32_eq_mask(bits_c0, i1 + (uint32_t)1U);
+      uint32_t *res_j = table + (i1 + (uint32_t)1U) * len;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint32_t *os = a_bits_c;
+        uint32_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]);
+        os[i] = x;
+      }
+    }
+    bn_almost_mont_mul_u32(len, n, mu, resM, a_bits_c, resM);
+  }
+  KRML_CHECK_SIZE(sizeof (uint32_t), len + len);
+  uint32_t *tmp1 = alloca((len + len) * sizeof (uint32_t));
+  memset(tmp1, 0U, (len + len) * sizeof (uint32_t));
+  memcpy(tmp1, resM, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u32(len, n, mu, tmp1, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *r2 = alloca(len * sizeof (uint32_t));
+  memset(r2, 0U, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(len, nBits, n, r2);
+  uint32_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint32(n[0U]);
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len, n, mu, r2, a, bBits, b, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t *r2 = alloca(len * sizeof (uint32_t));
+  memset(r2, 0U, len * sizeof (uint32_t));
+  Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(len, nBits, n, r2);
+  uint32_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint32(n[0U]);
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len, n, mu, r2, a, bBits, b, res);
+}
+
+uint64_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *one = alloca(len * sizeof (uint64_t));
+  memset(one, 0U, len * sizeof (uint64_t));
+  memset(one, 0U, len * sizeof (uint64_t));
+  one[0U] = (uint64_t)1U;
+  uint64_t bit0 = n[0U] & (uint64_t)1U;
+  uint64_t m0 = (uint64_t)0U - bit0;
+  uint64_t acc0 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
+    acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m10 = acc0;
+  uint64_t m00 = m0 & m10;
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  uint64_t m1;
+  if (bBits < (uint32_t)64U * bLen)
+  {
+    KRML_CHECK_SIZE(sizeof (uint64_t), bLen);
+    uint64_t *b2 = alloca(bLen * sizeof (uint64_t));
+    memset(b2, 0U, bLen * sizeof (uint64_t));
+    uint32_t i0 = bBits / (uint32_t)64U;
+    uint32_t j = bBits % (uint32_t)64U;
+    b2[i0] = b2[i0] | (uint64_t)1U << j;
+    uint64_t acc = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < bLen; i++)
+    {
+      uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
+      uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
+      acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+    }
+    uint64_t res = acc;
+    m1 = res;
+  }
+  else
+  {
+    m1 = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+  }
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < len; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m2 = acc;
+  uint64_t m = m1 & m2;
+  return m00 & m;
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *aM = alloca(len * sizeof (uint64_t));
+    memset(aM, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+    memset(c, 0U, (len + len) * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+    uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t));
+    memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+    Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c);
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM);
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *resM = alloca(len * sizeof (uint64_t));
+    memset(resM, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t));
+    memset(tmp1, 0U, (len + len) * sizeof (uint64_t));
+    memcpy(tmp1, r2, len * sizeof (uint64_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, resM);
+    for (uint32_t i = (uint32_t)0U; i < bBits; i++)
+    {
+      uint32_t i1 = i / (uint32_t)64U;
+      uint32_t j = i % (uint32_t)64U;
+      uint64_t tmp = b[i1];
+      uint64_t bit = tmp >> j & (uint64_t)1U;
+      if (!(bit == (uint64_t)0U))
+      {
+        bn_almost_mont_mul_u64(len, n, mu, resM, aM, resM);
+      }
+      bn_almost_mont_sqr_u64(len, n, mu, aM, aM);
+    }
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *tmp = alloca((len + len) * sizeof (uint64_t));
+    memset(tmp, 0U, (len + len) * sizeof (uint64_t));
+    memcpy(tmp, resM, len * sizeof (uint64_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, res);
+    return;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *aM = alloca(len * sizeof (uint64_t));
+  memset(aM, 0U, len * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+  memset(c, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM);
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *resM = alloca(len * sizeof (uint64_t));
+  memset(resM, 0U, len * sizeof (uint64_t));
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *tmp = alloca((len + len) * sizeof (uint64_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint64_t));
+  memcpy(tmp, r2, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint64_t), table_len * len);
+  uint64_t *table = alloca(table_len * len * sizeof (uint64_t));
+  memset(table, 0U, table_len * len * sizeof (uint64_t));
+  memcpy(table, resM, len * sizeof (uint64_t));
+  uint64_t *t1 = table + len;
+  memcpy(t1, aM, len * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint64_t *t11 = table + (i + (uint32_t)1U) * len;
+    uint64_t *t2 = table + (i + (uint32_t)2U) * len;
+    bn_almost_mont_mul_u64(len, n, mu, t11, aM, t2);
+  }
+  for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++)
+  {
+    for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+    {
+      bn_almost_mont_sqr_u64(len, n, mu, resM, resM);
+    }
+    uint64_t mask_l = (uint64_t)16U - (uint64_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)64U;
+    uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)64U;
+    uint64_t p1 = b[i1] >> j;
+    uint64_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_l = ite & mask_l;
+    uint32_t bits_l32 = (uint32_t)bits_l;
+    uint64_t *a_bits_l = table + bits_l32 * len;
+    bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c0 = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c0; i++)
+    {
+      bn_almost_mont_sqr_u64(len, n, mu, resM, resM);
+    }
+    uint32_t c1 = bBits % (uint32_t)4U;
+    uint64_t mask_l = ((uint64_t)1U << c1) - (uint64_t)1U;
+    uint32_t i = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint64_t p1 = b[i] >> j;
+    uint64_t ite;
+    if (i + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_c = ite & mask_l;
+    uint64_t bits_c0 = bits_c;
+    uint32_t bits_c32 = (uint32_t)bits_c0;
+    uint64_t *a_bits_c = table + bits_c32 * len;
+    bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_c, resM);
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t));
+  memset(tmp1, 0U, (len + len) * sizeof (uint64_t));
+  memcpy(tmp1, resM, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *aM = alloca(len * sizeof (uint64_t));
+    memset(aM, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *c = alloca((len + len) * sizeof (uint64_t));
+    memset(c, 0U, (len + len) * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+    uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t));
+    memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+    Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c);
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c, aM);
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *resM = alloca(len * sizeof (uint64_t));
+    memset(resM, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t));
+    memset(tmp1, 0U, (len + len) * sizeof (uint64_t));
+    memcpy(tmp1, r2, len * sizeof (uint64_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, resM);
+    uint64_t sw = (uint64_t)0U;
+    for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++)
+    {
+      uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)64U;
+      uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)64U;
+      uint64_t tmp = b[i1];
+      uint64_t bit = tmp >> j & (uint64_t)1U;
+      uint64_t sw1 = bit ^ sw;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint64_t dummy = ((uint64_t)0U - sw1) & (resM[i] ^ aM[i]);
+        resM[i] = resM[i] ^ dummy;
+        aM[i] = aM[i] ^ dummy;
+      }
+      bn_almost_mont_mul_u64(len, n, mu, aM, resM, aM);
+      bn_almost_mont_sqr_u64(len, n, mu, resM, resM);
+      sw = bit;
+    }
+    uint64_t sw0 = sw;
+    for (uint32_t i = (uint32_t)0U; i < len; i++)
+    {
+      uint64_t dummy = ((uint64_t)0U - sw0) & (resM[i] ^ aM[i]);
+      resM[i] = resM[i] ^ dummy;
+      aM[i] = aM[i] ^ dummy;
+    }
+    KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+    uint64_t *tmp = alloca((len + len) * sizeof (uint64_t));
+    memset(tmp, 0U, (len + len) * sizeof (uint64_t));
+    memcpy(tmp, resM, len * sizeof (uint64_t));
+    Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, res);
+    return;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *aM = alloca(len * sizeof (uint64_t));
+  memset(aM, 0U, len * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *c0 = alloca((len + len) * sizeof (uint64_t));
+  memset(c0, 0U, (len + len) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), (uint32_t)4U * len);
+  uint64_t *tmp0 = alloca((uint32_t)4U * len * sizeof (uint64_t));
+  memset(tmp0, 0U, (uint32_t)4U * len * sizeof (uint64_t));
+  Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len, a, r2, tmp0, c0);
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, c0, aM);
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *resM = alloca(len * sizeof (uint64_t));
+  memset(resM, 0U, len * sizeof (uint64_t));
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *tmp = alloca((len + len) * sizeof (uint64_t));
+  memset(tmp, 0U, (len + len) * sizeof (uint64_t));
+  memcpy(tmp, r2, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint64_t), table_len * len);
+  uint64_t *table = alloca(table_len * len * sizeof (uint64_t));
+  memset(table, 0U, table_len * len * sizeof (uint64_t));
+  memcpy(table, resM, len * sizeof (uint64_t));
+  uint64_t *t1 = table + len;
+  memcpy(t1, aM, len * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint64_t *t11 = table + (i + (uint32_t)1U) * len;
+    uint64_t *t2 = table + (i + (uint32_t)2U) * len;
+    bn_almost_mont_mul_u64(len, n, mu, t11, aM, t2);
+  }
+  for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++)
+  {
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+    {
+      bn_almost_mont_sqr_u64(len, n, mu, resM, resM);
+    }
+    uint64_t mask_l = (uint64_t)16U - (uint64_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)64U;
+    uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)64U;
+    uint64_t p1 = b[i1] >> j;
+    uint64_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_l = ite & mask_l;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *a_bits_l = alloca(len * sizeof (uint64_t));
+    memset(a_bits_l, 0U, len * sizeof (uint64_t));
+    memcpy(a_bits_l, table, len * sizeof (uint64_t));
+    for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++)
+    {
+      uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + (uint32_t)1U));
+      uint64_t *res_j = table + (i2 + (uint32_t)1U) * len;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint64_t *os = a_bits_l;
+        uint64_t x = (c & res_j[i]) | (~c & a_bits_l[i]);
+        os[i] = x;
+      }
+    }
+    bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c; i++)
+    {
+      bn_almost_mont_sqr_u64(len, n, mu, resM, resM);
+    }
+    uint32_t c10 = bBits % (uint32_t)4U;
+    uint64_t mask_l = ((uint64_t)1U << c10) - (uint64_t)1U;
+    uint32_t i0 = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint64_t p1 = b[i0] >> j;
+    uint64_t ite;
+    if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_c = ite & mask_l;
+    uint64_t bits_c0 = bits_c;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t *a_bits_c = alloca(len * sizeof (uint64_t));
+    memset(a_bits_c, 0U, len * sizeof (uint64_t));
+    memcpy(a_bits_c, table, len * sizeof (uint64_t));
+    for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++)
+    {
+      uint64_t c1 = FStar_UInt64_eq_mask(bits_c0, (uint64_t)(i1 + (uint32_t)1U));
+      uint64_t *res_j = table + (i1 + (uint32_t)1U) * len;
+      for (uint32_t i = (uint32_t)0U; i < len; i++)
+      {
+        uint64_t *os = a_bits_c;
+        uint64_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]);
+        os[i] = x;
+      }
+    }
+    bn_almost_mont_mul_u64(len, n, mu, resM, a_bits_c, resM);
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), len + len);
+  uint64_t *tmp1 = alloca((len + len) * sizeof (uint64_t));
+  memset(tmp1, 0U, (len + len) * sizeof (uint64_t));
+  memcpy(tmp1, resM, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_mont_reduction_u64(len, n, mu, tmp1, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *r2 = alloca(len * sizeof (uint64_t));
+  memset(r2, 0U, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(len, nBits, n, r2);
+  uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len, n, mu, r2, a, bBits, b, res);
+}
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t *r2 = alloca(len * sizeof (uint64_t));
+  memset(r2, 0U, len * sizeof (uint64_t));
+  Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(len, nBits, n, r2);
+  uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len, n, mu, r2, a, bBits, b, res);
+}
+
diff --git a/src/math/bigfix/Hacl_Bignum.h b/src/math/bigfix/Hacl_Bignum.h
new file mode 100644
index 000000000..e9f204d1a
--- /dev/null
+++ b/src/math/bigfix/Hacl_Bignum.h
@@ -0,0 +1,345 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_Bignum_H
+#define __Hacl_Bignum_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "lib_intrinsics.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include "kremlin/internal/target.h"
+
+#include "Hacl_Bignum_Base.h"
+
+void Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *res);
+
+void Hacl_Bignum_Convert_bn_to_bytes_be_uint64(uint32_t len, uint64_t *b, uint8_t *res);
+
+uint32_t Hacl_Bignum_Lib_bn_get_top_index_u32(uint32_t len, uint32_t *b);
+
+uint64_t Hacl_Bignum_Lib_bn_get_top_index_u64(uint32_t len, uint64_t *b);
+
+uint32_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res);
+
+uint64_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res);
+
+uint32_t
+Hacl_Bignum_Addition_bn_add_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res);
+
+uint64_t
+Hacl_Bignum_Addition_bn_add_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
+  uint32_t aLen,
+  uint32_t *a,
+  uint32_t *b,
+  uint32_t *tmp,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
+  uint32_t aLen,
+  uint64_t *a,
+  uint64_t *b,
+  uint64_t *tmp,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
+  uint32_t aLen,
+  uint32_t *a,
+  uint32_t *tmp,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
+  uint32_t aLen,
+  uint64_t *a,
+  uint64_t *tmp,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_bn_add_mod_n_u32(
+  uint32_t len1,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t *b,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_bn_add_mod_n_u64(
+  uint32_t len1,
+  uint64_t *n,
+  uint64_t *a,
+  uint64_t *b,
+  uint64_t *res
+);
+
+uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0);
+
+uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0);
+
+uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n);
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv,
+  uint32_t *c,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t *aM
+);
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *a
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *bM,
+  uint32_t *resM
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t nInv_u64,
+  uint32_t *aM,
+  uint32_t *resM
+);
+
+uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n);
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv,
+  uint64_t *c,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv,
+  uint64_t *r2,
+  uint64_t *a,
+  uint64_t *aM
+);
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *a
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *bM,
+  uint64_t *resM
+);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t nInv_u64,
+  uint64_t *aM,
+  uint64_t *resM
+);
+
+uint32_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t mu,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
+  uint32_t len,
+  uint32_t *n,
+  uint32_t mu,
+  uint32_t *r2,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32(
+  uint32_t len,
+  uint32_t nBits,
+  uint32_t *n,
+  uint32_t *a,
+  uint32_t bBits,
+  uint32_t *b,
+  uint32_t *res
+);
+
+uint64_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
+  uint32_t len,
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64(
+  uint32_t len,
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Bignum_H_DEFINED
+#endif
diff --git a/src/math/bigfix/Hacl_Bignum256.c b/src/math/bigfix/Hacl_Bignum256.c
new file mode 100644
index 000000000..372baa30f
--- /dev/null
+++ b/src/math/bigfix/Hacl_Bignum256.c
@@ -0,0 +1,1534 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "math/bigfix/Hacl_Bignum256.h"
+
+/*******************************************************************************
+
+A verified 256-bit bignum library.
+
+This is a 64-bit optimized version, where bignums are represented as an array
+of four unsigned 64-bit integers, i.e. uint64_t[4]. Furthermore, the
+limbs are stored in little-endian format, i.e. the least significant limb is at
+index 0. Each limb is stored in native format in memory. Example:
+
+  uint64_t sixteen[4] = { 0x10; 0x00; 0x00; 0x00 }
+
+We strongly encourage users to go through the conversion functions, e.g.
+bn_from_bytes_be, to i) not depend on internal representation choices and ii)
+have the ability to switch easily to a 32-bit optimized version in the future.
+
+*******************************************************************************/
+
+/************************/
+/* Arithmetic functions */
+/************************/
+
+
+/*
+Write `a + b mod 2^256` in `res`.
+
+  This functions returns the carry.
+
+  The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4]
+*/
+uint64_t Hacl_Bignum256_add(uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+/*
+Write `a - b mod 2^256` in `res`.
+
+  This functions returns the carry.
+
+  The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4]
+*/
+uint64_t Hacl_Bignum256_sub(uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
+  }
+  return c;
+}
+
+static inline void add_mod_n(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+  {
+    uint64_t t1 = a[(uint32_t)4U * i];
+    uint64_t t20 = b[(uint32_t)4U * i];
+    uint64_t *res_i0 = res + (uint32_t)4U * i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0);
+    uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1);
+    uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2);
+    uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i);
+  }
+  for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+  {
+    uint64_t t1 = a[i];
+    uint64_t t2 = b[i];
+    uint64_t *res_i = res + i;
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i);
+  }
+  uint64_t c00 = c0;
+  uint64_t tmp[4U] = { 0U };
+  uint64_t c = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+  {
+    uint64_t t1 = res[(uint32_t)4U * i];
+    uint64_t t20 = n[(uint32_t)4U * i];
+    uint64_t *res_i0 = tmp + (uint32_t)4U * i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+    uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+    uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+    uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+  }
+  for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+  {
+    uint64_t t1 = res[i];
+    uint64_t t2 = n[i];
+    uint64_t *res_i = tmp + i;
+    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
+  }
+  uint64_t c1 = c;
+  uint64_t c2 = c00 - c1;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+/*
+Write `a * b` in `res`.
+
+  The arguments a and b are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8].
+*/
+void Hacl_Bignum256_mul(uint64_t *a, uint64_t *b, uint64_t *res)
+{
+  uint32_t resLen = (uint32_t)8U;
+  memset(res, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t bj = b[i0];
+    uint64_t *res_j = res + i0;
+    uint64_t c = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+    {
+      uint64_t a_i = a[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0);
+      uint64_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1);
+      uint64_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2);
+      uint64_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i);
+    }
+    for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+    {
+      uint64_t a_i = a[i];
+      uint64_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i);
+    }
+    uint64_t r = c;
+    res[(uint32_t)4U + i0] = r;
+  }
+}
+
+/*
+Write `a * a` in `res`.
+
+  The argument a is meant to be a 256-bit bignum, i.e. uint64_t[4].
+  The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8].
+*/
+void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
+{
+  uint32_t resLen = (uint32_t)8U;
+  memset(res, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t *ab = a;
+    uint64_t a_j = a[i0];
+    uint64_t *res_j = res + i0;
+    uint64_t c = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = ab[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0);
+      uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1);
+      uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2);
+      uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i);
+    }
+    for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++)
+    {
+      uint64_t a_i = ab[i];
+      uint64_t *res_i = res_j + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i);
+    }
+    uint64_t r = c;
+    res[i0 + i0] = r;
+  }
+  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), resLen);
+  uint64_t *tmp = alloca(resLen * sizeof (uint64_t));
+  memset(tmp, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(a[i], a[i]);
+    uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U));
+    uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
+    tmp[(uint32_t)2U * i] = lo;
+    tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+  }
+  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, res, tmp, res);
+}
+
+static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res)
+{
+  memset(res, 0U, (uint32_t)4U * sizeof (uint64_t));
+  uint32_t i = nBits / (uint32_t)64U;
+  uint32_t j = nBits % (uint32_t)64U;
+  res[i] = res[i] | (uint64_t)1U << j;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)512U - nBits; i0++)
+  {
+    add_mod_n(n, res, res, res);
+  }
+}
+
+static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *res)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t qj = nInv * c[i0];
+    uint64_t *res_j0 = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+    {
+      uint64_t a_i = n[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0);
+      uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1);
+      uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2);
+      uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+    {
+      uint64_t a_i = n[i];
+      uint64_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i);
+    }
+    uint64_t r = c1;
+    uint64_t c10 = r;
+    uint64_t *resb = c + (uint32_t)4U + i0;
+    uint64_t res_j = c[(uint32_t)4U + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t));
+  uint64_t c00 = c0;
+  uint64_t tmp[4U] = { 0U };
+  uint64_t c1 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+  {
+    uint64_t t1 = res[(uint32_t)4U * i];
+    uint64_t t20 = n[(uint32_t)4U * i];
+    uint64_t *res_i0 = tmp + (uint32_t)4U * i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
+    uint64_t t10 = res[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t t21 = n[(uint32_t)4U * i + (uint32_t)1U];
+    uint64_t *res_i1 = tmp + (uint32_t)4U * i + (uint32_t)1U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
+    uint64_t t11 = res[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t t22 = n[(uint32_t)4U * i + (uint32_t)2U];
+    uint64_t *res_i2 = tmp + (uint32_t)4U * i + (uint32_t)2U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
+    uint64_t t12 = res[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t t2 = n[(uint32_t)4U * i + (uint32_t)3U];
+    uint64_t *res_i = tmp + (uint32_t)4U * i + (uint32_t)3U;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i);
+  }
+  for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+  {
+    uint64_t t1 = res[i];
+    uint64_t t2 = n[i];
+    uint64_t *res_i = tmp + i;
+    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i);
+  }
+  uint64_t c10 = c1;
+  uint64_t c2 = c00 - c10;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    os[i] = x;
+  }
+}
+
+static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *res)
+{
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t qj = nInv * c[i0];
+    uint64_t *res_j0 = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+    {
+      uint64_t a_i = n[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i0);
+      uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c1, res_i1);
+      uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c1, res_i2);
+      uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i);
+    }
+    for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+    {
+      uint64_t a_i = n[i];
+      uint64_t *res_i = res_j0 + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c1, res_i);
+    }
+    uint64_t r = c1;
+    uint64_t c10 = r;
+    uint64_t *resb = c + (uint32_t)4U + i0;
+    uint64_t res_j = c[(uint32_t)4U + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
+  }
+  memcpy(res, c + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t));
+  uint64_t c00 = c0;
+  uint64_t tmp[4U] = { 0U };
+  uint64_t c1 = Hacl_Bignum256_sub(res, n, tmp);
+  uint64_t m = (uint64_t)0U - c00;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t *os = res;
+    uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    os[i] = x;
+  }
+}
+
+static inline void
+amont_mul(uint64_t *n, uint64_t nInv_u64, uint64_t *aM, uint64_t *bM, uint64_t *resM)
+{
+  uint64_t c[8U] = { 0U };
+  uint32_t resLen = (uint32_t)8U;
+  memset(c, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t bj = bM[i0];
+    uint64_t *res_j = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+    {
+      uint64_t a_i = aM[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i0);
+      uint64_t a_i0 = aM[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c1, res_i1);
+      uint64_t a_i1 = aM[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c1, res_i2);
+      uint64_t a_i2 = aM[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c1, res_i);
+    }
+    for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+    {
+      uint64_t a_i = aM[i];
+      uint64_t *res_i = res_j + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i);
+    }
+    uint64_t r = c1;
+    c[(uint32_t)4U + i0] = r;
+  }
+  areduction(n, nInv_u64, c, resM);
+}
+
+static inline void amont_sqr(uint64_t *n, uint64_t nInv_u64, uint64_t *aM, uint64_t *resM)
+{
+  uint64_t c[8U] = { 0U };
+  uint32_t resLen = (uint32_t)8U;
+  memset(c, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t *ab = aM;
+    uint64_t a_j = aM[i0];
+    uint64_t *res_j = c + i0;
+    uint64_t c1 = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t a_i = ab[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c1, res_i0);
+      uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c1, res_i1);
+      uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c1, res_i2);
+      uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c1, res_i);
+    }
+    for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++)
+    {
+      uint64_t a_i = ab[i];
+      uint64_t *res_i = res_j + i;
+      c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c1, res_i);
+    }
+    uint64_t r = c1;
+    c[i0 + i0] = r;
+  }
+  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, c, c, c);
+  KRML_CHECK_SIZE(sizeof (uint64_t), resLen);
+  uint64_t *tmp = alloca(resLen * sizeof (uint64_t));
+  memset(tmp, 0U, resLen * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    FStar_UInt128_uint128 res = FStar_UInt128_mul_wide(aM[i], aM[i]);
+    uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U));
+    uint64_t lo = FStar_UInt128_uint128_to_uint64(res);
+    tmp[(uint32_t)2U * i] = lo;
+    tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+  }
+  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(resLen, c, tmp, c);
+  areduction(n, nInv_u64, c, resM);
+}
+
+static inline void
+bn_slow_precomp(uint64_t *n, uint64_t mu, uint64_t *r2, uint64_t *a, uint64_t *res)
+{
+  uint64_t a_mod[4U] = { 0U };
+  uint64_t a1[8U] = { 0U };
+  memcpy(a1, a, (uint32_t)8U * sizeof (uint64_t));
+  uint64_t c0 = (uint64_t)0U;
+  for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+  {
+    uint64_t qj = mu * a1[i0];
+    uint64_t *res_j0 = a1 + i0;
+    uint64_t c = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)1U; i++)
+    {
+      uint64_t a_i = n[(uint32_t)4U * i];
+      uint64_t *res_i0 = res_j0 + (uint32_t)4U * i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0);
+      uint64_t a_i0 = n[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res_j0 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1);
+      uint64_t a_i1 = n[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res_j0 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2);
+      uint64_t a_i2 = n[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res_j0 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i);
+    }
+    for (uint32_t i = (uint32_t)4U; i < (uint32_t)4U; i++)
+    {
+      uint64_t a_i = n[i];
+      uint64_t *res_i = res_j0 + i;
+      c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i);
+    }
+    uint64_t r = c;
+    uint64_t c1 = r;
+    uint64_t *resb = a1 + (uint32_t)4U + i0;
+    uint64_t res_j = a1[(uint32_t)4U + i0];
+    c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb);
+  }
+  memcpy(a_mod, a1 + (uint32_t)4U, (uint32_t)4U * sizeof (uint64_t));
+  uint64_t c00 = c0;
+  uint64_t tmp[4U] = { 0U };
+  uint64_t c1 = Hacl_Bignum256_sub(a_mod, n, tmp);
+  uint64_t m = (uint64_t)0U - c00;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t *os = a_mod;
+    uint64_t x = (m & tmp[i]) | (~m & a_mod[i]);
+    os[i] = x;
+  }
+  uint64_t c[8U] = { 0U };
+  Hacl_Bignum256_mul(a_mod, r2, c);
+  reduction(n, mu, c, res);
+}
+
+/*
+Write `a mod n` in `res`.
+
+  The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8].
+  The argument n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • 1 < n
+   • n % 2 = 1 
+*/
+bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res)
+{
+  uint64_t one[4U] = { 0U };
+  memset(one, 0U, (uint32_t)4U * sizeof (uint64_t));
+  one[0U] = (uint64_t)1U;
+  uint64_t bit0 = n[0U] & (uint64_t)1U;
+  uint64_t m0 = (uint64_t)0U - bit0;
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m1 = acc;
+  uint64_t is_valid_m = m0 & m1;
+  uint32_t
+  nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n);
+  if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU)
+  {
+    uint64_t r2[4U] = { 0U };
+    precompr2(nBits, n, r2);
+    uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+    bn_slow_precomp(n, mu, r2, a, res);
+  }
+  else
+  {
+    memset(res, 0U, (uint32_t)4U * sizeof (uint64_t));
+  }
+  return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
+{
+  uint64_t one[4U] = { 0U };
+  memset(one, 0U, (uint32_t)4U * sizeof (uint64_t));
+  one[0U] = (uint64_t)1U;
+  uint64_t bit0 = n[0U] & (uint64_t)1U;
+  uint64_t m0 = (uint64_t)0U - bit0;
+  uint64_t acc0 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
+    acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m10 = acc0;
+  uint64_t m00 = m0 & m10;
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  uint64_t m1;
+  if (bBits < (uint32_t)64U * bLen)
+  {
+    KRML_CHECK_SIZE(sizeof (uint64_t), bLen);
+    uint64_t *b2 = alloca(bLen * sizeof (uint64_t));
+    memset(b2, 0U, bLen * sizeof (uint64_t));
+    uint32_t i0 = bBits / (uint32_t)64U;
+    uint32_t j = bBits % (uint32_t)64U;
+    b2[i0] = b2[i0] | (uint64_t)1U << j;
+    uint64_t acc = (uint64_t)0U;
+    for (uint32_t i = (uint32_t)0U; i < bLen; i++)
+    {
+      uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
+      uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
+      acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+    }
+    uint64_t res = acc;
+    m1 = res;
+  }
+  else
+  {
+    m1 = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+  }
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m2 = acc;
+  uint64_t m = m1 & m2;
+  return m00 & m;
+}
+
+static inline void
+exp_vartime_precomp(
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    uint64_t aM[4U] = { 0U };
+    uint64_t c[8U] = { 0U };
+    Hacl_Bignum256_mul(a, r2, c);
+    reduction(n, mu, c, aM);
+    uint64_t resM[4U] = { 0U };
+    uint64_t tmp0[8U] = { 0U };
+    memcpy(tmp0, r2, (uint32_t)4U * sizeof (uint64_t));
+    reduction(n, mu, tmp0, resM);
+    for (uint32_t i = (uint32_t)0U; i < bBits; i++)
+    {
+      uint32_t i1 = i / (uint32_t)64U;
+      uint32_t j = i % (uint32_t)64U;
+      uint64_t tmp = b[i1];
+      uint64_t bit = tmp >> j & (uint64_t)1U;
+      if (!(bit == (uint64_t)0U))
+      {
+        amont_mul(n, mu, resM, aM, resM);
+      }
+      amont_sqr(n, mu, aM, aM);
+    }
+    uint64_t tmp[8U] = { 0U };
+    memcpy(tmp, resM, (uint32_t)4U * sizeof (uint64_t));
+    reduction(n, mu, tmp, res);
+    return;
+  }
+  uint64_t aM[4U] = { 0U };
+  uint64_t c[8U] = { 0U };
+  Hacl_Bignum256_mul(a, r2, c);
+  reduction(n, mu, c, aM);
+  uint64_t resM[4U] = { 0U };
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  uint64_t tmp[8U] = { 0U };
+  memcpy(tmp, r2, (uint32_t)4U * sizeof (uint64_t));
+  reduction(n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint64_t), table_len * (uint32_t)4U);
+  uint64_t *table = alloca(table_len * (uint32_t)4U * sizeof (uint64_t));
+  memset(table, 0U, table_len * (uint32_t)4U * sizeof (uint64_t));
+  memcpy(table, resM, (uint32_t)4U * sizeof (uint64_t));
+  uint64_t *t1 = table + (uint32_t)4U;
+  memcpy(t1, aM, (uint32_t)4U * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)4U;
+    uint64_t *t2 = table + (i + (uint32_t)2U) * (uint32_t)4U;
+    amont_mul(n, mu, t11, aM, t2);
+  }
+  for (uint32_t i = (uint32_t)0U; i < bBits / (uint32_t)4U; i++)
+  {
+    for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++)
+    {
+      amont_sqr(n, mu, resM, resM);
+    }
+    uint64_t mask_l = (uint64_t)16U - (uint64_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i - (uint32_t)4U) / (uint32_t)64U;
+    uint32_t j = (bBits - (uint32_t)4U * i - (uint32_t)4U) % (uint32_t)64U;
+    uint64_t p1 = b[i1] >> j;
+    uint64_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_l = ite & mask_l;
+    uint32_t bits_l32 = (uint32_t)bits_l;
+    uint64_t *a_bits_l = table + bits_l32 * (uint32_t)4U;
+    amont_mul(n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c0 = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c0; i++)
+    {
+      amont_sqr(n, mu, resM, resM);
+    }
+    uint32_t c1 = bBits % (uint32_t)4U;
+    uint64_t mask_l = ((uint64_t)1U << c1) - (uint64_t)1U;
+    uint32_t i = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint64_t p1 = b[i] >> j;
+    uint64_t ite;
+    if (i + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_c = ite & mask_l;
+    uint64_t bits_c0 = bits_c;
+    uint32_t bits_c32 = (uint32_t)bits_c0;
+    uint64_t *a_bits_c = table + bits_c32 * (uint32_t)4U;
+    amont_mul(n, mu, resM, a_bits_c, resM);
+  }
+  uint64_t tmp0[8U] = { 0U };
+  memcpy(tmp0, resM, (uint32_t)4U * sizeof (uint64_t));
+  reduction(n, mu, tmp0, res);
+}
+
+static inline void
+exp_consttime_precomp(
+  uint64_t *n,
+  uint64_t mu,
+  uint64_t *r2,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  if (bBits < (uint32_t)200U)
+  {
+    uint64_t aM[4U] = { 0U };
+    uint64_t c[8U] = { 0U };
+    Hacl_Bignum256_mul(a, r2, c);
+    reduction(n, mu, c, aM);
+    uint64_t resM[4U] = { 0U };
+    uint64_t tmp0[8U] = { 0U };
+    memcpy(tmp0, r2, (uint32_t)4U * sizeof (uint64_t));
+    reduction(n, mu, tmp0, resM);
+    uint64_t sw = (uint64_t)0U;
+    for (uint32_t i0 = (uint32_t)0U; i0 < bBits; i0++)
+    {
+      uint32_t i1 = (bBits - i0 - (uint32_t)1U) / (uint32_t)64U;
+      uint32_t j = (bBits - i0 - (uint32_t)1U) % (uint32_t)64U;
+      uint64_t tmp = b[i1];
+      uint64_t bit = tmp >> j & (uint64_t)1U;
+      uint64_t sw1 = bit ^ sw;
+      for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+      {
+        uint64_t dummy = ((uint64_t)0U - sw1) & (resM[i] ^ aM[i]);
+        resM[i] = resM[i] ^ dummy;
+        aM[i] = aM[i] ^ dummy;
+      }
+      amont_mul(n, mu, aM, resM, aM);
+      amont_sqr(n, mu, resM, resM);
+      sw = bit;
+    }
+    uint64_t sw0 = sw;
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+    {
+      uint64_t dummy = ((uint64_t)0U - sw0) & (resM[i] ^ aM[i]);
+      resM[i] = resM[i] ^ dummy;
+      aM[i] = aM[i] ^ dummy;
+    }
+    uint64_t tmp[8U] = { 0U };
+    memcpy(tmp, resM, (uint32_t)4U * sizeof (uint64_t));
+    reduction(n, mu, tmp, res);
+    return;
+  }
+  uint64_t aM[4U] = { 0U };
+  uint64_t c0[8U] = { 0U };
+  Hacl_Bignum256_mul(a, r2, c0);
+  reduction(n, mu, c0, aM);
+  uint64_t resM[4U] = { 0U };
+  uint32_t bLen;
+  if (bBits == (uint32_t)0U)
+  {
+    bLen = (uint32_t)1U;
+  }
+  else
+  {
+    bLen = (bBits - (uint32_t)1U) / (uint32_t)64U + (uint32_t)1U;
+  }
+  uint64_t tmp[8U] = { 0U };
+  memcpy(tmp, r2, (uint32_t)4U * sizeof (uint64_t));
+  reduction(n, mu, tmp, resM);
+  uint32_t table_len = (uint32_t)16U;
+  KRML_CHECK_SIZE(sizeof (uint64_t), table_len * (uint32_t)4U);
+  uint64_t *table = alloca(table_len * (uint32_t)4U * sizeof (uint64_t));
+  memset(table, 0U, table_len * (uint32_t)4U * sizeof (uint64_t));
+  memcpy(table, resM, (uint32_t)4U * sizeof (uint64_t));
+  uint64_t *t1 = table + (uint32_t)4U;
+  memcpy(t1, aM, (uint32_t)4U * sizeof (uint64_t));
+  for (uint32_t i = (uint32_t)0U; i < table_len - (uint32_t)2U; i++)
+  {
+    uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)4U;
+    uint64_t *t2 = table + (i + (uint32_t)2U) * (uint32_t)4U;
+    amont_mul(n, mu, t11, aM, t2);
+  }
+  for (uint32_t i0 = (uint32_t)0U; i0 < bBits / (uint32_t)4U; i0++)
+  {
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+    {
+      amont_sqr(n, mu, resM, resM);
+    }
+    uint64_t mask_l = (uint64_t)16U - (uint64_t)1U;
+    uint32_t i1 = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) / (uint32_t)64U;
+    uint32_t j = (bBits - (uint32_t)4U * i0 - (uint32_t)4U) % (uint32_t)64U;
+    uint64_t p1 = b[i1] >> j;
+    uint64_t ite;
+    if (i1 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_l = ite & mask_l;
+    uint64_t a_bits_l[4U] = { 0U };
+    memcpy(a_bits_l, table, (uint32_t)4U * sizeof (uint64_t));
+    for (uint32_t i2 = (uint32_t)0U; i2 < table_len - (uint32_t)1U; i2++)
+    {
+      uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + (uint32_t)1U));
+      uint64_t *res_j = table + (i2 + (uint32_t)1U) * (uint32_t)4U;
+      for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+      {
+        uint64_t *os = a_bits_l;
+        uint64_t x = (c & res_j[i]) | (~c & a_bits_l[i]);
+        os[i] = x;
+      }
+    }
+    amont_mul(n, mu, resM, a_bits_l, resM);
+  }
+  if (!(bBits % (uint32_t)4U == (uint32_t)0U))
+  {
+    uint32_t c = bBits % (uint32_t)4U;
+    for (uint32_t i = (uint32_t)0U; i < c; i++)
+    {
+      amont_sqr(n, mu, resM, resM);
+    }
+    uint32_t c10 = bBits % (uint32_t)4U;
+    uint64_t mask_l = ((uint64_t)1U << c10) - (uint64_t)1U;
+    uint32_t i0 = (uint32_t)0U;
+    uint32_t j = (uint32_t)0U;
+    uint64_t p1 = b[i0] >> j;
+    uint64_t ite;
+    if (i0 + (uint32_t)1U < bLen && (uint32_t)0U < j)
+    {
+      ite = p1 | b[i0 + (uint32_t)1U] << ((uint32_t)64U - j);
+    }
+    else
+    {
+      ite = p1;
+    }
+    uint64_t bits_c = ite & mask_l;
+    uint64_t bits_c0 = bits_c;
+    uint64_t a_bits_c[4U] = { 0U };
+    memcpy(a_bits_c, table, (uint32_t)4U * sizeof (uint64_t));
+    for (uint32_t i1 = (uint32_t)0U; i1 < table_len - (uint32_t)1U; i1++)
+    {
+      uint64_t c1 = FStar_UInt64_eq_mask(bits_c0, (uint64_t)(i1 + (uint32_t)1U));
+      uint64_t *res_j = table + (i1 + (uint32_t)1U) * (uint32_t)4U;
+      for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+      {
+        uint64_t *os = a_bits_c;
+        uint64_t x = (c1 & res_j[i]) | (~c1 & a_bits_c[i]);
+        os[i] = x;
+      }
+    }
+    amont_mul(n, mu, resM, a_bits_c, resM);
+  }
+  uint64_t tmp0[8U] = { 0U };
+  memcpy(tmp0, resM, (uint32_t)4U * sizeof (uint64_t));
+  reduction(n, mu, tmp0, res);
+}
+
+static inline void
+exp_vartime(
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  uint64_t r2[4U] = { 0U };
+  precompr2(nBits, n, r2);
+  uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
+}
+
+static inline void
+exp_consttime(
+  uint32_t nBits,
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  uint64_t r2[4U] = { 0U };
+  precompr2(nBits, n, r2);
+  uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
+}
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  The function is *NOT* constant-time on the argument b. See the
+  mod_exp_consttime_* functions for constant-time variants.
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • n % 2 = 1
+   • 1 < n
+   • b < pow2 bBits
+   • a < n 
+*/
+bool
+Hacl_Bignum256_mod_exp_vartime(
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  uint64_t is_valid_m = exp_check(n, a, bBits, b);
+  uint32_t
+  nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n);
+  if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU)
+  {
+    exp_vartime(nBits, n, a, bBits, b, res);
+  }
+  else
+  {
+    memset(res, 0U, (uint32_t)4U * sizeof (uint64_t));
+  }
+  return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  This function is constant-time over its argument b, at the cost of a slower
+  execution time than mod_exp_vartime.
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • n % 2 = 1
+   • 1 < n
+   • b < pow2 bBits
+   • a < n 
+*/
+bool
+Hacl_Bignum256_mod_exp_consttime(
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  uint64_t is_valid_m = exp_check(n, a, bBits, b);
+  uint32_t
+  nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n);
+  if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU)
+  {
+    exp_consttime(nBits, n, a, bBits, b, res);
+  }
+  else
+  {
+    memset(res, 0U, (uint32_t)4U * sizeof (uint64_t));
+  }
+  return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+/*
+Write `a ^ (-1) mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n is a prime
+
+  The function returns false if any of the following preconditions are violated, true otherwise.
+  • n % 2 = 1
+  • 1 < n
+  • 0 < a
+  • a < n 
+*/
+bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *res)
+{
+  uint64_t one[4U] = { 0U };
+  memset(one, 0U, (uint32_t)4U * sizeof (uint64_t));
+  one[0U] = (uint64_t)1U;
+  uint64_t bit0 = n[0U] & (uint64_t)1U;
+  uint64_t m0 = (uint64_t)0U - bit0;
+  uint64_t acc0 = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
+    acc0 = (beq & acc0) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m1 = acc0;
+  uint64_t m00 = m0 & m1;
+  uint64_t bn_zero[4U] = { 0U };
+  uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t uu____0 = FStar_UInt64_eq_mask(a[i], bn_zero[i]);
+    mask = uu____0 & mask;
+  }
+  uint64_t mask1 = mask;
+  uint64_t res10 = mask1;
+  uint64_t m10 = res10;
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  uint64_t m2 = acc;
+  uint64_t is_valid_m = (m00 & ~m10) & m2;
+  uint32_t
+  nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n);
+  if (is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU)
+  {
+    uint64_t n2[4U] = { 0U };
+    uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64((uint64_t)0U, n[0U], (uint64_t)2U, n2);
+    uint64_t c1;
+    if ((uint32_t)1U < (uint32_t)4U)
+    {
+      uint32_t rLen = (uint32_t)3U;
+      uint64_t *a1 = n + (uint32_t)1U;
+      uint64_t *res1 = n2 + (uint32_t)1U;
+      uint64_t c = c0;
+      for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+      {
+        uint64_t t1 = a1[(uint32_t)4U * i];
+        uint64_t *res_i0 = res1 + (uint32_t)4U * i;
+        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i0);
+        uint64_t t10 = a1[(uint32_t)4U * i + (uint32_t)1U];
+        uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, (uint64_t)0U, res_i1);
+        uint64_t t11 = a1[(uint32_t)4U * i + (uint32_t)2U];
+        uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, (uint64_t)0U, res_i2);
+        uint64_t t12 = a1[(uint32_t)4U * i + (uint32_t)3U];
+        uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, (uint64_t)0U, res_i);
+      }
+      for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+      {
+        uint64_t t1 = a1[i];
+        uint64_t *res_i = res1 + i;
+        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i);
+      }
+      uint64_t c10 = c;
+      c1 = c10;
+    }
+    else
+    {
+      c1 = c0;
+    }
+    exp_vartime(nBits, n, a, (uint32_t)256U, n2, res);
+  }
+  else
+  {
+    memset(res, 0U, (uint32_t)4U * sizeof (uint64_t));
+  }
+  return is_valid_m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+
+/**********************************************/
+/* Arithmetic functions with precomputations. */
+/**********************************************/
+
+
+/*
+Heap-allocate and initialize a montgomery context.
+
+  The argument n is meant to be a 256-bit bignum, i.e. uint64_t[4].
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n % 2 = 1
+  • 1 < n
+
+  The caller will need to call Hacl_Bignum256_mont_ctx_free on the return value
+  to avoid memory leaks.
+*/
+Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *Hacl_Bignum256_mont_ctx_init(uint64_t *n)
+{
+  uint64_t *r2 = KRML_HOST_CALLOC((uint32_t)4U, sizeof (uint64_t));
+  uint64_t *n1 = KRML_HOST_CALLOC((uint32_t)4U, sizeof (uint64_t));
+  uint64_t *r21 = r2;
+  uint64_t *n11 = n1;
+  memcpy(n11, n, (uint32_t)4U * sizeof (uint64_t));
+  uint32_t
+  nBits = (uint32_t)64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64((uint32_t)4U, n);
+  precompr2(nBits, n, r21);
+  uint64_t mu = Hacl_Bignum_ModInvLimb_mod_inv_uint64(n[0U]);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64
+  res = { .len = (uint32_t)4U, .n = n11, .mu = mu, .r2 = r21 };
+  KRML_CHECK_SIZE(sizeof (Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64), (uint32_t)1U);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64
+  *buf = KRML_HOST_MALLOC(sizeof (Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64));
+  buf[0U] = res;
+  return buf;
+}
+
+/*
+Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
+
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+*/
+void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
+{
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t *n = k1.n;
+  uint64_t *r2 = k1.r2;
+  KRML_HOST_FREE(n);
+  KRML_HOST_FREE(r2);
+  KRML_HOST_FREE(k);
+}
+
+/*
+Write `a mod n` in `res`.
+
+  The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8].
+  The outparam res is meant to be a 256-bit bignum, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+*/
+void
+Hacl_Bignum256_mod_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint64_t *res
+)
+{
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+}
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  The function is *NOT* constant-time on the argument b. See the
+  mod_exp_consttime_* functions for constant-time variants.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • b < pow2 bBits
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_exp_vartime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+}
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  This function is constant-time over its argument b, at the cost of a slower
+  execution time than mod_exp_vartime_*.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • b < pow2 bBits
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_exp_consttime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+)
+{
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+}
+
+/*
+Write `a ^ (-1) mod n` in `res`.
+
+  The argument a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n is a prime
+  • 0 < a
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_inv_prime_vartime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint64_t *res
+)
+{
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t n2[4U] = { 0U };
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64((uint64_t)0U, k1.n[0U], (uint64_t)2U, n2);
+  uint64_t c1;
+  if ((uint32_t)1U < (uint32_t)4U)
+  {
+    uint32_t rLen = (uint32_t)3U;
+    uint64_t *a1 = k1.n + (uint32_t)1U;
+    uint64_t *res1 = n2 + (uint32_t)1U;
+    uint64_t c = c0;
+    for (uint32_t i = (uint32_t)0U; i < rLen / (uint32_t)4U * (uint32_t)4U / (uint32_t)4U; i++)
+    {
+      uint64_t t1 = a1[(uint32_t)4U * i];
+      uint64_t *res_i0 = res1 + (uint32_t)4U * i;
+      c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i0);
+      uint64_t t10 = a1[(uint32_t)4U * i + (uint32_t)1U];
+      uint64_t *res_i1 = res1 + (uint32_t)4U * i + (uint32_t)1U;
+      c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, (uint64_t)0U, res_i1);
+      uint64_t t11 = a1[(uint32_t)4U * i + (uint32_t)2U];
+      uint64_t *res_i2 = res1 + (uint32_t)4U * i + (uint32_t)2U;
+      c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, (uint64_t)0U, res_i2);
+      uint64_t t12 = a1[(uint32_t)4U * i + (uint32_t)3U];
+      uint64_t *res_i = res1 + (uint32_t)4U * i + (uint32_t)3U;
+      c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, (uint64_t)0U, res_i);
+    }
+    for (uint32_t i = rLen / (uint32_t)4U * (uint32_t)4U; i < rLen; i++)
+    {
+      uint64_t t1 = a1[i];
+      uint64_t *res_i = res1 + i;
+      c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, (uint64_t)0U, res_i);
+    }
+    uint64_t c10 = c;
+    c1 = c10;
+  }
+  else
+  {
+    c1 = c0;
+  }
+  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, (uint32_t)256U, n2, res);
+}
+
+
+/********************/
+/* Loads and stores */
+/********************/
+
+
+/*
+Load a bid-endian bignum from memory.
+
+  The argument b points to len bytes of valid memory.
+  The function returns a heap-allocated bignum of size sufficient to hold the
+   result of loading b, or NULL if either the allocation failed, or the amount of
+    required memory would exceed 4GB.
+
+  If the return value is non-null, clients must eventually call free(3) on it to
+  avoid memory leaks.
+*/
+uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
+{
+  if
+  (
+    len
+    == (uint32_t)0U
+    || !((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U <= (uint32_t)536870911U)
+  )
+  {
+    return NULL;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U);
+  uint64_t
+  *res = KRML_HOST_CALLOC((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U, sizeof (uint64_t));
+  if (res == NULL)
+  {
+    return res;
+  }
+  uint64_t *res1 = res;
+  uint64_t *res2 = res1;
+  uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
+  for (uint32_t i = (uint32_t)0U; i < bnLen; i++)
+  {
+    uint64_t *os = res2;
+    uint64_t u = load64_be(tmp + (bnLen - i - (uint32_t)1U) * (uint32_t)8U);
+    uint64_t x = u;
+    os[i] = x;
+  }
+  return res2;
+}
+
+/*
+Load a little-endian bignum from memory.
+
+  The argument b points to len bytes of valid memory.
+  The function returns a heap-allocated bignum of size sufficient to hold the
+   result of loading b, or NULL if either the allocation failed, or the amount of
+    required memory would exceed 4GB.
+
+  If the return value is non-null, clients must eventually call free(3) on it to
+  avoid memory leaks.
+*/
+uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
+{
+  if
+  (
+    len
+    == (uint32_t)0U
+    || !((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U <= (uint32_t)536870911U)
+  )
+  {
+    return NULL;
+  }
+  KRML_CHECK_SIZE(sizeof (uint64_t), (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U);
+  uint64_t
+  *res = KRML_HOST_CALLOC((len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U, sizeof (uint64_t));
+  if (res == NULL)
+  {
+    return res;
+  }
+  uint64_t *res1 = res;
+  uint64_t *res2 = res1;
+  uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  memcpy(tmp, b, len * sizeof (uint8_t));
+  for (uint32_t i = (uint32_t)0U; i < (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U; i++)
+  {
+    uint64_t *os = res2;
+    uint8_t *bj = tmp + i * (uint32_t)8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r1 = u;
+    uint64_t x = r1;
+    os[i] = x;
+  }
+  return res2;
+}
+
+/*
+Serialize a bignum into big-endian memory.
+
+  The argument b points to a 256-bit bignum.
+  The outparam res points to 32 bytes of valid memory.
+*/
+void Hacl_Bignum256_bn_to_bytes_be(uint64_t *b, uint8_t *res)
+{
+  uint32_t bnLen = ((uint32_t)32U - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  uint32_t numb = (uint32_t)8U;
+  for (uint32_t i = (uint32_t)0U; i < bnLen; i++)
+  {
+    store64_be(tmp + i * numb, b[bnLen - i - (uint32_t)1U]);
+  }
+  memcpy(res, tmp + tmpLen - (uint32_t)32U, (uint32_t)32U * sizeof (uint8_t));
+}
+
+/*
+Serialize a bignum into little-endian memory.
+
+  The argument b points to a 256-bit bignum.
+  The outparam res points to 32 bytes of valid memory.
+*/
+void Hacl_Bignum256_bn_to_bytes_le(uint64_t *b, uint8_t *res)
+{
+  uint32_t bnLen = ((uint32_t)32U - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+  uint32_t tmpLen = (uint32_t)8U * bnLen;
+  KRML_CHECK_SIZE(sizeof (uint8_t), tmpLen);
+  uint8_t *tmp = alloca(tmpLen * sizeof (uint8_t));
+  memset(tmp, 0U, tmpLen * sizeof (uint8_t));
+  for (uint32_t i = (uint32_t)0U; i < bnLen; i++)
+  {
+    store64_le(tmp + i * (uint32_t)8U, b[i]);
+  }
+  memcpy(res, tmp, (uint32_t)32U * sizeof (uint8_t));
+}
+
+
+/***************/
+/* Comparisons */
+/***************/
+
+
+/*
+Returns 2 ^ 64 - 1 if and only if the argument a is strictly less than the argument b,
+ otherwise returns 0.
+*/
+uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b)
+{
+  uint64_t acc = (uint64_t)0U;
+  for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++)
+  {
+    uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
+    uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
+    acc = (beq & acc) | (~beq & ((blt & (uint64_t)0xFFFFFFFFFFFFFFFFU) | (~blt & (uint64_t)0U)));
+  }
+  return acc;
+}
+
diff --git a/src/math/bigfix/Hacl_Bignum256.h b/src/math/bigfix/Hacl_Bignum256.h
new file mode 100644
index 000000000..db7c99b78
--- /dev/null
+++ b/src/math/bigfix/Hacl_Bignum256.h
@@ -0,0 +1,375 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_Bignum256_H
+#define __Hacl_Bignum256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include "kremlin/internal/target.h"
+
+#include "Hacl_Bignum.h"
+#include "Hacl_Bignum_Base.h"
+
+/*******************************************************************************
+
+A verified 256-bit bignum library.
+
+This is a 64-bit optimized version, where bignums are represented as an array
+of four unsigned 64-bit integers, i.e. uint64_t[4]. Furthermore, the
+limbs are stored in little-endian format, i.e. the least significant limb is at
+index 0. Each limb is stored in native format in memory. Example:
+
+  uint64_t sixteen[4] = { 0x10; 0x00; 0x00; 0x00 }
+
+We strongly encourage users to go through the conversion functions, e.g.
+bn_from_bytes_be, to i) not depend on internal representation choices and ii)
+have the ability to switch easily to a 32-bit optimized version in the future.
+
+*******************************************************************************/
+
+/************************/
+/* Arithmetic functions */
+/************************/
+
+
+/*
+Write `a + b mod 2^256` in `res`.
+
+  This functions returns the carry.
+
+  The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4]
+*/
+uint64_t Hacl_Bignum256_add(uint64_t *a, uint64_t *b, uint64_t *res);
+
+/*
+Write `a - b mod 2^256` in `res`.
+
+  This functions returns the carry.
+
+  The arguments a, b and res are meant to be 256-bit bignums, i.e. uint64_t[4]
+*/
+uint64_t Hacl_Bignum256_sub(uint64_t *a, uint64_t *b, uint64_t *res);
+
+/*
+Write `a * b` in `res`.
+
+  The arguments a and b are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8].
+*/
+void Hacl_Bignum256_mul(uint64_t *a, uint64_t *b, uint64_t *res);
+
+/*
+Write `a * a` in `res`.
+
+  The argument a is meant to be a 256-bit bignum, i.e. uint64_t[4].
+  The outparam res is meant to be a 512-bit bignum, i.e. uint64_t[8].
+*/
+void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res);
+
+/*
+Write `a mod n` in `res`.
+
+  The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8].
+  The argument n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • 1 < n
+   • n % 2 = 1 
+*/
+bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res);
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  The function is *NOT* constant-time on the argument b. See the
+  mod_exp_consttime_* functions for constant-time variants.
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • n % 2 = 1
+   • 1 < n
+   • b < pow2 bBits
+   • a < n 
+*/
+bool
+Hacl_Bignum256_mod_exp_vartime(
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  This function is constant-time over its argument b, at the cost of a slower
+  execution time than mod_exp_vartime.
+
+  The function returns false if any of the following preconditions are violated,
+  true otherwise.
+   • n % 2 = 1
+   • 1 < n
+   • b < pow2 bBits
+   • a < n 
+*/
+bool
+Hacl_Bignum256_mod_exp_consttime(
+  uint64_t *n,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+/*
+Write `a ^ (-1) mod n` in `res`.
+
+  The arguments a, n and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n is a prime
+
+  The function returns false if any of the following preconditions are violated, true otherwise.
+  • n % 2 = 1
+  • 1 < n
+  • 0 < a
+  • a < n 
+*/
+bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *res);
+
+typedef struct Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64_s
+{
+  uint32_t len;
+  uint64_t *n;
+  uint64_t mu;
+  uint64_t *r2;
+}
+Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64;
+
+
+/**********************************************/
+/* Arithmetic functions with precomputations. */
+/**********************************************/
+
+
+/*
+Heap-allocate and initialize a montgomery context.
+
+  The argument n is meant to be a 256-bit bignum, i.e. uint64_t[4].
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n % 2 = 1
+  • 1 < n
+
+  The caller will need to call Hacl_Bignum256_mont_ctx_free on the return value
+  to avoid memory leaks.
+*/
+Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *Hacl_Bignum256_mont_ctx_init(uint64_t *n);
+
+/*
+Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
+
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+*/
+void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k);
+
+/*
+Write `a mod n` in `res`.
+
+  The argument a is meant to be a 512-bit bignum, i.e. uint64_t[8].
+  The outparam res is meant to be a 256-bit bignum, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+*/
+void
+Hacl_Bignum256_mod_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint64_t *res
+);
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  The function is *NOT* constant-time on the argument b. See the
+  mod_exp_consttime_* functions for constant-time variants.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • b < pow2 bBits
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_exp_vartime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+/*
+Write `a ^ b mod n` in `res`.
+
+  The arguments a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  The argument b is a bignum of any size, and bBits is an upper bound on the
+  number of significant bits of b. A tighter bound results in faster execution
+  time. When in doubt, the number of bits for the bignum size is always a safe
+  default, e.g. if b is a 256-bit bignum, bBits should be 256.
+
+  This function is constant-time over its argument b, at the cost of a slower
+  execution time than mod_exp_vartime_*.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • b < pow2 bBits
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_exp_consttime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint32_t bBits,
+  uint64_t *b,
+  uint64_t *res
+);
+
+/*
+Write `a ^ (-1) mod n` in `res`.
+
+  The argument a and the outparam res are meant to be 256-bit bignums, i.e. uint64_t[4].
+  The argument k is a montgomery context obtained through Hacl_Bignum256_mont_ctx_init.
+
+  Before calling this function, the caller will need to ensure that the following
+  preconditions are observed.
+  • n is a prime
+  • 0 < a
+  • a < n 
+*/
+void
+Hacl_Bignum256_mod_inv_prime_vartime_precomp(
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k,
+  uint64_t *a,
+  uint64_t *res
+);
+
+
+/********************/
+/* Loads and stores */
+/********************/
+
+
+/*
+Load a bid-endian bignum from memory.
+
+  The argument b points to len bytes of valid memory.
+  The function returns a heap-allocated bignum of size sufficient to hold the
+   result of loading b, or NULL if either the allocation failed, or the amount of
+    required memory would exceed 4GB.
+
+  If the return value is non-null, clients must eventually call free(3) on it to
+  avoid memory leaks.
+*/
+uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b);
+
+/*
+Load a little-endian bignum from memory.
+
+  The argument b points to len bytes of valid memory.
+  The function returns a heap-allocated bignum of size sufficient to hold the
+   result of loading b, or NULL if either the allocation failed, or the amount of
+    required memory would exceed 4GB.
+
+  If the return value is non-null, clients must eventually call free(3) on it to
+  avoid memory leaks.
+*/
+uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b);
+
+/*
+Serialize a bignum into big-endian memory.
+
+  The argument b points to a 256-bit bignum.
+  The outparam res points to 32 bytes of valid memory.
+*/
+void Hacl_Bignum256_bn_to_bytes_be(uint64_t *b, uint8_t *res);
+
+/*
+Serialize a bignum into little-endian memory.
+
+  The argument b points to a 256-bit bignum.
+  The outparam res points to 32 bytes of valid memory.
+*/
+void Hacl_Bignum256_bn_to_bytes_le(uint64_t *b, uint8_t *res);
+
+
+/***************/
+/* Comparisons */
+/***************/
+
+
+/*
+Returns 2 ^ 64 - 1 if and only if the argument a is strictly less than the argument b,
+ otherwise returns 0.
+*/
+uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Bignum256_H_DEFINED
+#endif
diff --git a/src/math/bigfix/Hacl_Bignum_Base.h b/src/math/bigfix/Hacl_Bignum_Base.h
new file mode 100644
index 000000000..f22c22392
--- /dev/null
+++ b/src/math/bigfix/Hacl_Bignum_Base.h
@@ -0,0 +1,73 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_Bignum_Base_H
+#define __Hacl_Bignum_Base_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include "kremlin/internal/target.h"
+
+static inline uint64_t
+Hacl_Bignum_Base_mul_wide_add_u64(uint64_t a, uint64_t b, uint64_t c_in, uint64_t *out)
+{
+  FStar_UInt128_uint128
+  res = FStar_UInt128_add(FStar_UInt128_mul_wide(a, b), FStar_UInt128_uint64_to_uint128(c_in));
+  out[0U] = FStar_UInt128_uint128_to_uint64(res);
+  return FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U));
+}
+
+static inline uint32_t
+Hacl_Bignum_Base_mul_wide_add2_u32(uint32_t a, uint32_t b, uint32_t c_in, uint32_t *out)
+{
+  uint32_t out0 = out[0U];
+  uint64_t res = (uint64_t)a * (uint64_t)b + (uint64_t)c_in + (uint64_t)out0;
+  out[0U] = (uint32_t)res;
+  return (uint32_t)(res >> (uint32_t)32U);
+}
+
+static inline uint64_t
+Hacl_Bignum_Base_mul_wide_add2_u64(uint64_t a, uint64_t b, uint64_t c_in, uint64_t *out)
+{
+  uint64_t out0 = out[0U];
+  FStar_UInt128_uint128
+  res =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(a, b),
+        FStar_UInt128_uint64_to_uint128(c_in)),
+      FStar_UInt128_uint64_to_uint128(out0));
+  out[0U] = FStar_UInt128_uint128_to_uint64(res);
+  return FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U));
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Bignum_Base_H_DEFINED
+#endif
diff --git a/src/math/bigfix/Hacl_IntTypes_Intrinsics.h b/src/math/bigfix/Hacl_IntTypes_Intrinsics.h
new file mode 100644
index 000000000..5faf4eddf
--- /dev/null
+++ b/src/math/bigfix/Hacl_IntTypes_Intrinsics.h
@@ -0,0 +1,88 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_IntTypes_Intrinsics_H
+#define __Hacl_IntTypes_Intrinsics_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include "kremlin/internal/target.h"
+
+static inline uint32_t
+Hacl_IntTypes_Intrinsics_add_carry_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r)
+{
+  uint32_t res = x + cin + y;
+  uint32_t
+  c = (~FStar_UInt32_gte_mask(res, x) | (FStar_UInt32_eq_mask(res, x) & cin)) & (uint32_t)1U;
+  r[0U] = res;
+  return c;
+}
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_add_carry_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+  uint64_t res = x + cin + y;
+  uint64_t
+  c = (~FStar_UInt64_gte_mask(res, x) | (FStar_UInt64_eq_mask(res, x) & cin)) & (uint64_t)1U;
+  r[0U] = res;
+  return c;
+}
+
+static inline uint32_t
+Hacl_IntTypes_Intrinsics_sub_borrow_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r)
+{
+  uint32_t res = x - y - cin;
+  uint32_t
+  c =
+    ((FStar_UInt32_gte_mask(res, x) & ~FStar_UInt32_eq_mask(res, x))
+    | (FStar_UInt32_eq_mask(res, x) & cin))
+    & (uint32_t)1U;
+  r[0U] = res;
+  return c;
+}
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_sub_borrow_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+  uint64_t res = x - y - cin;
+  uint64_t
+  c =
+    ((FStar_UInt64_gte_mask(res, x) & ~FStar_UInt64_eq_mask(res, x))
+    | (FStar_UInt64_eq_mask(res, x) & cin))
+    & (uint64_t)1U;
+  r[0U] = res;
+  return c;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_IntTypes_Intrinsics_H_DEFINED
+#endif
diff --git a/src/math/bigfix/kremlib/FStar_UInt128.h b/src/math/bigfix/kremlib/FStar_UInt128.h
new file mode 100644
index 000000000..9b5ece517
--- /dev/null
+++ b/src/math/bigfix/kremlib/FStar_UInt128.h
@@ -0,0 +1,79 @@
+/*
+  Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+  Licensed under the Apache 2.0 License.
+*/
+
+
+#ifndef __FStar_UInt128_H
+#define __FStar_UInt128_H
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+
+
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+
+#define __FStar_UInt128_H_DEFINED
+#endif
diff --git a/src/math/bigfix/kremlib/FStar_UInt128_Verified.h b/src/math/bigfix/kremlib/FStar_UInt128_Verified.h
new file mode 100644
index 000000000..45e3c1117
--- /dev/null
+++ b/src/math/bigfix/kremlib/FStar_UInt128_Verified.h
@@ -0,0 +1,347 @@
+/*
+  Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+  Licensed under the Apache 2.0 License.
+*/
+
+
+#ifndef __FStar_UInt128_Verified_H
+#define __FStar_UInt128_Verified_H
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+
+#include "FStar_UInt_8_16_32_64.h"
+
+static inline uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+  return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U;
+}
+
+static inline uint64_t FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+  return FStar_UInt128_constant_time_carry(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return FStar_UInt128_sub_mod_impl(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low & b.low;
+  lit.high = a.high & b.high;
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low ^ b.low;
+  lit.high = a.high ^ b.high;
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low | b.low;
+  lit.high = a.high | b.high;
+  return lit;
+}
+
+static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = ~a.low;
+  lit.high = ~a.high;
+  return lit;
+}
+
+static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+static inline uint64_t FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s == (uint32_t)0U)
+  {
+    return a;
+  }
+  else
+  {
+    FStar_UInt128_uint128 lit;
+    lit.low = a.low << s;
+    lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);
+    return lit;
+  }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = (uint64_t)0U;
+  lit.high = a.low << (s - FStar_UInt128_u32_64);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s < FStar_UInt128_u32_64)
+  {
+    return FStar_UInt128_shift_left_small(a, s);
+  }
+  else
+  {
+    return FStar_UInt128_shift_left_large(a, s);
+  }
+}
+
+static inline uint64_t FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s == (uint32_t)0U)
+  {
+    return a;
+  }
+  else
+  {
+    FStar_UInt128_uint128 lit;
+    lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);
+    lit.high = a.high >> s;
+    return lit;
+  }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a.high >> (s - FStar_UInt128_u32_64);
+  lit.high = (uint64_t)0U;
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s < FStar_UInt128_u32_64)
+  {
+    return FStar_UInt128_shift_right_small(a, s);
+  }
+  else
+  {
+    return FStar_UInt128_shift_right_large(a, s);
+  }
+}
+
+static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.low == b.low && a.high == b.high;
+}
+
+static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high > b.high || (a.high == b.high && a.low > b.low);
+}
+
+static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high < b.high || (a.high == b.high && a.low < b.low);
+}
+
+static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high > b.high || (a.high == b.high && a.low >= b.low);
+}
+
+static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high < b.high || (a.high == b.high && a.low <= b.low);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+  lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+  return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low =
+    (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high))
+    | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+  lit.high =
+    (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high))
+    | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+  return lit;
+}
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low = a;
+  lit.high = (uint64_t)0U;
+  return lit;
+}
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+  return a.low;
+}
+
+static inline uint64_t FStar_UInt128_u64_mod_32(uint64_t a)
+{
+  return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+static inline uint64_t FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low =
+    FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32)
+      * (uint64_t)y
+      + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32),
+      FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));
+  lit.high =
+    ((x >> FStar_UInt128_u32_32)
+    * (uint64_t)y
+    + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32))
+    >> FStar_UInt128_u32_32;
+  return lit;
+}
+
+static inline uint64_t FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+  FStar_UInt128_uint128 lit;
+  lit.low =
+    FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x)
+      * (y >> FStar_UInt128_u32_32)
+      +
+        FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32)
+          * FStar_UInt128_u64_mod_32(y)
+          + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)),
+      FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)));
+  lit.high =
+    (x >> FStar_UInt128_u32_32)
+    * (y >> FStar_UInt128_u32_32)
+    +
+      (((x >> FStar_UInt128_u32_32)
+      * FStar_UInt128_u64_mod_32(y)
+      + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))
+      >> FStar_UInt128_u32_32)
+    +
+      ((FStar_UInt128_u64_mod_32(x)
+      * (y >> FStar_UInt128_u32_32)
+      +
+        FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32)
+          * FStar_UInt128_u64_mod_32(y)
+          + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)))
+      >> FStar_UInt128_u32_32);
+  return lit;
+}
+
+
+#define __FStar_UInt128_Verified_H_DEFINED
+#endif
diff --git a/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h b/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h
new file mode 100644
index 000000000..a7d3cbae7
--- /dev/null
+++ b/src/math/bigfix/kremlib/FStar_UInt_8_16_32_64.h
@@ -0,0 +1,104 @@
+/*
+  Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+  Licensed under the Apache 2.0 License.
+*/
+
+
+#ifndef __FStar_UInt_8_16_32_64_H
+#define __FStar_UInt_8_16_32_64_H
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a ^ b;
+  uint64_t minus_x = ~x + (uint64_t)1U;
+  uint64_t x_or_minus_x = x | minus_x;
+  uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+  return xnx - (uint64_t)1U;
+}
+
+static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a;
+  uint64_t y = b;
+  uint64_t x_xor_y = x ^ y;
+  uint64_t x_sub_y = x - y;
+  uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint64_t q = x_xor_y | x_sub_y_xor_y;
+  uint64_t x_xor_q = x ^ q;
+  uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+  return x_xor_q_ - (uint64_t)1U;
+}
+
+static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a ^ b;
+  uint32_t minus_x = ~x + (uint32_t)1U;
+  uint32_t x_or_minus_x = x | minus_x;
+  uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
+  return xnx - (uint32_t)1U;
+}
+
+static inline uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a;
+  uint32_t y = b;
+  uint32_t x_xor_y = x ^ y;
+  uint32_t x_sub_y = x - y;
+  uint32_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint32_t q = x_xor_y | x_sub_y_xor_y;
+  uint32_t x_xor_q = x ^ q;
+  uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
+  return x_xor_q_ - (uint32_t)1U;
+}
+
+static inline uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a ^ b;
+  uint16_t minus_x = ~x + (uint16_t)1U;
+  uint16_t x_or_minus_x = x | minus_x;
+  uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
+  return xnx - (uint16_t)1U;
+}
+
+static inline uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a;
+  uint16_t y = b;
+  uint16_t x_xor_y = x ^ y;
+  uint16_t x_sub_y = x - y;
+  uint16_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint16_t q = x_xor_y | x_sub_y_xor_y;
+  uint16_t x_xor_q = x ^ q;
+  uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
+  return x_xor_q_ - (uint16_t)1U;
+}
+
+static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a ^ b;
+  uint8_t minus_x = ~x + (uint8_t)1U;
+  uint8_t x_or_minus_x = x | minus_x;
+  uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
+  return xnx - (uint8_t)1U;
+}
+
+static inline uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a;
+  uint8_t y = b;
+  uint8_t x_xor_y = x ^ y;
+  uint8_t x_sub_y = x - y;
+  uint8_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint8_t q = x_xor_y | x_sub_y_xor_y;
+  uint8_t x_xor_q = x ^ q;
+  uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
+  return x_xor_q_ - (uint8_t)1U;
+}
+
+#define __FStar_UInt_8_16_32_64_H_DEFINED
+#endif
diff --git a/src/math/bigfix/kremlib/LowStar_Endianness.h b/src/math/bigfix/kremlib/LowStar_Endianness.h
new file mode 100644
index 000000000..5cd3be350
--- /dev/null
+++ b/src/math/bigfix/kremlib/LowStar_Endianness.h
@@ -0,0 +1,28 @@
+/*
+  Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+  Licensed under the Apache 2.0 License.
+*/
+
+
+#ifndef __LowStar_Endianness_H
+#define __LowStar_Endianness_H
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+
+#include "FStar_UInt128.h"
+
+static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_le(uint8_t *x0);
+
+static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_be(uint8_t *x0);
+
+
+#define __LowStar_Endianness_H_DEFINED
+#endif
diff --git a/src/math/bigfix/kremlib/fstar_uint128_gcc64.h b/src/math/bigfix/kremlib/fstar_uint128_gcc64.h
new file mode 100644
index 000000000..aae6a7dc9
--- /dev/null
+++ b/src/math/bigfix/kremlib/fstar_uint128_gcc64.h
@@ -0,0 +1,165 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/******************************************************************************/
+/* Machine integers (128-bit arithmetic)                                      */
+/******************************************************************************/
+
+/* This header contains two things.
+ *
+ * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and
+ * Clang, i.e. all the operations from FStar.UInt128.
+ *
+ * Second, 128-bit operations from C.Endianness (or LowStar.Endianness),
+ * suitable for any compiler and platform (via a series of ifdefs). This second
+ * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to
+ * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code.
+ * */
+
+/* This file is used for both the minimal and generic kremlib distributions. As
+ * such, it assumes that the machine integers have been bundled the exact same
+ * way in both cases. */
+
+#ifndef FSTAR_UINT128_GCC64
+#define FSTAR_UINT128_GCC64
+
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+#include "LowStar_Endianness.h"
+
+/* GCC + using native unsigned __int128 support */
+
+inline static uint128_t load128_le(uint8_t *b) {
+  uint128_t l = (uint128_t)load64_le(b);
+  uint128_t h = (uint128_t)load64_le(b + 8);
+  return (h << 64 | l);
+}
+
+inline static void store128_le(uint8_t *b, uint128_t n) {
+  store64_le(b, (uint64_t)n);
+  store64_le(b + 8, (uint64_t)(n >> 64));
+}
+
+inline static uint128_t load128_be(uint8_t *b) {
+  uint128_t h = (uint128_t)load64_be(b);
+  uint128_t l = (uint128_t)load64_be(b + 8);
+  return (h << 64 | l);
+}
+
+inline static void store128_be(uint8_t *b, uint128_t n) {
+  store64_be(b, (uint64_t)(n >> 64));
+  store64_be(b + 8, (uint64_t)n);
+}
+
+inline static uint128_t FStar_UInt128_add(uint128_t x, uint128_t y) {
+  return x + y;
+}
+
+inline static uint128_t FStar_UInt128_mul(uint128_t x, uint128_t y) {
+  return x * y;
+}
+
+inline static uint128_t FStar_UInt128_add_mod(uint128_t x, uint128_t y) {
+  return x + y;
+}
+
+inline static uint128_t FStar_UInt128_sub(uint128_t x, uint128_t y) {
+  return x - y;
+}
+
+inline static uint128_t FStar_UInt128_sub_mod(uint128_t x, uint128_t y) {
+  return x - y;
+}
+
+inline static uint128_t FStar_UInt128_logand(uint128_t x, uint128_t y) {
+  return x & y;
+}
+
+inline static uint128_t FStar_UInt128_logor(uint128_t x, uint128_t y) {
+  return x | y;
+}
+
+inline static uint128_t FStar_UInt128_logxor(uint128_t x, uint128_t y) {
+  return x ^ y;
+}
+
+inline static uint128_t FStar_UInt128_lognot(uint128_t x) {
+  return ~x;
+}
+
+inline static uint128_t FStar_UInt128_shift_left(uint128_t x, uint32_t y) {
+  return x << y;
+}
+
+inline static uint128_t FStar_UInt128_shift_right(uint128_t x, uint32_t y) {
+  return x >> y;
+}
+
+inline static uint128_t FStar_UInt128_uint64_to_uint128(uint64_t x) {
+  return (uint128_t)x;
+}
+
+inline static uint64_t FStar_UInt128_uint128_to_uint64(uint128_t x) {
+  return (uint64_t)x;
+}
+
+inline static uint128_t FStar_UInt128_mul_wide(uint64_t x, uint64_t y) {
+  return ((uint128_t) x) * y;
+}
+
+inline static uint128_t FStar_UInt128_eq_mask(uint128_t x, uint128_t y) {
+  uint64_t mask =
+      FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) &
+      FStar_UInt64_eq_mask(x, y);
+  return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint128_t FStar_UInt128_gte_mask(uint128_t x, uint128_t y) {
+  uint64_t mask =
+      (FStar_UInt64_gte_mask(x >> 64, y >> 64) &
+       ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) |
+      (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y));
+  return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint64_t FStar_UInt128___proj__Mkuint128__item__low(uint128_t x) {
+  return (uint64_t) x;
+}
+
+inline static uint64_t FStar_UInt128___proj__Mkuint128__item__high(uint128_t x) {
+  return (uint64_t) (x >> 64);
+}
+
+inline static uint128_t FStar_UInt128_add_underspec(uint128_t x, uint128_t y) {
+  return x + y;
+}
+
+inline static uint128_t FStar_UInt128_sub_underspec(uint128_t x, uint128_t y) {
+  return x - y;
+}
+
+inline static bool FStar_UInt128_eq(uint128_t x, uint128_t y) {
+  return x == y;
+}
+
+inline static bool FStar_UInt128_gt(uint128_t x, uint128_t y) {
+  return x > y;
+}
+
+inline static bool FStar_UInt128_lt(uint128_t x, uint128_t y) {
+  return x < y;
+}
+
+inline static bool FStar_UInt128_gte(uint128_t x, uint128_t y) {
+  return x >= y;
+}
+
+inline static bool FStar_UInt128_lte(uint128_t x, uint128_t y) {
+  return x <= y;
+}
+
+inline static uint128_t FStar_UInt128_mul32(uint64_t x, uint32_t y) {
+  return (uint128_t) x * (uint128_t) y;
+}
+
+#endif
diff --git a/src/math/bigfix/kremlib/fstar_uint128_msvc.h b/src/math/bigfix/kremlib/fstar_uint128_msvc.h
new file mode 100644
index 000000000..c2a28abc6
--- /dev/null
+++ b/src/math/bigfix/kremlib/fstar_uint128_msvc.h
@@ -0,0 +1,510 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * then hand-edited to use MSVC intrinsics KreMLin invocation:
+ * C:\users\barrybo\mitls2c\kremlin\_build\src\Kremlin.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "kremlib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims
+ * F* version: 15104ff8
+ * KreMLin version: 318b7fa8
+ */
+
+#ifndef FSTAR_UINT128_MSVC
+#define FSTAR_UINT128_MSVC
+
+#include "kremlin/internal/types.h"
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+
+#ifndef _MSC_VER
+#  error This file only works with the MSVC compiler
+#endif
+
+/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now
+ * done properly and only include this file when we know for sure we are on
+ * 64-bit MSVC. */
+
+#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128)
+#define HAS_OPTIMIZED 1
+#else
+#define HAS_OPTIMIZED 0
+#endif
+
+// Define .low and .high in terms of the __m128i fields, to reduce
+// the amount of churn in this file.
+#if HAS_OPTIMIZED
+#include <intrin.h>
+#include <immintrin.h>
+#define low m128i_u64[0]
+#define high m128i_u64[1]
+#endif
+
+inline static FStar_UInt128_uint128 load128_le(uint8_t *b) {
+#if HAS_OPTIMIZED
+  return _mm_loadu_si128((__m128i *)b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = load64_le(b);
+  lit.high = load64_le(b + 8);
+  return lit;
+#endif
+}
+
+inline static void store128_le(uint8_t *b, FStar_UInt128_uint128 n) {
+  store64_le(b, n.low);
+  store64_le(b + 8, n.high);
+}
+
+inline static FStar_UInt128_uint128 load128_be(uint8_t *b) {
+  uint64_t l = load64_be(b + 8);
+  uint64_t h = load64_be(b);
+#if HAS_OPTIMIZED
+  return _mm_set_epi64x(h, l);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = l;
+  lit.high = h;
+  return lit;
+#endif
+}
+
+inline static void store128_be(uint8_t *b, uint128_t n) {
+  store64_be(b, n.high);
+  store64_be(b + 8, n.low);
+}
+
+inline static uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) {
+  return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U;
+}
+
+inline static uint64_t FStar_UInt128_carry(uint64_t a, uint64_t b) {
+  return FStar_UInt128_constant_time_carry(a, b);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  uint64_t l, h;
+
+  unsigned char carry =
+      _addcarry_u64(0, a.low, b.low, &l);   // low/CF = a.low+b.low+0
+  _addcarry_u64(carry, a.high, b.high, &h); // high   = a.high+b.high+CF
+  return _mm_set_epi64x(h, l);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return FStar_UInt128_add(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low;
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return FStar_UInt128_add(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low + b.low;
+  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  uint64_t l, h;
+
+  unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l);
+  _subborrow_u64(borrow, a.high, b.high, &h);
+  return _mm_set_epi64x(h, l);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return FStar_UInt128_sub(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low - b.low;
+  lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+  return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return FStar_UInt128_sub(a, b);
+#else
+  return FStar_UInt128_sub_mod_impl(a, b);
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return _mm_and_si128(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low & b.low;
+  lit.high = a.high & b.high;
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return _mm_xor_si128(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low ^ b.low;
+  lit.high = a.high ^ b.high;
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  return _mm_or_si128(a, b);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a.low | b.low;
+  lit.high = a.high | b.high;
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a) {
+#if HAS_OPTIMIZED
+  return _mm_andnot_si128(a, a);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = ~a.low;
+  lit.high = ~a.high;
+  return lit;
+#endif
+}
+
+static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) {
+    return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) {
+  return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) {
+  if (s == (uint32_t)0U)
+    return a;
+  else {
+    FStar_UInt128_uint128 lit;
+    lit.low = a.low << s;
+    lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);
+    return lit;
+  }
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) {
+  FStar_UInt128_uint128 lit;
+  lit.low = (uint64_t)0U;
+  lit.high = a.low << (s - FStar_UInt128_u32_64);
+  return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) {
+#if HAS_OPTIMIZED
+  if (s == 0) {
+    return a;
+  } else if (s < FStar_UInt128_u32_64) {
+    uint64_t l = a.low << s;
+    uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s);
+    return _mm_set_epi64x(h, l);
+  } else {
+    return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0);
+  }
+#else
+  if (s < FStar_UInt128_u32_64)
+    return FStar_UInt128_shift_left_small(a, s);
+  else
+    return FStar_UInt128_shift_left_large(a, s);
+#endif
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) {
+    return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) {
+  return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) {
+  if (s == (uint32_t)0U)
+    return a;
+  else {
+    FStar_UInt128_uint128 lit;
+    lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);
+    lit.high = a.high >> s;
+    return lit;
+  }
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) {
+  FStar_UInt128_uint128 lit;
+  lit.low = a.high >> (s - FStar_UInt128_u32_64);
+  lit.high = (uint64_t)0U;
+  return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) {
+#if HAS_OPTIMIZED
+  if (s == 0) {
+    return a;
+  } else if (s < FStar_UInt128_u32_64) {
+    uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s);
+    uint64_t h = a.high >> s;
+    return _mm_set_epi64x(h, l);
+  } else {
+    return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64));
+  }
+#else
+  if (s < FStar_UInt128_u32_64)
+    return FStar_UInt128_shift_right_small(a, s);
+  else
+    return FStar_UInt128_shift_right_large(a, s);
+#endif
+}
+
+inline static bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  return a.low == b.low && a.high == b.high;
+}
+
+inline static bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  return a.high > b.high || a.high == b.high && a.low > b.low;
+}
+
+inline static bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  return a.high < b.high || a.high == b.high && a.low < b.low;
+}
+
+inline static bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  return a.high > b.high || a.high == b.high && a.low >= b.low;
+}
+
+inline static bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+  return a.high < b.high || a.high == b.high && a.low <= b.low;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED
+  // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff
+  __m128i r32 = _mm_cmpeq_epi32(a, b);
+  // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half)
+  __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1));
+  // Bitwise and to compute (3&2),(2&3),(1&0),(0&1)
+  __m128i ret64 = _mm_and_si128(r32, s32);
+  // Swap the two 64-bit values to form s64
+  __m128i s64 =
+      _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2
+  // And them together
+  return _mm_and_si128(ret64, s64);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+  lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) {
+#if HAS_OPTIMIZED && 0
+  // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each
+  // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each
+  // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3)
+  // then splat slot 0 to 3,2,1,0
+  __m128i gt = _mm_cmpgt_epi32(a, b);
+  __m128i eq = _mm_cmpeq_epi32(a, b);
+  __m128i ge = _mm_or_si128(gt, eq);
+  __m128i ge0 = ge;
+  __m128i eq0 = eq;
+  __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1
+  __m128i t1 = _mm_and_si128(eq0, ge1);
+  __m128i ret = _mm_or_si128(ge, t1);  // ge0 | (eq0 & ge1) is now in 0
+  __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1
+  __m128i ge2 =
+      _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2
+  __m128i t2 =
+      _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2)
+  ret = _mm_or_si128(ret, t2);
+  __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3
+  __m128i ge3 =
+      _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3
+  __m128i t3 = _mm_and_si128(
+      eq0, _mm_and_si128(
+               eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3)
+  ret = _mm_or_si128(ret, t3);
+  return _mm_shuffle_epi32(
+      ret,
+      _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0.  Shuffle into all dwords.
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = FStar_UInt64_gte_mask(a.high, b.high) &
+                 ~FStar_UInt64_eq_mask(a.high, b.high) |
+             FStar_UInt64_eq_mask(a.high, b.high) &
+                 FStar_UInt64_gte_mask(a.low, b.low);
+  lit.high = FStar_UInt64_gte_mask(a.high, b.high) &
+                  ~FStar_UInt64_eq_mask(a.high, b.high) |
+              FStar_UInt64_eq_mask(a.high, b.high) &
+                  FStar_UInt64_gte_mask(a.low, b.low);
+  return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a) {
+#if HAS_OPTIMIZED
+  return _mm_set_epi64x(0, a);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = a;
+  lit.high = (uint64_t)0U;
+  return lit;
+#endif
+}
+
+inline static uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) {
+  return a.low;
+}
+
+inline static uint64_t FStar_UInt128_u64_mod_32(uint64_t a) {
+  return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+inline static uint64_t FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) {
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) {
+#if HAS_OPTIMIZED
+  uint64_t l, h;
+  l = _umul128(x, (uint64_t)y, &h);
+  return _mm_set_epi64x(h, l);
+#else
+  FStar_UInt128_uint128 lit;
+  lit.low = FStar_UInt128_u32_combine(
+          (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+              (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+               FStar_UInt128_u32_32),
+          FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));
+  lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+                  (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+                   FStar_UInt128_u32_32) >>
+              FStar_UInt128_u32_32;
+  return lit;
+#endif
+}
+
+/* Note: static headers bring scope collision issues when they define types!
+ * Because now client (kremlin-generated) code will include this header and
+ * there might be type collisions if the client code uses quadruples of uint64s.
+ * So, we cannot use the kremlin-generated name. */
+typedef struct K_quad_s {
+  uint64_t fst;
+  uint64_t snd;
+  uint64_t thd;
+  uint64_t f3;
+} K_quad;
+
+inline static K_quad
+FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) {
+  K_quad tmp;
+  tmp.fst = FStar_UInt128_u64_mod_32(x);
+  tmp.snd = FStar_UInt128_u64_mod_32(
+          FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y));
+  tmp.thd = x >> FStar_UInt128_u32_32;
+  tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) +
+            (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >>
+             FStar_UInt128_u32_32);
+  return tmp;
+}
+
+static uint64_t FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) {
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) {
+  K_quad scrut =
+      FStar_UInt128_mul_wide_impl_t_(x, y);
+  uint64_t u1 = scrut.fst;
+  uint64_t w3 = scrut.snd;
+  uint64_t x_ = scrut.thd;
+  uint64_t t_ = scrut.f3;
+  FStar_UInt128_uint128 lit;
+  lit.low = FStar_UInt128_u32_combine_(
+          u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3);
+  lit.high =
+          x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +
+      (u1 * (y >> FStar_UInt128_u32_32) + (FStar_UInt128_u64_mod_32(t_) >>
+                                           FStar_UInt128_u32_32));
+  return lit;
+}
+
+inline static
+FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y) {
+#if HAS_OPTIMIZED
+  uint64_t l, h;
+  l = _umul128(x, y, &h);
+  return _mm_set_epi64x(h, l);
+#else
+  return FStar_UInt128_mul_wide_impl(x, y);
+#endif
+}
+
+#undef low
+#undef high
+
+#endif
diff --git a/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h b/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h
new file mode 100644
index 000000000..e2b6d6285
--- /dev/null
+++ b/src/math/bigfix/kremlib/fstar_uint128_struct_endianness.h
@@ -0,0 +1,68 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H
+#define FSTAR_UINT128_STRUCT_ENDIANNESS_H
+
+/* Hand-written implementation of endianness-related uint128 functions
+ * for the extracted uint128 implementation */
+
+/* Access 64-bit fields within the int128. */
+#define HIGH64_OF(x) ((x)->high)
+#define LOW64_OF(x)  ((x)->low)
+
+/* A series of definitions written using pointers. */
+
+inline static void load128_le_(uint8_t *b, uint128_t *r) {
+  LOW64_OF(r) = load64_le(b);
+  HIGH64_OF(r) = load64_le(b + 8);
+}
+
+inline static void store128_le_(uint8_t *b, uint128_t *n) {
+  store64_le(b, LOW64_OF(n));
+  store64_le(b + 8, HIGH64_OF(n));
+}
+
+inline static void load128_be_(uint8_t *b, uint128_t *r) {
+  HIGH64_OF(r) = load64_be(b);
+  LOW64_OF(r) = load64_be(b + 8);
+}
+
+inline static void store128_be_(uint8_t *b, uint128_t *n) {
+  store64_be(b, HIGH64_OF(n));
+  store64_be(b + 8, LOW64_OF(n));
+}
+
+#ifndef KRML_NOSTRUCT_PASSING
+
+inline static uint128_t load128_le(uint8_t *b) {
+  uint128_t r;
+  load128_le_(b, &r);
+  return r;
+}
+
+inline static void store128_le(uint8_t *b, uint128_t n) {
+  store128_le_(b, &n);
+}
+
+inline static uint128_t load128_be(uint8_t *b) {
+  uint128_t r;
+  load128_be_(b, &r);
+  return r;
+}
+
+inline static void store128_be(uint8_t *b, uint128_t n) {
+  store128_be_(b, &n);
+}
+
+#else /* !defined(KRML_STRUCT_PASSING) */
+
+#  define print128 print128_
+#  define load128_le load128_le_
+#  define store128_le store128_le_
+#  define load128_be load128_be_
+#  define store128_be store128_be_
+
+#endif /* KRML_STRUCT_PASSING */
+
+#endif
diff --git a/src/math/bigfix/kremlin/internal/target.h b/src/math/bigfix/kremlin/internal/target.h
new file mode 100644
index 000000000..b25254a23
--- /dev/null
+++ b/src/math/bigfix/kremlin/internal/target.h
@@ -0,0 +1,60 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_TARGET_H
+#define __KREMLIN_TARGET_H
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+
+/******************************************************************************/
+/* Macros that KreMLin will generate.                                         */
+/******************************************************************************/
+
+/* For "bare" targets that do not have a C stdlib, the user might want to use
+ * [-add-early-include '"mydefinitions.h"'] and override these. */
+#ifndef KRML_HOST_PRINTF
+#  define KRML_HOST_PRINTF printf
+#endif
+
+#ifndef KRML_HOST_EXIT
+#  define KRML_HOST_EXIT exit
+#endif
+
+#ifndef KRML_HOST_MALLOC
+#  define KRML_HOST_MALLOC malloc
+#endif
+
+#ifndef KRML_HOST_CALLOC
+#  define KRML_HOST_CALLOC calloc
+#endif
+
+#ifndef KRML_HOST_FREE
+#  define KRML_HOST_FREE free
+#endif
+
+/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of
+ * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate).
+ */
+
+#ifdef __GNUC__
+#  define _KRML_CHECK_SIZE_PRAGMA                                              \
+    _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+#else
+#  define _KRML_CHECK_SIZE_PRAGMA
+#endif
+
+#define KRML_CHECK_SIZE(size_elt, sz)                                          \
+  do {                                                                         \
+    _KRML_CHECK_SIZE_PRAGMA                                                    \
+    if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) {                  \
+      KRML_HOST_PRINTF(                                                        \
+          "Maximum allocatable size exceeded, aborting before overflow at "    \
+          "%s:%d\n",                                                           \
+          __FILE__, __LINE__);                                                 \
+      KRML_HOST_EXIT(253);                                                     \
+    }                                                                          \
+  } while (0)
+
+#endif
diff --git a/src/math/bigfix/kremlin/internal/types.h b/src/math/bigfix/kremlin/internal/types.h
new file mode 100644
index 000000000..885d956c4
--- /dev/null
+++ b/src/math/bigfix/kremlin/internal/types.h
@@ -0,0 +1,70 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_TYPES_H
+#define KRML_TYPES_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* This file picks a suitable uint128 implementation depending on whether the
+ * target compiler supports it, and or whether KRML_VERIFIED_UINT128 is defined. */
+
+#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__))
+#define IS_MSVC64 1
+#endif
+
+/* This code makes a number of assumptions and should be refined. In particular,
+ * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would
+ * be easier to just test for defined(__SIZEOF_INT128__) only? */
+#if (defined(__x86_64__) || \
+    defined(__x86_64) || \
+    defined(__aarch64__) || \
+    (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \
+    defined(__s390x__) || \
+    (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \
+    (defined(__mips__) && defined(__LP64__)) || \
+    (defined(__riscv) && __riscv_xlen == 64) || \
+    defined(__SIZEOF_INT128__))
+#define HAS_INT128 1
+#endif
+
+/* The uint128 type is a special case since we offer several implementations of
+ * it, depending on the compiler and whether the user wants the verified
+ * implementation or not. */
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#  include <emmintrin.h>
+typedef __m128i FStar_UInt128_uint128;
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+typedef unsigned __int128 FStar_UInt128_uint128;
+#else
+typedef struct FStar_UInt128_uint128_s {
+  uint64_t low;
+  uint64_t high;
+} FStar_UInt128_uint128;
+#endif
+
+/* The former is defined once, here (otherwise, conflicts for test-c89. The
+ * latter is for internal use. */
+typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t;
+
+#include "math/bigfix/kremlin/lowstar_endianness.h"
+
+#endif
+
+/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64,
+ * then don't bring the uint128 definitions into scope. */
+#ifndef __FStar_UInt_8_16_32_64_H
+
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#include "math/bigfix/kremlib/fstar_uint128_msvc.h"
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+#include "math/bigfix/kremlib/fstar_uint128_gcc64.h"
+#else
+#include "math/bigfix/kremlib/FStar_UInt128_Verified.h"
+#include "math/bigfix/kremlib/fstar_uint128_struct_endianness.h"
+#endif
+
+#endif
diff --git a/src/math/bigfix/kremlin/lowstar_endianness.h b/src/math/bigfix/kremlin/lowstar_endianness.h
new file mode 100644
index 000000000..3b120c7fb
--- /dev/null
+++ b/src/math/bigfix/kremlin/lowstar_endianness.h
@@ -0,0 +1,230 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __LOWSTAR_ENDIANNESS_H
+#define __LOWSTAR_ENDIANNESS_H
+
+#include <string.h>
+#include <inttypes.h>
+
+/******************************************************************************/
+/* Implementing C.fst (part 2: endian-ness macros)                            */
+/******************************************************************************/
+
+/* ... for Linux */
+#if defined(__linux__) || defined(__CYGWIN__) || defined (__USE_SYSTEM_ENDIAN_H__)
+#  include <endian.h>
+
+/* ... for OSX */
+#elif defined(__APPLE__)
+#  include <libkern/OSByteOrder.h>
+#  define htole64(x) OSSwapHostToLittleInt64(x)
+#  define le64toh(x) OSSwapLittleToHostInt64(x)
+#  define htobe64(x) OSSwapHostToBigInt64(x)
+#  define be64toh(x) OSSwapBigToHostInt64(x)
+
+#  define htole16(x) OSSwapHostToLittleInt16(x)
+#  define le16toh(x) OSSwapLittleToHostInt16(x)
+#  define htobe16(x) OSSwapHostToBigInt16(x)
+#  define be16toh(x) OSSwapBigToHostInt16(x)
+
+#  define htole32(x) OSSwapHostToLittleInt32(x)
+#  define le32toh(x) OSSwapLittleToHostInt32(x)
+#  define htobe32(x) OSSwapHostToBigInt32(x)
+#  define be32toh(x) OSSwapBigToHostInt32(x)
+
+/* ... for Solaris */
+#elif defined(__sun__)
+#  include <sys/byteorder.h>
+#  define htole64(x) LE_64(x)
+#  define le64toh(x) LE_64(x)
+#  define htobe64(x) BE_64(x)
+#  define be64toh(x) BE_64(x)
+
+#  define htole16(x) LE_16(x)
+#  define le16toh(x) LE_16(x)
+#  define htobe16(x) BE_16(x)
+#  define be16toh(x) BE_16(x)
+
+#  define htole32(x) LE_32(x)
+#  define le32toh(x) LE_32(x)
+#  define htobe32(x) BE_32(x)
+#  define be32toh(x) BE_32(x)
+
+/* ... for the BSDs */
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+#  include <sys/endian.h>
+#elif defined(__OpenBSD__)
+#  include <endian.h>
+
+/* ... for Windows (MSVC)... not targeting XBOX 360! */
+#elif defined(_MSC_VER)
+
+#  include <stdlib.h>
+#  define htobe16(x) _byteswap_ushort(x)
+#  define htole16(x) (x)
+#  define be16toh(x) _byteswap_ushort(x)
+#  define le16toh(x) (x)
+
+#  define htobe32(x) _byteswap_ulong(x)
+#  define htole32(x) (x)
+#  define be32toh(x) _byteswap_ulong(x)
+#  define le32toh(x) (x)
+
+#  define htobe64(x) _byteswap_uint64(x)
+#  define htole64(x) (x)
+#  define be64toh(x) _byteswap_uint64(x)
+#  define le64toh(x) (x)
+
+/* ... for Windows (GCC-like, e.g. mingw or clang) */
+#elif (defined(_WIN32) || defined(_WIN64)) &&                                  \
+    (defined(__GNUC__) || defined(__clang__))
+
+#  define htobe16(x) __builtin_bswap16(x)
+#  define htole16(x) (x)
+#  define be16toh(x) __builtin_bswap16(x)
+#  define le16toh(x) (x)
+
+#  define htobe32(x) __builtin_bswap32(x)
+#  define htole32(x) (x)
+#  define be32toh(x) __builtin_bswap32(x)
+#  define le32toh(x) (x)
+
+#  define htobe64(x) __builtin_bswap64(x)
+#  define htole64(x) (x)
+#  define be64toh(x) __builtin_bswap64(x)
+#  define le64toh(x) (x)
+
+/* ... generic big-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
+/* byte swapping code inspired by:
+ * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h
+ * */
+
+#  define htobe32(x) (x)
+#  define be32toh(x) (x)
+#  define htole32(x)                                                           \
+    (__extension__({                                                           \
+      uint32_t _temp = (x);                                                    \
+      ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) |             \
+          ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000);          \
+    }))
+#  define le32toh(x) (htole32((x)))
+
+#  define htobe64(x) (x)
+#  define be64toh(x) (x)
+#  define htole64(x)                                                           \
+    (__extension__({                                                           \
+      uint64_t __temp = (x);                                                   \
+      uint32_t __low = htobe32((uint32_t)__temp);                              \
+      uint32_t __high = htobe32((uint32_t)(__temp >> 32));                     \
+      (((uint64_t)__low) << 32) | __high;                                      \
+    }))
+#  define le64toh(x) (htole64((x)))
+
+/* ... generic little-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+#  define htole32(x) (x)
+#  define le32toh(x) (x)
+#  define htobe32(x)                                                           \
+    (__extension__({                                                           \
+      uint32_t _temp = (x);                                                    \
+      ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) |             \
+          ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000);          \
+    }))
+#  define be32toh(x) (htobe32((x)))
+
+#  define htole64(x) (x)
+#  define le64toh(x) (x)
+#  define htobe64(x)                                                           \
+    (__extension__({                                                           \
+      uint64_t __temp = (x);                                                   \
+      uint32_t __low = htobe32((uint32_t)__temp);                              \
+      uint32_t __high = htobe32((uint32_t)(__temp >> 32));                     \
+      (((uint64_t)__low) << 32) | __high;                                      \
+    }))
+#  define be64toh(x) (htobe64((x)))
+
+/* ... couldn't determine endian-ness of the target platform */
+#else
+#  error "Please define __BYTE_ORDER__!"
+
+#endif /* defined(__linux__) || ... */
+
+/* Loads and stores. These avoid undefined behavior due to unaligned memory
+ * accesses, via memcpy. */
+
+inline static uint16_t load16(uint8_t *b) {
+  uint16_t x;
+  memcpy(&x, b, 2);
+  return x;
+}
+
+inline static uint32_t load32(uint8_t *b) {
+  uint32_t x;
+  memcpy(&x, b, 4);
+  return x;
+}
+
+inline static uint64_t load64(uint8_t *b) {
+  uint64_t x;
+  memcpy(&x, b, 8);
+  return x;
+}
+
+inline static void store16(uint8_t *b, uint16_t i) {
+  memcpy(b, &i, 2);
+}
+
+inline static void store32(uint8_t *b, uint32_t i) {
+  memcpy(b, &i, 4);
+}
+
+inline static void store64(uint8_t *b, uint64_t i) {
+  memcpy(b, &i, 8);
+}
+
+/* Legacy accessors so that this header can serve as an implementation of
+ * C.Endianness */
+#define load16_le(b) (le16toh(load16(b)))
+#define store16_le(b, i) (store16(b, htole16(i)))
+#define load16_be(b) (be16toh(load16(b)))
+#define store16_be(b, i) (store16(b, htobe16(i)))
+
+#define load32_le(b) (le32toh(load32(b)))
+#define store32_le(b, i) (store32(b, htole32(i)))
+#define load32_be(b) (be32toh(load32(b)))
+#define store32_be(b, i) (store32(b, htobe32(i)))
+
+#define load64_le(b) (le64toh(load64(b)))
+#define store64_le(b, i) (store64(b, htole64(i)))
+#define load64_be(b) (be64toh(load64(b)))
+#define store64_be(b, i) (store64(b, htobe64(i)))
+
+/* Co-existence of LowStar.Endianness and FStar.Endianness generates name
+ * conflicts, because of course both insist on having no prefixes. Until a
+ * prefix is added, or until we truly retire FStar.Endianness, solve this issue
+ * in an elegant way. */
+#define load16_le0 load16_le
+#define store16_le0 store16_le
+#define load16_be0 load16_be
+#define store16_be0 store16_be
+
+#define load32_le0 load32_le
+#define store32_le0 store32_le
+#define load32_be0 load32_be
+#define store32_be0 store32_be
+
+#define load64_le0 load64_le
+#define store64_le0 store64_le
+#define load64_be0 load64_be
+#define store64_be0 store64_be
+
+#define load128_le0 load128_le
+#define store128_le0 store128_le
+#define load128_be0 load128_be
+#define store128_be0 store128_be
+
+#endif
diff --git a/src/math/bigfix/lib_intrinsics.h b/src/math/bigfix/lib_intrinsics.h
new file mode 100644
index 000000000..cf269bb89
--- /dev/null
+++ b/src/math/bigfix/lib_intrinsics.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <sys/types.h>
+
+#if __has_include("config.h")
+#include "config.h"
+#endif
+
+#if defined(COMPILE_INTRINSICS)
+#if defined(_MSC_VER)
+#include <immintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+
+#if !defined(COMPILE_INTRINSICS)
+
+#include "Hacl_IntTypes_Intrinsics.h"
+
+#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \
+  (Hacl_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \
+  (Hacl_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \
+  (Hacl_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+  (Hacl_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4))
+
+#else
+
+#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \
+  (_addcarry_u32(x1, x2, x3, (unsigned int *) x4))
+
+#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \
+  (_addcarry_u64(x1, x2, x3, (long long unsigned int *) x4))
+
+
+/*
+   GCC versions prior to 7.2 pass arguments to _subborrow_u{32,64}
+   in an incorrect order.
+
+   See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+*/
+#if defined(__GNUC__) && !defined (__clang__) && \
+  (__GNUC__ < 7 || (__GNUC__ == 7 && (__GNUC_MINOR__ < 2)))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \
+  (_subborrow_u32(x1, x3, x2, (unsigned int *) x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+  (_subborrow_u64(x1, x3, x2, (long long unsigned int *) x4))
+
+#else
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4)  \
+  (_subborrow_u32(x1, x2, x3, (unsigned int *) x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4)  \
+  (_subborrow_u64(x1, x2, x3, (long long unsigned int *) x4))
+
+#endif // GCC < 7.2
+
+#endif // !COMPILE_INTRINSICS
diff --git a/src/math/bigfix/u256.cpp b/src/math/bigfix/u256.cpp
new file mode 100644
index 000000000..79ebb1844
--- /dev/null
+++ b/src/math/bigfix/u256.cpp
@@ -0,0 +1,8 @@
+#include "math/bigfix/u256.h"
+#include "math/bigfix/Hacl_Bignum256.h"
+
+u256 u256::operator*(u256 const& other) const {
+    uint64_t result[8];
+    Hacl_Bignum256_mul(const_cast<uint64_t*>(m_num), const_cast<uint64_t*>(other.m_num), result);
+    return u256(result);
+}
diff --git a/src/math/bigfix/u256.h b/src/math/bigfix/u256.h
new file mode 100644
index 000000000..91bbd9742
--- /dev/null
+++ b/src/math/bigfix/u256.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "util/util.h"
+
+class u256 {
+    uint64_t m_num[4];
+public:
+    u256() { memset(this, 0, sizeof(*this)); }
+    u256(uint64_t const* v) { memcpy(m_num, v, sizeof(*this)); }
+    u256 operator*(u256 const& other) const;
+};
diff --git a/src/math/polysat/CMakeLists.txt b/src/math/polysat/CMakeLists.txt
index 2648fe70e..14f1a4ac6 100644
--- a/src/math/polysat/CMakeLists.txt
+++ b/src/math/polysat/CMakeLists.txt
@@ -18,4 +18,5 @@ z3_add_component(polysat
     dd
     simplex
     interval
+    bigfix
 )