diff --git a/include/Hacl_Ed25519.h b/include/Hacl_Ed25519.h
index b2654704..f0dc31e2 100644
--- a/include/Hacl_Ed25519.h
+++ b/include/Hacl_Ed25519.h
@@ -47,16 +47,16 @@ extern "C" {
 /**
 Compute the public key from the private key.
 
-  The outparam `public_key`  points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] public_key Points to 32 bytes of valid memory, i.e., `uint8_t[32]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 */
 void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key);
 
 /**
 Compute the expanded keys for an Ed25519 signature.
 
-  The outparam `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `private_key`   points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -66,11 +66,10 @@ void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key);
 /**
 Create an Ed25519 signature with the (precomputed) expanded keys.
 
-  The outparam `signature`     points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `msg`    points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-
-  The argument `expanded_keys` is obtained through `expand_keys`.
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `expanded_keys` nor `msg`.
+  @param[in] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`, containing the expanded keys obtained by invoking `expand_keys`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -86,9 +85,10 @@ Hacl_Ed25519_sign_expanded(
 /**
 Create an Ed25519 signature.
 
-  The outparam `signature`   points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg`  points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `private_key` nor `msg`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   The function first calls `expand_keys` and then invokes `sign_expanded`.
 
@@ -101,11 +101,12 @@ Hacl_Ed25519_sign(uint8_t *signature, uint8_t *private_key, uint32_t msg_len, ui
 /**
 Verify an Ed25519 signature.
 
-  The function returns `true` if the signature is valid and `false` otherwise.
+  @param public_key Points to 32 bytes of valid memory containing the public key, i.e., `uint8_t[32]`.
+  @param msg_len Length of `msg`.
+  @param msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
+  @param signature Points to 64 bytes of valid memory containing the signature, i.e., `uint8_t[64]`.
 
-  The argument `public_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-  The argument `signature`  points to 64 bytes of valid memory, i.e., uint8_t[64].
+  @return Returns `true` if the signature is valid and `false` otherwise.
 */
 bool
 Hacl_Ed25519_verify(uint8_t *public_key, uint32_t msg_len, uint8_t *msg, uint8_t *signature);
diff --git a/include/Hacl_Hash_Blake2b.h b/include/Hacl_Hash_Blake2b.h
index 414574f9..3403fc83 100644
--- a/include/Hacl_Hash_Blake2b.h
+++ b/include/Hacl_Hash_Blake2b.h
@@ -38,11 +38,34 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
 
-typedef struct Hacl_Hash_Blake2b_block_state_t_s
+typedef struct Hacl_Hash_Blake2b_blake2_params_s
+{
+  uint8_t digest_length;
+  uint8_t key_length;
+  uint8_t fanout;
+  uint8_t depth;
+  uint32_t leaf_length;
+  uint64_t node_offset;
+  uint8_t node_depth;
+  uint8_t inner_length;
+  uint8_t *salt;
+  uint8_t *personal;
+}
+Hacl_Hash_Blake2b_blake2_params;
+
+typedef struct K____uint64_t___uint64_t__s
 {
   uint64_t *fst;
   uint64_t *snd;
 }
+K____uint64_t___uint64_t_;
+
+typedef struct Hacl_Hash_Blake2b_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____uint64_t___uint64_t_ thd;
+}
 Hacl_Hash_Blake2b_block_state_t;
 
 typedef struct Hacl_Hash_Blake2b_state_t_s
@@ -54,23 +77,90 @@ typedef struct Hacl_Hash_Blake2b_state_t_s
 Hacl_Hash_Blake2b_state_t;
 
 /**
-  State allocation function when there is no key
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk);
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
-  Re-initialization function when there is no key
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length or key_length, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
+*/
+void
+Hacl_Hash_Blake2b_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
-void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *state);
+void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s);
+
+/**
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your
+parameters.
 */
 void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
 
@@ -79,6 +169,11 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
 */
 void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state);
 
+/**
+  Copying. This preserves all parameters.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state);
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -99,6 +194,21 @@ Hacl_Hash_Blake2b_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`. 
+*/
+void
+Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/Hacl_Hash_Blake2b_Simd256.h b/include/Hacl_Hash_Blake2b_Simd256.h
index adddce66..af309dc8 100644
--- a/include/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/Hacl_Hash_Blake2b_Simd256.h
@@ -37,13 +37,22 @@ extern "C" {
 
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
+#include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
+typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
 {
   Lib_IntVector_Intrinsics_vec256 *fst;
   Lib_IntVector_Intrinsics_vec256 *snd;
 }
+K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_;
+
+typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+}
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
 typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
@@ -54,15 +63,56 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 }
 Hacl_Hash_Blake2b_Simd256_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (256 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (256 for S, 64 for B).
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_Simd256_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *state);
+void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -85,6 +135,12 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 */
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -105,6 +161,15 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/Hacl_Hash_Blake2s.h b/include/Hacl_Hash_Blake2s.h
index 2c0d7c5b..ac783473 100644
--- a/include/Hacl_Hash_Blake2s.h
+++ b/include/Hacl_Hash_Blake2s.h
@@ -36,12 +36,21 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "Hacl_Streaming_Types.h"
+#include "Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2s_block_state_t_s
+typedef struct K____uint32_t___uint32_t__s
 {
   uint32_t *fst;
   uint32_t *snd;
 }
+K____uint32_t___uint32_t_;
+
+typedef struct Hacl_Hash_Blake2s_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____uint32_t___uint32_t_ thd;
+}
 Hacl_Hash_Blake2s_block_state_t;
 
 typedef struct Hacl_Hash_Blake2s_state_t_s
@@ -52,15 +61,51 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 }
 Hacl_Hash_Blake2s_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (32 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_state_t
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (32 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *state);
+void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -78,11 +123,16 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 */
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -98,6 +148,15 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/Hacl_Hash_Blake2s_Simd128.h b/include/Hacl_Hash_Blake2s_Simd128.h
index 6484005e..d725ee86 100644
--- a/include/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/Hacl_Hash_Blake2s_Simd128.h
@@ -36,13 +36,22 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "Hacl_Streaming_Types.h"
+#include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
+typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
 {
   Lib_IntVector_Intrinsics_vec128 *fst;
   Lib_IntVector_Intrinsics_vec128 *snd;
 }
+K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_;
+
+typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+}
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
 typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
@@ -53,15 +62,56 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 }
 Hacl_Hash_Blake2s_Simd128_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (128 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (128 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_Simd128_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *state);
+void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -84,11 +134,17 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 */
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -104,6 +160,15 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/Hacl_Hash_SHA3.h b/include/Hacl_Hash_SHA3.h
index e09f8745..8fb78fcd 100644
--- a/include/Hacl_Hash_SHA3.h
+++ b/include/Hacl_Hash_SHA3.h
@@ -77,49 +77,90 @@ uint32_t Hacl_Hash_SHA3_hash_len(Hacl_Hash_SHA3_state_t *s);
 
 bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s);
 
+void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s);
+
 void
-Hacl_Hash_SHA3_shake128_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
+Hacl_Hash_SHA3_shake128(
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
 void
-Hacl_Hash_SHA3_shake256_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
+Hacl_Hash_SHA3_shake256(
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
-void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len);
+void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_state_malloc(void);
 
-void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_state_free(uint64_t *s);
 
-void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Absorb number of input blocks and write the output state
 
-void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s);
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
 
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_squeeze0(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t outputByteLen,
-  uint8_t *output
-);
+Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen);
+
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen);
+
+/**
+Squeeze a hash state to output buffer
 
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
 void
-Hacl_Hash_SHA3_keccak(
-  uint32_t rate,
-  uint32_t capacity,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix,
-  uint32_t outputByteLen,
-  uint8_t *output
+Hacl_Hash_SHA3_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
 );
 
 #if defined(__cplusplus)
diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h
new file mode 100644
index 00000000..617e8e34
--- /dev/null
+++ b/include/Hacl_Hash_SHA3_Simd256.h
@@ -0,0 +1,213 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_Hash_SHA3_Simd256_H
+#define __Hacl_Hash_SHA3_Simd256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_SHA2_Types.h"
+#include "libintvector.h"
+
+void
+Hacl_Hash_SHA3_Simd256_absorb_inner_256(
+  uint32_t rateInBytes,
+  Hacl_Hash_SHA2_uint8_4p b,
+  Lib_IntVector_Intrinsics_vec256 *s
+);
+
+void
+Hacl_Hash_SHA3_Simd256_shake128(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_shake256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_224(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void);
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Hash_SHA3_Simd256_H_DEFINED
+#endif
diff --git a/include/Hacl_SHA2_Types.h b/include/Hacl_SHA2_Types.h
new file mode 100644
index 00000000..da2a6886
--- /dev/null
+++ b/include/Hacl_SHA2_Types.h
@@ -0,0 +1,68 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_SHA2_Types_H
+#define __Hacl_SHA2_Types_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+typedef struct Hacl_Hash_SHA2_uint8_2p_s
+{
+  uint8_t *fst;
+  uint8_t *snd;
+}
+Hacl_Hash_SHA2_uint8_2p;
+
+typedef struct Hacl_Hash_SHA2_uint8_3p_s
+{
+  uint8_t *fst;
+  Hacl_Hash_SHA2_uint8_2p snd;
+}
+Hacl_Hash_SHA2_uint8_3p;
+
+typedef struct Hacl_Hash_SHA2_uint8_4p_s
+{
+  uint8_t *fst;
+  Hacl_Hash_SHA2_uint8_3p snd;
+}
+Hacl_Hash_SHA2_uint8_4p;
+
+typedef uint8_t *Hacl_Hash_SHA2_bufx1;
+
+typedef Hacl_Hash_SHA2_uint8_4p Hacl_Hash_SHA2_bufx4;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_SHA2_Types_H_DEFINED
+#endif
diff --git a/include/Hacl_SHA2_Vec128.h b/include/Hacl_SHA2_Vec128.h
index 5118cd36..c5df2075 100644
--- a/include/Hacl_SHA2_Vec128.h
+++ b/include/Hacl_SHA2_Vec128.h
@@ -35,6 +35,8 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_SHA2_Types.h"
+
 void
 Hacl_SHA2_Vec128_sha224_4(
   uint8_t *dst0,
diff --git a/include/Hacl_SHA2_Vec256.h b/include/Hacl_SHA2_Vec256.h
index e41e9fd4..7e41314a 100644
--- a/include/Hacl_SHA2_Vec256.h
+++ b/include/Hacl_SHA2_Vec256.h
@@ -35,6 +35,7 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_SHA2_Types.h"
 #include "Hacl_Krmllib.h"
 
 void
diff --git a/include/internal/Hacl_Frodo_KEM.h b/include/internal/Hacl_Frodo_KEM.h
index a4e2f62a..34b1816a 100644
--- a/include/internal/Hacl_Frodo_KEM.h
+++ b/include/internal/Hacl_Frodo_KEM.h
@@ -55,10 +55,10 @@ Hacl_Keccak_shake128_4x(
   uint8_t *output3
 )
 {
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input0, output_len, output0);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input1, output_len, output1);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input2, output_len, output2);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input3, output_len, output3);
+  Hacl_Hash_SHA3_shake128(output0, output_len, input0, input_len);
+  Hacl_Hash_SHA3_shake128(output1, output_len, input1, input_len);
+  Hacl_Hash_SHA3_shake128(output2, output_len, input2, input_len);
+  Hacl_Hash_SHA3_shake128(output3, output_len, input3, input_len);
 }
 
 static inline void
diff --git a/include/internal/Hacl_Hash_Blake2b.h b/include/internal/Hacl_Hash_Blake2b.h
index 21689d60..6928d205 100644
--- a/include/internal/Hacl_Hash_Blake2b.h
+++ b/include/internal/Hacl_Hash_Blake2b.h
@@ -38,6 +38,13 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
+typedef struct Hacl_Hash_Blake2b_index_s
+{
+  uint8_t key_length;
+  uint8_t digest_length;
+}
+Hacl_Hash_Blake2b_index;
+
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
 void
@@ -62,6 +69,13 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
+typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
+{
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
+}
+K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/internal/Hacl_Hash_Blake2b_Simd256.h b/include/internal/Hacl_Hash_Blake2b_Simd256.h
index 4cc07869..4dd986b2 100644
--- a/include/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2b_Simd256.h"
 #include "libintvector.h"
 
diff --git a/include/internal/Hacl_Hash_Blake2s.h b/include/internal/Hacl_Hash_Blake2s.h
index f814aa95..eccd92de 100644
--- a/include/internal/Hacl_Hash_Blake2s.h
+++ b/include/internal/Hacl_Hash_Blake2s.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2s.h"
 
 void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn);
diff --git a/include/internal/Hacl_Hash_Blake2s_Simd128.h b/include/internal/Hacl_Hash_Blake2s_Simd128.h
index 0589aec5..2c422949 100644
--- a/include/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2s_Simd128.h"
 #include "libintvector.h"
 
diff --git a/include/internal/Hacl_Hash_SHA3.h b/include/internal/Hacl_Hash_SHA3.h
index 1c8129fb..a82af4bd 100644
--- a/include/internal/Hacl_Hash_SHA3.h
+++ b/include/internal/Hacl_Hash_SHA3.h
@@ -37,6 +37,12 @@ extern "C" {
 
 #include "../Hacl_Hash_SHA3.h"
 
+extern const uint32_t Hacl_Hash_SHA3_keccak_rotc[24U];
+
+extern const uint32_t Hacl_Hash_SHA3_keccak_piln[24U];
+
+extern const uint64_t Hacl_Hash_SHA3_keccak_rndc[24U];
+
 void
 Hacl_Hash_SHA3_update_multi_sha3(
   Spec_Hash_Definitions_hash_alg a,
@@ -53,10 +59,6 @@ Hacl_Hash_SHA3_update_last_sha3(
   uint32_t input_len
 );
 
-void Hacl_Hash_SHA3_state_permute(uint64_t *s);
-
-void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s);
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/internal/Hacl_Impl_Blake2_Constants.h b/include/internal/Hacl_Impl_Blake2_Constants.h
index aedc2486..fb3a045c 100644
--- a/include/internal/Hacl_Impl_Blake2_Constants.h
+++ b/include/internal/Hacl_Impl_Blake2_Constants.h
@@ -37,7 +37,7 @@ extern "C" {
 
 static const
 uint32_t
-Hacl_Hash_Blake2s_sigmaTable[160U] =
+Hacl_Hash_Blake2b_sigmaTable[160U] =
   {
     0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U, 14U, 15U, 14U, 10U, 4U, 8U, 9U, 15U,
     13U, 6U, 1U, 12U, 0U, 2U, 11U, 7U, 5U, 3U, 11U, 8U, 12U, 0U, 5U, 2U, 15U, 13U, 10U, 14U, 3U, 6U,
@@ -51,7 +51,7 @@ Hacl_Hash_Blake2s_sigmaTable[160U] =
 
 static const
 uint32_t
-Hacl_Hash_Blake2s_ivTable_S[8U] =
+Hacl_Hash_Blake2b_ivTable_S[8U] =
   {
     0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510E527FU, 0x9B05688CU, 0x1F83D9ABU,
     0x5BE0CD19U
@@ -59,7 +59,7 @@ Hacl_Hash_Blake2s_ivTable_S[8U] =
 
 static const
 uint64_t
-Hacl_Hash_Blake2s_ivTable_B[8U] =
+Hacl_Hash_Blake2b_ivTable_B[8U] =
   {
     0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL, 0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
     0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL, 0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
diff --git a/include/internal/Hacl_SHA2_Types.h b/include/internal/Hacl_SHA2_Types.h
index 5a1eb668..dcb276aa 100644
--- a/include/internal/Hacl_SHA2_Types.h
+++ b/include/internal/Hacl_SHA2_Types.h
@@ -35,26 +35,7 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
-typedef struct Hacl_Hash_SHA2_uint8_2p_s
-{
-  uint8_t *fst;
-  uint8_t *snd;
-}
-Hacl_Hash_SHA2_uint8_2p;
-
-typedef struct Hacl_Hash_SHA2_uint8_3p_s
-{
-  uint8_t *fst;
-  Hacl_Hash_SHA2_uint8_2p snd;
-}
-Hacl_Hash_SHA2_uint8_3p;
-
-typedef struct Hacl_Hash_SHA2_uint8_4p_s
-{
-  uint8_t *fst;
-  Hacl_Hash_SHA2_uint8_3p snd;
-}
-Hacl_Hash_SHA2_uint8_4p;
+#include "../Hacl_SHA2_Types.h"
 
 typedef struct Hacl_Hash_SHA2_uint8_5p_s
 {
diff --git a/include/lib_memzero0.h b/include/lib_memzero0.h
index 506dd50f..fea3e41c 100644
--- a/include/lib_memzero0.h
+++ b/include/lib_memzero0.h
@@ -2,4 +2,4 @@
 
 void Lib_Memzero0_memzero0(void *dst, uint64_t len);
 
-#define Lib_Memzero0_memzero(dst, len, t) Lib_Memzero0_memzero0(dst, len * sizeof(t))
+#define Lib_Memzero0_memzero(dst, len, t, _ret_t) Lib_Memzero0_memzero0(dst, len * sizeof(t))
diff --git a/include/msvc/Hacl_Ed25519.h b/include/msvc/Hacl_Ed25519.h
index b2654704..f0dc31e2 100644
--- a/include/msvc/Hacl_Ed25519.h
+++ b/include/msvc/Hacl_Ed25519.h
@@ -47,16 +47,16 @@ extern "C" {
 /**
 Compute the public key from the private key.
 
-  The outparam `public_key`  points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] public_key Points to 32 bytes of valid memory, i.e., `uint8_t[32]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 */
 void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key);
 
 /**
 Compute the expanded keys for an Ed25519 signature.
 
-  The outparam `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `private_key`   points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -66,11 +66,10 @@ void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key);
 /**
 Create an Ed25519 signature with the (precomputed) expanded keys.
 
-  The outparam `signature`     points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `msg`    points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-
-  The argument `expanded_keys` is obtained through `expand_keys`.
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `expanded_keys` nor `msg`.
+  @param[in] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`, containing the expanded keys obtained by invoking `expand_keys`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -86,9 +85,10 @@ Hacl_Ed25519_sign_expanded(
 /**
 Create an Ed25519 signature.
 
-  The outparam `signature`   points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg`  points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `private_key` nor `msg`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   The function first calls `expand_keys` and then invokes `sign_expanded`.
 
@@ -101,11 +101,12 @@ Hacl_Ed25519_sign(uint8_t *signature, uint8_t *private_key, uint32_t msg_len, ui
 /**
 Verify an Ed25519 signature.
 
-  The function returns `true` if the signature is valid and `false` otherwise.
+  @param public_key Points to 32 bytes of valid memory containing the public key, i.e., `uint8_t[32]`.
+  @param msg_len Length of `msg`.
+  @param msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
+  @param signature Points to 64 bytes of valid memory containing the signature, i.e., `uint8_t[64]`.
 
-  The argument `public_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-  The argument `signature`  points to 64 bytes of valid memory, i.e., uint8_t[64].
+  @return Returns `true` if the signature is valid and `false` otherwise.
 */
 bool
 Hacl_Ed25519_verify(uint8_t *public_key, uint32_t msg_len, uint8_t *msg, uint8_t *signature);
diff --git a/include/msvc/Hacl_Hash_Blake2b.h b/include/msvc/Hacl_Hash_Blake2b.h
index 414574f9..3403fc83 100644
--- a/include/msvc/Hacl_Hash_Blake2b.h
+++ b/include/msvc/Hacl_Hash_Blake2b.h
@@ -38,11 +38,34 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
 
-typedef struct Hacl_Hash_Blake2b_block_state_t_s
+typedef struct Hacl_Hash_Blake2b_blake2_params_s
+{
+  uint8_t digest_length;
+  uint8_t key_length;
+  uint8_t fanout;
+  uint8_t depth;
+  uint32_t leaf_length;
+  uint64_t node_offset;
+  uint8_t node_depth;
+  uint8_t inner_length;
+  uint8_t *salt;
+  uint8_t *personal;
+}
+Hacl_Hash_Blake2b_blake2_params;
+
+typedef struct K____uint64_t___uint64_t__s
 {
   uint64_t *fst;
   uint64_t *snd;
 }
+K____uint64_t___uint64_t_;
+
+typedef struct Hacl_Hash_Blake2b_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____uint64_t___uint64_t_ thd;
+}
 Hacl_Hash_Blake2b_block_state_t;
 
 typedef struct Hacl_Hash_Blake2b_state_t_s
@@ -54,23 +77,90 @@ typedef struct Hacl_Hash_Blake2b_state_t_s
 Hacl_Hash_Blake2b_state_t;
 
 /**
-  State allocation function when there is no key
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk);
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
-  Re-initialization function when there is no key
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length or key_length, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
+*/
+void
+Hacl_Hash_Blake2b_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
-void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *state);
+void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s);
+
+/**
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your
+parameters.
 */
 void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
 
@@ -79,6 +169,11 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
 */
 void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state);
 
+/**
+  Copying. This preserves all parameters.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state);
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -99,6 +194,21 @@ Hacl_Hash_Blake2b_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`. 
+*/
+void
+Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
index adddce66..af309dc8 100644
--- a/include/msvc/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
@@ -37,13 +37,22 @@ extern "C" {
 
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
+#include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
+typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
 {
   Lib_IntVector_Intrinsics_vec256 *fst;
   Lib_IntVector_Intrinsics_vec256 *snd;
 }
+K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_;
+
+typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+}
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
 typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
@@ -54,15 +63,56 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 }
 Hacl_Hash_Blake2b_Simd256_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (256 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (256 for S, 64 for B).
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_Simd256_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *state);
+void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -85,6 +135,12 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 */
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -105,6 +161,15 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/Hacl_Hash_Blake2s.h b/include/msvc/Hacl_Hash_Blake2s.h
index 2c0d7c5b..ac783473 100644
--- a/include/msvc/Hacl_Hash_Blake2s.h
+++ b/include/msvc/Hacl_Hash_Blake2s.h
@@ -36,12 +36,21 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "Hacl_Streaming_Types.h"
+#include "Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2s_block_state_t_s
+typedef struct K____uint32_t___uint32_t__s
 {
   uint32_t *fst;
   uint32_t *snd;
 }
+K____uint32_t___uint32_t_;
+
+typedef struct Hacl_Hash_Blake2s_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____uint32_t___uint32_t_ thd;
+}
 Hacl_Hash_Blake2s_block_state_t;
 
 typedef struct Hacl_Hash_Blake2s_state_t_s
@@ -52,15 +61,51 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 }
 Hacl_Hash_Blake2s_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (32 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_state_t
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (32 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *state);
+void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -78,11 +123,16 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 */
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -98,6 +148,15 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
index 6484005e..d725ee86 100644
--- a/include/msvc/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
@@ -36,13 +36,22 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "Hacl_Streaming_Types.h"
+#include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
+typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
 {
   Lib_IntVector_Intrinsics_vec128 *fst;
   Lib_IntVector_Intrinsics_vec128 *snd;
 }
+K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_;
+
+typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
+{
+  uint8_t fst;
+  uint8_t snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+}
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
 typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
@@ -53,15 +62,56 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 }
 Hacl_Hash_Blake2s_Simd128_state_t;
 
+/**
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (128 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (128 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
+
 /**
   State allocation function when there is no key
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_Simd128_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+);
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
+
 /**
   Re-initialization function when there is no key
 */
-void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *state);
+void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
@@ -84,11 +134,17 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 */
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -104,6 +160,15 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+void
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/Hacl_Hash_SHA3.h b/include/msvc/Hacl_Hash_SHA3.h
index e09f8745..8fb78fcd 100644
--- a/include/msvc/Hacl_Hash_SHA3.h
+++ b/include/msvc/Hacl_Hash_SHA3.h
@@ -77,49 +77,90 @@ uint32_t Hacl_Hash_SHA3_hash_len(Hacl_Hash_SHA3_state_t *s);
 
 bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s);
 
+void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s);
+
 void
-Hacl_Hash_SHA3_shake128_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
+Hacl_Hash_SHA3_shake128(
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
 void
-Hacl_Hash_SHA3_shake256_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
+Hacl_Hash_SHA3_shake256(
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
-void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len);
+void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_state_malloc(void);
 
-void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_state_free(uint64_t *s);
 
-void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len);
+/**
+Absorb number of input blocks and write the output state
 
-void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s);
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
 
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_squeeze0(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t outputByteLen,
-  uint8_t *output
-);
+Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen);
+
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen);
+
+/**
+Squeeze a hash state to output buffer
 
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
 void
-Hacl_Hash_SHA3_keccak(
-  uint32_t rate,
-  uint32_t capacity,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix,
-  uint32_t outputByteLen,
-  uint8_t *output
+Hacl_Hash_SHA3_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
 );
 
 #if defined(__cplusplus)
diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h
new file mode 100644
index 00000000..617e8e34
--- /dev/null
+++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h
@@ -0,0 +1,213 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_Hash_SHA3_Simd256_H
+#define __Hacl_Hash_SHA3_Simd256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_SHA2_Types.h"
+#include "libintvector.h"
+
+void
+Hacl_Hash_SHA3_Simd256_absorb_inner_256(
+  uint32_t rateInBytes,
+  Hacl_Hash_SHA2_uint8_4p b,
+  Lib_IntVector_Intrinsics_vec256 *s
+);
+
+void
+Hacl_Hash_SHA3_Simd256_shake128(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_shake256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_224(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void);
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Hash_SHA3_Simd256_H_DEFINED
+#endif
diff --git a/include/msvc/Hacl_SHA2_Types.h b/include/msvc/Hacl_SHA2_Types.h
new file mode 100644
index 00000000..da2a6886
--- /dev/null
+++ b/include/msvc/Hacl_SHA2_Types.h
@@ -0,0 +1,68 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef __Hacl_SHA2_Types_H
+#define __Hacl_SHA2_Types_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+typedef struct Hacl_Hash_SHA2_uint8_2p_s
+{
+  uint8_t *fst;
+  uint8_t *snd;
+}
+Hacl_Hash_SHA2_uint8_2p;
+
+typedef struct Hacl_Hash_SHA2_uint8_3p_s
+{
+  uint8_t *fst;
+  Hacl_Hash_SHA2_uint8_2p snd;
+}
+Hacl_Hash_SHA2_uint8_3p;
+
+typedef struct Hacl_Hash_SHA2_uint8_4p_s
+{
+  uint8_t *fst;
+  Hacl_Hash_SHA2_uint8_3p snd;
+}
+Hacl_Hash_SHA2_uint8_4p;
+
+typedef uint8_t *Hacl_Hash_SHA2_bufx1;
+
+typedef Hacl_Hash_SHA2_uint8_4p Hacl_Hash_SHA2_bufx4;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_SHA2_Types_H_DEFINED
+#endif
diff --git a/include/msvc/Hacl_SHA2_Vec128.h b/include/msvc/Hacl_SHA2_Vec128.h
index 5118cd36..c5df2075 100644
--- a/include/msvc/Hacl_SHA2_Vec128.h
+++ b/include/msvc/Hacl_SHA2_Vec128.h
@@ -35,6 +35,8 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_SHA2_Types.h"
+
 void
 Hacl_SHA2_Vec128_sha224_4(
   uint8_t *dst0,
diff --git a/include/msvc/Hacl_SHA2_Vec256.h b/include/msvc/Hacl_SHA2_Vec256.h
index e41e9fd4..7e41314a 100644
--- a/include/msvc/Hacl_SHA2_Vec256.h
+++ b/include/msvc/Hacl_SHA2_Vec256.h
@@ -35,6 +35,7 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_SHA2_Types.h"
 #include "Hacl_Krmllib.h"
 
 void
diff --git a/include/msvc/internal/Hacl_Frodo_KEM.h b/include/msvc/internal/Hacl_Frodo_KEM.h
index 6a1ece49..c03a1b03 100644
--- a/include/msvc/internal/Hacl_Frodo_KEM.h
+++ b/include/msvc/internal/Hacl_Frodo_KEM.h
@@ -55,10 +55,10 @@ Hacl_Keccak_shake128_4x(
   uint8_t *output3
 )
 {
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input0, output_len, output0);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input1, output_len, output1);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input2, output_len, output2);
-  Hacl_Hash_SHA3_shake128_hacl(input_len, input3, output_len, output3);
+  Hacl_Hash_SHA3_shake128(output0, output_len, input0, input_len);
+  Hacl_Hash_SHA3_shake128(output1, output_len, input1, input_len);
+  Hacl_Hash_SHA3_shake128(output2, output_len, input2, input_len);
+  Hacl_Hash_SHA3_shake128(output3, output_len, input3, input_len);
 }
 
 static inline void
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b.h b/include/msvc/internal/Hacl_Hash_Blake2b.h
index 21689d60..6928d205 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b.h
@@ -38,6 +38,13 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
+typedef struct Hacl_Hash_Blake2b_index_s
+{
+  uint8_t key_length;
+  uint8_t digest_length;
+}
+Hacl_Hash_Blake2b_index;
+
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
 void
@@ -62,6 +69,13 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
+typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
+{
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
+}
+K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
index 4cc07869..4dd986b2 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2b_Simd256.h"
 #include "libintvector.h"
 
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s.h b/include/msvc/internal/Hacl_Hash_Blake2s.h
index f814aa95..eccd92de 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2s.h"
 
 void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn);
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
index 0589aec5..2c422949 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -36,6 +36,7 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_Hash_Blake2s_Simd128.h"
 #include "libintvector.h"
 
diff --git a/include/msvc/internal/Hacl_Hash_SHA3.h b/include/msvc/internal/Hacl_Hash_SHA3.h
index 1c8129fb..a82af4bd 100644
--- a/include/msvc/internal/Hacl_Hash_SHA3.h
+++ b/include/msvc/internal/Hacl_Hash_SHA3.h
@@ -37,6 +37,12 @@ extern "C" {
 
 #include "../Hacl_Hash_SHA3.h"
 
+extern const uint32_t Hacl_Hash_SHA3_keccak_rotc[24U];
+
+extern const uint32_t Hacl_Hash_SHA3_keccak_piln[24U];
+
+extern const uint64_t Hacl_Hash_SHA3_keccak_rndc[24U];
+
 void
 Hacl_Hash_SHA3_update_multi_sha3(
   Spec_Hash_Definitions_hash_alg a,
@@ -53,10 +59,6 @@ Hacl_Hash_SHA3_update_last_sha3(
   uint32_t input_len
 );
 
-void Hacl_Hash_SHA3_state_permute(uint64_t *s);
-
-void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s);
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/internal/Hacl_Impl_Blake2_Constants.h b/include/msvc/internal/Hacl_Impl_Blake2_Constants.h
index aedc2486..fb3a045c 100644
--- a/include/msvc/internal/Hacl_Impl_Blake2_Constants.h
+++ b/include/msvc/internal/Hacl_Impl_Blake2_Constants.h
@@ -37,7 +37,7 @@ extern "C" {
 
 static const
 uint32_t
-Hacl_Hash_Blake2s_sigmaTable[160U] =
+Hacl_Hash_Blake2b_sigmaTable[160U] =
   {
     0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U, 14U, 15U, 14U, 10U, 4U, 8U, 9U, 15U,
     13U, 6U, 1U, 12U, 0U, 2U, 11U, 7U, 5U, 3U, 11U, 8U, 12U, 0U, 5U, 2U, 15U, 13U, 10U, 14U, 3U, 6U,
@@ -51,7 +51,7 @@ Hacl_Hash_Blake2s_sigmaTable[160U] =
 
 static const
 uint32_t
-Hacl_Hash_Blake2s_ivTable_S[8U] =
+Hacl_Hash_Blake2b_ivTable_S[8U] =
   {
     0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510E527FU, 0x9B05688CU, 0x1F83D9ABU,
     0x5BE0CD19U
@@ -59,7 +59,7 @@ Hacl_Hash_Blake2s_ivTable_S[8U] =
 
 static const
 uint64_t
-Hacl_Hash_Blake2s_ivTable_B[8U] =
+Hacl_Hash_Blake2b_ivTable_B[8U] =
   {
     0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL, 0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
     0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL, 0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
diff --git a/include/msvc/internal/Hacl_SHA2_Types.h b/include/msvc/internal/Hacl_SHA2_Types.h
index 5a1eb668..dcb276aa 100644
--- a/include/msvc/internal/Hacl_SHA2_Types.h
+++ b/include/msvc/internal/Hacl_SHA2_Types.h
@@ -35,26 +35,7 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
-typedef struct Hacl_Hash_SHA2_uint8_2p_s
-{
-  uint8_t *fst;
-  uint8_t *snd;
-}
-Hacl_Hash_SHA2_uint8_2p;
-
-typedef struct Hacl_Hash_SHA2_uint8_3p_s
-{
-  uint8_t *fst;
-  Hacl_Hash_SHA2_uint8_2p snd;
-}
-Hacl_Hash_SHA2_uint8_3p;
-
-typedef struct Hacl_Hash_SHA2_uint8_4p_s
-{
-  uint8_t *fst;
-  Hacl_Hash_SHA2_uint8_3p snd;
-}
-Hacl_Hash_SHA2_uint8_4p;
+#include "../Hacl_SHA2_Types.h"
 
 typedef struct Hacl_Hash_SHA2_uint8_5p_s
 {
diff --git a/include/msvc/lib_memzero0.h b/include/msvc/lib_memzero0.h
index 506dd50f..fea3e41c 100644
--- a/include/msvc/lib_memzero0.h
+++ b/include/msvc/lib_memzero0.h
@@ -2,4 +2,4 @@
 
 void Lib_Memzero0_memzero0(void *dst, uint64_t len);
 
-#define Lib_Memzero0_memzero(dst, len, t) Lib_Memzero0_memzero0(dst, len * sizeof(t))
+#define Lib_Memzero0_memzero(dst, len, t, _ret_t) Lib_Memzero0_memzero0(dst, len * sizeof(t))
diff --git a/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h b/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
index cd1448dd..6ff658f5 100644
--- a/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
+++ b/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
@@ -217,7 +217,7 @@ static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
 
 inline static uint64_t
 FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) {
-  return (hi << s) + (lo >> FStar_UInt128_u32_64 - s);
+  return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
 }
 
 inline static uint64_t
@@ -241,7 +241,7 @@ inline static FStar_UInt128_uint128
 FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) {
   FStar_UInt128_uint128 lit;
   lit.low = (uint64_t)0U;
-  lit.high = a.low << s - FStar_UInt128_u32_64;
+  lit.high = a.low << (s - FStar_UInt128_u32_64);
   return lit;
 }
 
@@ -267,7 +267,7 @@ FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) {
 
 inline static uint64_t
 FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) {
-  return (lo >> s) + (hi << FStar_UInt128_u32_64 - s);
+  return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
 }
 
 inline static uint64_t
@@ -290,7 +290,7 @@ FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) {
 inline static FStar_UInt128_uint128
 FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) {
   FStar_UInt128_uint128 lit;
-  lit.low = a.high >> s - FStar_UInt128_u32_64;
+  lit.low = a.high >> (s - FStar_UInt128_u32_64);
   lit.high = (uint64_t)0U;
   return lit;
 }
@@ -488,7 +488,7 @@ FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) {
           u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3);
   lit.high =
           x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +
-          (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >>
+          ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >>
            FStar_UInt128_u32_32);
   return lit;
 }
diff --git a/ocaml/ctypes.depend b/ocaml/ctypes.depend
index 065ce5eb..d94fad90 100644
--- a/ocaml/ctypes.depend
+++ b/ocaml/ctypes.depend
@@ -1,4 +1,4 @@
-CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
+CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/Hacl_Hash_SHA3_Simd256_stubs.cmx lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
 lib/Hacl_Streaming_Types_bindings.cmx: 
 lib/Hacl_Streaming_Types_bindings.cmo: 
 lib_gen/Hacl_Streaming_Types_gen.cmx: lib/Hacl_Streaming_Types_bindings.cmx
@@ -11,18 +11,18 @@ lib/Hacl_Hash_Blake2b_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Ha
 lib/Hacl_Hash_Blake2b_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_Blake2b_gen.cmx: lib/Hacl_Hash_Blake2b_bindings.cmx
 lib_gen/Hacl_Hash_Blake2b_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Blake2b_bindings.cmx lib_gen/Hacl_Hash_Blake2b_gen.cmx 
-lib/Hacl_Hash_Blake2s_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
-lib/Hacl_Hash_Blake2s_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
+lib/Hacl_Hash_Blake2s_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx 
+lib/Hacl_Hash_Blake2s_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo lib/Hacl_Hash_Blake2b_bindings.cmo lib/Hacl_Hash_Blake2b_stubs.cmo 
 lib_gen/Hacl_Hash_Blake2s_gen.cmx: lib/Hacl_Hash_Blake2s_bindings.cmx
-lib_gen/Hacl_Hash_Blake2s_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Blake2s_bindings.cmx lib_gen/Hacl_Hash_Blake2s_gen.cmx 
-lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx: 
-lib/Hacl_Hash_Blake2b_Simd256_bindings.cmo: 
+lib_gen/Hacl_Hash_Blake2s_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_c_stubs.o lib/Hacl_Hash_Blake2s_bindings.cmx lib_gen/Hacl_Hash_Blake2s_gen.cmx 
+lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx 
+lib/Hacl_Hash_Blake2b_Simd256_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo lib/Hacl_Hash_Blake2b_bindings.cmo lib/Hacl_Hash_Blake2b_stubs.cmo 
 lib_gen/Hacl_Hash_Blake2b_Simd256_gen.cmx: lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx
-lib_gen/Hacl_Hash_Blake2b_Simd256_gen.exe: lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib_gen/Hacl_Hash_Blake2b_Simd256_gen.cmx 
-lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx: 
-lib/Hacl_Hash_Blake2s_Simd128_bindings.cmo: 
+lib_gen/Hacl_Hash_Blake2b_Simd256_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_c_stubs.o lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib_gen/Hacl_Hash_Blake2b_Simd256_gen.cmx 
+lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx 
+lib/Hacl_Hash_Blake2s_Simd128_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo lib/Hacl_Hash_Blake2b_bindings.cmo lib/Hacl_Hash_Blake2b_stubs.cmo 
 lib_gen/Hacl_Hash_Blake2s_Simd128_gen.cmx: lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx
-lib_gen/Hacl_Hash_Blake2s_Simd128_gen.exe: lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib_gen/Hacl_Hash_Blake2s_Simd128_gen.cmx 
+lib_gen/Hacl_Hash_Blake2s_Simd128_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_c_stubs.o lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib_gen/Hacl_Hash_Blake2s_Simd128_gen.cmx 
 lib/Hacl_Hash_Base_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
 lib/Hacl_Hash_Base_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_Base_gen.cmx: lib/Hacl_Hash_Base_bindings.cmx
@@ -51,14 +51,18 @@ lib/Hacl_Hash_SHA3_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_
 lib/Hacl_Hash_SHA3_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_SHA3_gen.cmx: lib/Hacl_Hash_SHA3_bindings.cmx
 lib_gen/Hacl_Hash_SHA3_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_SHA3_bindings.cmx lib_gen/Hacl_Hash_SHA3_gen.cmx 
-lib/Hacl_Hash_MD5_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
-lib/Hacl_Hash_MD5_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
-lib_gen/Hacl_Hash_MD5_gen.cmx: lib/Hacl_Hash_MD5_bindings.cmx
-lib_gen/Hacl_Hash_MD5_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_MD5_bindings.cmx lib_gen/Hacl_Hash_MD5_gen.cmx 
 lib/Hacl_SHA2_Types_bindings.cmx: 
 lib/Hacl_SHA2_Types_bindings.cmo: 
 lib_gen/Hacl_SHA2_Types_gen.cmx: lib/Hacl_SHA2_Types_bindings.cmx
 lib_gen/Hacl_SHA2_Types_gen.exe: lib/Hacl_SHA2_Types_bindings.cmx lib_gen/Hacl_SHA2_Types_gen.cmx 
+lib/Hacl_Hash_SHA3_Simd256_bindings.cmx: 
+lib/Hacl_Hash_SHA3_Simd256_bindings.cmo: 
+lib_gen/Hacl_Hash_SHA3_Simd256_gen.cmx: lib/Hacl_Hash_SHA3_Simd256_bindings.cmx
+lib_gen/Hacl_Hash_SHA3_Simd256_gen.exe: lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib_gen/Hacl_Hash_SHA3_Simd256_gen.cmx 
+lib/Hacl_Hash_MD5_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
+lib/Hacl_Hash_MD5_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
+lib_gen/Hacl_Hash_MD5_gen.cmx: lib/Hacl_Hash_MD5_bindings.cmx
+lib_gen/Hacl_Hash_MD5_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_MD5_bindings.cmx lib_gen/Hacl_Hash_MD5_gen.cmx 
 lib/EverCrypt_Error_bindings.cmx: 
 lib/EverCrypt_Error_bindings.cmo: 
 lib_gen/EverCrypt_Error_gen.cmx: lib/EverCrypt_Error_bindings.cmx
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
index e7ef20d8..1c132a7a 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
@@ -2,6 +2,12 @@ open Ctypes
 module Bindings(F:Cstubs.FOREIGN) =
   struct
     open F
+    module Hacl_Streaming_Types_applied =
+      (Hacl_Streaming_Types_bindings.Bindings)(Hacl_Streaming_Types_stubs)
+    open Hacl_Streaming_Types_applied
+    module Hacl_Hash_Blake2b_applied =
+      (Hacl_Hash_Blake2b_bindings.Bindings)(Hacl_Hash_Blake2b_stubs)
+    open Hacl_Hash_Blake2b_applied
     let hacl_Hash_Blake2b_Simd256_hash_with_key =
       foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key"
         (ocaml_bytes @->
@@ -9,4 +15,11 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
+    let hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas =
+      foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @->
+                 (hacl_Hash_Blake2b_blake2_params @->
+                    (ocaml_bytes @-> (returning void))))))
   end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
index 75c75e90..7ba4fcf6 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
@@ -5,21 +5,67 @@ module Bindings(F:Cstubs.FOREIGN) =
     module Hacl_Streaming_Types_applied =
       (Hacl_Streaming_Types_bindings.Bindings)(Hacl_Streaming_Types_stubs)
     open Hacl_Streaming_Types_applied
+    type hacl_Hash_Blake2b_blake2_params =
+      [ `hacl_Hash_Blake2b_blake2_params ] structure
+    let (hacl_Hash_Blake2b_blake2_params :
+      [ `hacl_Hash_Blake2b_blake2_params ] structure typ) =
+      structure "Hacl_Hash_Blake2b_blake2_params_s"
+    let hacl_Hash_Blake2b_blake2_params_digest_length =
+      field hacl_Hash_Blake2b_blake2_params "digest_length" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_key_length =
+      field hacl_Hash_Blake2b_blake2_params "key_length" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_fanout =
+      field hacl_Hash_Blake2b_blake2_params "fanout" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_depth =
+      field hacl_Hash_Blake2b_blake2_params "depth" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_leaf_length =
+      field hacl_Hash_Blake2b_blake2_params "leaf_length" uint32_t
+    let hacl_Hash_Blake2b_blake2_params_node_offset =
+      field hacl_Hash_Blake2b_blake2_params "node_offset" uint64_t
+    let hacl_Hash_Blake2b_blake2_params_node_depth =
+      field hacl_Hash_Blake2b_blake2_params "node_depth" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_inner_length =
+      field hacl_Hash_Blake2b_blake2_params "inner_length" uint8_t
+    let hacl_Hash_Blake2b_blake2_params_salt =
+      field hacl_Hash_Blake2b_blake2_params "salt" (ptr uint8_t)
+    let hacl_Hash_Blake2b_blake2_params_personal =
+      field hacl_Hash_Blake2b_blake2_params "personal" (ptr uint8_t)
+    let _ = seal hacl_Hash_Blake2b_blake2_params
+    type hacl_Hash_Blake2b_index = [ `hacl_Hash_Blake2b_index ] structure
+    let (hacl_Hash_Blake2b_index :
+      [ `hacl_Hash_Blake2b_index ] structure typ) =
+      structure "Hacl_Hash_Blake2b_index_s"
+    let hacl_Hash_Blake2b_index_key_length =
+      field hacl_Hash_Blake2b_index "key_length" uint8_t
+    let hacl_Hash_Blake2b_index_digest_length =
+      field hacl_Hash_Blake2b_index "digest_length" uint8_t
+    let _ = seal hacl_Hash_Blake2b_index
     let hacl_Hash_Blake2b_init =
       foreign "Hacl_Hash_Blake2b_init"
         ((ptr uint64_t) @-> (uint32_t @-> (uint32_t @-> (returning void))))
     let hacl_Hash_Blake2b_finish =
       foreign "Hacl_Hash_Blake2b_finish"
         (uint32_t @-> (ocaml_bytes @-> ((ptr uint64_t) @-> (returning void))))
+    type k____uint64_t___uint64_t_ = [ `k____uint64_t___uint64_t_ ] structure
+    let (k____uint64_t___uint64_t_ :
+      [ `k____uint64_t___uint64_t_ ] structure typ) =
+      structure "K____uint64_t___uint64_t__s"
+    let k____uint64_t___uint64_t__fst =
+      field k____uint64_t___uint64_t_ "fst" (ptr uint64_t)
+    let k____uint64_t___uint64_t__snd =
+      field k____uint64_t___uint64_t_ "snd" (ptr uint64_t)
+    let _ = seal k____uint64_t___uint64_t_
     type hacl_Hash_Blake2b_block_state_t =
       [ `hacl_Hash_Blake2b_block_state_t ] structure
     let (hacl_Hash_Blake2b_block_state_t :
       [ `hacl_Hash_Blake2b_block_state_t ] structure typ) =
       structure "Hacl_Hash_Blake2b_block_state_t_s"
     let hacl_Hash_Blake2b_block_state_t_fst =
-      field hacl_Hash_Blake2b_block_state_t "fst" (ptr uint64_t)
+      field hacl_Hash_Blake2b_block_state_t "fst" uint8_t
     let hacl_Hash_Blake2b_block_state_t_snd =
-      field hacl_Hash_Blake2b_block_state_t "snd" (ptr uint64_t)
+      field hacl_Hash_Blake2b_block_state_t "snd" uint8_t
+    let hacl_Hash_Blake2b_block_state_t_thd =
+      field hacl_Hash_Blake2b_block_state_t "thd" k____uint64_t___uint64_t_
     let _ = seal hacl_Hash_Blake2b_block_state_t
     type hacl_Hash_Blake2b_state_t = [ `hacl_Hash_Blake2b_state_t ] structure
     let (hacl_Hash_Blake2b_state_t :
@@ -33,9 +79,26 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_state_t_total_len =
       field hacl_Hash_Blake2b_state_t "total_len" uint64_t
     let _ = seal hacl_Hash_Blake2b_state_t
+    let hacl_Hash_Blake2b_malloc_with_params_and_key =
+      foreign "Hacl_Hash_Blake2b_malloc_with_params_and_key"
+        ((ptr hacl_Hash_Blake2b_blake2_params) @->
+           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2b_state_t))))
+    let hacl_Hash_Blake2b_malloc_with_key =
+      foreign "Hacl_Hash_Blake2b_malloc_with_key"
+        (ocaml_bytes @->
+           (uint8_t @-> (returning (ptr hacl_Hash_Blake2b_state_t))))
     let hacl_Hash_Blake2b_malloc =
       foreign "Hacl_Hash_Blake2b_malloc"
         (void @-> (returning (ptr hacl_Hash_Blake2b_state_t)))
+    let hacl_Hash_Blake2b_reset_with_key_and_params =
+      foreign "Hacl_Hash_Blake2b_reset_with_key_and_params"
+        ((ptr hacl_Hash_Blake2b_state_t) @->
+           ((ptr hacl_Hash_Blake2b_blake2_params) @->
+              (ocaml_bytes @-> (returning void))))
+    let hacl_Hash_Blake2b_reset_with_key =
+      foreign "Hacl_Hash_Blake2b_reset_with_key"
+        ((ptr hacl_Hash_Blake2b_state_t) @->
+           (ocaml_bytes @-> (returning void)))
     let hacl_Hash_Blake2b_reset =
       foreign "Hacl_Hash_Blake2b_reset"
         ((ptr hacl_Hash_Blake2b_state_t) @-> (returning void))
@@ -51,6 +114,10 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_free =
       foreign "Hacl_Hash_Blake2b_free"
         ((ptr hacl_Hash_Blake2b_state_t) @-> (returning void))
+    let hacl_Hash_Blake2b_copy =
+      foreign "Hacl_Hash_Blake2b_copy"
+        ((ptr hacl_Hash_Blake2b_state_t) @->
+           (returning (ptr hacl_Hash_Blake2b_state_t)))
     let hacl_Hash_Blake2b_hash_with_key =
       foreign "Hacl_Hash_Blake2b_hash_with_key"
         (ocaml_bytes @->
@@ -58,4 +125,11 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
+    let hacl_Hash_Blake2b_hash_with_key_and_paramas =
+      foreign "Hacl_Hash_Blake2b_hash_with_key_and_paramas"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @->
+                 (hacl_Hash_Blake2b_blake2_params @->
+                    (ocaml_bytes @-> (returning void))))))
   end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
index db4ff123..6533ddbc 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
@@ -2,6 +2,12 @@ open Ctypes
 module Bindings(F:Cstubs.FOREIGN) =
   struct
     open F
+    module Hacl_Streaming_Types_applied =
+      (Hacl_Streaming_Types_bindings.Bindings)(Hacl_Streaming_Types_stubs)
+    open Hacl_Streaming_Types_applied
+    module Hacl_Hash_Blake2b_applied =
+      (Hacl_Hash_Blake2b_bindings.Bindings)(Hacl_Hash_Blake2b_stubs)
+    open Hacl_Hash_Blake2b_applied
     let hacl_Hash_Blake2s_Simd128_hash_with_key =
       foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key"
         (ocaml_bytes @->
@@ -9,4 +15,11 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
+    let hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas =
+      foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @->
+                 (hacl_Hash_Blake2b_blake2_params @->
+                    (ocaml_bytes @-> (returning void))))))
   end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
index 7f057689..f6c93e89 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
@@ -5,6 +5,9 @@ module Bindings(F:Cstubs.FOREIGN) =
     module Hacl_Streaming_Types_applied =
       (Hacl_Streaming_Types_bindings.Bindings)(Hacl_Streaming_Types_stubs)
     open Hacl_Streaming_Types_applied
+    module Hacl_Hash_Blake2b_applied =
+      (Hacl_Hash_Blake2b_bindings.Bindings)(Hacl_Hash_Blake2b_stubs)
+    open Hacl_Hash_Blake2b_applied
     let hacl_Hash_Blake2s_init =
       foreign "Hacl_Hash_Blake2s_init"
         ((ptr uint32_t) @-> (uint32_t @-> (uint32_t @-> (returning void))))
@@ -25,15 +28,26 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_finish =
       foreign "Hacl_Hash_Blake2s_finish"
         (uint32_t @-> (ocaml_bytes @-> ((ptr uint32_t) @-> (returning void))))
+    type k____uint32_t___uint32_t_ = [ `k____uint32_t___uint32_t_ ] structure
+    let (k____uint32_t___uint32_t_ :
+      [ `k____uint32_t___uint32_t_ ] structure typ) =
+      structure "K____uint32_t___uint32_t__s"
+    let k____uint32_t___uint32_t__fst =
+      field k____uint32_t___uint32_t_ "fst" (ptr uint32_t)
+    let k____uint32_t___uint32_t__snd =
+      field k____uint32_t___uint32_t_ "snd" (ptr uint32_t)
+    let _ = seal k____uint32_t___uint32_t_
     type hacl_Hash_Blake2s_block_state_t =
       [ `hacl_Hash_Blake2s_block_state_t ] structure
     let (hacl_Hash_Blake2s_block_state_t :
       [ `hacl_Hash_Blake2s_block_state_t ] structure typ) =
       structure "Hacl_Hash_Blake2s_block_state_t_s"
     let hacl_Hash_Blake2s_block_state_t_fst =
-      field hacl_Hash_Blake2s_block_state_t "fst" (ptr uint32_t)
+      field hacl_Hash_Blake2s_block_state_t "fst" uint8_t
     let hacl_Hash_Blake2s_block_state_t_snd =
-      field hacl_Hash_Blake2s_block_state_t "snd" (ptr uint32_t)
+      field hacl_Hash_Blake2s_block_state_t "snd" uint8_t
+    let hacl_Hash_Blake2s_block_state_t_thd =
+      field hacl_Hash_Blake2s_block_state_t "thd" k____uint32_t___uint32_t_
     let _ = seal hacl_Hash_Blake2s_block_state_t
     type hacl_Hash_Blake2s_state_t = [ `hacl_Hash_Blake2s_state_t ] structure
     let (hacl_Hash_Blake2s_state_t :
@@ -47,9 +61,26 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_state_t_total_len =
       field hacl_Hash_Blake2s_state_t "total_len" uint64_t
     let _ = seal hacl_Hash_Blake2s_state_t
+    let hacl_Hash_Blake2s_malloc_with_params_and_key =
+      foreign "Hacl_Hash_Blake2s_malloc_with_params_and_key"
+        ((ptr hacl_Hash_Blake2b_blake2_params) @->
+           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2s_state_t))))
+    let hacl_Hash_Blake2s_malloc_with_key =
+      foreign "Hacl_Hash_Blake2s_malloc_with_key"
+        (ocaml_bytes @->
+           (uint8_t @-> (returning (ptr hacl_Hash_Blake2s_state_t))))
     let hacl_Hash_Blake2s_malloc =
       foreign "Hacl_Hash_Blake2s_malloc"
         (void @-> (returning (ptr hacl_Hash_Blake2s_state_t)))
+    let hacl_Hash_Blake2s_reset_with_key_and_params =
+      foreign "Hacl_Hash_Blake2s_reset_with_key_and_params"
+        ((ptr hacl_Hash_Blake2s_state_t) @->
+           ((ptr hacl_Hash_Blake2b_blake2_params) @->
+              (ocaml_bytes @-> (returning void))))
+    let hacl_Hash_Blake2s_reset_with_key =
+      foreign "Hacl_Hash_Blake2s_reset_with_key"
+        ((ptr hacl_Hash_Blake2s_state_t) @->
+           (ocaml_bytes @-> (returning void)))
     let hacl_Hash_Blake2s_reset =
       foreign "Hacl_Hash_Blake2s_reset"
         ((ptr hacl_Hash_Blake2s_state_t) @-> (returning void))
@@ -65,6 +96,10 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_free =
       foreign "Hacl_Hash_Blake2s_free"
         ((ptr hacl_Hash_Blake2s_state_t) @-> (returning void))
+    let hacl_Hash_Blake2s_copy =
+      foreign "Hacl_Hash_Blake2s_copy"
+        ((ptr hacl_Hash_Blake2s_state_t) @->
+           (returning (ptr hacl_Hash_Blake2s_state_t)))
     let hacl_Hash_Blake2s_hash_with_key =
       foreign "Hacl_Hash_Blake2s_hash_with_key"
         (ocaml_bytes @->
@@ -72,4 +107,11 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
+    let hacl_Hash_Blake2s_hash_with_key_and_paramas =
+      foreign "Hacl_Hash_Blake2s_hash_with_key_and_paramas"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @->
+                 (hacl_Hash_Blake2b_blake2_params @->
+                    (ocaml_bytes @-> (returning void))))))
   end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_Hash_SHA3_Simd256_bindings.ml b/ocaml/lib/Hacl_Hash_SHA3_Simd256_bindings.ml
new file mode 100644
index 00000000..a04d1206
--- /dev/null
+++ b/ocaml/lib/Hacl_Hash_SHA3_Simd256_bindings.ml
@@ -0,0 +1,69 @@
+open Ctypes
+module Bindings(F:Cstubs.FOREIGN) =
+  struct
+    open F
+    let hacl_Hash_SHA3_Simd256_shake128 =
+      foreign "Hacl_Hash_SHA3_Simd256_shake128"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (uint32_t @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @->
+                                (ocaml_bytes @->
+                                   (uint32_t @-> (returning void)))))))))))
+    let hacl_Hash_SHA3_Simd256_shake256 =
+      foreign "Hacl_Hash_SHA3_Simd256_shake256"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (uint32_t @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @->
+                                (ocaml_bytes @->
+                                   (uint32_t @-> (returning void)))))))))))
+    let hacl_Hash_SHA3_Simd256_sha3_224 =
+      foreign "Hacl_Hash_SHA3_Simd256_sha3_224"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (ocaml_bytes @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @-> (uint32_t @-> (returning void))))))))))
+    let hacl_Hash_SHA3_Simd256_sha3_256 =
+      foreign "Hacl_Hash_SHA3_Simd256_sha3_256"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (ocaml_bytes @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @-> (uint32_t @-> (returning void))))))))))
+    let hacl_Hash_SHA3_Simd256_sha3_384 =
+      foreign "Hacl_Hash_SHA3_Simd256_sha3_384"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (ocaml_bytes @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @-> (uint32_t @-> (returning void))))))))))
+    let hacl_Hash_SHA3_Simd256_sha3_512 =
+      foreign "Hacl_Hash_SHA3_Simd256_sha3_512"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (ocaml_bytes @->
+                 (ocaml_bytes @->
+                    (ocaml_bytes @->
+                       (ocaml_bytes @->
+                          (ocaml_bytes @->
+                             (ocaml_bytes @-> (uint32_t @-> (returning void))))))))))
+  end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_Hash_SHA3_bindings.ml b/ocaml/lib/Hacl_Hash_SHA3_bindings.ml
index 45718cbd..dd900903 100644
--- a/ocaml/lib/Hacl_Hash_SHA3_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_SHA3_bindings.ml
@@ -75,14 +75,17 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_SHA3_is_shake =
       foreign "Hacl_Hash_SHA3_is_shake"
         ((ptr hacl_Hash_SHA3_state_t) @-> (returning bool))
-    let hacl_Hash_SHA3_shake128_hacl =
-      foreign "Hacl_Hash_SHA3_shake128_hacl"
-        (uint32_t @->
-           (ocaml_bytes @-> (uint32_t @-> (ocaml_bytes @-> (returning void)))))
-    let hacl_Hash_SHA3_shake256_hacl =
-      foreign "Hacl_Hash_SHA3_shake256_hacl"
-        (uint32_t @->
-           (ocaml_bytes @-> (uint32_t @-> (ocaml_bytes @-> (returning void)))))
+    let hacl_Hash_SHA3_absorb_inner_32 =
+      foreign "Hacl_Hash_SHA3_absorb_inner_32"
+        (uint32_t @-> (ocaml_bytes @-> ((ptr uint64_t) @-> (returning void))))
+    let hacl_Hash_SHA3_shake128 =
+      foreign "Hacl_Hash_SHA3_shake128"
+        (ocaml_bytes @->
+           (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void)))))
+    let hacl_Hash_SHA3_shake256 =
+      foreign "Hacl_Hash_SHA3_shake256"
+        (ocaml_bytes @->
+           (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void)))))
     let hacl_Hash_SHA3_sha3_224 =
       foreign "Hacl_Hash_SHA3_sha3_224"
         (ocaml_bytes @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))
@@ -95,25 +98,19 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_SHA3_sha3_512 =
       foreign "Hacl_Hash_SHA3_sha3_512"
         (ocaml_bytes @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))
-    let hacl_Hash_SHA3_state_permute =
-      foreign "Hacl_Hash_SHA3_state_permute"
+    let hacl_Hash_SHA3_state_malloc =
+      foreign "Hacl_Hash_SHA3_state_malloc"
+        (void @-> (returning (ptr uint64_t)))
+    let hacl_Hash_SHA3_state_free =
+      foreign "Hacl_Hash_SHA3_state_free"
         ((ptr uint64_t) @-> (returning void))
-    let hacl_Hash_SHA3_loadState =
-      foreign "Hacl_Hash_SHA3_loadState"
-        (uint32_t @-> (ocaml_bytes @-> ((ptr uint64_t) @-> (returning void))))
-    let hacl_Hash_SHA3_absorb_inner =
-      foreign "Hacl_Hash_SHA3_absorb_inner"
-        (uint32_t @-> (ocaml_bytes @-> ((ptr uint64_t) @-> (returning void))))
-    let hacl_Hash_SHA3_squeeze0 =
-      foreign "Hacl_Hash_SHA3_squeeze0"
-        ((ptr uint64_t) @->
-           (uint32_t @-> (uint32_t @-> (ocaml_bytes @-> (returning void)))))
-    let hacl_Hash_SHA3_keccak =
-      foreign "Hacl_Hash_SHA3_keccak"
-        (uint32_t @->
-           (uint32_t @->
-              (uint32_t @->
-                 (ocaml_bytes @->
-                    (uint8_t @->
-                       (uint32_t @-> (ocaml_bytes @-> (returning void))))))))
+    let hacl_Hash_SHA3_shake128_absorb_nblocks =
+      foreign "Hacl_Hash_SHA3_shake128_absorb_nblocks"
+        ((ptr uint64_t) @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))
+    let hacl_Hash_SHA3_shake128_absorb_final =
+      foreign "Hacl_Hash_SHA3_shake128_absorb_final"
+        ((ptr uint64_t) @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))
+    let hacl_Hash_SHA3_shake128_squeeze_nblocks =
+      foreign "Hacl_Hash_SHA3_shake128_squeeze_nblocks"
+        ((ptr uint64_t) @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))
   end
\ No newline at end of file
diff --git a/ocaml/lib/Hacl_SHA2_Types_bindings.ml b/ocaml/lib/Hacl_SHA2_Types_bindings.ml
index 274f959d..56fcc212 100644
--- a/ocaml/lib/Hacl_SHA2_Types_bindings.ml
+++ b/ocaml/lib/Hacl_SHA2_Types_bindings.ml
@@ -83,4 +83,7 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_SHA2_uint8_2x8p_snd =
       field hacl_Hash_SHA2_uint8_2x8p "snd" hacl_Hash_SHA2_uint8_8p
     let _ = seal hacl_Hash_SHA2_uint8_2x8p
+    type hacl_Hash_SHA2_bufx4 = hacl_Hash_SHA2_uint8_4p
+    let hacl_Hash_SHA2_bufx4 =
+      typedef hacl_Hash_SHA2_uint8_4p "Hacl_Hash_SHA2_bufx4"
   end
\ No newline at end of file
diff --git a/ocaml/lib_gen/Hacl_Hash_SHA3_Simd256_gen.ml b/ocaml/lib_gen/Hacl_Hash_SHA3_Simd256_gen.ml
new file mode 100644
index 00000000..db9025c2
--- /dev/null
+++ b/ocaml/lib_gen/Hacl_Hash_SHA3_Simd256_gen.ml
@@ -0,0 +1,10 @@
+let _ =
+  (((Format.set_formatter_out_channel
+       (open_out_bin "lib/Hacl_Hash_SHA3_Simd256_stubs.ml");
+     Cstubs.write_ml Format.std_formatter ~prefix:""
+       (module Hacl_Hash_SHA3_Simd256_bindings.Bindings));
+    Format.set_formatter_out_channel
+      (open_out_bin "lib/Hacl_Hash_SHA3_Simd256_c_stubs.c"));
+   Format.printf "#include \"Hacl_Hash_SHA3_Simd256.h\"\n");
+  Cstubs.write_c Format.std_formatter ~prefix:""
+    (module Hacl_Hash_SHA3_Simd256_bindings.Bindings)
\ No newline at end of file
diff --git a/ocaml/lib_gen/Hacl_SHA2_Types_gen.ml b/ocaml/lib_gen/Hacl_SHA2_Types_gen.ml
index affc9abf..bec1bcb3 100644
--- a/ocaml/lib_gen/Hacl_SHA2_Types_gen.ml
+++ b/ocaml/lib_gen/Hacl_SHA2_Types_gen.ml
@@ -5,6 +5,7 @@ let _ =
        (module Hacl_SHA2_Types_bindings.Bindings));
     Format.set_formatter_out_channel
       (open_out_bin "lib/Hacl_SHA2_Types_c_stubs.c"));
-   Format.printf "#include \"internal/Hacl_SHA2_Types.h\"\n");
+   Format.printf
+     "#include \"Hacl_SHA2_Types.h\"\n#include \"internal/Hacl_SHA2_Types.h\"\n");
   Cstubs.write_c Format.std_formatter ~prefix:""
     (module Hacl_SHA2_Types_bindings.Bindings)
\ No newline at end of file
diff --git a/src/EverCrypt_DRBG.c b/src/EverCrypt_DRBG.c
index 301fe528..a831a5b5 100644
--- a/src/EverCrypt_DRBG.c
+++ b/src/EverCrypt_DRBG.c
@@ -1770,8 +1770,8 @@ static void uninstantiate_sha1(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 20U, uint8_t);
-  Lib_Memzero0_memzero(v, 20U, uint8_t);
+  Lib_Memzero0_memzero(k, 20U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 20U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1794,8 +1794,8 @@ static void uninstantiate_sha2_256(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 32U, uint8_t);
-  Lib_Memzero0_memzero(v, 32U, uint8_t);
+  Lib_Memzero0_memzero(k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 32U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1818,8 +1818,8 @@ static void uninstantiate_sha2_384(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 48U, uint8_t);
-  Lib_Memzero0_memzero(v, 48U, uint8_t);
+  Lib_Memzero0_memzero(k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 48U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1842,8 +1842,8 @@ static void uninstantiate_sha2_512(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 64U, uint8_t);
-  Lib_Memzero0_memzero(v, 64U, uint8_t);
+  Lib_Memzero0_memzero(k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 64U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
diff --git a/src/EverCrypt_Hash.c b/src/EverCrypt_Hash.c
index 92b3c227..bfafa9be 100644
--- a/src/EverCrypt_Hash.c
+++ b/src/EverCrypt_Hash.c
@@ -709,25 +709,57 @@ static void finish(EverCrypt_Hash_state_s *s, uint8_t *dst)
   if (scrut.tag == SHA3_224_s)
   {
     uint64_t *p1 = scrut.case_SHA3_224_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 144U, 28U, dst);
+    uint32_t remOut = 28U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_256_s)
   {
     uint64_t *p1 = scrut.case_SHA3_256_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 136U, 32U, dst);
+    uint32_t remOut = 32U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_384_s)
   {
     uint64_t *p1 = scrut.case_SHA3_384_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 104U, 48U, dst);
+    uint32_t remOut = 48U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_512_s)
   {
     uint64_t *p1 = scrut.case_SHA3_512_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 72U, 64U, dst);
+    uint32_t remOut = 64U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == Blake2S_s)
diff --git a/src/Hacl_Ed25519.c b/src/Hacl_Ed25519.c
index 05d96cd0..d1f8edf2 100644
--- a/src/Hacl_Ed25519.c
+++ b/src/Hacl_Ed25519.c
@@ -1712,8 +1712,8 @@ static inline void secret_expand(uint8_t *expanded, uint8_t *secret)
 /**
 Compute the public key from the private key.
 
-  The outparam `public_key`  points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] public_key Points to 32 bytes of valid memory, i.e., `uint8_t[32]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 */
 void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key)
 {
@@ -1726,8 +1726,8 @@ void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key)
 /**
 Compute the expanded keys for an Ed25519 signature.
 
-  The outparam `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `private_key`   points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -1744,11 +1744,10 @@ void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key)
 /**
 Create an Ed25519 signature with the (precomputed) expanded keys.
 
-  The outparam `signature`     points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `msg`    points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-
-  The argument `expanded_keys` is obtained through `expand_keys`.
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `expanded_keys` nor `msg`.
+  @param[in] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`, containing the expanded keys obtained by invoking `expand_keys`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -1783,9 +1782,10 @@ Hacl_Ed25519_sign_expanded(
 /**
 Create an Ed25519 signature.
 
-  The outparam `signature`   points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg`  points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `private_key` nor `msg`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   The function first calls `expand_keys` and then invokes `sign_expanded`.
 
@@ -1803,11 +1803,12 @@ Hacl_Ed25519_sign(uint8_t *signature, uint8_t *private_key, uint32_t msg_len, ui
 /**
 Verify an Ed25519 signature.
 
-  The function returns `true` if the signature is valid and `false` otherwise.
+  @param public_key Points to 32 bytes of valid memory containing the public key, i.e., `uint8_t[32]`.
+  @param msg_len Length of `msg`.
+  @param msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
+  @param signature Points to 64 bytes of valid memory containing the signature, i.e., `uint8_t[64]`.
 
-  The argument `public_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-  The argument `signature`  points to 64 bytes of valid memory, i.e., uint8_t[64].
+  @return Returns `true` if the signature is valid and `false` otherwise.
 */
 bool
 Hacl_Ed25519_verify(uint8_t *public_key, uint32_t msg_len, uint8_t *msg, uint8_t *signature)
diff --git a/src/Hacl_Frodo1344.c b/src/Hacl_Frodo1344.c
index a565a85b..9fe78471 100644
--- a/src/Hacl_Frodo1344.c
+++ b/src/Hacl_Frodo1344.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 32U;
   uint8_t *z = coins + 64U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake256_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake256(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 21552U;
   uint16_t s_matrix[10752U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43008U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43008U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(1344U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(1344U, 8U, r + 21504U, e_matrix);
   uint16_t b_matrix[10752U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(1344U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(1344U, 8U, 16U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(1344U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 10752U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 10752U, uint16_t, void *);
   uint32_t slen1 = 43056U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 32U * sizeof (uint8_t));
   memcpy(sk_p + 32U, pk, 21520U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(21520U, pk, 32U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 80U, uint8_t);
+  Hacl_Hash_SHA3_shake256(sk + slen1, 32U, pk, 21520U);
+  Lib_Memzero0_memzero(coins, 80U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(32U, coins);
   uint8_t seed_se_k[64U] = { 0U };
   uint8_t pkh_mu[64U] = { 0U };
-  Hacl_Hash_SHA3_shake256_hacl(21520U, pk, 32U, pkh_mu);
+  Hacl_Hash_SHA3_shake256(pkh_mu, 32U, pk, 21520U);
   memcpy(pkh_mu + 32U, coins, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(64U, pkh_mu, 64U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 64U, pkh_mu, 64U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 32U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43136U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43136U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r + 21504U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 8U, r + 43008U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 4U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 16U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 21664U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t shake_input_ss[ss_init_len];
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 21632U * sizeof (uint8_t));
   memcpy(shake_input_ss + 21632U, k, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, shake_input_ss, 32U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t);
-  Lib_Memzero0_memzero(coins, 32U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 32U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 32U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 1344U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(16U, 4U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[64U] = { 0U };
   uint32_t pkh_mu_decode_len = 64U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 43056U;
   memcpy(pkh_mu_decode, pkh, 32U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 32U, mu_decode, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(pkh_mu_decode_len, pkh_mu_decode, 64U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 64U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 32U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43136U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43136U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r + 21504U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 8U, r + 43008U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 4U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 1344U, 16U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 16U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 1344U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -222,11 +222,11 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 21632U * sizeof (uint8_t));
   memcpy(ss_init + 21632U, kp_s, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, ss_init, 32U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 32U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 32U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 32U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 32U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/Hacl_Frodo64.c b/src/Hacl_Frodo64.c
index 91434038..19f1562d 100644
--- a/src/Hacl_Frodo64.c
+++ b/src/Hacl_Frodo64.c
@@ -50,7 +50,7 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 16U;
   uint8_t *z = coins + 32U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake128_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake128(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 992U;
   uint16_t s_matrix[512U] = { 0U };
@@ -59,8 +59,8 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2048U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2048U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(64U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(64U, 8U, r + 1024U, e_matrix);
   uint16_t b_matrix[512U] = { 0U };
@@ -70,14 +70,14 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(64U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(64U, 8U, 15U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(64U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 512U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 512U, uint16_t, void *);
   uint32_t slen1 = 2016U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 16U * sizeof (uint8_t));
   memcpy(sk_p + 16U, pk, 976U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(976U, pk, 16U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 48U, uint8_t);
+  Hacl_Hash_SHA3_shake128(sk + slen1, 16U, pk, 976U);
+  Lib_Memzero0_memzero(coins, 48U, uint8_t, void *);
   return 0U;
 }
 
@@ -87,9 +87,9 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(16U, coins);
   uint8_t seed_se_k[32U] = { 0U };
   uint8_t pkh_mu[32U] = { 0U };
-  Hacl_Hash_SHA3_shake128_hacl(976U, pk, 16U, pkh_mu);
+  Hacl_Hash_SHA3_shake128(pkh_mu, 16U, pk, 976U);
   memcpy(pkh_mu + 16U, coins, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(32U, pkh_mu, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu, 32U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 16U;
   uint8_t *seed_a = pk;
@@ -101,8 +101,8 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2176U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2176U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r + 1024U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 8U, r + 2048U, epp_matrix);
@@ -122,22 +122,22 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 15U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 1096U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t shake_input_ss[ss_init_len];
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 1080U * sizeof (uint8_t));
   memcpy(shake_input_ss + 1080U, k, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, shake_input_ss, 16U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(coins, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 16U, uint8_t, void *);
   return 0U;
 }
 
@@ -157,8 +157,8 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 64U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(15U, 2U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[32U] = { 0U };
   uint32_t pkh_mu_decode_len = 32U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -167,7 +167,7 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 2016U;
   memcpy(pkh_mu_decode, pkh, 16U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 16U, mu_decode, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(pkh_mu_decode_len, pkh_mu_decode, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 16U;
   uint8_t *s = sk;
@@ -180,8 +180,8 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2176U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2176U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r + 1024U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 8U, r + 2048U, epp_matrix);
@@ -199,12 +199,12 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 64U, 15U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 15U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 64U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -225,11 +225,11 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 1080U * sizeof (uint8_t));
   memcpy(ss_init + 1080U, kp_s, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, ss_init, 16U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 16U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 16U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/Hacl_Frodo640.c b/src/Hacl_Frodo640.c
index 8baaee46..8cf0253e 100644
--- a/src/Hacl_Frodo640.c
+++ b/src/Hacl_Frodo640.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 16U;
   uint8_t *z = coins + 32U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake128_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake128(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 9632U;
   uint16_t s_matrix[5120U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20480U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20480U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(640U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(640U, 8U, r + 10240U, e_matrix);
   uint16_t b_matrix[5120U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(640U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(640U, 8U, 15U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(640U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 5120U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 5120U, uint16_t, void *);
   uint32_t slen1 = 19872U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 16U * sizeof (uint8_t));
   memcpy(sk_p + 16U, pk, 9616U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(9616U, pk, 16U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 48U, uint8_t);
+  Hacl_Hash_SHA3_shake128(sk + slen1, 16U, pk, 9616U);
+  Lib_Memzero0_memzero(coins, 48U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(16U, coins);
   uint8_t seed_se_k[32U] = { 0U };
   uint8_t pkh_mu[32U] = { 0U };
-  Hacl_Hash_SHA3_shake128_hacl(9616U, pk, 16U, pkh_mu);
+  Hacl_Hash_SHA3_shake128(pkh_mu, 16U, pk, 9616U);
   memcpy(pkh_mu + 16U, coins, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(32U, pkh_mu, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu, 32U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 16U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20608U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20608U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r + 10240U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 8U, r + 20480U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 15U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 9736U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t shake_input_ss[ss_init_len];
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 9720U * sizeof (uint8_t));
   memcpy(shake_input_ss + 9720U, k, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, shake_input_ss, 16U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(coins, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 16U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 640U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(15U, 2U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[32U] = { 0U };
   uint32_t pkh_mu_decode_len = 32U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 19872U;
   memcpy(pkh_mu_decode, pkh, 16U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 16U, mu_decode, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(pkh_mu_decode_len, pkh_mu_decode, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 16U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20608U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20608U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r + 10240U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 8U, r + 20480U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 640U, 15U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 15U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 640U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -223,11 +223,11 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 9720U * sizeof (uint8_t));
   memcpy(ss_init + 9720U, kp_s, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, ss_init, 16U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 16U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 16U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/Hacl_Frodo976.c b/src/Hacl_Frodo976.c
index 76d78a30..9360e3af 100644
--- a/src/Hacl_Frodo976.c
+++ b/src/Hacl_Frodo976.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 24U;
   uint8_t *z = coins + 48U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake256_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake256(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 15656U;
   uint16_t s_matrix[7808U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31232U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31232U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(976U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(976U, 8U, r + 15616U, e_matrix);
   uint16_t b_matrix[7808U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(976U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(976U, 8U, 16U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(976U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 7808U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 7808U, uint16_t, void *);
   uint32_t slen1 = 31272U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 24U * sizeof (uint8_t));
   memcpy(sk_p + 24U, pk, 15632U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(15632U, pk, 24U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 64U, uint8_t);
+  Hacl_Hash_SHA3_shake256(sk + slen1, 24U, pk, 15632U);
+  Lib_Memzero0_memzero(coins, 64U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(24U, coins);
   uint8_t seed_se_k[48U] = { 0U };
   uint8_t pkh_mu[48U] = { 0U };
-  Hacl_Hash_SHA3_shake256_hacl(15632U, pk, 24U, pkh_mu);
+  Hacl_Hash_SHA3_shake256(pkh_mu, 24U, pk, 15632U);
   memcpy(pkh_mu + 24U, coins, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(48U, pkh_mu, 48U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 48U, pkh_mu, 48U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 24U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31360U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31360U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r + 15616U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 8U, r + 31232U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 3U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 16U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 15768U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t shake_input_ss[ss_init_len];
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 15744U * sizeof (uint8_t));
   memcpy(shake_input_ss + 15744U, k, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, shake_input_ss, 24U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t);
-  Lib_Memzero0_memzero(coins, 24U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 24U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 24U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 976U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(16U, 3U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[48U] = { 0U };
   uint32_t pkh_mu_decode_len = 48U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 31272U;
   memcpy(pkh_mu_decode, pkh, 24U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 24U, mu_decode, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(pkh_mu_decode_len, pkh_mu_decode, 48U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 48U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 24U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31360U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31360U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r + 15616U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 8U, r + 31232U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 3U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 976U, 16U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 16U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 976U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -222,11 +222,11 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 15744U * sizeof (uint8_t));
   memcpy(ss_init + 15744U, kp_s, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, ss_init, 24U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 24U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 24U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 24U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 24U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 24U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/Hacl_Hash_Blake2b.c b/src/Hacl_Hash_Blake2b.c
index 2dceaf4b..d490a1a5 100644
--- a/src/Hacl_Hash_Blake2b.c
+++ b/src/Hacl_Hash_Blake2b.c
@@ -76,22 +76,22 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     uint64_t *r1 = m_st + 4U;
     uint64_t *r20 = m_st + 8U;
     uint64_t *r30 = m_st + 12U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     uint64_t uu____0 = m_w[s2];
     uint64_t uu____1 = m_w[s4];
     uint64_t uu____2 = m_w[s6];
@@ -474,18 +474,27 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint64_t tmp[8U] = { 0U };
   uint64_t *r0 = hash;
   uint64_t *r1 = hash + 4U;
   uint64_t *r2 = hash + 8U;
   uint64_t *r3 = hash + 12U;
-  uint64_t iv0 = Hacl_Hash_Blake2s_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2s_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2s_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2s_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2s_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2s_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2s_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2s_ivTable_B[7U];
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
   r2[0U] = iv0;
   r2[1U] = iv1;
   r2[2U] = iv2;
@@ -494,16 +503,141 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
-  uint64_t kk_shift_8 = (uint64_t)kk << 8U;
-  uint64_t iv0_ = iv0 ^ (0x01010000ULL ^ (kk_shift_8 ^ (uint64_t)nn));
+  uint8_t kk1 = (uint8_t)kk;
+  uint8_t nn1 = (uint8_t)nn;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk1
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
   r0[0U] = iv0_;
-  r0[1U] = iv1;
-  r0[2U] = iv2;
-  r0[3U] = iv3;
-  r1[0U] = iv4;
-  r1[1U] = iv5;
-  r1[2U] = iv6;
-  r1[3U] = iv7;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+}
+
+static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = hash;
+  uint64_t *r1 = hash + 4U;
+  uint64_t *r2 = hash + 8U;
+  uint64_t *r3 = hash + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk = p.key_length;
+  uint8_t nn = p.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
 }
 
 static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
@@ -519,7 +653,7 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 void
@@ -560,7 +694,7 @@ Hacl_Hash_Blake2b_update_last(
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 static void
@@ -624,43 +758,223 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(second + i * 8U, row1[i]););
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
+static Hacl_Hash_Blake2b_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
-  Hacl_Hash_Blake2b_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2b_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_state_t
   *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
   p[0U] = s;
-  Hacl_Hash_Blake2b_init(block_state.snd, 0U, 64U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
-void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *state)
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 64U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  return s;
+}
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
+{
+  return Hacl_Hash_Blake2b_malloc_with_key(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2b_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2b_init(block_state.snd, 0U, 64U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length or key_length, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
+*/
+void
+Hacl_Hash_Blake2b_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_reset_with_key(s, NULL);
+}
+
+/**
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len)
@@ -726,8 +1040,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint64_t *wv = block_state1.fst;
-      uint64_t *hash = block_state1.snd;
+      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      uint64_t *wv = acc.fst;
+      uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
@@ -750,8 +1065,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    uint64_t *wv = block_state1.fst;
-    uint64_t *hash = block_state1.snd;
+    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    uint64_t *wv = acc.fst;
+    uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -817,8 +1133,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint64_t *wv = block_state1.fst;
-      uint64_t *hash = block_state1.snd;
+      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      uint64_t *wv = acc.fst;
+      uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
@@ -842,8 +1159,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    uint64_t *wv = block_state1.fst;
-    uint64_t *hash = block_state1.snd;
+    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    uint64_t *wv = acc.fst;
+    uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -867,10 +1185,20 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your
+parameters.
 */
 void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2b_state_t scrut = *state;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -887,9 +1215,11 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint64_t wv0[16U] = { 0U };
   uint64_t b[16U] = { 0U };
-  Hacl_Hash_Blake2b_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  uint64_t *src_b = block_state.snd;
-  uint64_t *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  uint64_t *src_b = block_state.thd.snd;
+  uint64_t *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -903,8 +1233,9 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  uint64_t *wv1 = tmp_block_state.fst;
-  uint64_t *hash0 = tmp_block_state.snd;
+  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
+  uint64_t *wv1 = acc0.fst;
+  uint64_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_update_multi(0U,
     wv1,
@@ -913,15 +1244,17 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  uint64_t *wv = tmp_block_state.fst;
-  uint64_t *hash = tmp_block_state.snd;
+  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
+  uint64_t *wv = acc.fst;
+  uint64_t *hash = acc.snd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  Hacl_Hash_Blake2b_finish(64U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -932,14 +1265,43 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *wv = block_state.fst;
-  uint64_t *b = block_state.snd;
+  uint64_t *b = block_state.thd.snd;
+  uint64_t *wv = block_state.thd.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. This preserves all parameters.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state)
+{
+  Hacl_Hash_Blake2b_state_t scrut = *state;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
+  memcpy(buf, buf0, 128U * sizeof (uint8_t));
+  uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
+  uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
+  Hacl_Hash_Blake2b_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint64_t *src_b = block_state0.thd.snd;
+  uint64_t *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
+  Hacl_Hash_Blake2b_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2b_state_t
+  *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -965,7 +1327,109 @@ Hacl_Hash_Blake2b_hash_with_key(
   Hacl_Hash_Blake2b_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2b_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 16U, uint64_t);
-  Lib_Memzero0_memzero(b, 16U, uint64_t);
+  Lib_Memzero0_memzero(b1, 16U, uint64_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint64_t, void *);
+}
+
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`. 
+*/
+void
+Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  uint64_t b[16U] = { 0U };
+  uint64_t b1[16U] = { 0U };
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = b;
+  uint64_t *r1 = b + 4U;
+  uint64_t *r2 = b + 8U;
+  uint64_t *r3 = b + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk = params.key_length;
+  uint8_t nn = params.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^
+        ((uint64_t)params.fanout
+        << 16U
+        ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U)));
+  tmp[1U] = params.node_offset;
+  tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2b_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 16U, uint64_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint64_t, void *);
 }
 
diff --git a/src/Hacl_Hash_Blake2b_Simd256.c b/src/Hacl_Hash_Blake2b_Simd256.c
index 1a5e8cf2..0afd93bc 100644
--- a/src/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/Hacl_Hash_Blake2b_Simd256.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2b_Simd256.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -77,22 +78,22 @@ update_block(
     Lib_IntVector_Intrinsics_vec256 *r1 = m_st + 1U;
     Lib_IntVector_Intrinsics_vec256 *r20 = m_st + 2U;
     Lib_IntVector_Intrinsics_vec256 *r30 = m_st + 3U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]);
     r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]);
     r20[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]);
@@ -214,24 +215,147 @@ update_block(
 void
 Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = hash;
+  Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk1 = (uint8_t)kk;
+  uint8_t nn1 = (uint8_t)nn;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk1
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
+}
+
+static void
+init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint64_t tmp[8U] = { 0U };
   Lib_IntVector_Intrinsics_vec256 *r0 = hash;
   Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
   Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
-  uint64_t iv0 = Hacl_Hash_Blake2s_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2s_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2s_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2s_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2s_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2s_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2s_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2s_ivTable_B[7U];
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
-  uint64_t kk_shift_8 = (uint64_t)kk << 8U;
-  uint64_t iv0_ = iv0 ^ (0x01010000ULL ^ (kk_shift_8 ^ (uint64_t)nn));
-  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1, iv2, iv3);
-  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk = p.key_length;
+  uint8_t nn = p.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
 }
 
 static void
@@ -254,7 +378,7 @@ update_key(
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 void
@@ -295,7 +419,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 static inline void
@@ -371,7 +495,7 @@ Hacl_Hash_Blake2b_Simd256_finish(
   Lib_IntVector_Intrinsics_vec256_store64_le(second, row1[0U]);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -468,10 +592,11 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
   return buf;
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
+static Hacl_Hash_Blake2b_Simd256_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -484,33 +609,199 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
     (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_Simd256_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_Simd256_state_t
   *p =
     (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2b_Simd256_state_t
       ));
   p[0U] = s;
-  Hacl_Hash_Blake2b_Simd256_init(block_state.snd, 0U, 64U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (256 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (256 for S, 64 for B).
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 64U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2b_Simd256_state_t
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
 */
-void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *state)
+Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
+{
+  return Hacl_Hash_Blake2b_Simd256_malloc_with_key0(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2b_Simd256_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2b_Simd256_init(block_state.snd, 0U, 64U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_Simd256_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -582,8 +873,10 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
@@ -606,8 +899,9 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -673,8 +967,10 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
@@ -698,8 +994,9 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -728,6 +1025,10 @@ Hacl_Hash_Blake2b_Simd256_update(
 void
 Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -744,9 +1045,11 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   uint8_t *buf_1 = buf_;
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 wv0[4U] KRML_POST_ALIGN(32) = { 0U };
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
-  Hacl_Hash_Blake2b_Simd256_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -760,8 +1063,10 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  Lib_IntVector_Intrinsics_vec256 *wv1 = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash0 = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+  acc0 = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
+  Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_Simd256_update_multi(0U,
     wv1,
@@ -770,15 +1075,18 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  Lib_IntVector_Intrinsics_vec256 *wv = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+  acc = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+  Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  Hacl_Hash_Blake2b_Simd256_finish(64U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -789,14 +1097,55 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.snd;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
+{
+  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
+  memcpy(buf, buf0, 128U * sizeof (uint8_t));
+  Lib_IntVector_Intrinsics_vec256
+  *wv =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
+  memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256
+  *b =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
+  memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Hacl_Hash_Blake2b_Simd256_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2b_Simd256_state_t
+  *p =
+    (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof (
+        Hacl_Hash_Blake2b_Simd256_state_t
+      ));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -822,7 +1171,91 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Hacl_Hash_Blake2b_Simd256_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2b_Simd256_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256);
-  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+}
+
+void
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b1[4U] KRML_POST_ALIGN(32) = { 0U };
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = b;
+  Lib_IntVector_Intrinsics_vec256 *r1 = b + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = b + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = b + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk = params.key_length;
+  uint8_t nn = params.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^
+        ((uint64_t)params.fanout
+        << 16U
+        ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U)));
+  tmp[1U] = params.node_offset;
+  tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
diff --git a/src/Hacl_Hash_Blake2s.c b/src/Hacl_Hash_Blake2s.c
index 652c3f33..6e19d83d 100644
--- a/src/Hacl_Hash_Blake2s.c
+++ b/src/Hacl_Hash_Blake2s.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2s.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -76,22 +77,22 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     uint32_t *r1 = m_st + 4U;
     uint32_t *r20 = m_st + 8U;
     uint32_t *r30 = m_st + 12U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     uint32_t uu____0 = m_w[s2];
     uint32_t uu____1 = m_w[s4];
     uint32_t uu____2 = m_w[s6];
@@ -474,18 +475,104 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
 
 void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = hash;
+  uint32_t *r1 = hash + 4U;
+  uint32_t *r2 = hash + 8U;
+  uint32_t *r3 = hash + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)(uint8_t)nn
+    ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+}
+
+static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint32_t tmp[8U] = { 0U };
   uint32_t *r0 = hash;
   uint32_t *r1 = hash + 4U;
   uint32_t *r2 = hash + 8U;
   uint32_t *r3 = hash + 12U;
-  uint32_t iv0 = Hacl_Hash_Blake2s_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2s_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2s_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2s_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2s_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2s_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2s_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2s_ivTable_S[7U];
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = iv0;
   r2[1U] = iv1;
   r2[2U] = iv2;
@@ -494,16 +581,58 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
-  uint32_t kk_shift_8 = kk << 8U;
-  uint32_t iv0_ = iv0 ^ (0x01010000U ^ (kk_shift_8 ^ nn));
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)p.digest_length
+    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
   r0[0U] = iv0_;
-  r0[1U] = iv1;
-  r0[2U] = iv2;
-  r0[3U] = iv3;
-  r1[0U] = iv4;
-  r1[1U] = iv5;
-  r1[2U] = iv6;
-  r1[3U] = iv7;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
 }
 
 static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
@@ -519,7 +648,7 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -556,7 +685,7 @@ Hacl_Hash_Blake2s_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 static void
@@ -614,41 +743,203 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(second + i * 4U, row1[i]););
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 32U, uint8_t);
+  Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
+static Hacl_Hash_Blake2s_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
-  Hacl_Hash_Blake2s_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2s_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_state_t
   *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
   p[0U] = s;
-  Hacl_Hash_Blake2s_init(block_state.snd, 0U, 32U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (32 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_state_t
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (32 for S, 64 for B).
 */
-void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *state)
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 32U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
+{
+  return Hacl_Hash_Blake2s_malloc_with_key(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2s_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2s_init(block_state.snd, 0U, 32U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -716,8 +1007,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint32_t *wv = block_state1.fst;
-      uint32_t *hash = block_state1.snd;
+      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      uint32_t *wv = acc.fst;
+      uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -735,8 +1027,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    uint32_t *wv = block_state1.fst;
-    uint32_t *hash = block_state1.snd;
+    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    uint32_t *wv = acc.fst;
+    uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -797,8 +1090,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint32_t *wv = block_state1.fst;
-      uint32_t *hash = block_state1.snd;
+      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      uint32_t *wv = acc.fst;
+      uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -817,8 +1111,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    uint32_t *wv = block_state1.fst;
-    uint32_t *hash = block_state1.snd;
+    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    uint32_t *wv = acc.fst;
+    uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -841,6 +1136,10 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
 */
 void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2s_state_t scrut = *state;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -857,9 +1156,11 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t wv0[16U] = { 0U };
   uint32_t b[16U] = { 0U };
-  Hacl_Hash_Blake2s_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  uint32_t *src_b = block_state.snd;
-  uint32_t *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  uint32_t *src_b = block_state.thd.snd;
+  uint32_t *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -873,15 +1174,18 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  uint32_t *wv1 = tmp_block_state.fst;
-  uint32_t *hash0 = tmp_block_state.snd;
+  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
+  uint32_t *wv1 = acc0.fst;
+  uint32_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  uint32_t *wv = tmp_block_state.fst;
-  uint32_t *hash = tmp_block_state.snd;
+  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
+  uint32_t *wv = acc.fst;
+  uint32_t *hash = acc.snd;
   Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  Hacl_Hash_Blake2s_finish(32U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -892,19 +1196,48 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *wv = block_state.fst;
-  uint32_t *b = block_state.snd;
+  uint32_t *b = block_state.thd.snd;
+  uint32_t *wv = block_state.thd.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
+{
+  Hacl_Hash_Blake2s_state_t scrut = *state;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
+  memcpy(buf, buf0, 64U * sizeof (uint8_t));
+  uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
+  uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
+  Hacl_Hash_Blake2s_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint32_t *src_b = block_state0.thd.snd;
+  uint32_t *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
+  Hacl_Hash_Blake2s_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2s_state_t
+  *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -925,7 +1258,100 @@ Hacl_Hash_Blake2s_hash_with_key(
   Hacl_Hash_Blake2s_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2s_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 16U, uint32_t);
-  Lib_Memzero0_memzero(b, 16U, uint32_t);
+  Lib_Memzero0_memzero(b1, 16U, uint32_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
+}
+
+void
+Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  uint32_t b[16U] = { 0U };
+  uint32_t b1[16U] = { 0U };
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = b;
+  uint32_t *r1 = b + 4U;
+  uint32_t *r2 = b + 8U;
+  uint32_t *r3 = b + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)params.digest_length
+    ^
+      ((uint32_t)params.key_length
+      << 8U
+      ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U));
+  tmp[1U] = params.leaf_length;
+  tmp[2U] = (uint32_t)params.node_offset;
+  tmp[3U] =
+    (uint32_t)(params.node_offset >> 32U)
+    ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2s_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 16U, uint32_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
diff --git a/src/Hacl_Hash_Blake2s_Simd128.c b/src/Hacl_Hash_Blake2s_Simd128.c
index 73f0cccb..c02da8fa 100644
--- a/src/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/Hacl_Hash_Blake2s_Simd128.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2s_Simd128.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -77,22 +78,22 @@ update_block(
     Lib_IntVector_Intrinsics_vec128 *r1 = m_st + 1U;
     Lib_IntVector_Intrinsics_vec128 *r20 = m_st + 2U;
     Lib_IntVector_Intrinsics_vec128 *r30 = m_st + 3U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]);
     r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]);
     r20[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]);
@@ -214,24 +215,141 @@ update_block(
 void
 Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = hash;
+  Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)(uint8_t)nn
+    ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
+}
+
+static void
+init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint32_t tmp[8U] = { 0U };
   Lib_IntVector_Intrinsics_vec128 *r0 = hash;
   Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
   Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
-  uint32_t iv0 = Hacl_Hash_Blake2s_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2s_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2s_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2s_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2s_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2s_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2s_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2s_ivTable_S[7U];
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
-  uint32_t kk_shift_8 = kk << 8U;
-  uint32_t iv0_ = iv0 ^ (0x01010000U ^ (kk_shift_8 ^ nn));
-  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1, iv2, iv3);
-  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)p.digest_length
+    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
 }
 
 static void
@@ -254,7 +372,7 @@ update_key(
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -291,7 +409,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 static inline void
@@ -367,7 +485,7 @@ Hacl_Hash_Blake2s_Simd128_finish(
   Lib_IntVector_Intrinsics_vec128_store32_le(second, row1[0U]);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 32U, uint8_t);
+  Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
 }
 
 void
@@ -464,10 +582,11 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
   return buf;
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
+static Hacl_Hash_Blake2s_Simd128_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -480,33 +599,199 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
     (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_Simd128_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_Simd128_state_t
   *p =
     (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2s_Simd128_state_t
       ));
   p[0U] = s;
-  Hacl_Hash_Blake2s_Simd128_init(block_state.snd, 0U, 32U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (128 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (128 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 32U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2s_Simd128_state_t
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
 */
-void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *state)
+Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
+{
+  return Hacl_Hash_Blake2s_Simd128_malloc_with_key0(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2s_Simd128_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2s_Simd128_init(block_state.snd, 0U, 32U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_Simd128_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -578,8 +863,10 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -597,8 +884,9 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -659,8 +947,10 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -679,8 +969,9 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -704,6 +995,10 @@ Hacl_Hash_Blake2s_Simd128_update(
 void
 Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -720,9 +1015,11 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   uint8_t *buf_1 = buf_;
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv0[4U] KRML_POST_ALIGN(16) = { 0U };
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
-  Hacl_Hash_Blake2s_Simd128_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -736,15 +1033,20 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  Lib_IntVector_Intrinsics_vec128 *wv1 = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash0 = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+  acc0 = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
+  Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  Lib_IntVector_Intrinsics_vec128 *wv = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+  acc = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+  Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
   Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  Hacl_Hash_Blake2s_Simd128_finish(32U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -755,19 +1057,60 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.snd;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
+{
+  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
+  memcpy(buf, buf0, 64U * sizeof (uint8_t));
+  Lib_IntVector_Intrinsics_vec128
+  *wv =
+    (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
+      sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
+  memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Lib_IntVector_Intrinsics_vec128
+  *b =
+    (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
+      sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
+  memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Hacl_Hash_Blake2s_Simd128_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2s_Simd128_state_t
+  *p =
+    (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof (
+        Hacl_Hash_Blake2s_Simd128_state_t
+      ));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -788,7 +1131,88 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Hacl_Hash_Blake2s_Simd128_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2s_Simd128_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128);
-  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+}
+
+void
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
+  KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b1[4U] KRML_POST_ALIGN(16) = { 0U };
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = b;
+  Lib_IntVector_Intrinsics_vec128 *r1 = b + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = b + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = b + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)params.digest_length
+    ^
+      ((uint32_t)params.key_length
+      << 8U
+      ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U));
+  tmp[1U] = params.leaf_length;
+  tmp[2U] = (uint32_t)params.node_offset;
+  tmp[3U] =
+    (uint32_t)(params.node_offset >> 32U)
+    ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
diff --git a/src/Hacl_Hash_SHA3.c b/src/Hacl_Hash_SHA3.c
index 4f502866..89bb0491 100644
--- a/src/Hacl_Hash_SHA3.c
+++ b/src/Hacl_Hash_SHA3.c
@@ -25,6 +25,151 @@
 
 #include "internal/Hacl_Hash_SHA3.h"
 
+const
+uint32_t
+Hacl_Hash_SHA3_keccak_rotc[24U] =
+  {
+    1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U,
+    39U, 61U, 20U, 44U
+  };
+
+const
+uint32_t
+Hacl_Hash_SHA3_keccak_piln[24U] =
+  {
+    10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U,
+    22U, 9U, 6U, 1U
+  };
+
+const
+uint64_t
+Hacl_Hash_SHA3_keccak_rndc[24U] =
+  {
+    0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+    0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+    0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+    0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+    0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL,
+    0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
+  };
+
+static void absorb_inner_32(uint8_t *b, uint64_t *s)
+{
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b1 = b;
+  uint64_t u = load64_le(b1);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b1 + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b1 + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b1 + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b1 + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b1 + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b1 + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b1 + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b1 + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b1 + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b1 + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b1 + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b1 + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b1 + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b1 + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b1 + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b1 + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b1 + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b1 + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b1 + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b1 + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b1 + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b1 + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b1 + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b1 + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b1 + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b1 + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b1 + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b1 + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b1 + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b1 + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b1 + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws[i];
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    uint64_t _C[5U] = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+      uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+      KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+    uint64_t x = s[1U];
+    uint64_t current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+      uint64_t temp = s[_Y];
+      uint64_t uu____1 = current;
+      s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+      uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+      uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+      uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+      uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+      s[0U + 5U * i] = v0;
+      s[1U + 5U * i] = v1;
+      s[2U + 5U * i] = v2;
+      s[3U + 5U * i] = v3;
+      s[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+    s[0U] = s[0U] ^ c;
+  }
+}
+
 static uint32_t block_len(Spec_Hash_Definitions_hash_alg a)
 {
   switch (a)
@@ -97,10 +242,17 @@ Hacl_Hash_SHA3_update_multi_sha3(
   uint32_t n_blocks
 )
 {
-  for (uint32_t i = 0U; i < n_blocks; i++)
+  uint32_t l = block_len(a) * n_blocks;
+  for (uint32_t i = 0U; i < l / block_len(a); i++)
   {
-    uint8_t *block = blocks + i * block_len(a);
-    Hacl_Hash_SHA3_absorb_inner(block_len(a), block, s);
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = blocks;
+    uint8_t *bl0 = b_;
+    uint8_t *uu____0 = b0 + i * block_len(a);
+    memcpy(bl0, uu____0, block_len(a) * sizeof (uint8_t));
+    block_len(a);
+    absorb_inner_32(b_, s);
   }
 }
 
@@ -124,37 +276,272 @@ Hacl_Hash_SHA3_update_last_sha3(
   uint32_t len = block_len(a);
   if (input_len == len)
   {
-    Hacl_Hash_SHA3_absorb_inner(len, input, s);
-    uint8_t lastBlock_[200U] = { 0U };
-    uint8_t *lastBlock = lastBlock_;
-    memcpy(lastBlock, input + input_len, 0U * sizeof (uint8_t));
-    lastBlock[0U] = suffix;
-    Hacl_Hash_SHA3_loadState(len, lastBlock, s);
-    if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U == len - 1U)
-    {
-      Hacl_Hash_SHA3_state_permute(s);
-    }
-    uint8_t nextBlock_[200U] = { 0U };
-    uint8_t *nextBlock = nextBlock_;
-    nextBlock[len - 1U] = 0x80U;
-    Hacl_Hash_SHA3_loadState(len, nextBlock, s);
-    Hacl_Hash_SHA3_state_permute(s);
+    uint8_t b1[256U] = { 0U };
+    uint8_t *b_ = b1;
+    uint8_t *b00 = input;
+    uint8_t *bl00 = b_;
+    memcpy(bl00, b00 + 0U * len, len * sizeof (uint8_t));
+    absorb_inner_32(b_, s);
+    uint8_t b2[256U] = { 0U };
+    uint8_t *b_0 = b2;
+    uint32_t rem = 0U % len;
+    uint8_t *b01 = input + input_len;
+    uint8_t *bl0 = b_0;
+    memcpy(bl0, b01 + 0U - rem, rem * sizeof (uint8_t));
+    uint8_t *b02 = b_0;
+    b02[0U % len] = suffix;
+    uint64_t ws[32U] = { 0U };
+    uint8_t *b = b_0;
+    uint64_t u = load64_le(b);
+    ws[0U] = u;
+    uint64_t u0 = load64_le(b + 8U);
+    ws[1U] = u0;
+    uint64_t u1 = load64_le(b + 16U);
+    ws[2U] = u1;
+    uint64_t u2 = load64_le(b + 24U);
+    ws[3U] = u2;
+    uint64_t u3 = load64_le(b + 32U);
+    ws[4U] = u3;
+    uint64_t u4 = load64_le(b + 40U);
+    ws[5U] = u4;
+    uint64_t u5 = load64_le(b + 48U);
+    ws[6U] = u5;
+    uint64_t u6 = load64_le(b + 56U);
+    ws[7U] = u6;
+    uint64_t u7 = load64_le(b + 64U);
+    ws[8U] = u7;
+    uint64_t u8 = load64_le(b + 72U);
+    ws[9U] = u8;
+    uint64_t u9 = load64_le(b + 80U);
+    ws[10U] = u9;
+    uint64_t u10 = load64_le(b + 88U);
+    ws[11U] = u10;
+    uint64_t u11 = load64_le(b + 96U);
+    ws[12U] = u11;
+    uint64_t u12 = load64_le(b + 104U);
+    ws[13U] = u12;
+    uint64_t u13 = load64_le(b + 112U);
+    ws[14U] = u13;
+    uint64_t u14 = load64_le(b + 120U);
+    ws[15U] = u14;
+    uint64_t u15 = load64_le(b + 128U);
+    ws[16U] = u15;
+    uint64_t u16 = load64_le(b + 136U);
+    ws[17U] = u16;
+    uint64_t u17 = load64_le(b + 144U);
+    ws[18U] = u17;
+    uint64_t u18 = load64_le(b + 152U);
+    ws[19U] = u18;
+    uint64_t u19 = load64_le(b + 160U);
+    ws[20U] = u19;
+    uint64_t u20 = load64_le(b + 168U);
+    ws[21U] = u20;
+    uint64_t u21 = load64_le(b + 176U);
+    ws[22U] = u21;
+    uint64_t u22 = load64_le(b + 184U);
+    ws[23U] = u22;
+    uint64_t u23 = load64_le(b + 192U);
+    ws[24U] = u23;
+    uint64_t u24 = load64_le(b + 200U);
+    ws[25U] = u24;
+    uint64_t u25 = load64_le(b + 208U);
+    ws[26U] = u25;
+    uint64_t u26 = load64_le(b + 216U);
+    ws[27U] = u26;
+    uint64_t u27 = load64_le(b + 224U);
+    ws[28U] = u27;
+    uint64_t u28 = load64_le(b + 232U);
+    ws[29U] = u28;
+    uint64_t u29 = load64_le(b + 240U);
+    ws[30U] = u29;
+    uint64_t u30 = load64_le(b + 248U);
+    ws[31U] = u30;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      s[i] = s[i] ^ ws[i];
+    }
+    if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U % len == len - 1U)
+    {
+      for (uint32_t i0 = 0U; i0 < 24U; i0++)
+      {
+        uint64_t _C[5U] = { 0U };
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+        KRML_MAYBE_FOR5(i1,
+          0U,
+          5U,
+          1U,
+          uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+          uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+          KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+        uint64_t x = s[1U];
+        uint64_t current = x;
+        for (uint32_t i = 0U; i < 24U; i++)
+        {
+          uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+          uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+          uint64_t temp = s[_Y];
+          uint64_t uu____1 = current;
+          s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+          current = temp;
+        }
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+          uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+          uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+          uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+          uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+          s[0U + 5U * i] = v0;
+          s[1U + 5U * i] = v1;
+          s[2U + 5U * i] = v2;
+          s[3U + 5U * i] = v3;
+          s[4U + 5U * i] = v4;);
+        uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+        s[0U] = s[0U] ^ c;
+      }
+    }
+    uint8_t b3[256U] = { 0U };
+    uint8_t *b4 = b3;
+    uint8_t *b0 = b4;
+    b0[len - 1U] = 0x80U;
+    absorb_inner_32(b4, s);
     return;
   }
-  uint8_t lastBlock_[200U] = { 0U };
-  uint8_t *lastBlock = lastBlock_;
-  memcpy(lastBlock, input, input_len * sizeof (uint8_t));
-  lastBlock[input_len] = suffix;
-  Hacl_Hash_SHA3_loadState(len, lastBlock, s);
-  if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len == len - 1U)
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = input_len % len;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + input_len - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[input_len % len] = suffix;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
   {
-    Hacl_Hash_SHA3_state_permute(s);
+    s[i] = s[i] ^ ws[i];
+  }
+  if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len % len == len - 1U)
+  {
+    for (uint32_t i0 = 0U; i0 < 24U; i0++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i1,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____2 = _C[(i1 + 1U) % 5U];
+        uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____3 = current;
+        s[_Y] = uu____3 << r | uu____3 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+      s[0U] = s[0U] ^ c;
+    }
   }
-  uint8_t nextBlock_[200U] = { 0U };
-  uint8_t *nextBlock = nextBlock_;
-  nextBlock[len - 1U] = 0x80U;
-  Hacl_Hash_SHA3_loadState(len, nextBlock, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[len - 1U] = 0x80U;
+  absorb_inner_32(b3, s);
 }
 
 typedef struct hash_buf2_s
@@ -463,10 +850,139 @@ digest_(
   uint64_t *s = tmp_block_state.snd;
   if (a11 == Spec_Hash_Definitions_Shake128 || a11 == Spec_Hash_Definitions_Shake256)
   {
-    Hacl_Hash_SHA3_squeeze0(s, block_len(a11), l, output);
+    for (uint32_t i0 = 0U; i0 < l / block_len(a11); i0++)
+    {
+      uint8_t hbuf[256U] = { 0U };
+      uint64_t ws[32U] = { 0U };
+      memcpy(ws, s, 25U * sizeof (uint64_t));
+      for (uint32_t i = 0U; i < 32U; i++)
+      {
+        store64_le(hbuf + i * 8U, ws[i]);
+      }
+      uint8_t *b0 = output;
+      uint8_t *uu____0 = hbuf;
+      memcpy(b0 + i0 * block_len(a11), uu____0, block_len(a11) * sizeof (uint8_t));
+      for (uint32_t i1 = 0U; i1 < 24U; i1++)
+      {
+        uint64_t _C[5U] = { 0U };
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+        KRML_MAYBE_FOR5(i2,
+          0U,
+          5U,
+          1U,
+          uint64_t uu____1 = _C[(i2 + 1U) % 5U];
+          uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____1 << 1U | uu____1 >> 63U);
+          KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+        uint64_t x = s[1U];
+        uint64_t current = x;
+        for (uint32_t i = 0U; i < 24U; i++)
+        {
+          uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+          uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i];
+          uint64_t temp = s[_Y];
+          uint64_t uu____2 = current;
+          s[_Y] = uu____2 << r1 | uu____2 >> (64U - r1);
+          current = temp;
+        }
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+          uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+          uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+          uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+          uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+          s[0U + 5U * i] = v0;
+          s[1U + 5U * i] = v1;
+          s[2U + 5U * i] = v2;
+          s[3U + 5U * i] = v3;
+          s[4U + 5U * i] = v4;);
+        uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+        s[0U] = s[0U] ^ c;
+      }
+    }
+    uint32_t remOut = l % block_len(a11);
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(output + l - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
-  Hacl_Hash_SHA3_squeeze0(s, block_len(a11), hash_len(a11), output);
+  for (uint32_t i0 = 0U; i0 < hash_len(a11) / block_len(a11); i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b0 = output;
+    uint8_t *uu____3 = hbuf;
+    memcpy(b0 + i0 * block_len(a11), uu____3, block_len(a11) * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____4 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____4 << 1U | uu____4 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____5 = current;
+        s[_Y] = uu____5 << r1 | uu____5 >> (64U - r1);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = hash_len(a11) % block_len(a11);
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  uint8_t *uu____6 = hbuf;
+  memcpy(output + hash_len(a11) - remOut, uu____6, remOut * sizeof (uint8_t));
 }
 
 Hacl_Streaming_Types_error_code
@@ -515,78 +1031,79 @@ bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s)
   return uu____0 == Spec_Hash_Definitions_Shake128 || uu____0 == Spec_Hash_Definitions_Shake256;
 }
 
-void
-Hacl_Hash_SHA3_shake128_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
-{
-  Hacl_Hash_SHA3_keccak(1344U, 256U, inputByteLen, input, 0x1FU, outputByteLen, output);
-}
-
-void
-Hacl_Hash_SHA3_shake256_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
-{
-  Hacl_Hash_SHA3_keccak(1088U, 512U, inputByteLen, input, 0x1FU, outputByteLen, output);
-}
-
-void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(1152U, 448U, input_len, input, 0x06U, 28U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(1088U, 512U, input_len, input, 0x06U, 32U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len)
+void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s)
 {
-  Hacl_Hash_SHA3_keccak(832U, 768U, input_len, input, 0x06U, 48U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(576U, 1024U, input_len, input, 0x06U, 64U, output);
-}
-
-static const
-uint32_t
-keccak_rotc[24U] =
-  {
-    1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U,
-    39U, 61U, 20U, 44U
-  };
-
-static const
-uint32_t
-keccak_piln[24U] =
-  {
-    10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U,
-    22U, 9U, 6U, 1U
-  };
-
-static const
-uint64_t
-keccak_rndc[24U] =
+  KRML_MAYBE_UNUSED_VAR(rateInBytes);
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b1 = b;
+  uint64_t u = load64_le(b1);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b1 + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b1 + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b1 + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b1 + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b1 + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b1 + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b1 + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b1 + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b1 + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b1 + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b1 + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b1 + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b1 + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b1 + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b1 + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b1 + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b1 + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b1 + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b1 + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b1 + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b1 + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b1 + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b1 + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b1 + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b1 + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b1 + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b1 + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b1 + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b1 + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b1 + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b1 + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
   {
-    0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
-    0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
-    0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
-    0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
-    0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL,
-    0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
-  };
-
-void Hacl_Hash_SHA3_state_permute(uint64_t *s)
-{
+    s[i] = s[i] ^ ws[i];
+  }
   for (uint32_t i0 = 0U; i0 < 24U; i0++)
   {
     uint64_t _C[5U] = { 0U };
@@ -606,8 +1123,8 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s)
     uint64_t current = x;
     for (uint32_t i = 0U; i < 24U; i++)
     {
-      uint32_t _Y = keccak_piln[i];
-      uint32_t r = keccak_rotc[i];
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
       uint64_t temp = s[_Y];
       uint64_t uu____1 = current;
       s[_Y] = uu____1 << r | uu____1 >> (64U - r);
@@ -627,108 +1144,1227 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s)
       s[2U + 5U * i] = v2;
       s[3U + 5U * i] = v3;
       s[4U + 5U * i] = v4;);
-    uint64_t c = keccak_rndc[i0];
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
     s[0U] = s[0U] ^ c;
   }
 }
 
-void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s)
+void
+Hacl_Hash_SHA3_shake128(
+  uint8_t *output,
+  uint32_t outputByteLen,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
 {
-  uint8_t block[200U] = { 0U };
-  memcpy(block, input, rateInBytes * sizeof (uint8_t));
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 168U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x1FU;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    uint64_t u = load64_le(block + i * 8U);
-    uint64_t x = u;
-    s[i] = s[i] ^ x;
+    s[i] = s[i] ^ ws0[i];
   }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-static void storeState(uint32_t rateInBytes, uint64_t *s, uint8_t *res)
+void
+Hacl_Hash_SHA3_shake256(
+  uint8_t *output,
+  uint32_t outputByteLen,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
 {
-  uint8_t block[200U] = { 0U };
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x1FU;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    uint64_t sj = s[i];
-    store64_le(block + i * 8U, sj);
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
   }
-  memcpy(res, block, rateInBytes * sizeof (uint8_t));
+  memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s)
+void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
-  Hacl_Hash_SHA3_loadState(rateInBytes, block, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 144U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 28U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 28U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-static void
-absorb(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix
-)
+void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 32U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 32U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 104U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 48U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 48U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 72U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 64U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 64U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_state_malloc(void)
+{
+  uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
+  return buf;
+}
+
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_state_free(uint64_t *s)
 {
-  uint32_t n_blocks = inputByteLen / rateInBytes;
-  uint32_t rem = inputByteLen % rateInBytes;
-  for (uint32_t i = 0U; i < n_blocks; i++)
-  {
-    uint8_t *block = input + i * rateInBytes;
-    Hacl_Hash_SHA3_absorb_inner(rateInBytes, block, s);
-  }
-  uint8_t *last = input + n_blocks * rateInBytes;
-  uint8_t lastBlock_[200U] = { 0U };
-  uint8_t *lastBlock = lastBlock_;
-  memcpy(lastBlock, last, rem * sizeof (uint8_t));
-  lastBlock[rem] = delimitedSuffix;
-  Hacl_Hash_SHA3_loadState(rateInBytes, lastBlock, s);
-  if (!(((uint32_t)delimitedSuffix & 0x80U) == 0U) && rem == rateInBytes - 1U)
-  {
-    Hacl_Hash_SHA3_state_permute(s);
-  }
-  uint8_t nextBlock_[200U] = { 0U };
-  uint8_t *nextBlock = nextBlock_;
-  nextBlock[rateInBytes - 1U] = 0x80U;
-  Hacl_Hash_SHA3_loadState(rateInBytes, nextBlock, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  KRML_HOST_FREE(s);
 }
 
+/**
+Absorb number of input blocks and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_squeeze0(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
+Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen)
 {
-  uint32_t outBlocks = outputByteLen / rateInBytes;
-  uint32_t remOut = outputByteLen % rateInBytes;
-  uint8_t *last = output + outputByteLen - remOut;
-  uint8_t *blocks = output;
-  for (uint32_t i = 0U; i < outBlocks; i++)
+  for (uint32_t i = 0U; i < inputByteLen / 168U; i++)
   {
-    storeState(rateInBytes, s, blocks + i * rateInBytes);
-    Hacl_Hash_SHA3_state_permute(s);
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = input;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * 168U, 168U * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(168U, b_, state);
   }
-  storeState(remOut, s, last);
 }
 
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
 void
-Hacl_Hash_SHA3_keccak(
-  uint32_t rate,
-  uint32_t capacity,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix,
-  uint32_t outputByteLen,
-  uint8_t *output
+Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % 168U] = 0x1FU;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[167U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(168U, b3, state);
+}
+
+/**
+Squeeze a hash state to output buffer
+
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
+void
+Hacl_Hash_SHA3_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
 )
 {
-  KRML_MAYBE_UNUSED_VAR(capacity);
-  uint32_t rateInBytes = rate / 8U;
-  uint64_t s[25U] = { 0U };
-  absorb(s, rateInBytes, inputByteLen, input, delimitedSuffix);
-  Hacl_Hash_SHA3_squeeze0(s, rateInBytes, outputByteLen, output);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, state, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b0 = output;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
 }
 
diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c
new file mode 100644
index 00000000..131c34e6
--- /dev/null
+++ b/src/Hacl_Hash_SHA3_Simd256.c
@@ -0,0 +1,6733 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "Hacl_Hash_SHA3_Simd256.h"
+
+#include "internal/Hacl_Hash_SHA3.h"
+
+void
+Hacl_Hash_SHA3_Simd256_absorb_inner_256(
+  uint32_t rateInBytes,
+  Hacl_Hash_SHA2_uint8_4p b,
+  Lib_IntVector_Intrinsics_vec256 *s
+)
+{
+  KRML_MAYBE_UNUSED_VAR(rateInBytes);
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+  ws[0U] = ws0;
+  ws[1U] = ws1;
+  ws[2U] = ws2;
+  ws[3U] = ws3;
+  ws[4U] = ws4;
+  ws[5U] = ws5;
+  ws[6U] = ws6;
+  ws[7U] = ws7;
+  ws[8U] = ws8;
+  ws[9U] = ws9;
+  ws[10U] = ws10;
+  ws[11U] = ws11;
+  ws[12U] = ws12;
+  ws[13U] = ws13;
+  ws[14U] = ws14;
+  ws[15U] = ws15;
+  ws[16U] = ws16;
+  ws[17U] = ws17;
+  ws[18U] = ws18;
+  ws[19U] = ws19;
+  ws[20U] = ws20;
+  ws[21U] = ws21;
+  ws[22U] = ws22;
+  ws[23U] = ws23;
+  ws[24U] = ws24;
+  ws[25U] = ws25;
+  ws[26U] = ws26;
+  ws[27U] = ws27;
+  ws[28U] = ws28;
+  ws[29U] = ws29;
+  ws[30U] = ws30;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws[i]);
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+      Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+      _C[i] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+          Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+              Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U];
+      Lib_IntVector_Intrinsics_vec256
+      _D =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+              1U),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        s[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i1 + 5U * i], _D);););
+    Lib_IntVector_Intrinsics_vec256 x = s[1U];
+    Lib_IntVector_Intrinsics_vec256 current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+      Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+      Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+      s[_Y] =
+        Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r),
+          Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v07 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+          Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v17 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+          Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v27 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+          Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v37 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+          Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v4 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+          Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+      s[0U + 5U * i] = v07;
+      s[1U + 5U * i] = v17;
+      s[2U + 5U * i] = v27;
+      s[3U + 5U * i] = v37;
+      s[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+    Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+    s[0U] =
+      Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+        Lib_IntVector_Intrinsics_vec256_load64(c));
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 168U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x1FU;
+  b12[inputByteLen % rateInBytes1] = 0x1FU;
+  b22[inputByteLen % rateInBytes1] = 0x1FU;
+  b32[inputByteLen % rateInBytes1] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x1FU;
+  b12[inputByteLen % rateInBytes1] = 0x1FU;
+  b22[inputByteLen % rateInBytes1] = 0x1FU;
+  b32[inputByteLen % rateInBytes1] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_224(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 144U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 28U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 28U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 32U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 32U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 104U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 48U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 48U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 72U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 64U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 64U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void)
+{
+  Lib_IntVector_Intrinsics_vec256
+  *buf =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 25U);
+  memset(buf, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  return buf;
+}
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
+{
+  KRML_ALIGNED_FREE(s);
+}
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i = 0U; i < inputByteLen / 168U; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b0 = input0;
+    uint8_t *b1 = input1;
+    uint8_t *b2 = input2;
+    uint8_t *b3 = input3;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * 168U, 168U * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(168U, b_, state);
+  }
+}
+
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t *b01 = input0;
+  uint8_t *b11 = input1;
+  uint8_t *b21 = input2;
+  uint8_t *b31 = input3;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % 168U] = 0x1FU;
+  b12[inputByteLen % 168U] = 0x1FU;
+  b22[inputByteLen % 168U] = 0x1FU;
+  b32[inputByteLen % 168U] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+  ws[0U] = ws0;
+  ws[1U] = ws1;
+  ws[2U] = ws2;
+  ws[3U] = ws3;
+  ws[4U] = ws4;
+  ws[5U] = ws5;
+  ws[6U] = ws6;
+  ws[7U] = ws7;
+  ws[8U] = ws8;
+  ws[9U] = ws9;
+  ws[10U] = ws10;
+  ws[11U] = ws11;
+  ws[12U] = ws12;
+  ws[13U] = ws13;
+  ws[14U] = ws14;
+  ws[15U] = ws15;
+  ws[16U] = ws16;
+  ws[17U] = ws17;
+  ws[18U] = ws18;
+  ws[19U] = ws19;
+  ws[20U] = ws20;
+  ws[21U] = ws21;
+  ws[22U] = ws22;
+  ws[23U] = ws23;
+  ws[24U] = ws24;
+  ws[25U] = ws25;
+  ws[26U] = ws26;
+  ws[27U] = ws27;
+  ws[28U] = ws28;
+  ws[29U] = ws29;
+  ws[30U] = ws30;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  b0[167U] = 0x80U;
+  b1[167U] = 0x80U;
+  b2[167U] = 0x80U;
+  b3[167U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(168U, b, state);
+}
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b0 = output0;
+    uint8_t *b1 = output1;
+    uint8_t *b2 = output2;
+    uint8_t *b3 = output3;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t));
+    memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t));
+    memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+}
+
diff --git a/src/Hacl_K256_ECDSA.c b/src/Hacl_K256_ECDSA.c
index bbd2c615..0b72b166 100644
--- a/src/Hacl_K256_ECDSA.c
+++ b/src/Hacl_K256_ECDSA.c
@@ -571,10 +571,6 @@ static inline bool is_qelem_le_q_halved_vartime(uint64_t *f)
   {
     return true;
   }
-  if (a2 > 0xffffffffffffffffULL)
-  {
-    return false;
-  }
   if (a1 < 0x5d576e7357a4501dULL)
   {
     return true;
diff --git a/src/Lib_RandomBuffer_System.c b/src/Lib_RandomBuffer_System.c
index 0d7924b4..de6ef337 100644
--- a/src/Lib_RandomBuffer_System.c
+++ b/src/Lib_RandomBuffer_System.c
@@ -31,6 +31,7 @@ bool read_random_bytes(uint32_t len, uint8_t *buf) {
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 
 bool read_random_bytes(uint32_t len, uint8_t *buf) {
diff --git a/src/msvc/EverCrypt_DRBG.c b/src/msvc/EverCrypt_DRBG.c
index 1395f59f..c76a69cd 100644
--- a/src/msvc/EverCrypt_DRBG.c
+++ b/src/msvc/EverCrypt_DRBG.c
@@ -1798,8 +1798,8 @@ static void uninstantiate_sha1(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 20U, uint8_t);
-  Lib_Memzero0_memzero(v, 20U, uint8_t);
+  Lib_Memzero0_memzero(k, 20U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 20U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1822,8 +1822,8 @@ static void uninstantiate_sha2_256(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 32U, uint8_t);
-  Lib_Memzero0_memzero(v, 32U, uint8_t);
+  Lib_Memzero0_memzero(k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 32U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1846,8 +1846,8 @@ static void uninstantiate_sha2_384(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 48U, uint8_t);
-  Lib_Memzero0_memzero(v, 48U, uint8_t);
+  Lib_Memzero0_memzero(k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 48U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
@@ -1870,8 +1870,8 @@ static void uninstantiate_sha2_512(EverCrypt_DRBG_state_s *st)
   uint8_t *k = s.k;
   uint8_t *v = s.v;
   uint32_t *ctr = s.reseed_counter;
-  Lib_Memzero0_memzero(k, 64U, uint8_t);
-  Lib_Memzero0_memzero(v, 64U, uint8_t);
+  Lib_Memzero0_memzero(k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(v, 64U, uint8_t, void *);
   ctr[0U] = 0U;
   KRML_HOST_FREE(k);
   KRML_HOST_FREE(v);
diff --git a/src/msvc/EverCrypt_Hash.c b/src/msvc/EverCrypt_Hash.c
index 92b3c227..bfafa9be 100644
--- a/src/msvc/EverCrypt_Hash.c
+++ b/src/msvc/EverCrypt_Hash.c
@@ -709,25 +709,57 @@ static void finish(EverCrypt_Hash_state_s *s, uint8_t *dst)
   if (scrut.tag == SHA3_224_s)
   {
     uint64_t *p1 = scrut.case_SHA3_224_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 144U, 28U, dst);
+    uint32_t remOut = 28U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_256_s)
   {
     uint64_t *p1 = scrut.case_SHA3_256_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 136U, 32U, dst);
+    uint32_t remOut = 32U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_384_s)
   {
     uint64_t *p1 = scrut.case_SHA3_384_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 104U, 48U, dst);
+    uint32_t remOut = 48U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == SHA3_512_s)
   {
     uint64_t *p1 = scrut.case_SHA3_512_s;
-    Hacl_Hash_SHA3_squeeze0(p1, 72U, 64U, dst);
+    uint32_t remOut = 64U;
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, p1, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(dst + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
   if (scrut.tag == Blake2S_s)
diff --git a/src/msvc/Hacl_Ed25519.c b/src/msvc/Hacl_Ed25519.c
index 05d96cd0..d1f8edf2 100644
--- a/src/msvc/Hacl_Ed25519.c
+++ b/src/msvc/Hacl_Ed25519.c
@@ -1712,8 +1712,8 @@ static inline void secret_expand(uint8_t *expanded, uint8_t *secret)
 /**
 Compute the public key from the private key.
 
-  The outparam `public_key`  points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] public_key Points to 32 bytes of valid memory, i.e., `uint8_t[32]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 */
 void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key)
 {
@@ -1726,8 +1726,8 @@ void Hacl_Ed25519_secret_to_public(uint8_t *public_key, uint8_t *private_key)
 /**
 Compute the expanded keys for an Ed25519 signature.
 
-  The outparam `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `private_key`   points to 32 bytes of valid memory, i.e., uint8_t[32].
+  @param[out] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`. Must not overlap the memory location of `private_key`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -1744,11 +1744,10 @@ void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key)
 /**
 Create an Ed25519 signature with the (precomputed) expanded keys.
 
-  The outparam `signature`     points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `expanded_keys` points to 96 bytes of valid memory, i.e., uint8_t[96].
-  The argument `msg`    points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-
-  The argument `expanded_keys` is obtained through `expand_keys`.
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `expanded_keys` nor `msg`.
+  @param[in] expanded_keys Points to 96 bytes of valid memory, i.e., `uint8_t[96]`, containing the expanded keys obtained by invoking `expand_keys`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   If one needs to sign several messages under the same private key, it is more efficient
   to call `expand_keys` only once and `sign_expanded` multiple times, for each message.
@@ -1783,9 +1782,10 @@ Hacl_Ed25519_sign_expanded(
 /**
 Create an Ed25519 signature.
 
-  The outparam `signature`   points to 64 bytes of valid memory, i.e., uint8_t[64].
-  The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg`  points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+  @param[out] signature Points to 64 bytes of valid memory, i.e., `uint8_t[64]`. Must not overlap the memory locations of `private_key` nor `msg`.
+  @param[in] private_key Points to 32 bytes of valid memory containing the private key, i.e., `uint8_t[32]`.
+  @param[in] msg_len Length of `msg`.
+  @param[in] msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
 
   The function first calls `expand_keys` and then invokes `sign_expanded`.
 
@@ -1803,11 +1803,12 @@ Hacl_Ed25519_sign(uint8_t *signature, uint8_t *private_key, uint32_t msg_len, ui
 /**
 Verify an Ed25519 signature.
 
-  The function returns `true` if the signature is valid and `false` otherwise.
+  @param public_key Points to 32 bytes of valid memory containing the public key, i.e., `uint8_t[32]`.
+  @param msg_len Length of `msg`.
+  @param msg Points to `msg_len` bytes of valid memory containing the message, i.e., `uint8_t[msg_len]`.
+  @param signature Points to 64 bytes of valid memory containing the signature, i.e., `uint8_t[64]`.
 
-  The argument `public_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
-  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
-  The argument `signature`  points to 64 bytes of valid memory, i.e., uint8_t[64].
+  @return Returns `true` if the signature is valid and `false` otherwise.
 */
 bool
 Hacl_Ed25519_verify(uint8_t *public_key, uint32_t msg_len, uint8_t *msg, uint8_t *signature)
diff --git a/src/msvc/Hacl_Frodo1344.c b/src/msvc/Hacl_Frodo1344.c
index 61262a4c..ea380d8c 100644
--- a/src/msvc/Hacl_Frodo1344.c
+++ b/src/msvc/Hacl_Frodo1344.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 32U;
   uint8_t *z = coins + 64U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake256_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake256(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 21552U;
   uint16_t s_matrix[10752U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43008U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43008U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(1344U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(1344U, 8U, r + 21504U, e_matrix);
   uint16_t b_matrix[10752U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo1344_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(1344U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(1344U, 8U, 16U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(1344U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 10752U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 10752U, uint16_t, void *);
   uint32_t slen1 = 43056U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 32U * sizeof (uint8_t));
   memcpy(sk_p + 32U, pk, 21520U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(21520U, pk, 32U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 80U, uint8_t);
+  Hacl_Hash_SHA3_shake256(sk + slen1, 32U, pk, 21520U);
+  Lib_Memzero0_memzero(coins, 80U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(32U, coins);
   uint8_t seed_se_k[64U] = { 0U };
   uint8_t pkh_mu[64U] = { 0U };
-  Hacl_Hash_SHA3_shake256_hacl(21520U, pk, 32U, pkh_mu);
+  Hacl_Hash_SHA3_shake256(pkh_mu, 32U, pk, 21520U);
   memcpy(pkh_mu + 32U, coins, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(64U, pkh_mu, 64U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 64U, pkh_mu, 64U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 32U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43136U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43136U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r + 21504U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 8U, r + 43008U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo1344_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 4U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 16U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 21664U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t *shake_input_ss = (uint8_t *)alloca(ss_init_len * sizeof (uint8_t));
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 21632U * sizeof (uint8_t));
   memcpy(shake_input_ss + 21632U, k, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, shake_input_ss, 32U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t);
-  Lib_Memzero0_memzero(coins, 32U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 32U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 32U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 1344U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(16U, 4U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[64U] = { 0U };
   uint32_t pkh_mu_decode_len = 64U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 43056U;
   memcpy(pkh_mu_decode, pkh, 32U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 32U, mu_decode, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(pkh_mu_decode_len, pkh_mu_decode, 64U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 64U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 32U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[33U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(33U, shake_input_seed_se, 43136U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 43136U, shake_input_seed_se, 33U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 33U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 1344U, r + 21504U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix1344(8U, 8U, r + 43008U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 4U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 1344U, 16U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 16U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 10752U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 1344U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -222,11 +222,11 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 21632U * sizeof (uint8_t));
   memcpy(ss_init + 21632U, kp_s, 32U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, ss_init, 32U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 32U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 32U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 32U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 64U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 32U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/msvc/Hacl_Frodo64.c b/src/msvc/Hacl_Frodo64.c
index 392d87f9..f4a025ce 100644
--- a/src/msvc/Hacl_Frodo64.c
+++ b/src/msvc/Hacl_Frodo64.c
@@ -50,7 +50,7 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 16U;
   uint8_t *z = coins + 32U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake128_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake128(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 992U;
   uint16_t s_matrix[512U] = { 0U };
@@ -59,8 +59,8 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2048U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2048U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(64U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(64U, 8U, r + 1024U, e_matrix);
   uint16_t b_matrix[512U] = { 0U };
@@ -70,14 +70,14 @@ uint32_t Hacl_Frodo64_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(64U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(64U, 8U, 15U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(64U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 512U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 512U, uint16_t, void *);
   uint32_t slen1 = 2016U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 16U * sizeof (uint8_t));
   memcpy(sk_p + 16U, pk, 976U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(976U, pk, 16U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 48U, uint8_t);
+  Hacl_Hash_SHA3_shake128(sk + slen1, 16U, pk, 976U);
+  Lib_Memzero0_memzero(coins, 48U, uint8_t, void *);
   return 0U;
 }
 
@@ -87,9 +87,9 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(16U, coins);
   uint8_t seed_se_k[32U] = { 0U };
   uint8_t pkh_mu[32U] = { 0U };
-  Hacl_Hash_SHA3_shake128_hacl(976U, pk, 16U, pkh_mu);
+  Hacl_Hash_SHA3_shake128(pkh_mu, 16U, pk, 976U);
   memcpy(pkh_mu + 16U, coins, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(32U, pkh_mu, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu, 32U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 16U;
   uint8_t *seed_a = pk;
@@ -101,8 +101,8 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2176U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2176U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r + 1024U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 8U, r + 2048U, epp_matrix);
@@ -122,22 +122,22 @@ uint32_t Hacl_Frodo64_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 15U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 1096U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t *shake_input_ss = (uint8_t *)alloca(ss_init_len * sizeof (uint8_t));
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 1080U * sizeof (uint8_t));
   memcpy(shake_input_ss + 1080U, k, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, shake_input_ss, 16U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(coins, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 16U, uint8_t, void *);
   return 0U;
 }
 
@@ -157,8 +157,8 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 64U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(15U, 2U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[32U] = { 0U };
   uint32_t pkh_mu_decode_len = 32U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -167,7 +167,7 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 2016U;
   memcpy(pkh_mu_decode, pkh, 16U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 16U, mu_decode, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(pkh_mu_decode_len, pkh_mu_decode, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 16U;
   uint8_t *s = sk;
@@ -180,8 +180,8 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 2176U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 2176U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 64U, r + 1024U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix64(8U, 8U, r + 2048U, epp_matrix);
@@ -199,12 +199,12 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 64U, 15U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 15U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 512U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 64U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -225,11 +225,11 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 1080U * sizeof (uint8_t));
   memcpy(ss_init + 1080U, kp_s, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, ss_init, 16U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 16U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 16U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/msvc/Hacl_Frodo640.c b/src/msvc/Hacl_Frodo640.c
index 5de5871f..e3d10300 100644
--- a/src/msvc/Hacl_Frodo640.c
+++ b/src/msvc/Hacl_Frodo640.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 16U;
   uint8_t *z = coins + 32U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake128_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake128(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 9632U;
   uint16_t s_matrix[5120U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20480U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20480U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(640U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(640U, 8U, r + 10240U, e_matrix);
   uint16_t b_matrix[5120U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo640_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(640U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(640U, 8U, 15U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(640U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 5120U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 5120U, uint16_t, void *);
   uint32_t slen1 = 19872U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 16U * sizeof (uint8_t));
   memcpy(sk_p + 16U, pk, 9616U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(9616U, pk, 16U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 48U, uint8_t);
+  Hacl_Hash_SHA3_shake128(sk + slen1, 16U, pk, 9616U);
+  Lib_Memzero0_memzero(coins, 48U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(16U, coins);
   uint8_t seed_se_k[32U] = { 0U };
   uint8_t pkh_mu[32U] = { 0U };
-  Hacl_Hash_SHA3_shake128_hacl(9616U, pk, 16U, pkh_mu);
+  Hacl_Hash_SHA3_shake128(pkh_mu, 16U, pk, 9616U);
   memcpy(pkh_mu + 16U, coins, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(32U, pkh_mu, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu, 32U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 16U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20608U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20608U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r + 10240U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 8U, r + 20480U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo640_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 15U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 9736U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t *shake_input_ss = (uint8_t *)alloca(ss_init_len * sizeof (uint8_t));
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 9720U * sizeof (uint8_t));
   memcpy(shake_input_ss + 9720U, k, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, shake_input_ss, 16U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(coins, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 16U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 640U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(15U, 2U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[32U] = { 0U };
   uint32_t pkh_mu_decode_len = 32U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 19872U;
   memcpy(pkh_mu_decode, pkh, 16U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 16U, mu_decode, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(pkh_mu_decode_len, pkh_mu_decode, 32U, seed_se_k);
+  Hacl_Hash_SHA3_shake128(seed_se_k, 32U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 16U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[17U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(17U, shake_input_seed_se, 20608U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t);
+  Hacl_Hash_SHA3_shake128(r, 20608U, shake_input_seed_se, 17U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 17U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 640U, r + 10240U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix640(8U, 8U, r + 20480U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(15U, 2U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 640U, 15U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 15U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 5120U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 640U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -223,11 +223,11 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 9720U * sizeof (uint8_t));
   memcpy(ss_init + 9720U, kp_s, 16U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake128_hacl(ss_init_len, ss_init, 16U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 16U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t);
+  Hacl_Hash_SHA3_shake128(ss, 16U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 16U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 32U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 16U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/msvc/Hacl_Frodo976.c b/src/msvc/Hacl_Frodo976.c
index 61454ceb..982192c1 100644
--- a/src/msvc/Hacl_Frodo976.c
+++ b/src/msvc/Hacl_Frodo976.c
@@ -45,7 +45,7 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t *seed_se = coins + 24U;
   uint8_t *z = coins + 48U;
   uint8_t *seed_a = pk;
-  Hacl_Hash_SHA3_shake256_hacl(16U, z, 16U, seed_a);
+  Hacl_Hash_SHA3_shake256(seed_a, 16U, z, 16U);
   uint8_t *b_bytes = pk + 16U;
   uint8_t *s_bytes = sk + 15656U;
   uint16_t s_matrix[7808U] = { 0U };
@@ -54,8 +54,8 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x5fU;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31232U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31232U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(976U, 8U, r, s_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(976U, 8U, r + 15616U, e_matrix);
   uint16_t b_matrix[7808U] = { 0U };
@@ -66,14 +66,14 @@ uint32_t Hacl_Frodo976_crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_add(976U, 8U, b_matrix, e_matrix);
   Hacl_Impl_Frodo_Pack_frodo_pack(976U, 8U, 16U, b_matrix, b_bytes);
   Hacl_Impl_Matrix_matrix_to_lbytes(976U, 8U, s_matrix, s_bytes);
-  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(e_matrix, 7808U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(e_matrix, 7808U, uint16_t, void *);
   uint32_t slen1 = 31272U;
   uint8_t *sk_p = sk;
   memcpy(sk_p, s, 24U * sizeof (uint8_t));
   memcpy(sk_p + 24U, pk, 15632U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(15632U, pk, 24U, sk + slen1);
-  Lib_Memzero0_memzero(coins, 64U, uint8_t);
+  Hacl_Hash_SHA3_shake256(sk + slen1, 24U, pk, 15632U);
+  Lib_Memzero0_memzero(coins, 64U, uint8_t, void *);
   return 0U;
 }
 
@@ -83,9 +83,9 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   randombytes_(24U, coins);
   uint8_t seed_se_k[48U] = { 0U };
   uint8_t pkh_mu[48U] = { 0U };
-  Hacl_Hash_SHA3_shake256_hacl(15632U, pk, 24U, pkh_mu);
+  Hacl_Hash_SHA3_shake256(pkh_mu, 24U, pk, 15632U);
   memcpy(pkh_mu + 24U, coins, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(48U, pkh_mu, 48U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 48U, pkh_mu, 48U);
   uint8_t *seed_se = seed_se_k;
   uint8_t *k = seed_se_k + 24U;
   uint8_t *seed_a = pk;
@@ -97,8 +97,8 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31360U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31360U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r + 15616U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 8U, r + 31232U, epp_matrix);
@@ -119,22 +119,22 @@ uint32_t Hacl_Frodo976_crypto_kem_enc(uint8_t *ct, uint8_t *ss, uint8_t *pk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 3U, 8U, coins, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, v_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Frodo_Pack_frodo_pack(8U, 8U, 16U, v_matrix, c2);
-  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t);
-  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(v_matrix, 64U, uint16_t, void *);
+  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint32_t ss_init_len = 15768U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
   uint8_t *shake_input_ss = (uint8_t *)alloca(ss_init_len * sizeof (uint8_t));
   memset(shake_input_ss, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(shake_input_ss, ct, 15744U * sizeof (uint8_t));
   memcpy(shake_input_ss + 15744U, k, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, shake_input_ss, 24U, ss);
-  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t);
-  Lib_Memzero0_memzero(coins, 24U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 24U, shake_input_ss, ss_init_len);
+  Lib_Memzero0_memzero(shake_input_ss, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(coins, 24U, uint8_t, void *);
   return 0U;
 }
 
@@ -154,8 +154,8 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   Hacl_Impl_Matrix_matrix_mul_s(8U, 976U, 8U, bp_matrix, s_matrix, m_matrix);
   Hacl_Impl_Matrix_matrix_sub(8U, 8U, c_matrix, m_matrix);
   Hacl_Impl_Frodo_Encode_frodo_key_decode(16U, 3U, 8U, m_matrix, mu_decode);
-  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(s_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(m_matrix, 64U, uint16_t, void *);
   uint8_t seed_se_k[48U] = { 0U };
   uint32_t pkh_mu_decode_len = 48U;
   KRML_CHECK_SIZE(sizeof (uint8_t), pkh_mu_decode_len);
@@ -164,7 +164,7 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t *pkh = sk + 31272U;
   memcpy(pkh_mu_decode, pkh, 24U * sizeof (uint8_t));
   memcpy(pkh_mu_decode + 24U, mu_decode, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(pkh_mu_decode_len, pkh_mu_decode, 48U, seed_se_k);
+  Hacl_Hash_SHA3_shake256(seed_se_k, 48U, pkh_mu_decode, pkh_mu_decode_len);
   uint8_t *seed_se = seed_se_k;
   uint8_t *kp = seed_se_k + 24U;
   uint8_t *s = sk;
@@ -177,8 +177,8 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t shake_input_seed_se[25U] = { 0U };
   shake_input_seed_se[0U] = 0x96U;
   memcpy(shake_input_seed_se + 1U, seed_se, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(25U, shake_input_seed_se, 31360U, r);
-  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t);
+  Hacl_Hash_SHA3_shake256(r, 31360U, shake_input_seed_se, 25U);
+  Lib_Memzero0_memzero(shake_input_seed_se, 25U, uint8_t, void *);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r, sp_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 976U, r + 15616U, ep_matrix);
   Hacl_Impl_Frodo_Sample_frodo_sample_matrix976(8U, 8U, r + 31232U, epp_matrix);
@@ -197,12 +197,12 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint16_t mu_encode[64U] = { 0U };
   Hacl_Impl_Frodo_Encode_frodo_key_encode(16U, 3U, 8U, mu_decode, mu_encode);
   Hacl_Impl_Matrix_matrix_add(8U, 8U, cp_matrix, mu_encode);
-  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t);
+  Lib_Memzero0_memzero(mu_encode, 64U, uint16_t, void *);
   Hacl_Impl_Matrix_mod_pow2(8U, 976U, 16U, bpp_matrix);
   Hacl_Impl_Matrix_mod_pow2(8U, 8U, 16U, cp_matrix);
-  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t);
-  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t);
+  Lib_Memzero0_memzero(sp_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(ep_matrix, 7808U, uint16_t, void *);
+  Lib_Memzero0_memzero(epp_matrix, 64U, uint16_t, void *);
   uint16_t b1 = Hacl_Impl_Matrix_matrix_eq(8U, 976U, bp_matrix, bpp_matrix);
   uint16_t b2 = Hacl_Impl_Matrix_matrix_eq(8U, 8U, c_matrix, cp_matrix);
   uint16_t mask = (uint32_t)b1 & (uint32_t)b2;
@@ -222,11 +222,11 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   memset(ss_init, 0U, ss_init_len * sizeof (uint8_t));
   memcpy(ss_init, ct, 15744U * sizeof (uint8_t));
   memcpy(ss_init + 15744U, kp_s, 24U * sizeof (uint8_t));
-  Hacl_Hash_SHA3_shake256_hacl(ss_init_len, ss_init, 24U, ss);
-  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t);
-  Lib_Memzero0_memzero(kp_s, 24U, uint8_t);
-  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t);
-  Lib_Memzero0_memzero(mu_decode, 24U, uint8_t);
+  Hacl_Hash_SHA3_shake256(ss, 24U, ss_init, ss_init_len);
+  Lib_Memzero0_memzero(ss_init, ss_init_len, uint8_t, void *);
+  Lib_Memzero0_memzero(kp_s, 24U, uint8_t, void *);
+  Lib_Memzero0_memzero(seed_se_k, 48U, uint8_t, void *);
+  Lib_Memzero0_memzero(mu_decode, 24U, uint8_t, void *);
   return 0U;
 }
 
diff --git a/src/msvc/Hacl_Hash_Blake2b.c b/src/msvc/Hacl_Hash_Blake2b.c
index 2dceaf4b..d490a1a5 100644
--- a/src/msvc/Hacl_Hash_Blake2b.c
+++ b/src/msvc/Hacl_Hash_Blake2b.c
@@ -76,22 +76,22 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     uint64_t *r1 = m_st + 4U;
     uint64_t *r20 = m_st + 8U;
     uint64_t *r30 = m_st + 12U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     uint64_t uu____0 = m_w[s2];
     uint64_t uu____1 = m_w[s4];
     uint64_t uu____2 = m_w[s6];
@@ -474,18 +474,27 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint64_t tmp[8U] = { 0U };
   uint64_t *r0 = hash;
   uint64_t *r1 = hash + 4U;
   uint64_t *r2 = hash + 8U;
   uint64_t *r3 = hash + 12U;
-  uint64_t iv0 = Hacl_Hash_Blake2s_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2s_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2s_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2s_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2s_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2s_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2s_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2s_ivTable_B[7U];
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
   r2[0U] = iv0;
   r2[1U] = iv1;
   r2[2U] = iv2;
@@ -494,16 +503,141 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
-  uint64_t kk_shift_8 = (uint64_t)kk << 8U;
-  uint64_t iv0_ = iv0 ^ (0x01010000ULL ^ (kk_shift_8 ^ (uint64_t)nn));
+  uint8_t kk1 = (uint8_t)kk;
+  uint8_t nn1 = (uint8_t)nn;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk1
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
   r0[0U] = iv0_;
-  r0[1U] = iv1;
-  r0[2U] = iv2;
-  r0[3U] = iv3;
-  r1[0U] = iv4;
-  r1[1U] = iv5;
-  r1[2U] = iv6;
-  r1[3U] = iv7;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+}
+
+static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = hash;
+  uint64_t *r1 = hash + 4U;
+  uint64_t *r2 = hash + 8U;
+  uint64_t *r3 = hash + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk = p.key_length;
+  uint8_t nn = p.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
 }
 
 static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
@@ -519,7 +653,7 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 void
@@ -560,7 +694,7 @@ Hacl_Hash_Blake2b_update_last(
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 static void
@@ -624,43 +758,223 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(second + i * 8U, row1[i]););
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
+static Hacl_Hash_Blake2b_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
-  Hacl_Hash_Blake2b_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2b_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_state_t
   *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
   p[0U] = s;
-  Hacl_Hash_Blake2b_init(block_state.snd, 0U, 64U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
+*/
+Hacl_Hash_Blake2b_state_t
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
-void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *state)
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 64U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  return s;
+}
+
+/**
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
+{
+  return Hacl_Hash_Blake2b_malloc_with_key(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2b_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2b_init(block_state.snd, 0U, 64U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length or key_length, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
+*/
+void
+Hacl_Hash_Blake2b_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
+*/
+void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_reset_with_key(s, NULL);
+}
+
+/**
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len)
@@ -726,8 +1040,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint64_t *wv = block_state1.fst;
-      uint64_t *hash = block_state1.snd;
+      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      uint64_t *wv = acc.fst;
+      uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
@@ -750,8 +1065,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    uint64_t *wv = block_state1.fst;
-    uint64_t *hash = block_state1.snd;
+    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    uint64_t *wv = acc.fst;
+    uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -817,8 +1133,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint64_t *wv = block_state1.fst;
-      uint64_t *hash = block_state1.snd;
+      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      uint64_t *wv = acc.fst;
+      uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
@@ -842,8 +1159,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    uint64_t *wv = block_state1.fst;
-    uint64_t *hash = block_state1.snd;
+    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    uint64_t *wv = acc.fst;
+    uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -867,10 +1185,20 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your
+parameters.
 */
 void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2b_state_t scrut = *state;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -887,9 +1215,11 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint64_t wv0[16U] = { 0U };
   uint64_t b[16U] = { 0U };
-  Hacl_Hash_Blake2b_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  uint64_t *src_b = block_state.snd;
-  uint64_t *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  uint64_t *src_b = block_state.thd.snd;
+  uint64_t *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -903,8 +1233,9 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  uint64_t *wv1 = tmp_block_state.fst;
-  uint64_t *hash0 = tmp_block_state.snd;
+  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
+  uint64_t *wv1 = acc0.fst;
+  uint64_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_update_multi(0U,
     wv1,
@@ -913,15 +1244,17 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  uint64_t *wv = tmp_block_state.fst;
-  uint64_t *hash = tmp_block_state.snd;
+  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
+  uint64_t *wv = acc.fst;
+  uint64_t *hash = acc.snd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  Hacl_Hash_Blake2b_finish(64U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -932,14 +1265,43 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *wv = block_state.fst;
-  uint64_t *b = block_state.snd;
+  uint64_t *b = block_state.thd.snd;
+  uint64_t *wv = block_state.thd.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. This preserves all parameters.
+*/
+Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state)
+{
+  Hacl_Hash_Blake2b_state_t scrut = *state;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
+  memcpy(buf, buf0, 128U * sizeof (uint8_t));
+  uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
+  uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
+  Hacl_Hash_Blake2b_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint64_t *src_b = block_state0.thd.snd;
+  uint64_t *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
+  Hacl_Hash_Blake2b_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2b_state_t
+  *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -965,7 +1327,109 @@ Hacl_Hash_Blake2b_hash_with_key(
   Hacl_Hash_Blake2b_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2b_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 16U, uint64_t);
-  Lib_Memzero0_memzero(b, 16U, uint64_t);
+  Lib_Memzero0_memzero(b1, 16U, uint64_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint64_t, void *);
+}
+
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`. 
+*/
+void
+Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  uint64_t b[16U] = { 0U };
+  uint64_t b1[16U] = { 0U };
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = b;
+  uint64_t *r1 = b + 4U;
+  uint64_t *r2 = b + 8U;
+  uint64_t *r3 = b + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk = params.key_length;
+  uint8_t nn = params.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^
+        ((uint64_t)params.fanout
+        << 16U
+        ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U)));
+  tmp[1U] = params.node_offset;
+  tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2b_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 16U, uint64_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint64_t, void *);
 }
 
diff --git a/src/msvc/Hacl_Hash_Blake2b_Simd256.c b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
index 1a5e8cf2..0afd93bc 100644
--- a/src/msvc/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2b_Simd256.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -77,22 +78,22 @@ update_block(
     Lib_IntVector_Intrinsics_vec256 *r1 = m_st + 1U;
     Lib_IntVector_Intrinsics_vec256 *r20 = m_st + 2U;
     Lib_IntVector_Intrinsics_vec256 *r30 = m_st + 3U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]);
     r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]);
     r20[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]);
@@ -214,24 +215,147 @@ update_block(
 void
 Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = hash;
+  Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk1 = (uint8_t)kk;
+  uint8_t nn1 = (uint8_t)nn;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk1
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
+}
+
+static void
+init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint64_t tmp[8U] = { 0U };
   Lib_IntVector_Intrinsics_vec256 *r0 = hash;
   Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
   Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
-  uint64_t iv0 = Hacl_Hash_Blake2s_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2s_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2s_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2s_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2s_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2s_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2s_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2s_ivTable_B[7U];
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
-  uint64_t kk_shift_8 = (uint64_t)kk << 8U;
-  uint64_t iv0_ = iv0 ^ (0x01010000ULL ^ (kk_shift_8 ^ (uint64_t)nn));
-  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1, iv2, iv3);
-  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk = p.key_length;
+  uint8_t nn = p.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
+  tmp[1U] = p.node_offset;
+  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
 }
 
 static void
@@ -254,7 +378,7 @@ update_key(
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 void
@@ -295,7 +419,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 128U, uint8_t);
+  Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
 static inline void
@@ -371,7 +495,7 @@ Hacl_Hash_Blake2b_Simd256_finish(
   Lib_IntVector_Intrinsics_vec256_store64_le(second, row1[0U]);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -468,10 +592,11 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
   return buf;
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
+static Hacl_Hash_Blake2b_Simd256_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -484,33 +609,199 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
     (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_Simd256_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_Simd256_state_t
   *p =
     (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2b_Simd256_state_t
       ));
   p[0U] = s;
-  Hacl_Hash_Blake2b_Simd256_init(block_state.snd, 0U, 64U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (256 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (256 for S, 64 for B).
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 64U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2b_Simd256_state_t
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
 */
-void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *state)
+Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
+{
+  return Hacl_Hash_Blake2b_Simd256_malloc_with_key0(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2b_Simd256_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2b_Simd256_init(block_state.snd, 0U, 64U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 128U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2b_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
+  Hacl_Hash_Blake2b_Simd256_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -582,8 +873,10 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
@@ -606,8 +899,9 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -673,8 +967,10 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
@@ -698,8 +994,9 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Lib_IntVector_Intrinsics_vec256 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -728,6 +1025,10 @@ Hacl_Hash_Blake2b_Simd256_update(
 void
 Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -744,9 +1045,11 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   uint8_t *buf_1 = buf_;
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 wv0[4U] KRML_POST_ALIGN(32) = { 0U };
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
-  Hacl_Hash_Blake2b_Simd256_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -760,8 +1063,10 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  Lib_IntVector_Intrinsics_vec256 *wv1 = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash0 = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+  acc0 = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
+  Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_Simd256_update_multi(0U,
     wv1,
@@ -770,15 +1075,18 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  Lib_IntVector_Intrinsics_vec256 *wv = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
+  acc = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
+  Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  Hacl_Hash_Blake2b_Simd256_finish(64U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -789,14 +1097,55 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.fst;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.snd;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2b_Simd256_state_t
+*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
+{
+  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
+  memcpy(buf, buf0, 128U * sizeof (uint8_t));
+  Lib_IntVector_Intrinsics_vec256
+  *wv =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
+  memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256
+  *b =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
+  memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Hacl_Hash_Blake2b_Simd256_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Hacl_Hash_Blake2b_Simd256_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2b_Simd256_state_t
+  *p =
+    (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof (
+        Hacl_Hash_Blake2b_Simd256_state_t
+      ));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2b digest of message `input` using key `key` into `output`.
 
@@ -822,7 +1171,91 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Hacl_Hash_Blake2b_Simd256_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2b_Simd256_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256);
-  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+}
+
+void
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b1[4U] KRML_POST_ALIGN(32) = { 0U };
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = b;
+  Lib_IntVector_Intrinsics_vec256 *r1 = b + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = b + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = b + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk = params.key_length;
+  uint8_t nn = params.digest_length;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn
+    ^
+      ((uint64_t)kk
+      << 8U
+      ^
+        ((uint64_t)params.fanout
+        << 16U
+        ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U)));
+  tmp[1U] = params.node_offset;
+  tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
diff --git a/src/msvc/Hacl_Hash_Blake2s.c b/src/msvc/Hacl_Hash_Blake2s.c
index 652c3f33..6e19d83d 100644
--- a/src/msvc/Hacl_Hash_Blake2s.c
+++ b/src/msvc/Hacl_Hash_Blake2s.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2s.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -76,22 +77,22 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     uint32_t *r1 = m_st + 4U;
     uint32_t *r20 = m_st + 8U;
     uint32_t *r30 = m_st + 12U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     uint32_t uu____0 = m_w[s2];
     uint32_t uu____1 = m_w[s4];
     uint32_t uu____2 = m_w[s6];
@@ -474,18 +475,104 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
 
 void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = hash;
+  uint32_t *r1 = hash + 4U;
+  uint32_t *r2 = hash + 8U;
+  uint32_t *r3 = hash + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)(uint8_t)nn
+    ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+}
+
+static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint32_t tmp[8U] = { 0U };
   uint32_t *r0 = hash;
   uint32_t *r1 = hash + 4U;
   uint32_t *r2 = hash + 8U;
   uint32_t *r3 = hash + 12U;
-  uint32_t iv0 = Hacl_Hash_Blake2s_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2s_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2s_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2s_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2s_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2s_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2s_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2s_ivTable_S[7U];
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = iv0;
   r2[1U] = iv1;
   r2[2U] = iv2;
@@ -494,16 +581,58 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
-  uint32_t kk_shift_8 = kk << 8U;
-  uint32_t iv0_ = iv0 ^ (0x01010000U ^ (kk_shift_8 ^ nn));
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)p.digest_length
+    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
   r0[0U] = iv0_;
-  r0[1U] = iv1;
-  r0[2U] = iv2;
-  r0[3U] = iv3;
-  r1[0U] = iv4;
-  r1[1U] = iv5;
-  r1[2U] = iv6;
-  r1[3U] = iv7;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
 }
 
 static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
@@ -519,7 +648,7 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -556,7 +685,7 @@ Hacl_Hash_Blake2s_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 static void
@@ -614,41 +743,203 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(second + i * 4U, row1[i]););
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 32U, uint8_t);
+  Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
+static Hacl_Hash_Blake2s_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
-  Hacl_Hash_Blake2s_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2s_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_state_t
   *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
   p[0U] = s;
-  Hacl_Hash_Blake2s_init(block_state.snd, 0U, 32U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (32 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_state_t
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (32 for S, 64 for B).
 */
-void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *state)
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 32U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
+{
+  return Hacl_Hash_Blake2s_malloc_with_key(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2s_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2s_init(block_state.snd, 0U, 32U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -716,8 +1007,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint32_t *wv = block_state1.fst;
-      uint32_t *hash = block_state1.snd;
+      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      uint32_t *wv = acc.fst;
+      uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -735,8 +1027,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    uint32_t *wv = block_state1.fst;
-    uint32_t *hash = block_state1.snd;
+    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    uint32_t *wv = acc.fst;
+    uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -797,8 +1090,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      uint32_t *wv = block_state1.fst;
-      uint32_t *hash = block_state1.snd;
+      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      uint32_t *wv = acc.fst;
+      uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -817,8 +1111,9 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    uint32_t *wv = block_state1.fst;
-    uint32_t *hash = block_state1.snd;
+    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    uint32_t *wv = acc.fst;
+    uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -841,6 +1136,10 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
 */
 void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2s_state_t scrut = *state;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -857,9 +1156,11 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t wv0[16U] = { 0U };
   uint32_t b[16U] = { 0U };
-  Hacl_Hash_Blake2s_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  uint32_t *src_b = block_state.snd;
-  uint32_t *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  uint32_t *src_b = block_state.thd.snd;
+  uint32_t *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -873,15 +1174,18 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  uint32_t *wv1 = tmp_block_state.fst;
-  uint32_t *hash0 = tmp_block_state.snd;
+  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
+  uint32_t *wv1 = acc0.fst;
+  uint32_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  uint32_t *wv = tmp_block_state.fst;
-  uint32_t *hash = tmp_block_state.snd;
+  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
+  uint32_t *wv = acc.fst;
+  uint32_t *hash = acc.snd;
   Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  Hacl_Hash_Blake2s_finish(32U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -892,19 +1196,48 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *wv = block_state.fst;
-  uint32_t *b = block_state.snd;
+  uint32_t *b = block_state.thd.snd;
+  uint32_t *wv = block_state.thd.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
+{
+  Hacl_Hash_Blake2s_state_t scrut = *state;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
+  memcpy(buf, buf0, 64U * sizeof (uint8_t));
+  uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
+  uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
+  Hacl_Hash_Blake2s_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint32_t *src_b = block_state0.thd.snd;
+  uint32_t *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
+  Hacl_Hash_Blake2s_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2s_state_t
+  *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -925,7 +1258,100 @@ Hacl_Hash_Blake2s_hash_with_key(
   Hacl_Hash_Blake2s_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2s_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 16U, uint32_t);
-  Lib_Memzero0_memzero(b, 16U, uint32_t);
+  Lib_Memzero0_memzero(b1, 16U, uint32_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
+}
+
+void
+Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  uint32_t b[16U] = { 0U };
+  uint32_t b1[16U] = { 0U };
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = b;
+  uint32_t *r1 = b + 4U;
+  uint32_t *r2 = b + 8U;
+  uint32_t *r3 = b + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)params.digest_length
+    ^
+      ((uint32_t)params.key_length
+      << 8U
+      ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U));
+  tmp[1U] = params.leaf_length;
+  tmp[2U] = (uint32_t)params.node_offset;
+  tmp[3U] =
+    (uint32_t)(params.node_offset >> 32U)
+    ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2s_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 16U, uint32_t, void *);
+  Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
diff --git a/src/msvc/Hacl_Hash_Blake2s_Simd128.c b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
index 73f0cccb..c02da8fa 100644
--- a/src/msvc/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
@@ -26,6 +26,7 @@
 #include "internal/Hacl_Hash_Blake2s_Simd128.h"
 
 #include "internal/Hacl_Impl_Blake2_Constants.h"
+#include "internal/Hacl_Hash_Blake2b.h"
 #include "lib_memzero0.h"
 
 static inline void
@@ -77,22 +78,22 @@ update_block(
     Lib_IntVector_Intrinsics_vec128 *r1 = m_st + 1U;
     Lib_IntVector_Intrinsics_vec128 *r20 = m_st + 2U;
     Lib_IntVector_Intrinsics_vec128 *r30 = m_st + 3U;
-    uint32_t s0 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 0U];
-    uint32_t s1 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 1U];
-    uint32_t s2 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 2U];
-    uint32_t s3 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 3U];
-    uint32_t s4 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 4U];
-    uint32_t s5 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 5U];
-    uint32_t s6 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 6U];
-    uint32_t s7 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 7U];
-    uint32_t s8 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 8U];
-    uint32_t s9 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 9U];
-    uint32_t s10 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 10U];
-    uint32_t s11 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 11U];
-    uint32_t s12 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 12U];
-    uint32_t s13 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 13U];
-    uint32_t s14 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 14U];
-    uint32_t s15 = Hacl_Hash_Blake2s_sigmaTable[start_idx + 15U];
+    uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U];
+    uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U];
+    uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U];
+    uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U];
+    uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U];
+    uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U];
+    uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U];
+    uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U];
+    uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U];
+    uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U];
+    uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U];
+    uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U];
+    uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U];
+    uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U];
+    uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U];
+    uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U];
     r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]);
     r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]);
     r20[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]);
@@ -214,24 +215,141 @@ update_block(
 void
 Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t kk, uint32_t nn)
 {
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U,
+      .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal
+    };
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = hash;
+  Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)(uint8_t)nn
+    ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
+}
+
+static void
+init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p)
+{
+  uint32_t tmp[8U] = { 0U };
   Lib_IntVector_Intrinsics_vec128 *r0 = hash;
   Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
   Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
-  uint32_t iv0 = Hacl_Hash_Blake2s_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2s_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2s_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2s_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2s_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2s_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2s_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2s_ivTable_S[7U];
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
-  uint32_t kk_shift_8 = kk << 8U;
-  uint32_t iv0_ = iv0 ^ (0x01010000U ^ (kk_shift_8 ^ nn));
-  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1, iv2, iv3);
-  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = p.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = p.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)p.digest_length
+    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
+  tmp[1U] = p.leaf_length;
+  tmp[2U] = (uint32_t)p.node_offset;
+  tmp[3U] =
+    (uint32_t)(p.node_offset >> 32U)
+    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
 }
 
 static void
@@ -254,7 +372,7 @@ update_key(
   {
     update_block(wv, hash, false, lb, b);
   }
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 void
@@ -291,7 +409,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
   update_block(wv, hash, true, totlen, b);
-  Lib_Memzero0_memzero(b, 64U, uint8_t);
+  Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 static inline void
@@ -367,7 +485,7 @@ Hacl_Hash_Blake2s_Simd128_finish(
   Lib_IntVector_Intrinsics_vec128_store32_le(second, row1[0U]);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
-  Lib_Memzero0_memzero(b, 32U, uint8_t);
+  Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
 }
 
 void
@@ -464,10 +582,11 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
   return buf;
 }
 
-/**
-  State allocation function when there is no key
-*/
-Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
+static Hacl_Hash_Blake2s_Simd128_state_t
+*malloc_raw(
+  Hacl_Hash_Blake2b_index kk,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -480,33 +599,199 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
     (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = { .fst = wv, .snd = b };
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  uint8_t kk10 = kk.key_length;
+  uint32_t ite;
+  if (kk10 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_Simd128_state_t
-  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_Simd128_state_t
   *p =
     (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2s_Simd128_state_t
       ));
   p[0U] = s;
-  Hacl_Hash_Blake2s_Simd128_init(block_state.snd, 0U, 32U);
+  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
+  uint8_t kk1 = p1->key_length;
+  uint8_t nn = p1->digest_length;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
+  init_with_params(block_state.thd.snd, pv);
   return p;
 }
 
 /**
-  Re-initialization function when there is no key
+ State allocation function when there are parameters and a key. The
+length of the key k MUST match the value of the field key_length in the
+parameters. Furthermore, there is a static (not dynamically checked) requirement
+that key_length does not exceed max_key (128 for S, 64 for B).)
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
+  return
+    malloc_raw(i1,
+      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ State allocation function when there is just a custom key. All
+other parameters are set to their respective default values, meaning the output
+length is the maximum allowed output (128 for S, 64 for B).
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
+{
+  uint8_t nn = 32U;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params
+  *p0 =
+    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
+  p0[0U] = p;
+  Hacl_Hash_Blake2s_Simd128_state_t
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
+  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
+  KRML_HOST_FREE(p1.salt);
+  KRML_HOST_FREE(p1.personal);
+  KRML_HOST_FREE(p0);
+  return s;
+}
+
+/**
+  State allocation function when there is no key
 */
-void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *state)
+Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
+{
+  return Hacl_Hash_Blake2s_Simd128_malloc_with_key0(NULL, 0U);
+}
+
+static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  uint8_t nn = block_state.snd;
+  uint8_t kk1 = block_state.fst;
+  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+}
+
+static void
+reset_raw(
+  Hacl_Hash_Blake2s_Simd128_state_t *state,
+  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
+)
 {
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Hacl_Hash_Blake2s_Simd128_init(block_state.snd, 0U, 32U);
+  uint8_t nn0 = block_state.snd;
+  uint8_t kk10 = block_state.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  uint32_t kk2 = (uint32_t)i1.key_length;
+  uint8_t *k_1 = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  init_with_params(block_state.thd.snd, pv);
+  uint8_t kk11 = i.key_length;
+  uint32_t ite;
+  if (kk11 != 0U)
+  {
+    ite = 64U;
+  }
+  else
+  {
+    ite = 0U;
+  }
   Hacl_Hash_Blake2s_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
+  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   state[0U] = tmp;
 }
 
+/**
+ Re-initialization function. The reinitialization API is tricky --
+you MUST reuse the same original parameters for digest (output) length and key
+length.
+*/
+void
+Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
+  Hacl_Hash_Blake2s_Simd128_state_t *s,
+  Hacl_Hash_Blake2b_blake2_params *p,
+  uint8_t *k
+)
+{
+  index_of_state(s);
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+}
+
+/**
+ Re-initialization function when there is a key. Note that the key
+size is not allowed to change, which is why this function does not take a key
+length -- the key has to be same key size that was originally passed to
+`malloc_with_key`
+*/
+void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
+{
+  Hacl_Hash_Blake2b_index idx = index_of_state(s);
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
+  Hacl_Hash_Blake2b_blake2_params
+  p =
+    {
+      .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U,
+      .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
+      .personal = personal
+    };
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+}
+
+/**
+  Re-initialization function when there is no key
+*/
+void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_reset_with_key(s, NULL);
+}
+
 /**
   Update function when there is no key; 0 = success, 1 = max length exceeded
 */
@@ -578,8 +863,10 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -597,8 +884,9 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -659,8 +947,10 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+      acc = block_state1.thd;
+      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -679,8 +969,9 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Lib_IntVector_Intrinsics_vec128 *wv = block_state1.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = block_state1.snd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
@@ -704,6 +995,10 @@ Hacl_Hash_Blake2s_Simd128_update(
 void
 Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
 {
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
@@ -720,9 +1015,11 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   uint8_t *buf_1 = buf_;
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv0[4U] KRML_POST_ALIGN(16) = { 0U };
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
-  Hacl_Hash_Blake2s_Simd128_block_state_t tmp_block_state = { .fst = wv0, .snd = b };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  tmp_block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -736,15 +1033,20 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  Lib_IntVector_Intrinsics_vec128 *wv1 = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash0 = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+  acc0 = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
+  Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  Lib_IntVector_Intrinsics_vec128 *wv = tmp_block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash = tmp_block_state.snd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
+  acc = tmp_block_state.thd;
+  Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
+  Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
   Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  Hacl_Hash_Blake2s_Simd128_finish(32U, output, tmp_block_state.snd);
+  uint8_t nn0 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
 }
 
 /**
@@ -755,19 +1057,60 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.fst;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.snd;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
   KRML_HOST_FREE(state);
 }
 
+/**
+  Copying. The key length (or absence thereof) must match between source and destination.
+*/
+Hacl_Hash_Blake2s_Simd128_state_t
+*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
+{
+  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
+  uint8_t *buf0 = scrut.buf;
+  uint64_t total_len0 = scrut.total_len;
+  uint8_t nn = block_state0.snd;
+  uint8_t kk1 = block_state0.fst;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
+  memcpy(buf, buf0, 64U * sizeof (uint8_t));
+  Lib_IntVector_Intrinsics_vec128
+  *wv =
+    (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
+      sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
+  memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Lib_IntVector_Intrinsics_vec128
+  *b =
+    (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16,
+      sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
+  memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Hacl_Hash_Blake2s_Simd128_block_state_t
+  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  Hacl_Hash_Blake2s_Simd128_state_t
+  s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
+  Hacl_Hash_Blake2s_Simd128_state_t
+  *p =
+    (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof (
+        Hacl_Hash_Blake2s_Simd128_state_t
+      ));
+  p[0U] = s;
+  return p;
+}
+
 /**
 Write the BLAKE2s digest of message `input` using key `key` into `output`.
 
 @param output Pointer to `output_len` bytes of memory where the digest is written to.
-@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 32.
+@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64.
 @param input Pointer to `input_len` bytes of memory where the input message is read from.
 @param input_len Length of the input message.
 @param key Pointer to `key_len` bytes of memory where the key is read from.
@@ -788,7 +1131,88 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Hacl_Hash_Blake2s_Simd128_init(b, key_len, output_len);
   update(b1, b, key_len, key, input_len, input);
   Hacl_Hash_Blake2s_Simd128_finish(output_len, output, b);
-  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128);
-  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+}
+
+void
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+  uint8_t *output,
+  uint8_t *input,
+  uint32_t input_len,
+  Hacl_Hash_Blake2b_blake2_params params,
+  uint8_t *key
+)
+{
+  KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
+  KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b1[4U] KRML_POST_ALIGN(16) = { 0U };
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = b;
+  Lib_IntVector_Intrinsics_vec128 *r1 = b + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = b + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = b + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = params.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = params.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)params.digest_length
+    ^
+      ((uint32_t)params.key_length
+      << 8U
+      ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U));
+  tmp[1U] = params.leaf_length;
+  tmp[2U] = (uint32_t)params.node_offset;
+  tmp[3U] =
+    (uint32_t)(params.node_offset >> 32U)
+    ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
+  update(b1, b, (uint32_t)params.key_length, key, input_len, input);
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)params.digest_length, output, b);
+  Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *);
+  Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
diff --git a/src/msvc/Hacl_Hash_SHA3.c b/src/msvc/Hacl_Hash_SHA3.c
index 4f502866..89bb0491 100644
--- a/src/msvc/Hacl_Hash_SHA3.c
+++ b/src/msvc/Hacl_Hash_SHA3.c
@@ -25,6 +25,151 @@
 
 #include "internal/Hacl_Hash_SHA3.h"
 
+const
+uint32_t
+Hacl_Hash_SHA3_keccak_rotc[24U] =
+  {
+    1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U,
+    39U, 61U, 20U, 44U
+  };
+
+const
+uint32_t
+Hacl_Hash_SHA3_keccak_piln[24U] =
+  {
+    10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U,
+    22U, 9U, 6U, 1U
+  };
+
+const
+uint64_t
+Hacl_Hash_SHA3_keccak_rndc[24U] =
+  {
+    0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+    0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+    0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+    0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+    0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL,
+    0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
+  };
+
+static void absorb_inner_32(uint8_t *b, uint64_t *s)
+{
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b1 = b;
+  uint64_t u = load64_le(b1);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b1 + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b1 + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b1 + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b1 + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b1 + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b1 + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b1 + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b1 + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b1 + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b1 + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b1 + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b1 + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b1 + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b1 + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b1 + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b1 + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b1 + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b1 + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b1 + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b1 + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b1 + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b1 + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b1 + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b1 + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b1 + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b1 + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b1 + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b1 + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b1 + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b1 + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b1 + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws[i];
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    uint64_t _C[5U] = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+      uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+      KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+    uint64_t x = s[1U];
+    uint64_t current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+      uint64_t temp = s[_Y];
+      uint64_t uu____1 = current;
+      s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+      uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+      uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+      uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+      uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+      s[0U + 5U * i] = v0;
+      s[1U + 5U * i] = v1;
+      s[2U + 5U * i] = v2;
+      s[3U + 5U * i] = v3;
+      s[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+    s[0U] = s[0U] ^ c;
+  }
+}
+
 static uint32_t block_len(Spec_Hash_Definitions_hash_alg a)
 {
   switch (a)
@@ -97,10 +242,17 @@ Hacl_Hash_SHA3_update_multi_sha3(
   uint32_t n_blocks
 )
 {
-  for (uint32_t i = 0U; i < n_blocks; i++)
+  uint32_t l = block_len(a) * n_blocks;
+  for (uint32_t i = 0U; i < l / block_len(a); i++)
   {
-    uint8_t *block = blocks + i * block_len(a);
-    Hacl_Hash_SHA3_absorb_inner(block_len(a), block, s);
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = blocks;
+    uint8_t *bl0 = b_;
+    uint8_t *uu____0 = b0 + i * block_len(a);
+    memcpy(bl0, uu____0, block_len(a) * sizeof (uint8_t));
+    block_len(a);
+    absorb_inner_32(b_, s);
   }
 }
 
@@ -124,37 +276,272 @@ Hacl_Hash_SHA3_update_last_sha3(
   uint32_t len = block_len(a);
   if (input_len == len)
   {
-    Hacl_Hash_SHA3_absorb_inner(len, input, s);
-    uint8_t lastBlock_[200U] = { 0U };
-    uint8_t *lastBlock = lastBlock_;
-    memcpy(lastBlock, input + input_len, 0U * sizeof (uint8_t));
-    lastBlock[0U] = suffix;
-    Hacl_Hash_SHA3_loadState(len, lastBlock, s);
-    if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U == len - 1U)
-    {
-      Hacl_Hash_SHA3_state_permute(s);
-    }
-    uint8_t nextBlock_[200U] = { 0U };
-    uint8_t *nextBlock = nextBlock_;
-    nextBlock[len - 1U] = 0x80U;
-    Hacl_Hash_SHA3_loadState(len, nextBlock, s);
-    Hacl_Hash_SHA3_state_permute(s);
+    uint8_t b1[256U] = { 0U };
+    uint8_t *b_ = b1;
+    uint8_t *b00 = input;
+    uint8_t *bl00 = b_;
+    memcpy(bl00, b00 + 0U * len, len * sizeof (uint8_t));
+    absorb_inner_32(b_, s);
+    uint8_t b2[256U] = { 0U };
+    uint8_t *b_0 = b2;
+    uint32_t rem = 0U % len;
+    uint8_t *b01 = input + input_len;
+    uint8_t *bl0 = b_0;
+    memcpy(bl0, b01 + 0U - rem, rem * sizeof (uint8_t));
+    uint8_t *b02 = b_0;
+    b02[0U % len] = suffix;
+    uint64_t ws[32U] = { 0U };
+    uint8_t *b = b_0;
+    uint64_t u = load64_le(b);
+    ws[0U] = u;
+    uint64_t u0 = load64_le(b + 8U);
+    ws[1U] = u0;
+    uint64_t u1 = load64_le(b + 16U);
+    ws[2U] = u1;
+    uint64_t u2 = load64_le(b + 24U);
+    ws[3U] = u2;
+    uint64_t u3 = load64_le(b + 32U);
+    ws[4U] = u3;
+    uint64_t u4 = load64_le(b + 40U);
+    ws[5U] = u4;
+    uint64_t u5 = load64_le(b + 48U);
+    ws[6U] = u5;
+    uint64_t u6 = load64_le(b + 56U);
+    ws[7U] = u6;
+    uint64_t u7 = load64_le(b + 64U);
+    ws[8U] = u7;
+    uint64_t u8 = load64_le(b + 72U);
+    ws[9U] = u8;
+    uint64_t u9 = load64_le(b + 80U);
+    ws[10U] = u9;
+    uint64_t u10 = load64_le(b + 88U);
+    ws[11U] = u10;
+    uint64_t u11 = load64_le(b + 96U);
+    ws[12U] = u11;
+    uint64_t u12 = load64_le(b + 104U);
+    ws[13U] = u12;
+    uint64_t u13 = load64_le(b + 112U);
+    ws[14U] = u13;
+    uint64_t u14 = load64_le(b + 120U);
+    ws[15U] = u14;
+    uint64_t u15 = load64_le(b + 128U);
+    ws[16U] = u15;
+    uint64_t u16 = load64_le(b + 136U);
+    ws[17U] = u16;
+    uint64_t u17 = load64_le(b + 144U);
+    ws[18U] = u17;
+    uint64_t u18 = load64_le(b + 152U);
+    ws[19U] = u18;
+    uint64_t u19 = load64_le(b + 160U);
+    ws[20U] = u19;
+    uint64_t u20 = load64_le(b + 168U);
+    ws[21U] = u20;
+    uint64_t u21 = load64_le(b + 176U);
+    ws[22U] = u21;
+    uint64_t u22 = load64_le(b + 184U);
+    ws[23U] = u22;
+    uint64_t u23 = load64_le(b + 192U);
+    ws[24U] = u23;
+    uint64_t u24 = load64_le(b + 200U);
+    ws[25U] = u24;
+    uint64_t u25 = load64_le(b + 208U);
+    ws[26U] = u25;
+    uint64_t u26 = load64_le(b + 216U);
+    ws[27U] = u26;
+    uint64_t u27 = load64_le(b + 224U);
+    ws[28U] = u27;
+    uint64_t u28 = load64_le(b + 232U);
+    ws[29U] = u28;
+    uint64_t u29 = load64_le(b + 240U);
+    ws[30U] = u29;
+    uint64_t u30 = load64_le(b + 248U);
+    ws[31U] = u30;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      s[i] = s[i] ^ ws[i];
+    }
+    if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U % len == len - 1U)
+    {
+      for (uint32_t i0 = 0U; i0 < 24U; i0++)
+      {
+        uint64_t _C[5U] = { 0U };
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+        KRML_MAYBE_FOR5(i1,
+          0U,
+          5U,
+          1U,
+          uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+          uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+          KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+        uint64_t x = s[1U];
+        uint64_t current = x;
+        for (uint32_t i = 0U; i < 24U; i++)
+        {
+          uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+          uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+          uint64_t temp = s[_Y];
+          uint64_t uu____1 = current;
+          s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+          current = temp;
+        }
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+          uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+          uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+          uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+          uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+          s[0U + 5U * i] = v0;
+          s[1U + 5U * i] = v1;
+          s[2U + 5U * i] = v2;
+          s[3U + 5U * i] = v3;
+          s[4U + 5U * i] = v4;);
+        uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+        s[0U] = s[0U] ^ c;
+      }
+    }
+    uint8_t b3[256U] = { 0U };
+    uint8_t *b4 = b3;
+    uint8_t *b0 = b4;
+    b0[len - 1U] = 0x80U;
+    absorb_inner_32(b4, s);
     return;
   }
-  uint8_t lastBlock_[200U] = { 0U };
-  uint8_t *lastBlock = lastBlock_;
-  memcpy(lastBlock, input, input_len * sizeof (uint8_t));
-  lastBlock[input_len] = suffix;
-  Hacl_Hash_SHA3_loadState(len, lastBlock, s);
-  if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len == len - 1U)
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = input_len % len;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + input_len - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[input_len % len] = suffix;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
   {
-    Hacl_Hash_SHA3_state_permute(s);
+    s[i] = s[i] ^ ws[i];
+  }
+  if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len % len == len - 1U)
+  {
+    for (uint32_t i0 = 0U; i0 < 24U; i0++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i1,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____2 = _C[(i1 + 1U) % 5U];
+        uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____3 = current;
+        s[_Y] = uu____3 << r | uu____3 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+      s[0U] = s[0U] ^ c;
+    }
   }
-  uint8_t nextBlock_[200U] = { 0U };
-  uint8_t *nextBlock = nextBlock_;
-  nextBlock[len - 1U] = 0x80U;
-  Hacl_Hash_SHA3_loadState(len, nextBlock, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[len - 1U] = 0x80U;
+  absorb_inner_32(b3, s);
 }
 
 typedef struct hash_buf2_s
@@ -463,10 +850,139 @@ digest_(
   uint64_t *s = tmp_block_state.snd;
   if (a11 == Spec_Hash_Definitions_Shake128 || a11 == Spec_Hash_Definitions_Shake256)
   {
-    Hacl_Hash_SHA3_squeeze0(s, block_len(a11), l, output);
+    for (uint32_t i0 = 0U; i0 < l / block_len(a11); i0++)
+    {
+      uint8_t hbuf[256U] = { 0U };
+      uint64_t ws[32U] = { 0U };
+      memcpy(ws, s, 25U * sizeof (uint64_t));
+      for (uint32_t i = 0U; i < 32U; i++)
+      {
+        store64_le(hbuf + i * 8U, ws[i]);
+      }
+      uint8_t *b0 = output;
+      uint8_t *uu____0 = hbuf;
+      memcpy(b0 + i0 * block_len(a11), uu____0, block_len(a11) * sizeof (uint8_t));
+      for (uint32_t i1 = 0U; i1 < 24U; i1++)
+      {
+        uint64_t _C[5U] = { 0U };
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+        KRML_MAYBE_FOR5(i2,
+          0U,
+          5U,
+          1U,
+          uint64_t uu____1 = _C[(i2 + 1U) % 5U];
+          uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____1 << 1U | uu____1 >> 63U);
+          KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+        uint64_t x = s[1U];
+        uint64_t current = x;
+        for (uint32_t i = 0U; i < 24U; i++)
+        {
+          uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+          uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i];
+          uint64_t temp = s[_Y];
+          uint64_t uu____2 = current;
+          s[_Y] = uu____2 << r1 | uu____2 >> (64U - r1);
+          current = temp;
+        }
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+          uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+          uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+          uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+          uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+          s[0U + 5U * i] = v0;
+          s[1U + 5U * i] = v1;
+          s[2U + 5U * i] = v2;
+          s[3U + 5U * i] = v3;
+          s[4U + 5U * i] = v4;);
+        uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+        s[0U] = s[0U] ^ c;
+      }
+    }
+    uint32_t remOut = l % block_len(a11);
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(output + l - remOut, hbuf, remOut * sizeof (uint8_t));
     return;
   }
-  Hacl_Hash_SHA3_squeeze0(s, block_len(a11), hash_len(a11), output);
+  for (uint32_t i0 = 0U; i0 < hash_len(a11) / block_len(a11); i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b0 = output;
+    uint8_t *uu____3 = hbuf;
+    memcpy(b0 + i0 * block_len(a11), uu____3, block_len(a11) * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____4 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____4 << 1U | uu____4 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____5 = current;
+        s[_Y] = uu____5 << r1 | uu____5 >> (64U - r1);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = hash_len(a11) % block_len(a11);
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  uint8_t *uu____6 = hbuf;
+  memcpy(output + hash_len(a11) - remOut, uu____6, remOut * sizeof (uint8_t));
 }
 
 Hacl_Streaming_Types_error_code
@@ -515,78 +1031,79 @@ bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s)
   return uu____0 == Spec_Hash_Definitions_Shake128 || uu____0 == Spec_Hash_Definitions_Shake256;
 }
 
-void
-Hacl_Hash_SHA3_shake128_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
-{
-  Hacl_Hash_SHA3_keccak(1344U, 256U, inputByteLen, input, 0x1FU, outputByteLen, output);
-}
-
-void
-Hacl_Hash_SHA3_shake256_hacl(
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
-{
-  Hacl_Hash_SHA3_keccak(1088U, 512U, inputByteLen, input, 0x1FU, outputByteLen, output);
-}
-
-void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(1152U, 448U, input_len, input, 0x06U, 28U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(1088U, 512U, input_len, input, 0x06U, 32U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len)
+void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s)
 {
-  Hacl_Hash_SHA3_keccak(832U, 768U, input_len, input, 0x06U, 48U, output);
-}
-
-void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len)
-{
-  Hacl_Hash_SHA3_keccak(576U, 1024U, input_len, input, 0x06U, 64U, output);
-}
-
-static const
-uint32_t
-keccak_rotc[24U] =
-  {
-    1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U,
-    39U, 61U, 20U, 44U
-  };
-
-static const
-uint32_t
-keccak_piln[24U] =
-  {
-    10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U,
-    22U, 9U, 6U, 1U
-  };
-
-static const
-uint64_t
-keccak_rndc[24U] =
+  KRML_MAYBE_UNUSED_VAR(rateInBytes);
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b1 = b;
+  uint64_t u = load64_le(b1);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b1 + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b1 + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b1 + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b1 + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b1 + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b1 + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b1 + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b1 + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b1 + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b1 + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b1 + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b1 + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b1 + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b1 + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b1 + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b1 + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b1 + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b1 + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b1 + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b1 + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b1 + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b1 + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b1 + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b1 + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b1 + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b1 + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b1 + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b1 + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b1 + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b1 + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b1 + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
   {
-    0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
-    0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
-    0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
-    0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
-    0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL,
-    0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
-  };
-
-void Hacl_Hash_SHA3_state_permute(uint64_t *s)
-{
+    s[i] = s[i] ^ ws[i];
+  }
   for (uint32_t i0 = 0U; i0 < 24U; i0++)
   {
     uint64_t _C[5U] = { 0U };
@@ -606,8 +1123,8 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s)
     uint64_t current = x;
     for (uint32_t i = 0U; i < 24U; i++)
     {
-      uint32_t _Y = keccak_piln[i];
-      uint32_t r = keccak_rotc[i];
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
       uint64_t temp = s[_Y];
       uint64_t uu____1 = current;
       s[_Y] = uu____1 << r | uu____1 >> (64U - r);
@@ -627,108 +1144,1227 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s)
       s[2U + 5U * i] = v2;
       s[3U + 5U * i] = v3;
       s[4U + 5U * i] = v4;);
-    uint64_t c = keccak_rndc[i0];
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
     s[0U] = s[0U] ^ c;
   }
 }
 
-void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s)
+void
+Hacl_Hash_SHA3_shake128(
+  uint8_t *output,
+  uint32_t outputByteLen,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
 {
-  uint8_t block[200U] = { 0U };
-  memcpy(block, input, rateInBytes * sizeof (uint8_t));
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 168U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x1FU;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    uint64_t u = load64_le(block + i * 8U);
-    uint64_t x = u;
-    s[i] = s[i] ^ x;
+    s[i] = s[i] ^ ws0[i];
   }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-static void storeState(uint32_t rateInBytes, uint64_t *s, uint8_t *res)
+void
+Hacl_Hash_SHA3_shake256(
+  uint8_t *output,
+  uint32_t outputByteLen,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
 {
-  uint8_t block[200U] = { 0U };
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x1FU;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    uint64_t sj = s[i];
-    store64_le(block + i * 8U, sj);
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
   }
-  memcpy(res, block, rateInBytes * sizeof (uint8_t));
+  memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s)
+void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
-  Hacl_Hash_SHA3_loadState(rateInBytes, block, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 144U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 28U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 28U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-static void
-absorb(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix
-)
+void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 32U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 32U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 104U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 48U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 48U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t *ib = input;
+  uint8_t *rb = output;
+  uint64_t s[25U] = { 0U };
+  uint32_t rateInBytes1 = 72U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = ib;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s);
+  }
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b00 = ib;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % rateInBytes1] = 0x06U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws0[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws0[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws0[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws0[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws0[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws0[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws0[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws0[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws0[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws0[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws0[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws0[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws0[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws0[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws0[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws0[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws0[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws0[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws0[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws0[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws0[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws0[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws0[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws0[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws0[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws0[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws0[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws0[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws0[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws0[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws0[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws0[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = s[i] ^ ws0[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s);
+  for (uint32_t i0 = 0U; i0 < 64U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, s, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b02 = rb;
+    memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;););
+      uint64_t x = s[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = s[_Y];
+        uint64_t uu____1 = current;
+        s[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]);
+        uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]);
+        uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]);
+        uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]);
+        uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]);
+        s[0U + 5U * i] = v0;
+        s[1U + 5U * i] = v1;
+        s[2U + 5U * i] = v2;
+        s[3U + 5U * i] = v3;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      s[0U] = s[0U] ^ c;
+    }
+  }
+  uint32_t remOut = 64U % rateInBytes1;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(rb + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+}
+
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_state_malloc(void)
+{
+  uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
+  return buf;
+}
+
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_state_free(uint64_t *s)
 {
-  uint32_t n_blocks = inputByteLen / rateInBytes;
-  uint32_t rem = inputByteLen % rateInBytes;
-  for (uint32_t i = 0U; i < n_blocks; i++)
-  {
-    uint8_t *block = input + i * rateInBytes;
-    Hacl_Hash_SHA3_absorb_inner(rateInBytes, block, s);
-  }
-  uint8_t *last = input + n_blocks * rateInBytes;
-  uint8_t lastBlock_[200U] = { 0U };
-  uint8_t *lastBlock = lastBlock_;
-  memcpy(lastBlock, last, rem * sizeof (uint8_t));
-  lastBlock[rem] = delimitedSuffix;
-  Hacl_Hash_SHA3_loadState(rateInBytes, lastBlock, s);
-  if (!(((uint32_t)delimitedSuffix & 0x80U) == 0U) && rem == rateInBytes - 1U)
-  {
-    Hacl_Hash_SHA3_state_permute(s);
-  }
-  uint8_t nextBlock_[200U] = { 0U };
-  uint8_t *nextBlock = nextBlock_;
-  nextBlock[rateInBytes - 1U] = 0x80U;
-  Hacl_Hash_SHA3_loadState(rateInBytes, nextBlock, s);
-  Hacl_Hash_SHA3_state_permute(s);
+  KRML_HOST_FREE(s);
 }
 
+/**
+Absorb number of input blocks and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_squeeze0(
-  uint64_t *s,
-  uint32_t rateInBytes,
-  uint32_t outputByteLen,
-  uint8_t *output
-)
+Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen)
 {
-  uint32_t outBlocks = outputByteLen / rateInBytes;
-  uint32_t remOut = outputByteLen % rateInBytes;
-  uint8_t *last = output + outputByteLen - remOut;
-  uint8_t *blocks = output;
-  for (uint32_t i = 0U; i < outBlocks; i++)
+  for (uint32_t i = 0U; i < inputByteLen / 168U; i++)
   {
-    storeState(rateInBytes, s, blocks + i * rateInBytes);
-    Hacl_Hash_SHA3_state_permute(s);
+    uint8_t b[256U] = { 0U };
+    uint8_t *b_ = b;
+    uint8_t *b0 = input;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i * 168U, 168U * sizeof (uint8_t));
+    Hacl_Hash_SHA3_absorb_inner_32(168U, b_, state);
   }
-  storeState(remOut, s, last);
 }
 
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
 void
-Hacl_Hash_SHA3_keccak(
-  uint32_t rate,
-  uint32_t capacity,
-  uint32_t inputByteLen,
-  uint8_t *input,
-  uint8_t delimitedSuffix,
-  uint32_t outputByteLen,
-  uint8_t *output
+Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen)
+{
+  uint8_t b1[256U] = { 0U };
+  uint8_t *b_ = b1;
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[inputByteLen % 168U] = 0x1FU;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u = load64_le(b);
+  ws[0U] = u;
+  uint64_t u0 = load64_le(b + 8U);
+  ws[1U] = u0;
+  uint64_t u1 = load64_le(b + 16U);
+  ws[2U] = u1;
+  uint64_t u2 = load64_le(b + 24U);
+  ws[3U] = u2;
+  uint64_t u3 = load64_le(b + 32U);
+  ws[4U] = u3;
+  uint64_t u4 = load64_le(b + 40U);
+  ws[5U] = u4;
+  uint64_t u5 = load64_le(b + 48U);
+  ws[6U] = u5;
+  uint64_t u6 = load64_le(b + 56U);
+  ws[7U] = u6;
+  uint64_t u7 = load64_le(b + 64U);
+  ws[8U] = u7;
+  uint64_t u8 = load64_le(b + 72U);
+  ws[9U] = u8;
+  uint64_t u9 = load64_le(b + 80U);
+  ws[10U] = u9;
+  uint64_t u10 = load64_le(b + 88U);
+  ws[11U] = u10;
+  uint64_t u11 = load64_le(b + 96U);
+  ws[12U] = u11;
+  uint64_t u12 = load64_le(b + 104U);
+  ws[13U] = u12;
+  uint64_t u13 = load64_le(b + 112U);
+  ws[14U] = u13;
+  uint64_t u14 = load64_le(b + 120U);
+  ws[15U] = u14;
+  uint64_t u15 = load64_le(b + 128U);
+  ws[16U] = u15;
+  uint64_t u16 = load64_le(b + 136U);
+  ws[17U] = u16;
+  uint64_t u17 = load64_le(b + 144U);
+  ws[18U] = u17;
+  uint64_t u18 = load64_le(b + 152U);
+  ws[19U] = u18;
+  uint64_t u19 = load64_le(b + 160U);
+  ws[20U] = u19;
+  uint64_t u20 = load64_le(b + 168U);
+  ws[21U] = u20;
+  uint64_t u21 = load64_le(b + 176U);
+  ws[22U] = u21;
+  uint64_t u22 = load64_le(b + 184U);
+  ws[23U] = u22;
+  uint64_t u23 = load64_le(b + 192U);
+  ws[24U] = u23;
+  uint64_t u24 = load64_le(b + 200U);
+  ws[25U] = u24;
+  uint64_t u25 = load64_le(b + 208U);
+  ws[26U] = u25;
+  uint64_t u26 = load64_le(b + 216U);
+  ws[27U] = u26;
+  uint64_t u27 = load64_le(b + 224U);
+  ws[28U] = u27;
+  uint64_t u28 = load64_le(b + 232U);
+  ws[29U] = u28;
+  uint64_t u29 = load64_le(b + 240U);
+  ws[30U] = u29;
+  uint64_t u30 = load64_le(b + 248U);
+  ws[31U] = u30;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws[i];
+  }
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b3 = b2;
+  uint8_t *b0 = b3;
+  b0[167U] = 0x80U;
+  Hacl_Hash_SHA3_absorb_inner_32(168U, b3, state);
+}
+
+/**
+Squeeze a hash state to output buffer
+
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
+void
+Hacl_Hash_SHA3_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
 )
 {
-  KRML_MAYBE_UNUSED_VAR(capacity);
-  uint32_t rateInBytes = rate / 8U;
-  uint64_t s[25U] = { 0U };
-  absorb(s, rateInBytes, inputByteLen, input, delimitedSuffix);
-  Hacl_Hash_SHA3_squeeze0(s, rateInBytes, outputByteLen, output);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, state, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    uint8_t *b0 = output;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
 }
 
diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c
new file mode 100644
index 00000000..131c34e6
--- /dev/null
+++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c
@@ -0,0 +1,6733 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "Hacl_Hash_SHA3_Simd256.h"
+
+#include "internal/Hacl_Hash_SHA3.h"
+
+void
+Hacl_Hash_SHA3_Simd256_absorb_inner_256(
+  uint32_t rateInBytes,
+  Hacl_Hash_SHA2_uint8_4p b,
+  Lib_IntVector_Intrinsics_vec256 *s
+)
+{
+  KRML_MAYBE_UNUSED_VAR(rateInBytes);
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+  ws[0U] = ws0;
+  ws[1U] = ws1;
+  ws[2U] = ws2;
+  ws[3U] = ws3;
+  ws[4U] = ws4;
+  ws[5U] = ws5;
+  ws[6U] = ws6;
+  ws[7U] = ws7;
+  ws[8U] = ws8;
+  ws[9U] = ws9;
+  ws[10U] = ws10;
+  ws[11U] = ws11;
+  ws[12U] = ws12;
+  ws[13U] = ws13;
+  ws[14U] = ws14;
+  ws[15U] = ws15;
+  ws[16U] = ws16;
+  ws[17U] = ws17;
+  ws[18U] = ws18;
+  ws[19U] = ws19;
+  ws[20U] = ws20;
+  ws[21U] = ws21;
+  ws[22U] = ws22;
+  ws[23U] = ws23;
+  ws[24U] = ws24;
+  ws[25U] = ws25;
+  ws[26U] = ws26;
+  ws[27U] = ws27;
+  ws[28U] = ws28;
+  ws[29U] = ws29;
+  ws[30U] = ws30;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws[i]);
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+      Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+      _C[i] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+          Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+              Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U];
+      Lib_IntVector_Intrinsics_vec256
+      _D =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+              1U),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        s[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i1 + 5U * i], _D);););
+    Lib_IntVector_Intrinsics_vec256 x = s[1U];
+    Lib_IntVector_Intrinsics_vec256 current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+      uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+      Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+      Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+      s[_Y] =
+        Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r),
+          Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v07 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+          Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v17 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+          Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v27 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+          Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v37 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+          Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v4 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+          Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+      s[0U + 5U * i] = v07;
+      s[1U + 5U * i] = v17;
+      s[2U + 5U * i] = v27;
+      s[3U + 5U * i] = v37;
+      s[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0];
+    Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+    s[0U] =
+      Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+        Lib_IntVector_Intrinsics_vec256_load64(c));
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 168U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x1FU;
+  b12[inputByteLen % rateInBytes1] = 0x1FU;
+  b22[inputByteLen % rateInBytes1] = 0x1FU;
+  b32[inputByteLen % rateInBytes1] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x1FU;
+  b12[inputByteLen % rateInBytes1] = 0x1FU;
+  b22[inputByteLen % rateInBytes1] = 0x1FU;
+  b32[inputByteLen % rateInBytes1] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = outputByteLen % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_224(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 144U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 28U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 28U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 136U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 32U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 32U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 104U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 48U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 48U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  Hacl_Hash_SHA2_uint8_4p
+  ib = { .fst = input0, .snd = { .fst = input1, .snd = { .fst = input2, .snd = input3 } } };
+  Hacl_Hash_SHA2_uint8_4p
+  rb = { .fst = output0, .snd = { .fst = output1, .snd = { .fst = output2, .snd = output3 } } };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[25U] KRML_POST_ALIGN(32) = { 0U };
+  uint32_t rateInBytes1 = 72U;
+  for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b3 = ib.snd.snd.snd;
+    uint8_t *b2 = ib.snd.snd.fst;
+    uint8_t *b1 = ib.snd.fst;
+    uint8_t *b0 = ib.fst;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b_, s);
+  }
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % rateInBytes1;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % rateInBytes1] = 0x06U;
+  b12[inputByteLen % rateInBytes1] = 0x06U;
+  b22[inputByteLen % rateInBytes1] = 0x06U;
+  b32[inputByteLen % rateInBytes1] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws32[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws33 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws32[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws32[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws32[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws32[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws32[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws32[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws32[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws32[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws32[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws32[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws32[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws32[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws32[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws32[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws32[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws32[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws32[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws32[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws32[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws32[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws32[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws32[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws32[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws32[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws32[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws32[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws32[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws32[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws32[0U] = ws00;
+  ws32[1U] = ws110;
+  ws32[2U] = ws210;
+  ws32[3U] = ws33;
+  ws32[4U] = ws40;
+  ws32[5U] = ws50;
+  ws32[6U] = ws60;
+  ws32[7U] = ws70;
+  ws32[8U] = ws80;
+  ws32[9U] = ws90;
+  ws32[10U] = ws100;
+  ws32[11U] = ws111;
+  ws32[12U] = ws120;
+  ws32[13U] = ws130;
+  ws32[14U] = ws140;
+  ws32[15U] = ws150;
+  ws32[16U] = ws160;
+  ws32[17U] = ws170;
+  ws32[18U] = ws180;
+  ws32[19U] = ws190;
+  ws32[20U] = ws200;
+  ws32[21U] = ws211;
+  ws32[22U] = ws220;
+  ws32[23U] = ws230;
+  ws32[24U] = ws240;
+  ws32[25U] = ws250;
+  ws32[26U] = ws260;
+  ws32[27U] = ws270;
+  ws32[28U] = ws280;
+  ws32[29U] = ws290;
+  ws32[30U] = ws300;
+  ws32[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes1 - 1U] = 0x80U;
+  b15[rateInBytes1 - 1U] = 0x80U;
+  b25[rateInBytes1 - 1U] = 0x80U;
+  b3[rateInBytes1 - 1U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(rateInBytes1, b, s);
+  for (uint32_t i0 = 0U; i0 < 64U / rateInBytes1; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+    Lib_IntVector_Intrinsics_vec256
+    v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+    Lib_IntVector_Intrinsics_vec256
+    v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+    Lib_IntVector_Intrinsics_vec256
+    v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256
+    v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+    Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+    Lib_IntVector_Intrinsics_vec256
+    v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+    Lib_IntVector_Intrinsics_vec256
+    v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+    Lib_IntVector_Intrinsics_vec256
+    v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256
+    v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+    Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+    Lib_IntVector_Intrinsics_vec256
+    v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+    Lib_IntVector_Intrinsics_vec256
+    v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+    Lib_IntVector_Intrinsics_vec256
+    v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256
+    v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+    Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+    Lib_IntVector_Intrinsics_vec256
+    v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+    Lib_IntVector_Intrinsics_vec256
+    v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+    Lib_IntVector_Intrinsics_vec256
+    v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256
+    v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+    Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+    Lib_IntVector_Intrinsics_vec256
+    v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+    Lib_IntVector_Intrinsics_vec256
+    v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+    Lib_IntVector_Intrinsics_vec256
+    v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256
+    v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+    Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+    Lib_IntVector_Intrinsics_vec256
+    v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+    Lib_IntVector_Intrinsics_vec256
+    v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+    Lib_IntVector_Intrinsics_vec256
+    v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256
+    v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+    Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+    Lib_IntVector_Intrinsics_vec256
+    v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+    Lib_IntVector_Intrinsics_vec256
+    v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+    Lib_IntVector_Intrinsics_vec256
+    v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256
+    v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+    Lib_IntVector_Intrinsics_vec256
+    v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256
+    v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b35 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes1, hbuf + 256U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes1, hbuf + 512U, rateInBytes1 * sizeof (uint8_t));
+    memcpy(b35 + i0 * rateInBytes1, hbuf + 768U, rateInBytes1 * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = s[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = s[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = s[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(s[i + 15U], s[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          s[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(s[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = s[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = s[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        s[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = s[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(s[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v015 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, s[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = s[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(s[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v115 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, s[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = s[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(s[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v215 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, s[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = s[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(s[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v315 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, s[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = s[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(s[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, s[1U + 5U * i]));
+        s[0U + 5U * i] = v015;
+        s[1U + 5U * i] = v115;
+        s[2U + 5U * i] = v215;
+        s[3U + 5U * i] = v315;
+        s[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = s[0U];
+      s[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+  uint32_t remOut = 64U % rateInBytes1;
+  uint8_t hbuf[1024U] = { 0U };
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  memcpy(ws, s, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  Lib_IntVector_Intrinsics_vec256 v08 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws[0U] = ws0;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+  }
+  uint8_t *b35 = rb.snd.snd.snd;
+  uint8_t *b2 = rb.snd.snd.fst;
+  uint8_t *b1 = rb.snd.fst;
+  uint8_t *b0 = rb.fst;
+  memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b35 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void)
+{
+  Lib_IntVector_Intrinsics_vec256
+  *buf =
+    (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32,
+      sizeof (Lib_IntVector_Intrinsics_vec256) * 25U);
+  memset(buf, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  return buf;
+}
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
+{
+  KRML_ALIGNED_FREE(s);
+}
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i = 0U; i < inputByteLen / 168U; i++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    Hacl_Hash_SHA2_uint8_4p
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b0 = input0;
+    uint8_t *b1 = input1;
+    uint8_t *b2 = input2;
+    uint8_t *b3 = input3;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b0 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl1, b1 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl2, b2 + i * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl3, b3 + i * 168U, 168U * sizeof (uint8_t));
+    Hacl_Hash_SHA3_Simd256_absorb_inner_256(168U, b_, state);
+  }
+}
+
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t *b01 = input0;
+  uint8_t *b11 = input1;
+  uint8_t *b21 = input2;
+  uint8_t *b31 = input3;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem, rem * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem, rem * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[inputByteLen % 168U] = 0x1FU;
+  b12[inputByteLen % 168U] = 0x1FU;
+  b22[inputByteLen % 168U] = 0x1FU;
+  b32[inputByteLen % 168U] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+  ws[0U] = ws0;
+  ws[1U] = ws1;
+  ws[2U] = ws2;
+  ws[3U] = ws3;
+  ws[4U] = ws4;
+  ws[5U] = ws5;
+  ws[6U] = ws6;
+  ws[7U] = ws7;
+  ws[8U] = ws8;
+  ws[9U] = ws9;
+  ws[10U] = ws10;
+  ws[11U] = ws11;
+  ws[12U] = ws12;
+  ws[13U] = ws13;
+  ws[14U] = ws14;
+  ws[15U] = ws15;
+  ws[16U] = ws16;
+  ws[17U] = ws17;
+  ws[18U] = ws18;
+  ws[19U] = ws19;
+  ws[20U] = ws20;
+  ws[21U] = ws21;
+  ws[22U] = ws22;
+  ws[23U] = ws23;
+  ws[24U] = ws24;
+  ws[25U] = ws25;
+  ws[26U] = ws26;
+  ws[27U] = ws27;
+  ws[28U] = ws28;
+  ws[29U] = ws29;
+  ws[30U] = ws30;
+  ws[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  Hacl_Hash_SHA2_uint8_4p
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  b0[167U] = 0x80U;
+  b1[167U] = 0x80U;
+  b2[167U] = 0x80U;
+  b3[167U] = 0x80U;
+  Hacl_Hash_SHA3_Simd256_absorb_inner_256(168U, b, state);
+}
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b0 = output0;
+    uint8_t *b1 = output1;
+    uint8_t *b2 = output2;
+    uint8_t *b3 = output3;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t));
+    memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t));
+    memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i];
+        uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+}
+
diff --git a/src/msvc/Hacl_K256_ECDSA.c b/src/msvc/Hacl_K256_ECDSA.c
index f9bf31ed..0aaab085 100644
--- a/src/msvc/Hacl_K256_ECDSA.c
+++ b/src/msvc/Hacl_K256_ECDSA.c
@@ -571,10 +571,6 @@ static inline bool is_qelem_le_q_halved_vartime(uint64_t *f)
   {
     return true;
   }
-  if (a2 > 0xffffffffffffffffULL)
-  {
-    return false;
-  }
   if (a1 < 0x5d576e7357a4501dULL)
   {
     return true;
diff --git a/src/msvc/Lib_RandomBuffer_System.c b/src/msvc/Lib_RandomBuffer_System.c
index 0d7924b4..de6ef337 100644
--- a/src/msvc/Lib_RandomBuffer_System.c
+++ b/src/msvc/Lib_RandomBuffer_System.c
@@ -31,6 +31,7 @@ bool read_random_bytes(uint32_t len, uint8_t *buf) {
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 
 bool read_random_bytes(uint32_t len, uint8_t *buf) {
diff --git a/src/wasm/EverCrypt_Hash.wasm b/src/wasm/EverCrypt_Hash.wasm
index 101c3f68..1447feb3 100644
Binary files a/src/wasm/EverCrypt_Hash.wasm and b/src/wasm/EverCrypt_Hash.wasm differ
diff --git a/src/wasm/Hacl_AEAD_Chacha20Poly1305.wasm b/src/wasm/Hacl_AEAD_Chacha20Poly1305.wasm
index 560e70a8..5e46c2e7 100644
Binary files a/src/wasm/Hacl_AEAD_Chacha20Poly1305.wasm and b/src/wasm/Hacl_AEAD_Chacha20Poly1305.wasm differ
diff --git a/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd128.wasm b/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd128.wasm
index a6475221..e1bedee6 100644
Binary files a/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd128.wasm and b/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd128.wasm differ
diff --git a/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd256.wasm b/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd256.wasm
index 804858db..1f8a5cdc 100644
Binary files a/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd256.wasm and b/src/wasm/Hacl_AEAD_Chacha20Poly1305_Simd256.wasm differ
diff --git a/src/wasm/Hacl_Bignum25519_51.wasm b/src/wasm/Hacl_Bignum25519_51.wasm
index 9f05386a..78a0b296 100644
Binary files a/src/wasm/Hacl_Bignum25519_51.wasm and b/src/wasm/Hacl_Bignum25519_51.wasm differ
diff --git a/src/wasm/Hacl_Bignum256_32.wasm b/src/wasm/Hacl_Bignum256_32.wasm
index 05db6caa..5fcc70ae 100644
Binary files a/src/wasm/Hacl_Bignum256_32.wasm and b/src/wasm/Hacl_Bignum256_32.wasm differ
diff --git a/src/wasm/Hacl_Curve25519_51.wasm b/src/wasm/Hacl_Curve25519_51.wasm
index 12a0dd5c..1adb0c95 100644
Binary files a/src/wasm/Hacl_Curve25519_51.wasm and b/src/wasm/Hacl_Curve25519_51.wasm differ
diff --git a/src/wasm/Hacl_Ed25519_PrecompTable.wasm b/src/wasm/Hacl_Ed25519_PrecompTable.wasm
index 46a7380d..c94538f0 100644
Binary files a/src/wasm/Hacl_Ed25519_PrecompTable.wasm and b/src/wasm/Hacl_Ed25519_PrecompTable.wasm differ
diff --git a/src/wasm/Hacl_HMAC.wasm b/src/wasm/Hacl_HMAC.wasm
index c2e51b85..8752dda8 100644
Binary files a/src/wasm/Hacl_HMAC.wasm and b/src/wasm/Hacl_HMAC.wasm differ
diff --git a/src/wasm/Hacl_HMAC_Blake2b_256.wasm b/src/wasm/Hacl_HMAC_Blake2b_256.wasm
index bbc821ef..9ee78af8 100644
Binary files a/src/wasm/Hacl_HMAC_Blake2b_256.wasm and b/src/wasm/Hacl_HMAC_Blake2b_256.wasm differ
diff --git a/src/wasm/Hacl_HMAC_Blake2s_128.wasm b/src/wasm/Hacl_HMAC_Blake2s_128.wasm
index dde8629e..22fce826 100644
Binary files a/src/wasm/Hacl_HMAC_Blake2s_128.wasm and b/src/wasm/Hacl_HMAC_Blake2s_128.wasm differ
diff --git a/src/wasm/Hacl_HMAC_DRBG.wasm b/src/wasm/Hacl_HMAC_DRBG.wasm
index c1cb2fd3..f536237d 100644
Binary files a/src/wasm/Hacl_HMAC_DRBG.wasm and b/src/wasm/Hacl_HMAC_DRBG.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2b.wasm b/src/wasm/Hacl_Hash_Blake2b.wasm
index 8882f5e8..29138d3d 100644
Binary files a/src/wasm/Hacl_Hash_Blake2b.wasm and b/src/wasm/Hacl_Hash_Blake2b.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm b/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm
index 27244515..1e2c80b7 100644
Binary files a/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm and b/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2s.wasm b/src/wasm/Hacl_Hash_Blake2s.wasm
index 0dcaff92..8e69e8f7 100644
Binary files a/src/wasm/Hacl_Hash_Blake2s.wasm and b/src/wasm/Hacl_Hash_Blake2s.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm b/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm
index 6d5cdb78..b1a26f75 100644
Binary files a/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm and b/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm differ
diff --git a/src/wasm/Hacl_Hash_SHA3.wasm b/src/wasm/Hacl_Hash_SHA3.wasm
index befbda8d..8104d0a6 100644
Binary files a/src/wasm/Hacl_Hash_SHA3.wasm and b/src/wasm/Hacl_Hash_SHA3.wasm differ
diff --git a/src/wasm/Hacl_Hash_SHA3_Simd256.wasm b/src/wasm/Hacl_Hash_SHA3_Simd256.wasm
new file mode 100644
index 00000000..8455cf4b
Binary files /dev/null and b/src/wasm/Hacl_Hash_SHA3_Simd256.wasm differ
diff --git a/src/wasm/Hacl_Impl_Blake2_Constants.wasm b/src/wasm/Hacl_Impl_Blake2_Constants.wasm
index 8bf18578..1346057c 100644
Binary files a/src/wasm/Hacl_Impl_Blake2_Constants.wasm and b/src/wasm/Hacl_Impl_Blake2_Constants.wasm differ
diff --git a/src/wasm/Hacl_K256_ECDSA.wasm b/src/wasm/Hacl_K256_ECDSA.wasm
index c0e66ff8..5022a27e 100644
Binary files a/src/wasm/Hacl_K256_ECDSA.wasm and b/src/wasm/Hacl_K256_ECDSA.wasm differ
diff --git a/src/wasm/Hacl_MAC_Poly1305.wasm b/src/wasm/Hacl_MAC_Poly1305.wasm
index e72930c8..c4e38920 100644
Binary files a/src/wasm/Hacl_MAC_Poly1305.wasm and b/src/wasm/Hacl_MAC_Poly1305.wasm differ
diff --git a/src/wasm/Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305.wasm b/src/wasm/Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305.wasm
index bdfde537..23ece197 100644
Binary files a/src/wasm/Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305.wasm and b/src/wasm/Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305.wasm differ
diff --git a/src/wasm/Hacl_SHA2_Vec128.wasm b/src/wasm/Hacl_SHA2_Vec128.wasm
index cef1c90d..549c8a5f 100644
Binary files a/src/wasm/Hacl_SHA2_Vec128.wasm and b/src/wasm/Hacl_SHA2_Vec128.wasm differ
diff --git a/src/wasm/INFO.txt b/src/wasm/INFO.txt
index d2f1192f..e7adb2e4 100644
--- a/src/wasm/INFO.txt
+++ b/src/wasm/INFO.txt
@@ -1,4 +1,4 @@
 This code was generated with the following toolchain.
-F* version: 6e23042e74555544267731295b7d382c86edc574
-Karamel version: a7be2a7c43eca637ceb57fe8f3ffd16fc6627ebd
+F* version: 96f90842af8c0137bdee87ccb7bd3ea92485efb6
+Karamel version: 1282f04f16a4e193f329708b22e0a4577d5dd092
 Vale version: 0.3.19
diff --git a/src/wasm/layouts.json b/src/wasm/layouts.json
index 81273a66..c7e414d8 100644
--- a/src/wasm/layouts.json
+++ b/src/wasm/layouts.json
@@ -1 +1 @@
-{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file
+{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"K____uint64_t___uint64_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"K____uint32_t___uint32_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Layout","Hacl_Hash_Blake2b_blake2_params"]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint32_t___uint32_t_"]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_"]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint64_t___uint64_t_"]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_"]]]]}],"Hacl_Hash_Blake2b_index":["LFlat",{"size":2,"fields":[["key_length",[0,["Int",["A8"]]]],["digest_length",[1,["Int",["A8"]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_Blake2b_blake2_params":["LFlat",{"size":28,"fields":[["digest_length",[0,["Int",["A8"]]]],["key_length",[1,["Int",["A8"]]]],["fanout",[2,["Int",["A8"]]]],["depth",[3,["Int",["A8"]]]],["leaf_length",[4,["Int",["A32"]]]],["node_offset",[8,["Int",["A64"]]]],["node_depth",[16,["Int",["A8"]]]],["inner_length",[17,["Int",["A8"]]]],["salt",[20,["Pointer",["Int",["A8"]]]]],["personal",[24,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file
diff --git a/src/wasm/main.html b/src/wasm/main.html
index a4605811..8e3823bf 100644
--- a/src/wasm/main.html
+++ b/src/wasm/main.html
@@ -8,7 +8,7 @@
     <script type="application/javascript" src="./test.js"></script>
 
     <script type="application/javascript">
-      var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
+      var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_SHA3_Simd256", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
     </script>
     <script type="application/javascript" src="browser.js"></script>
     <script type="application/javascript" src="loader.js"></script>
diff --git a/src/wasm/shell.js b/src/wasm/shell.js
index cc877fd1..28a02c3e 100644
--- a/src/wasm/shell.js
+++ b/src/wasm/shell.js
@@ -1,7 +1,7 @@
 
 // To be loaded by main.js
 var my_js_files = ["./test.js"];
-var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
+var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_SHA3_Simd256", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
 var my_debug = false;
 
 if (typeof module !== "undefined")