cctv vectors

integritychain · Jan 6, 2024 · 6f72bef · 6f72bef
1 parent 4f3e43e
commit 6f72bef
Show file tree

Hide file tree

Showing 35 changed files with 830 additions and 32 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -33,6 +33,7 @@ regex = "1.10.2"
 hex = "0.4.3"
 rand_chacha = "0.3.1"
 criterion = "0.5.1"
+flate2 = "1.0.28"
 
 [[bench]]
 name = "benchmark"

diff --git a/benches/benchmark.rs b/benches/benchmark.rs
@@ -1,7 +1,7 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
-use fips203::traits::{Decaps, Encaps, KeyGen};
 use fips203::{ml_kem_1024, ml_kem_512, ml_kem_768};
+use fips203::traits::{Decaps, Encaps, KeyGen};
 
 pub fn criterion_benchmark(c: &mut Criterion) {
  let (ek_512, dk_512) = ml_kem_512::KG::try_keygen_vt().unwrap();
@@ -29,19 +29,19 @@ criterion_main!(benches);
 
 /*
 
-cargo bench # As of 1-1-24
+$ cargo bench  # As of 1-6-24
 Intel® Core™ i7-7700K CPU @ 4.20GHz × 8
 
-ml_kem_512 KeyGen time: [63.821 µs 63.830 µs 63.839 µs]
-ml_kem_768 KeyGen time: [100.88 µs 100.89 µs 100.90 µs]
-ml_kem_1024 KeyGen time: [146.53 µs 146.61 µs 146.70 µs]
+ml_kem_512 KeyGen time: [38.781 µs 39.282 µs 39.905 µs]
+ml_kem_768 KeyGen time: [64.254 µs 64.558 µs 65.107 µs]
+ml_kem_1024 KeyGen time: [100.13 µs 100.80 µs 101.55 µs]
 
-ml_kem_512 Encaps time: [76.934 µs 76.948 µs 76.961 µs]
-ml_kem_768 Encaps time: [117.93 µs 118.01 µs 118.08 µs]
-ml_kem_1024 Encaps time: [168.68 µs 168.76 µs 168.85 µs]
+ml_kem_512 Encaps time: [43.175 µs 43.851 µs 44.658 µs]
+ml_kem_768 Encaps time: [68.038 µs 68.808 µs 69.817 µs]
+ml_kem_1024 Encaps time: [102.59 µs 102.95 µs 103.34 µs]
 
-ml_kem_512 Decaps time: [76.749 µs 76.887 µs 77.071 µs]
-ml_kem_768 Decaps time: [117.05 µs 117.34 µs 117.84 µs]
-ml_kem_1024 Decaps time: [167.51 µs 167.53 µs 167.57 µs]
+ml_kem_512 Decaps time: [54.167 µs 54.810 µs 55.564 µs]
+ml_kem_768 Decaps time: [84.112 µs 85.940 µs 87.994 µs]
+ml_kem_1024 Decaps time: [121.48 µs 122.99 µs 125.10 µs]
 
  */
diff --git a/src/byte_fns.rs b/src/byte_fns.rs
@@ -1,6 +1,6 @@
 use crate::helpers::ensure;
-use crate::types::Z256;
 use crate::Q;
+use crate::types::Z256;
 
 // /// Algorithm 2 `BitsToBytes(b)` on page 17.
 // /// Converts a bit string (of length a multiple of eight) into an array of bytes.
@@ -134,8 +134,8 @@ pub(crate) fn byte_encode(
 ///
 /// Input: byte array B ∈ B^{32d} <br>
 /// Output: integer array `F ∈ Z^256_m`, where `m = 2^d if d < 12` and `m = q if d = 12`
-pub(crate) fn byte_decode(d: u32,
- bytes_b: &[u8], integers_f: &mut [Z256; 256],
+pub(crate) fn byte_decode(
+ d: u32, bytes_b: &[u8], integers_f: &mut [Z256; 256],
 ) -> Result<(), &'static str> {
  let bitlen = d;
  let mut temp = 0u64;
@@ -152,6 +152,8 @@ pub(crate) fn byte_decode(d: u32,
  int_index += 1;
  }
  }
+ let max = if d < 12 { 2u16.pow(d) } else { Q as u16 };
+ ensure!(integers_f.iter().all(|e| e.get_u16() < max), "Alg5: integers out of range");
  Ok(())
 }
 // #[allow(dead_code)]
@@ -202,7 +204,7 @@ mod tests {
  use crate::byte_fns::{byte_decode, byte_encode};
  use crate::types::Z256;
 
- // #[test]
+// #[test]
  // fn test_bytes_and_bits() {
  // let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(123);
  //

diff --git a/src/k_pke.rs b/src/k_pke.rs
@@ -253,7 +253,7 @@ pub(crate) fn k_pke_decrypt<const K: usize, const DU: usize, const DV: usize>(
  // 3: 3: u ← Decompress_{du}(ByteDecode_{du}(c_1)) ▷ ByteDecode_{du} invoked k times
  let mut u = [[Z256(0); 256]; K];
  for i in 0..K {
- byte_decode(DU as u32,&c1[32 * DU * i..32 * DU * (i + 1)], &mut u[i])?;
+ byte_decode(DU as u32, &c1[32 * DU * i..32 * DU * (i + 1)], &mut u[i])?;
  decompress(DU as u32, &mut u[i]);
  }
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -12,8 +12,8 @@ use zeroize::{Zeroize, ZeroizeOnDrop};
 
 // Functionality map per FIPS 203 draft
 //
-// Algorithm 2 BitsToBytes(b) on page 17 --> byte_fns.rs
-// Algorithm 3 BytesToBits(B) on page 18 --> byte_fns.rs
+// Algorithm 2 BitsToBytes(b) on page 17 --> optimized out (byte_fns.rs)
+// Algorithm 3 BytesToBits(B) on page 18 --> optimized out (byte_fns.rs)
 // Algorithm 4 ByteEncoded(F) on page 19 --> byte_fns.rs
 // Algorithm 5 ByteDecoded(B) on page 19 --> byte_fns.rs
 // Algorithm 6 SampleNTT(B) on page 20 --> sampling.rs
@@ -29,7 +29,7 @@ use zeroize::{Zeroize, ZeroizeOnDrop};
 // Algorithm 16 ML-KEM.Encaps(ek) on page 30 --> ml_ke.rs
 // Algorithm 17 ML-KEM.Decaps(c,dk) on page 32 --> ml_kem.rs
 // PRF and XOF on page 16 --> helpers.rs
-// Three has functions: G, H, J on page 17  --> helpers.rs
+// Three hash functions: G, H, J on page 17 --> helpers.rs
 // Compress and Decompress on page 18 --> helpers.rs
 //
 // The three parameter sets are modules in this file with injected macro code
@@ -84,8 +84,10 @@ macro_rules! functionality {
  const ETA2_64: usize = ETA2 * 64; // So this is handled manually...what a pain
  const J_LEN: usize = 32 + 32 * (DU * K + DV);
 
+ use crate::byte_fns::byte_decode;
  use crate::ml_kem::{ml_kem_decaps, ml_kem_encaps, ml_kem_key_gen};
  use crate::traits::{Decaps, Encaps, KeyGen, SerDes};
+ use crate::types::Z256;
  use crate::SharedSecretKey;
 
  use rand_core::CryptoRngCore;
@@ -159,7 +161,10 @@ macro_rules! functionality {
  type ByteArray = [u8; EK_LEN];
 
  fn try_from_bytes(ek: Self::ByteArray) -> Result<Self, &'static str> {
- // TODO: validation here
+ let mut ek_hat = [Z256(0); 256];
+ for i in 0..K {
+ byte_decode(12, &ek[384 * i..384 * (i + 1)], &mut ek_hat)?;
+ }
  Ok(EncapsKey(ek))
  }
 

diff --git a/tests/cctv_vectors/ML-KEM/README.md b/tests/cctv_vectors/ML-KEM/README.md
@@ -0,0 +1,175 @@
+# ML-KEM Intermediate values
+
+https://c2sp.org/CCTV/ML-KEM
+
+This directory collects resources for testing (and developing) ML-KEM
+implementations, as specified in FIPS 203.
+
+In particular, it provides:
+
+* Intermediate values for testing and debugging each intermediate step and
+ partial algorithm.
+
+* Negative test vectors for invalid encapsulation keys.
+
+* "Unlucky" vectors that require an unusually large number of XOF reads.
+
+* Vectors that fail if `strcmp()` is used in ML-KEM.Decaps.
+
+* Accumulated vectors (derived from the reference pq-crystals implementation)
+ for testing randomly reachable edge cases without checking in large amounts
+ of data, including an extended run of one million tests.
+
+* References to other test vectors.
+
+All test vectors are made available under the terms of the
+[CC0 1.0](http:https://creativecommons.org/publicdomain/zero/1.0).
+
+Implementers might also be interested in ["Enough Polynomials and Linear Algebra
+to Implement Kyber"](https://words.filippo.io/kyber-math/).
+
+### Changes from the FIPS 203 draft
+
+Like the [official intermediate values][NIST vectors] from October 2023, all the
+vectors in this directory implement the following two changes:
+
+1. The order of the input i and j to the XOF at step 6 in Algorithm 12
+ (K-PKE.KeyGen) is switched.
+2. The order of the input i and j to the XOF at step 6 in Algorithm 13
+ (K-PKE.Encrypt) is switched.
+
+This reverts [an unintentional change][pqc-forum discussion] that will probably
+be reverted in the final document and makes K-PKE consistent with Kyber round 3.
+
+[NIST vectors]: https://csrc.nist.gov/Projects/post-quantum-cryptography/post-quantum-cryptography-standardization/example-files
+
+[pqc-forum discussion]: https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/s-C-zIAeKfE/m/eZJmXYsSAQAJ
+
+## Intermediate values
+
+The files in the `intermediate/` folder provide vectors for developing,
+debugging, and testing ML-KEM step-by-step.
+
+Each file lists every intermediate value of the ML-KEM.KeyGen, K-PKE.KeyGen,
+ML-KEM.Encaps, K-PKE.Encrypt, ML-KEM.Decaps, and K-PKE.Decrypt algorithms, all
+executed on the same set of keys and messages.
+
+Byte strings are encoded in hex. Polynomials, NTT representatives, vectors, and
+matrixes are encoded with ByteEncode12 and then in hex. Some polynomials are
+also presented as an array of decimal coefficients to aid in the implementation
+of ByteEncode, NTT, and Compress.
+
+Where values appear multiple times across algorithms, they are not repeated in
+the test files. uᵈ and vᵈ are the u and v values from K-PKE.Decrypt, after they
+went through a Compress/Decompress cycle. (Props to the spec for maintaining a
+consistent lexical scope across algorithms! The one exception is that r is
+reused for the 32-byte K-PKE.Encrypt input and for the vector of polynomials
+sampled from it. The two are easily distinguished.)
+
+## Bad encapsulation keys
+
+Section 6.2 of FIPS 203 ipd (ML-KEM Encapsulation) requires input validation on
+the encapsulation key, checking that all encoded polynomial coefficients are
+reduced modulo the field prime (the "*Modulus check*").
+
+The files in the `modulus/` folder provide invalid ML-KEM.Encaps inputs,
+hex-encoded, one per line. Every value in the range q to 2¹²-1 and every
+position in the key is tested individually.
+
+The vectors share most of the coefficients so that they compress from 1–3 MiB
+down to 12–28 KiB.
+
+## Unlucky NTT sampling vector
+
+The SampleNTT algorithm reads a variable number of bytes from an Extendable
+Output Function to perform rejection sampling. The files in the `unlucky/`
+folder provide test vectors that cause many more rejections than usual.
+
+In particular, these vectors require reading more than 575 bytes from the
+SHAKE-128 XOF in SampleNTT, which would ordinarily happen [with probability
+2⁻³⁸](https://www.wolframalpha.com/input?i=binomcdf%28384%2C+3329%2F4096%2C+255%29).
+
+Note that these vectors can be run through a regular deterministic ML-KEM
+testing API (i.e. one that injects the `d`, `z`, `m` random values) since they
+were bruteforced at the level of the `d` value.
+
+If for some reason an implementation needs to draw a fixed amount of bytes from
+the XOF, at least 704 bytes are necessary for [a negligible probability (~
+2⁻¹²⁸)](https://www.wolframalpha.com/input?i=binomcdf%28469%2C+3329%2F4096%2C+255%29)
+of failure.
+
+## `strcmp` vectors
+
+In ML-KEM.Decaps the ciphertext is compared with the output of K-PKE.Encrypt for
+implicit rejection. If an implementation were to use `strcmp()` for that
+comparison it would fail to reject some ciphertexts if a zero byte terminates
+the comparison early.
+
+The files in the `strcmp/` folder provide test vectors that exercise this edge
+case. The chance of it occurring randomly is 2⁻¹⁶, and it is not covered by the
+pq-crystals vectors.
+
+## Accumulated pq-crystals vectors
+
+The `ref/test/test_vectors.c` program in the *standard* branch of
+github.com/pq-crystals/kyber produces 10 000 randomly generated tests.
+Thanks to the limited range of fundamental integer types (at most 0–4096), this
+is sufficient to hit a lot of edge cases that don't need to be deliberately
+targeted with specific test vectors.
+
+The output of the three `test_vectors.c` programs amounts to 300MB. Instead of
+checking in such a large amount of data, or running a binary as part of testing,
+implementations can generate the test inputs from the deterministic RNG, and
+check that the test outputs hash to the expected value.
+
+The input format, output format, and output hash are provided below.
+
+The deterministic RNG is a single SHAKE-128 instance with an empty input.
+(The RNG stream starts with `7f9c2ba4e88f827d616045507605853e`.)
+
+For each test, the following values are drawn from the RNG in order:
+
+* `d` for K-PKE.KeyGen
+* `z` for ML-KEM.KeyGen
+* `m` for ML-KEM.Encaps
+* `ct` as an invalid ciphertext input to ML-KEM.Decaps
+
+Then, the following values are written to a running SHAKE-128 instance in order:
+
+* `ek` from ML-KEM.KeyGen
+* `dk` from ML-KEM.KeyGen
+* `ct` from ML-KEM.Encaps
+* `k` from ML-KEM.Encaps (which should be checked to match the output of
+ ML-KEM.Decaps when provided with the correct `ct`)
+* `k` from ML-KEM.Decaps when provided with the random `ct`
+
+The resulting hashes for 10 000 consecutive tests are:
+
+* ML-KEM-512: `845913ea5a308b803c764a9ed8e9d814ca1fd9c82ba43c7b1e64b79c7a6ec8e4`
+* ML-KEM-768: `f7db260e1137a742e05fe0db9525012812b004d29040a5b606aad3d134b548d3`
+* ML-KEM-1024: `47ac888fe61544efc0518f46094b4f8a600965fc89822acb06dc7169d24f3543`
+
+The resulting hashes for 1 000 000 consecutive tests are:
+
+* ML-KEM-512: `578eeaa1156848cbf7a15bafef963b4ccabe3308ddfb7dbdd20ad965f634e81d`
+* ML-KEM-768: `70090cc5842aad0ec43d5042c783fae9bc320c047b5dafcb6e134821db02384d`
+* ML-KEM-1024: `7ccc6d803739d3db3c5ce39c7130f459db32a199c6605e3be210e5a89d4c4b95`
+
+## Other Known Answer Tests
+
+The following vectors also target FIPS 203 ipd with the Â fix described above.
+
+* [NIST's Intermediate Values](https://csrc.nist.gov/Projects/post-quantum-cryptography/post-quantum-cryptography-standardization/example-files)
+ * Random values (such as d, z, and m) are equal. This is not spec compliant.
+
+* [pq-crystals](https://github.com/pq-crystals/kyber), *standard* branch
+ * `ref/test/test_vectors.c` generates 10 000 vectors randomly.
+ * Accumulated vectors are available above.
+
+* [post-quantum-cryptography/KAT](https://github.com/post-quantum-cryptography/KAT/tree/main/MLKEM)
+ * Each file contains 100 randomly generated vectors.
+
+The s2n-tls project includes
+[vectors](https://github.com/aws/s2n-tls/tree/a6517c5fe97b1aa1898f2233498613dd53735bd8/tests/unit/kats)
+for Kyber round 3 as well as some of the hybrid KEMs, including those used in
+the TLS draft.