ML-KEM benchmarks: add per-operation microbenchmarks. Retaining the benchmarks for complete TLS server and client side though. Also, change a tiny bug in the ParseEncap benchmark that made its timing public key dependent, adding more noise to the output than necessary. Bug: 503700354 Change-Id: I9e2da2d0c25b71c0cb325713f63e04d56a6a6964 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/93088 Reviewed-by: Xiangfei Ding <xfding@google.com> Presubmit-BoringSSL-Verified: boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/bench/mlkem.cc b/bench/mlkem.cc index ef1c6dd..8ea49f1 100644 --- a/bench/mlkem.cc +++ b/bench/mlkem.cc
@@ -25,6 +25,8 @@ #include "./internal.h" namespace { + +// generate_key + decap (same as TLS server side) void BM_SpeedMLKEM768KeyGenDecap(benchmark::State &state) { uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES]; // This ciphertext is nonsense, but decap is constant-time so, for the @@ -44,14 +46,19 @@ } } +// parse + encap (same as TLS client side) void BM_SpeedMLKEM768ParseEncacp(benchmark::State &state) { - uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES]; - MLKEM768_private_key priv; - uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; - MLKEM768_generate_key(encoded_public_key, nullptr, &priv); - MLKEM768_public_key pub; - for (auto _ : state) { + state.PauseTiming(); + uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; + { + MLKEM768_private_key priv; + MLKEM768_generate_key(encoded_public_key, nullptr, &priv); + } + benchmark::DoNotOptimize(encoded_public_key); + state.ResumeTiming(); + + MLKEM768_public_key pub; CBS encoded_public_key_cbs; CBS_init(&encoded_public_key_cbs, encoded_public_key, sizeof(encoded_public_key)); @@ -60,13 +67,14 @@ return; } uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; + uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES]; MLKEM768_encap(ciphertext, shared_secret, &pub); benchmark::DoNotOptimize(ciphertext); benchmark::DoNotOptimize(shared_secret); } } - +// generate_key + decap (same as TLS server side) void BM_SpeedMLKEM1024KeyGenDecap(benchmark::State &state) { uint8_t ciphertext[MLKEM1024_CIPHERTEXT_BYTES]; // This ciphertext is nonsense, but decap is constant-time so, for the @@ -87,23 +95,193 @@ } } +// parse + encap (same as TLS client side) void BM_SpeedMLKEM1024ParseEncacp(benchmark::State &state) { - uint8_t ciphertext[MLKEM1024_CIPHERTEXT_BYTES]; - auto priv = std::make_unique<MLKEM1024_private_key>(); - uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; - MLKEM1024_generate_key(encoded_public_key, nullptr, &*priv); - auto pub = std::make_unique<MLKEM1024_public_key>(); - for (auto _ : state) { + state.PauseTiming(); + uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; + { + // On heap to avoid stack frame size limit. + auto priv = std::make_unique<MLKEM1024_private_key>(); + MLKEM1024_generate_key(encoded_public_key, nullptr, &*priv); + } + benchmark::DoNotOptimize(encoded_public_key); + state.ResumeTiming(); + + MLKEM1024_public_key pub; CBS encoded_public_key_cbs; CBS_init(&encoded_public_key_cbs, encoded_public_key, sizeof(encoded_public_key)); - if (!MLKEM1024_parse_public_key(&*pub, &encoded_public_key_cbs)) { + if (!MLKEM1024_parse_public_key(&pub, &encoded_public_key_cbs)) { state.SkipWithError("Failure in MLKEM1024_parse_public_key."); return; } uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; - MLKEM1024_encap(ciphertext, shared_secret, &*pub); + uint8_t ciphertext[MLKEM1024_CIPHERTEXT_BYTES]; + MLKEM1024_encap(ciphertext, shared_secret, &pub); + benchmark::DoNotOptimize(ciphertext); + benchmark::DoNotOptimize(shared_secret); + } +} + +// Microbenchmarks follow + +void BM_SpeedMLKEM768KeyGenOnly(benchmark::State &state) { + for (auto _ : state) { + MLKEM768_private_key priv; + uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; + MLKEM768_generate_key(encoded_public_key, nullptr, &priv); + benchmark::DoNotOptimize(encoded_public_key); + benchmark::DoNotOptimize(priv); + } +} + +void BM_SpeedMLKEM768DecapOnly(benchmark::State &state) { + uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES]; + // This ciphertext is nonsense, but decap is constant-time so, for the + // purposes of timing, it's fine. + memset(ciphertext, 42, sizeof(ciphertext)); + + for (auto _ : state) { + state.PauseTiming(); + MLKEM768_private_key priv; + { + uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; + MLKEM768_generate_key(encoded_public_key, nullptr, &priv); + } + benchmark::DoNotOptimize(priv); + state.ResumeTiming(); + + uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; + if (!MLKEM768_decap(shared_secret, ciphertext, sizeof(ciphertext), &priv)) { + state.SkipWithError("MLKEM768_decap failed"); + return; + } + benchmark::DoNotOptimize(shared_secret); + } +} + +void BM_SpeedMLKEM768ParseOnly(benchmark::State &state) { + for (auto _ : state) { + state.PauseTiming(); + uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; + { + MLKEM768_private_key priv; + MLKEM768_generate_key(encoded_public_key, nullptr, &priv); + } + benchmark::DoNotOptimize(encoded_public_key); + state.ResumeTiming(); + + MLKEM768_public_key pub; + CBS encoded_public_key_cbs; + CBS_init(&encoded_public_key_cbs, encoded_public_key, + sizeof(encoded_public_key)); + if (!MLKEM768_parse_public_key(&pub, &encoded_public_key_cbs)) { + state.SkipWithError("Failure in MLKEM768_parse_public_key."); + return; + } + benchmark::DoNotOptimize(pub); + } +} + +void BM_SpeedMLKEM768EncapOnly(benchmark::State &state) { + for (auto _ : state) { + state.PauseTiming(); + MLKEM768_public_key pub; + { + MLKEM768_private_key priv; + uint8_t encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES]; + MLKEM768_generate_key(encoded_public_key, nullptr, &priv); + MLKEM768_public_from_private(&pub, &priv); + } + benchmark::DoNotOptimize(pub); + state.ResumeTiming(); + + uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; + uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES]; + MLKEM768_encap(ciphertext, shared_secret, &pub); + benchmark::DoNotOptimize(ciphertext); + benchmark::DoNotOptimize(shared_secret); + } +} + +void BM_SpeedMLKEM1024KeyGenOnly(benchmark::State &state) { + for (auto _ : state) { + MLKEM1024_private_key priv; + uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; + MLKEM1024_generate_key(encoded_public_key, nullptr, &priv); + benchmark::DoNotOptimize(encoded_public_key); + benchmark::DoNotOptimize(priv); + } +} + +void BM_SpeedMLKEM1024DecapOnly(benchmark::State &state) { + uint8_t ciphertext[MLKEM1024_CIPHERTEXT_BYTES]; + // This ciphertext is nonsense, but decap is constant-time so, for the + // purposes of timing, it's fine. + memset(ciphertext, 42, sizeof(ciphertext)); + + for (auto _ : state) { + state.PauseTiming(); + MLKEM1024_private_key priv; + { + uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; + MLKEM1024_generate_key(encoded_public_key, nullptr, &priv); + } + benchmark::DoNotOptimize(priv); + state.ResumeTiming(); + + uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; + if (!MLKEM1024_decap(shared_secret, ciphertext, sizeof(ciphertext), + &priv)) { + state.SkipWithError("MLKEM1024_decap failed"); + return; + } + benchmark::DoNotOptimize(shared_secret); + } +} + +void BM_SpeedMLKEM1024ParseOnly(benchmark::State &state) { + for (auto _ : state) { + state.PauseTiming(); + uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; + { + // On heap to avoid stack frame size limit. + auto priv = std::make_unique<MLKEM1024_private_key>(); + MLKEM1024_generate_key(encoded_public_key, nullptr, &*priv); + } + benchmark::DoNotOptimize(encoded_public_key); + state.ResumeTiming(); + + MLKEM1024_public_key pub; + CBS encoded_public_key_cbs; + CBS_init(&encoded_public_key_cbs, encoded_public_key, + sizeof(encoded_public_key)); + if (!MLKEM1024_parse_public_key(&pub, &encoded_public_key_cbs)) { + state.SkipWithError("Failure in MLKEM1024_parse_public_key."); + return; + } + benchmark::DoNotOptimize(pub); + } +} + +void BM_SpeedMLKEM1024EncapOnly(benchmark::State &state) { + for (auto _ : state) { + state.PauseTiming(); + MLKEM1024_public_key pub; + { + // On heap to avoid stack frame size limit. + auto priv = std::make_unique<MLKEM1024_private_key>(); + uint8_t encoded_public_key[MLKEM1024_PUBLIC_KEY_BYTES]; + MLKEM1024_generate_key(encoded_public_key, nullptr, &*priv); + MLKEM1024_public_from_private(&pub, &*priv); + } + benchmark::DoNotOptimize(pub); + state.ResumeTiming(); + + uint8_t shared_secret[MLKEM_SHARED_SECRET_BYTES]; + uint8_t ciphertext[MLKEM1024_CIPHERTEXT_BYTES]; + MLKEM1024_encap(ciphertext, shared_secret, &pub); benchmark::DoNotOptimize(ciphertext); benchmark::DoNotOptimize(shared_secret); } @@ -114,6 +292,15 @@ BENCHMARK(BM_SpeedMLKEM768ParseEncacp)->Apply(bssl::bench::SetThreads); BENCHMARK(BM_SpeedMLKEM1024KeyGenDecap)->Apply(bssl::bench::SetThreads); BENCHMARK(BM_SpeedMLKEM1024ParseEncacp)->Apply(bssl::bench::SetThreads); + + BENCHMARK(BM_SpeedMLKEM768KeyGenOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM768DecapOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM768ParseOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM768EncapOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM1024KeyGenOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM1024DecapOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM1024ParseOnly)->Apply(bssl::bench::SetThreads); + BENCHMARK(BM_SpeedMLKEM1024EncapOnly)->Apply(bssl::bench::SetThreads); } } // namespace