From 6841be32b95b52e25be19b5910ab70c1a365dffc Mon Sep 17 00:00:00 2001 From: Fabian Boemer Date: Wed, 26 May 2021 14:19:23 -0700 Subject: [PATCH 1/4] Faster CKKS multiply --- native/src/seal/evaluator.cpp | 86 +++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 10 deletions(-) diff --git a/native/src/seal/evaluator.cpp b/native/src/seal/evaluator.cpp index 9dac91e95..a9d89f0e5 100644 --- a/native/src/seal/evaluator.cpp +++ b/native/src/seal/evaluator.cpp @@ -284,7 +284,7 @@ namespace seal #endif } - void Evaluator::bfv_multiply(Ciphertext &encrypted1, const Ciphertext &encrypted2, MemoryPoolHandle pool) const + void Evaluator::bfv_multiply(Ciphertext &encrypted1, const Ciphertext &encrypted2, MemoryPoolHandle pool) const { if (encrypted1.is_ntt_form() || encrypted2.is_ntt_form()) { @@ -506,6 +506,70 @@ namespace seal // Prepare destination encrypted1.resize(context_, context_data.parms_id(), dest_size); + if (dest_size == 3) + { + // Must divide coeff_count, i.e. be a power of two + size_t tile_size = min(coeff_count, size_t(256)); + size_t num_tiles = coeff_count / tile_size; +#ifdef SEAL_DEBUG + if (coeff_count % tile_size != 0) + { + throw invalid_argument("tile_size does not divide coeff_count"); + } +#endif + + // Set up iterators for input ciphertexts + PolyIter encrypted1_iter = iter(encrypted1); + ConstPolyIter encrypted2_iter = iter(encrypted2); + + // Semantic misuse of RNSIter; each is really pointing to the data for each RNS factor in sequence + ConstRNSIter encrypted2_0_iter(*encrypted2_iter[0], tile_size); + ConstRNSIter encrypted2_1_iter(*encrypted2_iter[1], tile_size); + RNSIter encrypted1_0_iter(*encrypted1_iter[0], tile_size); + RNSIter encrypted1_1_iter(*encrypted1_iter[1], tile_size); + RNSIter encrypted1_2_iter(*encrypted1_iter[2], tile_size); + + // Temporary buffer to store intermediate results + SEAL_ALLOCATE_GET_COEFF_ITER(temp, tile_size, pool); + + // Computes the output tile_size coefficients at a time + // Given input pairs of polynomials x = (x[0], x[1], x[2]), y = (y[0], y[1]), computes + // x = (x[0] * y[0], x[0] * y[1] + x[1] * y[0], x[1] * y[1]) + // with appropriate modular reduction + SEAL_ITERATE(coeff_modulus, coeff_modulus_size, [&](auto I) { + SEAL_ITERATE(iter(size_t(0)), num_tiles, [&](auto J) { + // Compute third output polynomial, overwriting input + // x[2] = x[1] * y[1] + dyadic_product_coeffmod( + encrypted1_1_iter[0], encrypted2_1_iter[0], tile_size, I, encrypted1_2_iter[0]); + + // Compute second output polynomial, overwriting input + // temp = x[1] * y[0] + dyadic_product_coeffmod(encrypted1_1_iter[0], encrypted2_0_iter[0], tile_size, I, temp); + // x[1] = x[0] * y[1] + dyadic_product_coeffmod( + encrypted1_0_iter[0], encrypted2_1_iter[0], tile_size, I, encrypted1_1_iter[0]); + // x[1] += temp + add_poly_coeffmod(encrypted1_1_iter[0], temp, tile_size, I, encrypted1_1_iter[0]); + + // Compute first output polynomial, overwriting input + // x[0] = x[0] * y[0] + dyadic_product_coeffmod( + encrypted1_0_iter[0], encrypted2_0_iter[0], tile_size, I, encrypted1_0_iter[0]); + + // Manually increment iterators + ++encrypted1_0_iter; + ++encrypted1_1_iter; + ++encrypted1_2_iter; + ++encrypted2_0_iter; + ++encrypted2_1_iter; + }); + }); + + encrypted1.scale() = new_scale; + return; + } + // Set up iterators for input ciphertexts auto encrypted1_iter = iter(encrypted1); auto encrypted2_iter = iter(encrypted2); @@ -921,7 +985,8 @@ namespace seal } } - void Evaluator::mod_switch_drop_to_next(const Ciphertext &encrypted, Ciphertext &destination, MemoryPoolHandle pool) const + void Evaluator::mod_switch_drop_to_next( + const Ciphertext &encrypted, Ciphertext &destination, MemoryPoolHandle pool) const { // Assuming at this point encrypted is already validated. auto context_data_ptr = context_.get_context_data(encrypted.parms_id()); @@ -1020,7 +1085,8 @@ namespace seal plain.parms_id() = next_context_data.parms_id(); } - void Evaluator::mod_switch_to_next(const Ciphertext &encrypted, Ciphertext &destination, MemoryPoolHandle pool) const + void Evaluator::mod_switch_to_next( + const Ciphertext &encrypted, Ciphertext &destination, MemoryPoolHandle pool) const { // Verify parameters. if (!is_metadata_valid_for(encrypted, context_) || !is_buffer_valid(encrypted)) @@ -1627,7 +1693,7 @@ namespace seal encrypted.scale() = new_scale; } - void Evaluator::multiply_plain_ntt(Ciphertext &encrypted_ntt, const Plaintext &plain_ntt) const + void Evaluator::multiply_plain_ntt(Ciphertext &encrypted_ntt, const Plaintext &plain_ntt) const { // Verify parameters. if (!plain_ntt.is_ntt_form()) @@ -1668,7 +1734,7 @@ namespace seal encrypted_ntt.scale() = new_scale; } - void Evaluator::transform_to_ntt_inplace(Plaintext &plain, parms_id_type parms_id, MemoryPoolHandle pool) const + void Evaluator::transform_to_ntt_inplace(Plaintext &plain, parms_id_type parms_id, MemoryPoolHandle pool) const { // Verify parameters. if (!is_valid_for(plain, context_)) @@ -1761,7 +1827,7 @@ namespace seal plain.parms_id() = parms_id; } - void Evaluator::transform_to_ntt_inplace(Ciphertext &encrypted) const + void Evaluator::transform_to_ntt_inplace(Ciphertext &encrypted) const { // Verify parameters. if (!is_metadata_valid_for(encrypted, context_) || !is_buffer_valid(encrypted)) @@ -1809,7 +1875,7 @@ namespace seal #endif } - void Evaluator::transform_from_ntt_inplace(Ciphertext &encrypted_ntt) const + void Evaluator::transform_from_ntt_inplace(Ciphertext &encrypted_ntt) const { // Verify parameters. if (!is_metadata_valid_for(encrypted_ntt, context_) || !is_buffer_valid(encrypted_ntt)) @@ -1857,7 +1923,7 @@ namespace seal } void Evaluator::apply_galois_inplace( - Ciphertext &encrypted, uint32_t galois_elt, const GaloisKeys &galois_keys, MemoryPoolHandle pool) const + Ciphertext &encrypted, uint32_t galois_elt, const GaloisKeys &galois_keys, MemoryPoolHandle pool) const { // Verify parameters. if (!is_metadata_valid_for(encrypted, context_) || !is_buffer_valid(encrypted)) @@ -1961,7 +2027,7 @@ namespace seal } void Evaluator::rotate_internal( - Ciphertext &encrypted, int steps, const GaloisKeys &galois_keys, MemoryPoolHandle pool) const + Ciphertext &encrypted, int steps, const GaloisKeys &galois_keys, MemoryPoolHandle pool) const { auto context_data_ptr = context_.get_context_data(encrypted.parms_id()); if (!context_data_ptr) @@ -2019,7 +2085,7 @@ namespace seal void Evaluator::switch_key_inplace( Ciphertext &encrypted, ConstRNSIter target_iter, const KSwitchKeys &kswitch_keys, size_t kswitch_keys_index, - MemoryPoolHandle pool) const + MemoryPoolHandle pool) const { auto parms_id = encrypted.parms_id(); auto &context_data = *context_.get_context_data(parms_id); From 97a568d38f140f00101ccfb8c9d4c00f27393b79 Mon Sep 17 00:00:00 2001 From: Fabian Boemer Date: Fri, 28 May 2021 07:05:14 -0700 Subject: [PATCH 2/4] Clarify comment --- native/src/seal/evaluator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native/src/seal/evaluator.cpp b/native/src/seal/evaluator.cpp index a9d89f0e5..46cb39174 100644 --- a/native/src/seal/evaluator.cpp +++ b/native/src/seal/evaluator.cpp @@ -533,7 +533,7 @@ namespace seal SEAL_ALLOCATE_GET_COEFF_ITER(temp, tile_size, pool); // Computes the output tile_size coefficients at a time - // Given input pairs of polynomials x = (x[0], x[1], x[2]), y = (y[0], y[1]), computes + // Given input tuples of polynomials x = (x[0], x[1], x[2]), y = (y[0], y[1]), computes // x = (x[0] * y[0], x[0] * y[1] + x[1] * y[0], x[1] * y[1]) // with appropriate modular reduction SEAL_ITERATE(coeff_modulus, coeff_modulus_size, [&](auto I) { From b0987a399c4d5e09c8d38761bc74c9ec5b405f4e Mon Sep 17 00:00:00 2001 From: Fabian Boemer Date: Fri, 28 May 2021 07:24:43 -0700 Subject: [PATCH 3/4] Add more clarifying comments --- native/src/seal/evaluator.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/native/src/seal/evaluator.cpp b/native/src/seal/evaluator.cpp index 46cb39174..2acf53d7b 100644 --- a/native/src/seal/evaluator.cpp +++ b/native/src/seal/evaluator.cpp @@ -508,7 +508,11 @@ namespace seal if (dest_size == 3) { - // Must divide coeff_count, i.e. be a power of two + // We want to keep six polynomials in the L1 cache: x[0], x[1], x[2], y[0], y[1], temp. + // For a 32KiB cache, which can store 32768 / 8 = 4096 coefficients, = 682.67 coefficients per polynomial, + // we should keep the tile size at 682 or below. The tile size must divide coeff_count, i.e. be a power of + // two. Some testing shows similar performance with tile size 256 and 512, and worse performance on smaller + // tiles. We pick the smaller of the two to prevent L1 cache misses on processors with < 32 KiB L1 cache. size_t tile_size = min(coeff_count, size_t(256)); size_t num_tiles = coeff_count / tile_size; #ifdef SEAL_DEBUG From 1ee4165f02e20ea8f070b2bf37d7afcc09fd6c7a Mon Sep 17 00:00:00 2001 From: Fabian Boemer Date: Fri, 28 May 2021 19:46:26 -0700 Subject: [PATCH 4/4] Fix typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 028782118..f855c0b2b 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ The optional dependencies and their tested versions (other versions may work as #### Intel HEXL -Intel HEXL is a library providing efficient implementations of cryptographic primitives common in homomorphic encryption. The acceleration is particularly evident on Intel processors with the Intel AVX512-IMA52 instruction set. +Intel HEXL is a library providing efficient implementations of cryptographic primitives common in homomorphic encryption. The acceleration is particularly evident on Intel processors with the Intel AVX512-IFMA52 instruction set. #### Microsoft GSL