diff --git a/aes/ctr.go b/aes/ctr.go new file mode 100644 index 0000000..89e2305 --- /dev/null +++ b/aes/ctr.go @@ -0,0 +1,148 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package aes + +import ( + //"crypto/internal/fips140" + "github.com/xtls/reality/alias" + "github.com/xtls/reality/subtle" + "github.com/xtls/reality/byteorder" + "math/bits" +) + +type CTR struct { + b Block + ivlo, ivhi uint64 // start counter as 64-bit limbs + offset uint64 // for XORKeyStream only +} + +func NewCTR(b *Block, iv []byte) *CTR { + // Allocate the CTR here, in an easily inlineable function, so + // the allocation can be done in the caller's stack frame + // instead of the heap. See issue 70499. + c := newCTR(b, iv) + return &c +} +func newCTR(b *Block, iv []byte) CTR { + if len(iv) != BlockSize { + panic("bad IV length") + } + + return CTR{ + b: *b, + ivlo: byteorder.BEUint64(iv[8:16]), + ivhi: byteorder.BEUint64(iv[0:8]), + offset: 0, + } +} + +func (c *CTR) XORKeyStream(dst, src []byte) { + c.XORKeyStreamAt(dst, src, c.offset) + + var carry uint64 + c.offset, carry = bits.Add64(c.offset, uint64(len(src)), 0) + if carry != 0 { + panic("crypto/aes: counter overflow") + } +} + +// RoundToBlock is used by CTR_DRBG, which discards the rightmost unused bits at +// each request. It rounds the offset up to the next block boundary. +func RoundToBlock(c *CTR) { + if remainder := c.offset % BlockSize; remainder != 0 { + var carry uint64 + c.offset, carry = bits.Add64(c.offset, BlockSize-remainder, 0) + if carry != 0 { + panic("crypto/aes: counter overflow") + } + } +} + +// XORKeyStreamAt behaves like XORKeyStream but keeps no state, and instead +// seeks into the keystream by the given bytes offset from the start (ignoring +// any XORKetStream calls). This allows for random access into the keystream, up +// to 16 EiB from the start. +func (c *CTR) XORKeyStreamAt(dst, src []byte, offset uint64) { + if len(dst) < len(src) { + panic("crypto/aes: len(dst) < len(src)") + } + dst = dst[:len(src)] + if alias.InexactOverlap(dst, src) { + panic("crypto/aes: invalid buffer overlap") + } + //fips140.RecordApproved() + + ivlo, ivhi := add128(c.ivlo, c.ivhi, offset/BlockSize) + + if blockOffset := offset % BlockSize; blockOffset != 0 { + // We have a partial block at the beginning. + var in, out [BlockSize]byte + copy(in[blockOffset:], src) + ctrBlocks1(&c.b, &out, &in, ivlo, ivhi) + n := copy(dst, out[blockOffset:]) + src = src[n:] + dst = dst[n:] + ivlo, ivhi = add128(ivlo, ivhi, 1) + } + + for len(src) >= 8*BlockSize { + ctrBlocks8(&c.b, (*[8 * BlockSize]byte)(dst), (*[8 * BlockSize]byte)(src), ivlo, ivhi) + src = src[8*BlockSize:] + dst = dst[8*BlockSize:] + ivlo, ivhi = add128(ivlo, ivhi, 8) + } + + // The tail can have at most 7 = 4 + 2 + 1 blocks. + if len(src) >= 4*BlockSize { + ctrBlocks4(&c.b, (*[4 * BlockSize]byte)(dst), (*[4 * BlockSize]byte)(src), ivlo, ivhi) + src = src[4*BlockSize:] + dst = dst[4*BlockSize:] + ivlo, ivhi = add128(ivlo, ivhi, 4) + } + if len(src) >= 2*BlockSize { + ctrBlocks2(&c.b, (*[2 * BlockSize]byte)(dst), (*[2 * BlockSize]byte)(src), ivlo, ivhi) + src = src[2*BlockSize:] + dst = dst[2*BlockSize:] + ivlo, ivhi = add128(ivlo, ivhi, 2) + } + if len(src) >= 1*BlockSize { + ctrBlocks1(&c.b, (*[1 * BlockSize]byte)(dst), (*[1 * BlockSize]byte)(src), ivlo, ivhi) + src = src[1*BlockSize:] + dst = dst[1*BlockSize:] + ivlo, ivhi = add128(ivlo, ivhi, 1) + } + + if len(src) != 0 { + // We have a partial block at the end. + var in, out [BlockSize]byte + copy(in[:], src) + ctrBlocks1(&c.b, &out, &in, ivlo, ivhi) + copy(dst, out[:]) + } +} + +// Each ctrBlocksN function XORs src with N blocks of counter keystream, and +// stores it in dst. src is loaded in full before storing dst, so they can +// overlap even inexactly. The starting counter value is passed in as a pair of +// little-endian 64-bit integers. + +func ctrBlocks(b *Block, dst, src []byte, ivlo, ivhi uint64) { + buf := make([]byte, len(src), 8*BlockSize) + for i := 0; i < len(buf); i += BlockSize { + byteorder.BEPutUint64(buf[i:], ivhi) + byteorder.BEPutUint64(buf[i+8:], ivlo) + ivlo, ivhi = add128(ivlo, ivhi, 1) + encryptBlock(b, buf[i:], buf[i:]) + } + // XOR into buf first, in case src and dst overlap (see above). + subtle.XORBytes(buf, src, buf) + copy(dst, buf) +} + +func add128(lo, hi uint64, x uint64) (uint64, uint64) { + lo, c := bits.Add64(lo, x, 0) + hi, _ = bits.Add64(hi, 0, c) + return lo, hi +} \ No newline at end of file diff --git a/aes/ctr_noasm.go b/aes/ctr_noasm.go new file mode 100644 index 0000000..e93fc80 --- /dev/null +++ b/aes/ctr_noasm.go @@ -0,0 +1,21 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package aes + +func ctrBlocks1(b *Block, dst, src *[BlockSize]byte, ivlo, ivhi uint64) { + ctrBlocks(b, dst[:], src[:], ivlo, ivhi) +} + +func ctrBlocks2(b *Block, dst, src *[2 * BlockSize]byte, ivlo, ivhi uint64) { + ctrBlocks(b, dst[:], src[:], ivlo, ivhi) +} + +func ctrBlocks4(b *Block, dst, src *[4 * BlockSize]byte, ivlo, ivhi uint64) { + ctrBlocks(b, dst[:], src[:], ivlo, ivhi) +} + +func ctrBlocks8(b *Block, dst, src *[8 * BlockSize]byte, ivlo, ivhi uint64) { + ctrBlocks(b, dst[:], src[:], ivlo, ivhi) +} \ No newline at end of file diff --git a/drbg/ctrdrbg.go b/drbg/ctrdrbg.go new file mode 100644 index 0000000..9d4257e --- /dev/null +++ b/drbg/ctrdrbg.go @@ -0,0 +1,143 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package drbg + +import ( + //"crypto/internal/fips140" + "github.com/xtls/reality/aes" + "github.com/xtls/reality/subtle" + "github.com/xtls/reality/byteorder" + "math/bits" +) + +// Counter is an SP 800-90A Rev. 1 CTR_DRBG instantiated with AES-256. +// +// Per Table 3, it has a security strength of 256 bits, a seed size of 384 bits, +// a counter length of 128 bits, a reseed interval of 2^48 requests, and a +// maximum request size of 2^19 bits (2^16 bytes, 64 KiB). +// +// We support a narrow range of parameters that fit the needs of our RNG: +// AES-256, no derivation function, no personalization string, no prediction +// resistance, and 384-bit additional input. +// +// WARNING: this type provides tightly scoped support for the DRBG +// functionality we need for FIPS 140-3 _only_. This type _should not_ be used +// outside of the FIPS 140-3 module for any other use. +// +// In particular, as documented, Counter does not support the derivation +// function, or personalization strings which are necessary for safely using +// this DRBG for generic purposes without leaking sensitive values. +type Counter struct { + // c is instantiated with K as the key and V as the counter. + c aes.CTR + + reseedCounter uint64 +} + +const ( + keySize = 256 / 8 + SeedSize = keySize + aes.BlockSize + reseedInterval = 1 << 48 + maxRequestSize = (1 << 19) / 8 +) + +func NewCounter(entropy *[SeedSize]byte) *Counter { + // CTR_DRBG_Instantiate_algorithm, per Section 10.2.1.3.1. + //fips140.RecordApproved() + + K := make([]byte, keySize) + V := make([]byte, aes.BlockSize) + + // V starts at 0, but is incremented in CTR_DRBG_Update before each use, + // unlike AES-CTR where it is incremented after each use. + V[len(V)-1] = 1 + + cipher, err := aes.New(K) + if err != nil { + panic(err) + } + + c := &Counter{} + c.c = *aes.NewCTR(cipher, V) + c.update(entropy) + c.reseedCounter = 1 + return c +} + +func (c *Counter) update(seed *[SeedSize]byte) { + // CTR_DRBG_Update, per Section 10.2.1.2. + + temp := make([]byte, SeedSize) + c.c.XORKeyStream(temp, seed[:]) + K := temp[:keySize] + V := temp[keySize:] + + // Again, we pre-increment V, like in NewCounter. + increment((*[aes.BlockSize]byte)(V)) + + cipher, err := aes.New(K) + if err != nil { + panic(err) + } + c.c = *aes.NewCTR(cipher, V) +} + +func increment(v *[aes.BlockSize]byte) { + hi := byteorder.BEUint64(v[:8]) + lo := byteorder.BEUint64(v[8:]) + lo, c := bits.Add64(lo, 1, 0) + hi, _ = bits.Add64(hi, 0, c) + byteorder.BEPutUint64(v[:8], hi) + byteorder.BEPutUint64(v[8:], lo) +} + +func (c *Counter) Reseed(entropy, additionalInput *[SeedSize]byte) { + // CTR_DRBG_Reseed_algorithm, per Section 10.2.1.4.1. + //fips140.RecordApproved() + + var seed [SeedSize]byte + subtle.XORBytes(seed[:], entropy[:], additionalInput[:]) + c.update(&seed) + c.reseedCounter = 1 +} + +// Generate produces at most maxRequestSize bytes of random data in out. +func (c *Counter) Generate(out []byte, additionalInput *[SeedSize]byte) (reseedRequired bool) { + // CTR_DRBG_Generate_algorithm, per Section 10.2.1.5.1. + //fips140.RecordApproved() + + if len(out) > maxRequestSize { + panic("crypto/drbg: internal error: request size exceeds maximum") + } + + // Step 1. + if c.reseedCounter > reseedInterval { + return true + } + + // Step 2. + if additionalInput != nil { + c.update(additionalInput) + } else { + // If the additional input is null, the first CTR_DRBG_Update is + // skipped, but the additional input is replaced with an all-zero string + // for the second CTR_DRBG_Update. + additionalInput = new([SeedSize]byte) + } + + // Steps 3-5. + clear(out) + c.c.XORKeyStream(out, out) + aes.RoundToBlock(&c.c) + + // Step 6. + c.update(additionalInput) + + // Step 7. + c.reseedCounter++ + + // Step 8. + return false +} \ No newline at end of file diff --git a/drbg/rand.go b/drbg/rand.go new file mode 100644 index 0000000..7fe0b57 --- /dev/null +++ b/drbg/rand.go @@ -0,0 +1,102 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package drbg provides cryptographically secure random bytes +// usable by FIPS code. In FIPS mode it uses an SP 800-90A Rev. 1 +// Deterministic Random Bit Generator (DRBG). Otherwise, +// it uses the operating system's random number generator. +package drbg + +import ( + "github.com/xtls/reality/entropy" + // "crypto/internal/fips140" + "github.com/xtls/reality/randutil" + // "github.com/xtls/reality/sysrand" + + "crypto/rand" + "io" + "sync" +) + +var drbgs = sync.Pool{ + New: func() any { + var c *Counter + entropy.Depleted(func(seed *[48]byte) { + c = NewCounter(seed) + }) + return c + }, +} + +// Read fills b with cryptographically secure random bytes. In FIPS mode, it +// uses an SP 800-90A Rev. 1 Deterministic Random Bit Generator (DRBG). +// Otherwise, it uses the operating system's random number generator. +func Read(b []byte) { + // if !fips140.Enabled { + // rand.Read(b) + // return + // } + + // At every read, 128 random bits from the operating system are mixed as + // additional input, to make the output as strong as non-FIPS randomness. + // This is not credited as entropy for FIPS purposes, as allowed by Section + // 8.7.2: "Note that a DRBG does not rely on additional input to provide + // entropy, even though entropy could be provided in the additional input". + additionalInput := new([SeedSize]byte) + rand.Read(additionalInput[:16]) + + drbg := drbgs.Get().(*Counter) + defer drbgs.Put(drbg) + + for len(b) > 0 { + size := min(len(b), maxRequestSize) + if reseedRequired := drbg.Generate(b[:size], additionalInput); reseedRequired { + // See SP 800-90A Rev. 1, Section 9.3.1, Steps 6-8, as explained in + // Section 9.3.2: if Generate reports a reseed is required, the + // additional input is passed to Reseed along with the entropy and + // then nulled before the next Generate call. + entropy.Depleted(func(seed *[48]byte) { + drbg.Reseed(seed, additionalInput) + }) + additionalInput = nil + continue + } + b = b[size:] + } +} + +// DefaultReader is a sentinel type, embedded in the default +// [crypto/rand.Reader], used to recognize it when passed to +// APIs that accept a rand io.Reader. +type DefaultReader interface{ defaultReader() } + +// ReadWithReader uses Reader to fill b with cryptographically secure random +// bytes. It is intended for use in APIs that expose a rand io.Reader. +// +// If Reader is not the default Reader from crypto/rand, +// [randutil.MaybeReadByte] and [fips140.RecordNonApproved] are called. +func ReadWithReader(r io.Reader, b []byte) error { + if _, ok := r.(DefaultReader); ok { + Read(b) + return nil + } + + //fips140.RecordNonApproved() + randutil.MaybeReadByte(r) + _, err := io.ReadFull(r, b) + return err +} + +// ReadWithReaderDeterministic is like ReadWithReader, but it doesn't call +// [randutil.MaybeReadByte] on non-default Readers. +func ReadWithReaderDeterministic(r io.Reader, b []byte) error { + if _, ok := r.(DefaultReader); ok { + Read(b) + return nil + } + + //fips140.RecordNonApproved() + _, err := io.ReadFull(r, b) + return err +} \ No newline at end of file diff --git a/entropy/entropy.go b/entropy/entropy.go new file mode 100644 index 0000000..0bea141 --- /dev/null +++ b/entropy/entropy.go @@ -0,0 +1,29 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package entropy provides the passive entropy source for the FIPS 140-3 +// module. It is only used in FIPS mode by [crypto/internal/fips140/drbg.Read]. +// +// This complies with IG 9.3.A, Additional Comment 12, which until January 1, +// 2026 allows new modules to meet an [earlier version] of Resolution 2(b): +// "A software module that contains an approved DRBG that receives a LOAD +// command (or its logical equivalent) with entropy obtained from [...] inside +// the physical perimeter of the operational environment of the module [...]." +// +// Distributions that have their own SP 800-90B entropy source should replace +// this package with their own implementation. +// +// [earlier version]: https://csrc.nist.gov/CSRC/media/Projects/cryptographic-module-validation-program/documents/IG%209.3.A%20Resolution%202b%5BMarch%2026%202024%5D.pdf +package entropy + +// "github.com/xtls/reality/sysrand" +import "crypto/rand" + +// Depleted notifies the entropy source that the entropy in the module is +// "depleted" and provides the callback for the LOAD command. +func Depleted(LOAD func(*[48]byte)) { + var entropy [48]byte + rand.Read(entropy[:]) + LOAD(&entropy) +} \ No newline at end of file diff --git a/handshake_client.go b/handshake_client.go index f3a0738..9cd987a 100644 --- a/handshake_client.go +++ b/handshake_client.go @@ -23,7 +23,7 @@ import ( "time" "github.com/xtls/reality/hpke" - "github.com/xtls/reality/mlkem768" + "github.com/xtls/reality/mlkem" "github.com/xtls/reality/tls13" ) @@ -160,11 +160,11 @@ func (c *Conn) makeClientHello() (*clientHelloMsg, *keySharePrivateKeys, *echCon if err != nil { return nil, nil, nil, err } - seed := make([]byte, mlkem768.SeedSize) + seed := make([]byte, mlkem.SeedSize) if _, err := io.ReadFull(config.rand(), seed); err != nil { return nil, nil, nil, err } - keyShareKeys.kyber, err = mlkem768.NewDecapsulationKey768(seed) + keyShareKeys.kyber, err = mlkem.NewDecapsulationKey768(seed) if err != nil { return nil, nil, nil, err } diff --git a/handshake_client_tls13.go b/handshake_client_tls13.go index 62aee55..f23bc8f 100644 --- a/handshake_client_tls13.go +++ b/handshake_client_tls13.go @@ -16,7 +16,7 @@ import ( "slices" "time" - "github.com/xtls/reality/mlkem768" + "github.com/xtls/reality/mlkem" "github.com/xtls/reality/tls13" "golang.org/x/crypto/hkdf" ) @@ -481,7 +481,7 @@ func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error { ecdhePeerData := hs.serverHello.serverShare.data if hs.serverHello.serverShare.group == x25519Kyber768Draft00 { - if len(ecdhePeerData) != x25519PublicKeySize+mlkem768.CiphertextSize768 { + if len(ecdhePeerData) != x25519PublicKeySize+mlkem.CiphertextSize768 { c.sendAlert(alertIllegalParameter) return errors.New("tls: invalid server key share") } diff --git a/handshake_server_tls13.go b/handshake_server_tls13.go index abdb8ae..1dce1c7 100644 --- a/handshake_server_tls13.go +++ b/handshake_server_tls13.go @@ -22,7 +22,7 @@ import ( "slices" "time" - "github.com/xtls/reality/mlkem768" + "github.com/xtls/reality/mlkem" "github.com/xtls/reality/tls13" ) @@ -276,7 +276,7 @@ func (hs *serverHandshakeStateTLS13) processClientHello() error { ecdhData := clientKeyShare.data if selectedGroup == x25519Kyber768Draft00 { ecdhGroup = X25519 - if len(ecdhData) != x25519PublicKeySize+mlkem768.EncapsulationKeySize768 { + if len(ecdhData) != x25519PublicKeySize+mlkem.EncapsulationKeySize768 { c.sendAlert(alertIllegalParameter) return errors.New("tls: invalid Kyber client key share") } diff --git a/key_schedule.go b/key_schedule.go index 8d96223..7113751 100644 --- a/key_schedule.go +++ b/key_schedule.go @@ -11,9 +11,8 @@ import ( "hash" "io" - "golang.org/x/crypto/sha3" - - "github.com/xtls/reality/mlkem768" + "github.com/xtls/reality/mlkem" + "github.com/xtls/reality/sha3" "github.com/xtls/reality/tls13" ) @@ -55,11 +54,11 @@ func (c *cipherSuiteTLS13) exportKeyingMaterial(s *tls13.MasterSecret, transcrip type keySharePrivateKeys struct { curveID CurveID ecdhe *ecdh.PrivateKey - kyber *mlkem768.DecapsulationKey768 + kyber *mlkem.DecapsulationKey768 } // kyberDecapsulate implements decapsulation according to Kyber Round 3. -func kyberDecapsulate(dk *mlkem768.DecapsulationKey768, c []byte) ([]byte, error) { +func kyberDecapsulate(dk *mlkem.DecapsulationKey768, c []byte) ([]byte, error) { K, err := dk.Decapsulate(c) if err != nil { return nil, err @@ -69,7 +68,7 @@ func kyberDecapsulate(dk *mlkem768.DecapsulationKey768, c []byte) ([]byte, error // kyberEncapsulate implements encapsulation according to Kyber Round 3. func kyberEncapsulate(ek []byte) (c, ss []byte, err error) { - k, err := mlkem768.NewEncapsulationKey768(ek) + k, err := mlkem.NewEncapsulationKey768(ek) if err != nil { return nil, nil, err } @@ -78,13 +77,14 @@ func kyberEncapsulate(ek []byte) (c, ss []byte, err error) { } func kyberSharedSecret(c, K []byte) []byte { - // Package mlkem768 implements ML-KEM, which compared to Kyber removed a + // Package mlkem implements ML-KEM, which compared to Kyber removed a // final hashing step. Compute SHAKE-256(K || SHA3-256(c), 32) to match Kyber. // See https://words.filippo.io/mlkem768/#bonus-track-using-a-ml-kem-implementation-as-kyber-v3. h := sha3.NewShake256() h.Write(K) - ch := sha3.Sum256(c) - h.Write(ch[:]) + ch := sha3.New256() + ch.Write(c) + h.Write(ch.Sum(nil)) out := make([]byte, 32) h.Read(out) return out diff --git a/mlkem/field.go b/mlkem/field.go new file mode 100644 index 0000000..4f94ea9 --- /dev/null +++ b/mlkem/field.go @@ -0,0 +1,550 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mlkem + +import ( + "github.com/xtls/reality/sha3" + "github.com/xtls/reality/byteorder" + "errors" +) + +// fieldElement is an integer modulo q, an element of ℤ_q. It is always reduced. +type fieldElement uint16 + +// fieldCheckReduced checks that a value a is < q. +func fieldCheckReduced(a uint16) (fieldElement, error) { + if a >= q { + return 0, errors.New("unreduced field element") + } + return fieldElement(a), nil +} + +// fieldReduceOnce reduces a value a < 2q. +func fieldReduceOnce(a uint16) fieldElement { + x := a - q + // If x underflowed, then x >= 2¹⁶ - q > 2¹⁵, so the top bit is set. + x += (x >> 15) * q + return fieldElement(x) +} + +func fieldAdd(a, b fieldElement) fieldElement { + x := uint16(a + b) + return fieldReduceOnce(x) +} + +func fieldSub(a, b fieldElement) fieldElement { + x := uint16(a - b + q) + return fieldReduceOnce(x) +} + +const ( + barrettMultiplier = 5039 // 2¹² * 2¹² / q + barrettShift = 24 // log₂(2¹² * 2¹²) +) + +// fieldReduce reduces a value a < 2q² using Barrett reduction, to avoid +// potentially variable-time division. +func fieldReduce(a uint32) fieldElement { + quotient := uint32((uint64(a) * barrettMultiplier) >> barrettShift) + return fieldReduceOnce(uint16(a - quotient*q)) +} + +func fieldMul(a, b fieldElement) fieldElement { + x := uint32(a) * uint32(b) + return fieldReduce(x) +} + +// fieldMulSub returns a * (b - c). This operation is fused to save a +// fieldReduceOnce after the subtraction. +func fieldMulSub(a, b, c fieldElement) fieldElement { + x := uint32(a) * uint32(b-c+q) + return fieldReduce(x) +} + +// fieldAddMul returns a * b + c * d. This operation is fused to save a +// fieldReduceOnce and a fieldReduce. +func fieldAddMul(a, b, c, d fieldElement) fieldElement { + x := uint32(a) * uint32(b) + x += uint32(c) * uint32(d) + return fieldReduce(x) +} + +// compress maps a field element uniformly to the range 0 to 2ᵈ-1, according to +// FIPS 203, Definition 4.7. +func compress(x fieldElement, d uint8) uint16 { + // We want to compute (x * 2ᵈ) / q, rounded to nearest integer, with 1/2 + // rounding up (see FIPS 203, Section 2.3). + + // Barrett reduction produces a quotient and a remainder in the range [0, 2q), + // such that dividend = quotient * q + remainder. + dividend := uint32(x) << d // x * 2ᵈ + quotient := uint32(uint64(dividend) * barrettMultiplier >> barrettShift) + remainder := dividend - quotient*q + + // Since the remainder is in the range [0, 2q), not [0, q), we need to + // portion it into three spans for rounding. + // + // [ 0, q/2 ) -> round to 0 + // [ q/2, q + q/2 ) -> round to 1 + // [ q + q/2, 2q ) -> round to 2 + // + // We can convert that to the following logic: add 1 if remainder > q/2, + // then add 1 again if remainder > q + q/2. + // + // Note that if remainder > x, then ⌊x⌋ - remainder underflows, and the top + // bit of the difference will be set. + quotient += (q/2 - remainder) >> 31 & 1 + quotient += (q + q/2 - remainder) >> 31 & 1 + + // quotient might have overflowed at this point, so reduce it by masking. + var mask uint32 = (1 << d) - 1 + return uint16(quotient & mask) +} + +// decompress maps a number x between 0 and 2ᵈ-1 uniformly to the full range of +// field elements, according to FIPS 203, Definition 4.8. +func decompress(y uint16, d uint8) fieldElement { + // We want to compute (y * q) / 2ᵈ, rounded to nearest integer, with 1/2 + // rounding up (see FIPS 203, Section 2.3). + + dividend := uint32(y) * q + quotient := dividend >> d // (y * q) / 2ᵈ + + // The d'th least-significant bit of the dividend (the most significant bit + // of the remainder) is 1 for the top half of the values that divide to the + // same quotient, which are the ones that round up. + quotient += dividend >> (d - 1) & 1 + + // quotient is at most (2¹¹-1) * q / 2¹¹ + 1 = 3328, so it didn't overflow. + return fieldElement(quotient) +} + +// ringElement is a polynomial, an element of R_q, represented as an array +// according to FIPS 203, Section 2.4.4. +type ringElement [n]fieldElement + +// polyAdd adds two ringElements or nttElements. +func polyAdd[T ~[n]fieldElement](a, b T) (s T) { + for i := range s { + s[i] = fieldAdd(a[i], b[i]) + } + return s +} + +// polySub subtracts two ringElements or nttElements. +func polySub[T ~[n]fieldElement](a, b T) (s T) { + for i := range s { + s[i] = fieldSub(a[i], b[i]) + } + return s +} + +// polyByteEncode appends the 384-byte encoding of f to b. +// +// It implements ByteEncode₁₂, according to FIPS 203, Algorithm 5. +func polyByteEncode[T ~[n]fieldElement](b []byte, f T) []byte { + out, B := sliceForAppend(b, encodingSize12) + for i := 0; i < n; i += 2 { + x := uint32(f[i]) | uint32(f[i+1])<<12 + B[0] = uint8(x) + B[1] = uint8(x >> 8) + B[2] = uint8(x >> 16) + B = B[3:] + } + return out +} + +// polyByteDecode decodes the 384-byte encoding of a polynomial, checking that +// all the coefficients are properly reduced. This fulfills the "Modulus check" +// step of ML-KEM Encapsulation. +// +// It implements ByteDecode₁₂, according to FIPS 203, Algorithm 6. +func polyByteDecode[T ~[n]fieldElement](b []byte) (T, error) { + if len(b) != encodingSize12 { + return T{}, errors.New("mlkem: invalid encoding length") + } + var f T + for i := 0; i < n; i += 2 { + d := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 + const mask12 = 0b1111_1111_1111 + var err error + if f[i], err = fieldCheckReduced(uint16(d & mask12)); err != nil { + return T{}, errors.New("mlkem: invalid polynomial encoding") + } + if f[i+1], err = fieldCheckReduced(uint16(d >> 12)); err != nil { + return T{}, errors.New("mlkem: invalid polynomial encoding") + } + b = b[3:] + } + return f, nil +} + +// sliceForAppend takes a slice and a requested number of bytes. It returns a +// slice with the contents of the given slice followed by that many bytes and a +// second slice that aliases into it and contains only the extra bytes. If the +// original slice has sufficient capacity then no allocation is performed. +func sliceForAppend(in []byte, n int) (head, tail []byte) { + if total := len(in) + n; cap(in) >= total { + head = in[:total] + } else { + head = make([]byte, total) + copy(head, in) + } + tail = head[len(in):] + return +} + +// ringCompressAndEncode1 appends a 32-byte encoding of a ring element to s, +// compressing one coefficients per bit. +// +// It implements Compress₁, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₁, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode1(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize1) + for i := range b { + b[i] = 0 + } + for i := range f { + b[i/8] |= uint8(compress(f[i], 1) << (i % 8)) + } + return s +} + +// ringDecodeAndDecompress1 decodes a 32-byte slice to a ring element where each +// bit is mapped to 0 or ⌈q/2⌋. +// +// It implements ByteDecode₁, according to FIPS 203, Algorithm 6, +// followed by Decompress₁, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress1(b *[encodingSize1]byte) ringElement { + var f ringElement + for i := range f { + b_i := b[i/8] >> (i % 8) & 1 + const halfQ = (q + 1) / 2 // ⌈q/2⌋, rounded up per FIPS 203, Section 2.3 + f[i] = fieldElement(b_i) * halfQ // 0 decompresses to 0, and 1 to ⌈q/2⌋ + } + return f +} + +// ringCompressAndEncode4 appends a 128-byte encoding of a ring element to s, +// compressing two coefficients per byte. +// +// It implements Compress₄, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₄, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode4(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize4) + for i := 0; i < n; i += 2 { + b[i/2] = uint8(compress(f[i], 4) | compress(f[i+1], 4)<<4) + } + return s +} + +// ringDecodeAndDecompress4 decodes a 128-byte encoding of a ring element where +// each four bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₄, according to FIPS 203, Algorithm 6, +// followed by Decompress₄, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress4(b *[encodingSize4]byte) ringElement { + var f ringElement + for i := 0; i < n; i += 2 { + f[i] = fieldElement(decompress(uint16(b[i/2]&0b1111), 4)) + f[i+1] = fieldElement(decompress(uint16(b[i/2]>>4), 4)) + } + return f +} + +// ringCompressAndEncode10 appends a 320-byte encoding of a ring element to s, +// compressing four coefficients per five bytes. +// +// It implements Compress₁₀, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₁₀, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode10(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize10) + for i := 0; i < n; i += 4 { + var x uint64 + x |= uint64(compress(f[i], 10)) + x |= uint64(compress(f[i+1], 10)) << 10 + x |= uint64(compress(f[i+2], 10)) << 20 + x |= uint64(compress(f[i+3], 10)) << 30 + b[0] = uint8(x) + b[1] = uint8(x >> 8) + b[2] = uint8(x >> 16) + b[3] = uint8(x >> 24) + b[4] = uint8(x >> 32) + b = b[5:] + } + return s +} + +// ringDecodeAndDecompress10 decodes a 320-byte encoding of a ring element where +// each ten bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₁₀, according to FIPS 203, Algorithm 6, +// followed by Decompress₁₀, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress10(bb *[encodingSize10]byte) ringElement { + b := bb[:] + var f ringElement + for i := 0; i < n; i += 4 { + x := uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 + b = b[5:] + f[i] = fieldElement(decompress(uint16(x>>0&0b11_1111_1111), 10)) + f[i+1] = fieldElement(decompress(uint16(x>>10&0b11_1111_1111), 10)) + f[i+2] = fieldElement(decompress(uint16(x>>20&0b11_1111_1111), 10)) + f[i+3] = fieldElement(decompress(uint16(x>>30&0b11_1111_1111), 10)) + } + return f +} + +// ringCompressAndEncode appends an encoding of a ring element to s, +// compressing each coefficient to d bits. +// +// It implements Compress, according to FIPS 203, Definition 4.7, +// followed by ByteEncode, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode(s []byte, f ringElement, d uint8) []byte { + var b byte + var bIdx uint8 + for i := 0; i < n; i++ { + c := compress(f[i], d) + var cIdx uint8 + for cIdx < d { + b |= byte(c>>cIdx) << bIdx + bits := min(8-bIdx, d-cIdx) + bIdx += bits + cIdx += bits + if bIdx == 8 { + s = append(s, b) + b = 0 + bIdx = 0 + } + } + } + if bIdx != 0 { + panic("mlkem: internal error: bitsFilled != 0") + } + return s +} + +// ringDecodeAndDecompress decodes an encoding of a ring element where +// each d bits are mapped to an equidistant distribution. +// +// It implements ByteDecode, according to FIPS 203, Algorithm 6, +// followed by Decompress, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress(b []byte, d uint8) ringElement { + var f ringElement + var bIdx uint8 + for i := 0; i < n; i++ { + var c uint16 + var cIdx uint8 + for cIdx < d { + c |= uint16(b[0]>>bIdx) << cIdx + c &= (1 << d) - 1 + bits := min(8-bIdx, d-cIdx) + bIdx += bits + cIdx += bits + if bIdx == 8 { + b = b[1:] + bIdx = 0 + } + } + f[i] = fieldElement(decompress(c, d)) + } + if len(b) != 0 { + panic("mlkem: internal error: leftover bytes") + } + return f +} + +// ringCompressAndEncode5 appends a 160-byte encoding of a ring element to s, +// compressing eight coefficients per five bytes. +// +// It implements Compress₅, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₅, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode5(s []byte, f ringElement) []byte { + return ringCompressAndEncode(s, f, 5) +} + +// ringDecodeAndDecompress5 decodes a 160-byte encoding of a ring element where +// each five bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₅, according to FIPS 203, Algorithm 6, +// followed by Decompress₅, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress5(bb *[encodingSize5]byte) ringElement { + return ringDecodeAndDecompress(bb[:], 5) +} + +// ringCompressAndEncode11 appends a 352-byte encoding of a ring element to s, +// compressing eight coefficients per eleven bytes. +// +// It implements Compress₁₁, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₁₁, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode11(s []byte, f ringElement) []byte { + return ringCompressAndEncode(s, f, 11) +} + +// ringDecodeAndDecompress11 decodes a 352-byte encoding of a ring element where +// each eleven bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₁₁, according to FIPS 203, Algorithm 6, +// followed by Decompress₁₁, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress11(bb *[encodingSize11]byte) ringElement { + return ringDecodeAndDecompress(bb[:], 11) +} + +// samplePolyCBD draws a ringElement from the special Dη distribution given a +// stream of random bytes generated by the PRF function, according to FIPS 203, +// Algorithm 8 and Definition 4.3. +func samplePolyCBD(s []byte, b byte) ringElement { + prf := sha3.NewShake256() + prf.Write(s) + prf.Write([]byte{b}) + B := make([]byte, 64*2) // η = 2 + prf.Read(B) + + // SamplePolyCBD simply draws four (2η) bits for each coefficient, and adds + // the first two and subtracts the last two. + + var f ringElement + for i := 0; i < n; i += 2 { + b := B[i/2] + b_7, b_6, b_5, b_4 := b>>7, b>>6&1, b>>5&1, b>>4&1 + b_3, b_2, b_1, b_0 := b>>3&1, b>>2&1, b>>1&1, b&1 + f[i] = fieldSub(fieldElement(b_0+b_1), fieldElement(b_2+b_3)) + f[i+1] = fieldSub(fieldElement(b_4+b_5), fieldElement(b_6+b_7)) + } + return f +} + +// nttElement is an NTT representation, an element of T_q, represented as an +// array according to FIPS 203, Section 2.4.4. +type nttElement [n]fieldElement + +// gammas are the values ζ^2BitRev7(i)+1 mod q for each index i, according to +// FIPS 203, Appendix A (with negative values reduced to positive). +var gammas = [128]fieldElement{17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, 2110, 1219, 2935, 394, 885, 2444, 2154, 1175} + +// nttMul multiplies two nttElements. +// +// It implements MultiplyNTTs, according to FIPS 203, Algorithm 11. +func nttMul(f, g nttElement) nttElement { + var h nttElement + // We use i += 2 for bounds check elimination. See https://go.dev/issue/66826. + for i := 0; i < 256; i += 2 { + a0, a1 := f[i], f[i+1] + b0, b1 := g[i], g[i+1] + h[i] = fieldAddMul(a0, b0, fieldMul(a1, b1), gammas[i/2]) + h[i+1] = fieldAddMul(a0, b1, a1, b0) + } + return h +} + +// zetas are the values ζ^BitRev7(k) mod q for each index k, according to FIPS +// 203, Appendix A. +var zetas = [128]fieldElement{1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154} + +// ntt maps a ringElement to its nttElement representation. +// +// It implements NTT, according to FIPS 203, Algorithm 9. +func ntt(f ringElement) nttElement { + k := 1 + for len := 128; len >= 2; len /= 2 { + for start := 0; start < 256; start += 2 * len { + zeta := zetas[k] + k++ + // Bounds check elimination hint. + f, flen := f[start:start+len], f[start+len:start+len+len] + for j := 0; j < len; j++ { + t := fieldMul(zeta, flen[j]) + flen[j] = fieldSub(f[j], t) + f[j] = fieldAdd(f[j], t) + } + } + } + return nttElement(f) +} + +// inverseNTT maps a nttElement back to the ringElement it represents. +// +// It implements NTT⁻¹, according to FIPS 203, Algorithm 10. +func inverseNTT(f nttElement) ringElement { + k := 127 + for len := 2; len <= 128; len *= 2 { + for start := 0; start < 256; start += 2 * len { + zeta := zetas[k] + k-- + // Bounds check elimination hint. + f, flen := f[start:start+len], f[start+len:start+len+len] + for j := 0; j < len; j++ { + t := f[j] + f[j] = fieldAdd(t, flen[j]) + flen[j] = fieldMulSub(zeta, flen[j], t) + } + } + } + for i := range f { + f[i] = fieldMul(f[i], 3303) // 3303 = 128⁻¹ mod q + } + return ringElement(f) +} + +// sampleNTT draws a uniformly random nttElement from a stream of uniformly +// random bytes generated by the XOF function, according to FIPS 203, +// Algorithm 7. +func sampleNTT(rho []byte, ii, jj byte) nttElement { + B := sha3.NewShake128() + B.Write(rho) + B.Write([]byte{ii, jj}) + + // SampleNTT essentially draws 12 bits at a time from r, interprets them in + // little-endian, and rejects values higher than q, until it drew 256 + // values. (The rejection rate is approximately 19%.) + // + // To do this from a bytes stream, it draws three bytes at a time, and + // splits them into two uint16 appropriately masked. + // + // r₀ r₁ r₂ + // |- - - - - - - -|- - - - - - - -|- - - - - - - -| + // + // Uint16(r₀ || r₁) + // |- - - - - - - - - - - - - - - -| + // |- - - - - - - - - - - -| + // d₁ + // + // Uint16(r₁ || r₂) + // |- - - - - - - - - - - - - - - -| + // |- - - - - - - - - - - -| + // d₂ + // + // Note that in little-endian, the rightmost bits are the most significant + // bits (dropped with a mask) and the leftmost bits are the least + // significant bits (dropped with a right shift). + + var a nttElement + var j int // index into a + var buf [24]byte // buffered reads from B + off := len(buf) // index into buf, starts in a "buffer fully consumed" state + for { + if off >= len(buf) { + B.Read(buf[:]) + off = 0 + } + d1 := byteorder.LEUint16(buf[off:]) & 0b1111_1111_1111 + d2 := byteorder.LEUint16(buf[off+1:]) >> 4 + off += 3 + if d1 < q { + a[j] = fieldElement(d1) + j++ + } + if j >= len(a) { + break + } + if d2 < q { + a[j] = fieldElement(d2) + j++ + } + if j >= len(a) { + break + } + } + return a +} \ No newline at end of file diff --git a/mlkem/mlkem768.go b/mlkem/mlkem768.go new file mode 100644 index 0000000..2b4fc12 --- /dev/null +++ b/mlkem/mlkem768.go @@ -0,0 +1,517 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mlkem implements the quantum-resistant key encapsulation method +// ML-KEM (formerly known as Kyber), as specified in [NIST FIPS 203]. +// +// [NIST FIPS 203]: https://doi.org/10.6028/NIST.FIPS.203 +package mlkem + +// This package targets security, correctness, simplicity, readability, and +// reviewability as its primary goals. All critical operations are performed in +// constant time. +// +// Variable and function names, as well as code layout, are selected to +// facilitate reviewing the implementation against the NIST FIPS 203 document. +// +// Reviewers unfamiliar with polynomials or linear algebra might find the +// background at https://words.filippo.io/kyber-math/ useful. +// +// This file implements the recommended parameter set ML-KEM-768. The ML-KEM-1024 +// parameter set implementation is auto-generated from this file. +// +//go:generate go run generate1024.go -input mlkem768.go -output mlkem1024.go + +import ( + "bytes" + //"github.com/xtls/reality/fips140" + "github.com/xtls/reality/drbg" + "github.com/xtls/reality/sha3" + "github.com/xtls/reality/subtle" + "errors" +) + +const ( + // ML-KEM global constants. + n = 256 + q = 3329 + + // encodingSizeX is the byte size of a ringElement or nttElement encoded + // by ByteEncode_X (FIPS 203, Algorithm 5). + encodingSize12 = n * 12 / 8 + encodingSize11 = n * 11 / 8 + encodingSize10 = n * 10 / 8 + encodingSize5 = n * 5 / 8 + encodingSize4 = n * 4 / 8 + encodingSize1 = n * 1 / 8 + + messageSize = encodingSize1 + + SharedKeySize = 32 + SeedSize = 32 + 32 +) + +// ML-KEM-768 parameters. +const ( + k = 3 + + CiphertextSize768 = k*encodingSize10 + encodingSize4 + EncapsulationKeySize768 = k*encodingSize12 + 32 + decapsulationKeySize768 = k*encodingSize12 + EncapsulationKeySize768 + 32 + 32 +) + +// ML-KEM-1024 parameters. +const ( + k1024 = 4 + + CiphertextSize1024 = k1024*encodingSize11 + encodingSize5 + EncapsulationKeySize1024 = k1024*encodingSize12 + 32 + decapsulationKeySize1024 = k1024*encodingSize12 + EncapsulationKeySize1024 + 32 + 32 +) + +// A DecapsulationKey768 is the secret key used to decapsulate a shared key from a +// ciphertext. It includes various precomputed values. +type DecapsulationKey768 struct { + d [32]byte // decapsulation key seed + z [32]byte // implicit rejection sampling seed + + ρ [32]byte // sampleNTT seed for A, stored for the encapsulation key + h [32]byte // H(ek), stored for ML-KEM.Decaps_internal + + encryptionKey + decryptionKey +} + +// Bytes returns the decapsulation key as a 64-byte seed in the "d || z" form. +// +// The decapsulation key must be kept secret. +func (dk *DecapsulationKey768) Bytes() []byte { + var b [SeedSize]byte + copy(b[:], dk.d[:]) + copy(b[32:], dk.z[:]) + return b[:] +} + +// TestingOnlyExpandedBytes768 returns the decapsulation key as a byte slice +// using the full expanded NIST encoding. +// +// This should only be used for ACVP testing. For all other purposes prefer +// the Bytes method that returns the (much smaller) seed. +func TestingOnlyExpandedBytes768(dk *DecapsulationKey768) []byte { + b := make([]byte, 0, decapsulationKeySize768) + + // ByteEncode₁₂(s) + for i := range dk.s { + b = polyByteEncode(b, dk.s[i]) + } + + // ByteEncode₁₂(t) || ρ + for i := range dk.t { + b = polyByteEncode(b, dk.t[i]) + } + b = append(b, dk.ρ[:]...) + + // H(ek) || z + b = append(b, dk.h[:]...) + b = append(b, dk.z[:]...) + + return b +} + +// EncapsulationKey returns the public encapsulation key necessary to produce +// ciphertexts. +func (dk *DecapsulationKey768) EncapsulationKey() *EncapsulationKey768 { + return &EncapsulationKey768{ + ρ: dk.ρ, + h: dk.h, + encryptionKey: dk.encryptionKey, + } +} + +// An EncapsulationKey768 is the public key used to produce ciphertexts to be +// decapsulated by the corresponding [DecapsulationKey768]. +type EncapsulationKey768 struct { + ρ [32]byte // sampleNTT seed for A + h [32]byte // H(ek) + encryptionKey +} + +// Bytes returns the encapsulation key as a byte slice. +func (ek *EncapsulationKey768) Bytes() []byte { + // The actual logic is in a separate function to outline this allocation. + b := make([]byte, 0, EncapsulationKeySize768) + return ek.bytes(b) +} + +func (ek *EncapsulationKey768) bytes(b []byte) []byte { + for i := range ek.t { + b = polyByteEncode(b, ek.t[i]) + } + b = append(b, ek.ρ[:]...) + return b +} + +// encryptionKey is the parsed and expanded form of a PKE encryption key. +type encryptionKey struct { + t [k]nttElement // ByteDecode₁₂(ek[:384k]) + a [k * k]nttElement // A[i*k+j] = sampleNTT(ρ, j, i) +} + +// decryptionKey is the parsed and expanded form of a PKE decryption key. +type decryptionKey struct { + s [k]nttElement // ByteDecode₁₂(dk[:decryptionKeySize]) +} + +// GenerateKey768 generates a new decapsulation key, drawing random bytes from +// a DRBG. The decapsulation key must be kept secret. +func GenerateKey768() (*DecapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + dk := &DecapsulationKey768{} + return generateKey(dk) +} + +func generateKey(dk *DecapsulationKey768) (*DecapsulationKey768, error) { + var d [32]byte + drbg.Read(d[:]) + var z [32]byte + drbg.Read(z[:]) + kemKeyGen(dk, &d, &z) + // if err := fips140.PCT("ML-KEM PCT", func() error { return kemPCT(dk) }); err != nil { + // // This clearly can't happen, but FIPS 140-3 requires us to check. + // panic(err) + // } + //fips140.RecordApproved() + return dk, nil +} + +// GenerateKeyInternal768 is a derandomized version of GenerateKey768, +// exclusively for use in tests. +func GenerateKeyInternal768(d, z *[32]byte) *DecapsulationKey768 { + dk := &DecapsulationKey768{} + kemKeyGen(dk, d, z) + return dk +} + +// NewDecapsulationKey768 parses a decapsulation key from a 64-byte +// seed in the "d || z" form. The seed must be uniformly random. +func NewDecapsulationKey768(seed []byte) (*DecapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + dk := &DecapsulationKey768{} + return newKeyFromSeed(dk, seed) +} + +func newKeyFromSeed(dk *DecapsulationKey768, seed []byte) (*DecapsulationKey768, error) { + if len(seed) != SeedSize { + return nil, errors.New("mlkem: invalid seed length") + } + d := (*[32]byte)(seed[:32]) + z := (*[32]byte)(seed[32:]) + kemKeyGen(dk, d, z) + // if err := fips140.PCT("ML-KEM PCT", func() error { return kemPCT(dk) }); err != nil { + // // This clearly can't happen, but FIPS 140-3 requires us to check. + // panic(err) + // } + //fips140.RecordApproved() + return dk, nil +} + +// TestingOnlyNewDecapsulationKey768 parses a decapsulation key from its expanded NIST format. +// +// Bytes() must not be called on the returned key, as it will not produce the +// original seed. +// +// This function should only be used for ACVP testing. Prefer NewDecapsulationKey768 for all +// other purposes. +func TestingOnlyNewDecapsulationKey768(b []byte) (*DecapsulationKey768, error) { + if len(b) != decapsulationKeySize768 { + return nil, errors.New("mlkem: invalid NIST decapsulation key length") + } + + dk := &DecapsulationKey768{} + for i := range dk.s { + var err error + dk.s[i], err = polyByteDecode[nttElement](b[:encodingSize12]) + if err != nil { + return nil, errors.New("mlkem: invalid secret key encoding") + } + b = b[encodingSize12:] + } + + ek, err := NewEncapsulationKey768(b[:EncapsulationKeySize768]) + if err != nil { + return nil, err + } + dk.ρ = ek.ρ + dk.h = ek.h + dk.encryptionKey = ek.encryptionKey + b = b[EncapsulationKeySize768:] + + if !bytes.Equal(dk.h[:], b[:32]) { + return nil, errors.New("mlkem: inconsistent H(ek) in encoded bytes") + } + b = b[32:] + + copy(dk.z[:], b) + + // Generate a random d value for use in Bytes(). This is a safety mechanism + // that avoids returning a broken key vs a random key if this function is + // called in contravention of the TestingOnlyNewDecapsulationKey768 function + // comment advising against it. + drbg.Read(dk.d[:]) + + return dk, nil +} + +// kemKeyGen generates a decapsulation key. +// +// It implements ML-KEM.KeyGen_internal according to FIPS 203, Algorithm 16, and +// K-PKE.KeyGen according to FIPS 203, Algorithm 13. The two are merged to save +// copies and allocations. +func kemKeyGen(dk *DecapsulationKey768, d, z *[32]byte) { + dk.d = *d + dk.z = *z + + g := sha3.New512() + g.Write(d[:]) + g.Write([]byte{k}) // Module dimension as a domain separator. + G := g.Sum(make([]byte, 0, 64)) + ρ, σ := G[:32], G[32:] + dk.ρ = [32]byte(ρ) + + A := &dk.a + for i := byte(0); i < k; i++ { + for j := byte(0); j < k; j++ { + A[i*k+j] = sampleNTT(ρ, j, i) + } + } + + var N byte + s := &dk.s + for i := range s { + s[i] = ntt(samplePolyCBD(σ, N)) + N++ + } + e := make([]nttElement, k) + for i := range e { + e[i] = ntt(samplePolyCBD(σ, N)) + N++ + } + + t := &dk.t + for i := range t { // t = A ◦ s + e + t[i] = e[i] + for j := range s { + t[i] = polyAdd(t[i], nttMul(A[i*k+j], s[j])) + } + } + + H := sha3.New256() + ek := dk.EncapsulationKey().Bytes() + H.Write(ek) + H.Sum(dk.h[:0]) +} + +// kemPCT performs a Pairwise Consistency Test per FIPS 140-3 IG 10.3.A +// Additional Comment 1: "For key pairs generated for use with approved KEMs in +// FIPS 203, the PCT shall consist of applying the encapsulation key ek to +// encapsulate a shared secret K leading to ciphertext c, and then applying +// decapsulation key dk to retrieve the same shared secret K. The PCT passes if +// the two shared secret K values are equal. The PCT shall be performed either +// when keys are generated/imported, prior to the first exportation, or prior to +// the first operational use (if not exported before the first use)." +func kemPCT(dk *DecapsulationKey768) error { + ek := dk.EncapsulationKey() + K, c := ek.Encapsulate() + K1, err := dk.Decapsulate(c) + if err != nil { + return err + } + if subtle.ConstantTimeCompare(K, K1) != 1 { + return errors.New("mlkem: PCT failed") + } + return nil +} + +// Encapsulate generates a shared key and an associated ciphertext from an +// encapsulation key, drawing random bytes from a DRBG. +// +// The shared key must be kept secret. +func (ek *EncapsulationKey768) Encapsulate() (sharedKey, ciphertext []byte) { + // The actual logic is in a separate function to outline this allocation. + var cc [CiphertextSize768]byte + return ek.encapsulate(&cc) +} + +func (ek *EncapsulationKey768) encapsulate(cc *[CiphertextSize768]byte) (sharedKey, ciphertext []byte) { + var m [messageSize]byte + drbg.Read(m[:]) + // Note that the modulus check (step 2 of the encapsulation key check from + // FIPS 203, Section 7.2) is performed by polyByteDecode in parseEK. + //fips140.RecordApproved() + return kemEncaps(cc, ek, &m) +} + +// EncapsulateInternal is a derandomized version of Encapsulate, exclusively for +// use in tests. +func (ek *EncapsulationKey768) EncapsulateInternal(m *[32]byte) (sharedKey, ciphertext []byte) { + cc := &[CiphertextSize768]byte{} + return kemEncaps(cc, ek, m) +} + +// kemEncaps generates a shared key and an associated ciphertext. +// +// It implements ML-KEM.Encaps_internal according to FIPS 203, Algorithm 17. +func kemEncaps(cc *[CiphertextSize768]byte, ek *EncapsulationKey768, m *[messageSize]byte) (K, c []byte) { + g := sha3.New512() + g.Write(m[:]) + g.Write(ek.h[:]) + G := g.Sum(nil) + K, r := G[:SharedKeySize], G[SharedKeySize:] + c = pkeEncrypt(cc, &ek.encryptionKey, m, r) + return K, c +} + +// NewEncapsulationKey768 parses an encapsulation key from its encoded form. +// If the encapsulation key is not valid, NewEncapsulationKey768 returns an error. +func NewEncapsulationKey768(encapsulationKey []byte) (*EncapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + ek := &EncapsulationKey768{} + return parseEK(ek, encapsulationKey) +} + +// parseEK parses an encryption key from its encoded form. +// +// It implements the initial stages of K-PKE.Encrypt according to FIPS 203, +// Algorithm 14. +func parseEK(ek *EncapsulationKey768, ekPKE []byte) (*EncapsulationKey768, error) { + if len(ekPKE) != EncapsulationKeySize768 { + return nil, errors.New("mlkem: invalid encapsulation key length") + } + + h := sha3.New256() + h.Write(ekPKE) + h.Sum(ek.h[:0]) + + for i := range ek.t { + var err error + ek.t[i], err = polyByteDecode[nttElement](ekPKE[:encodingSize12]) + if err != nil { + return nil, err + } + ekPKE = ekPKE[encodingSize12:] + } + copy(ek.ρ[:], ekPKE) + + for i := byte(0); i < k; i++ { + for j := byte(0); j < k; j++ { + ek.a[i*k+j] = sampleNTT(ek.ρ[:], j, i) + } + } + + return ek, nil +} + +// pkeEncrypt encrypt a plaintext message. +// +// It implements K-PKE.Encrypt according to FIPS 203, Algorithm 14, although the +// computation of t and AT is done in parseEK. +func pkeEncrypt(cc *[CiphertextSize768]byte, ex *encryptionKey, m *[messageSize]byte, rnd []byte) []byte { + var N byte + r, e1 := make([]nttElement, k), make([]ringElement, k) + for i := range r { + r[i] = ntt(samplePolyCBD(rnd, N)) + N++ + } + for i := range e1 { + e1[i] = samplePolyCBD(rnd, N) + N++ + } + e2 := samplePolyCBD(rnd, N) + + u := make([]ringElement, k) // NTT⁻¹(AT ◦ r) + e1 + for i := range u { + u[i] = e1[i] + for j := range r { + // Note that i and j are inverted, as we need the transposed of A. + u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.a[j*k+i], r[j]))) + } + } + + μ := ringDecodeAndDecompress1(m) + + var vNTT nttElement // t⊺ ◦ r + for i := range ex.t { + vNTT = polyAdd(vNTT, nttMul(ex.t[i], r[i])) + } + v := polyAdd(polyAdd(inverseNTT(vNTT), e2), μ) + + c := cc[:0] + for _, f := range u { + c = ringCompressAndEncode10(c, f) + } + c = ringCompressAndEncode4(c, v) + + return c +} + +// Decapsulate generates a shared key from a ciphertext and a decapsulation key. +// If the ciphertext is not valid, Decapsulate returns an error. +// +// The shared key must be kept secret. +func (dk *DecapsulationKey768) Decapsulate(ciphertext []byte) (sharedKey []byte, err error) { + if len(ciphertext) != CiphertextSize768 { + return nil, errors.New("mlkem: invalid ciphertext length") + } + c := (*[CiphertextSize768]byte)(ciphertext) + // Note that the hash check (step 3 of the decapsulation input check from + // FIPS 203, Section 7.3) is foregone as a DecapsulationKey is always + // validly generated by ML-KEM.KeyGen_internal. + return kemDecaps(dk, c), nil +} + +// kemDecaps produces a shared key from a ciphertext. +// +// It implements ML-KEM.Decaps_internal according to FIPS 203, Algorithm 18. +func kemDecaps(dk *DecapsulationKey768, c *[CiphertextSize768]byte) (K []byte) { + //fips140.RecordApproved() + m := pkeDecrypt(&dk.decryptionKey, c) + g := sha3.New512() + g.Write(m[:]) + g.Write(dk.h[:]) + G := g.Sum(make([]byte, 0, 64)) + Kprime, r := G[:SharedKeySize], G[SharedKeySize:] + J := sha3.NewShake256() + J.Write(dk.z[:]) + J.Write(c[:]) + Kout := make([]byte, SharedKeySize) + J.Read(Kout) + var cc [CiphertextSize768]byte + c1 := pkeEncrypt(&cc, &dk.encryptionKey, (*[32]byte)(m), r) + + subtle.ConstantTimeCopy(subtle.ConstantTimeCompare(c[:], c1), Kout, Kprime) + return Kout +} + +// pkeDecrypt decrypts a ciphertext. +// +// It implements K-PKE.Decrypt according to FIPS 203, Algorithm 15, +// although s is retained from kemKeyGen. +func pkeDecrypt(dx *decryptionKey, c *[CiphertextSize768]byte) []byte { + u := make([]ringElement, k) + for i := range u { + b := (*[encodingSize10]byte)(c[encodingSize10*i : encodingSize10*(i+1)]) + u[i] = ringDecodeAndDecompress10(b) + } + + b := (*[encodingSize4]byte)(c[encodingSize10*k:]) + v := ringDecodeAndDecompress4(b) + + var mask nttElement // s⊺ ◦ NTT(u) + for i := range dx.s { + mask = polyAdd(mask, nttMul(dx.s[i], ntt(u[i]))) + } + w := polySub(v, inverseNTT(mask)) + + return ringCompressAndEncode1(nil, w) +} \ No newline at end of file diff --git a/mlkem768/mlkem768.go b/mlkem768/mlkem768.go deleted file mode 100644 index 24bedea..0000000 --- a/mlkem768/mlkem768.go +++ /dev/null @@ -1,886 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package mlkem768 implements the quantum-resistant key encapsulation method -// ML-KEM (formerly known as Kyber). -// -// Only the recommended ML-KEM-768 parameter set is provided. -// -// The version currently implemented is the one specified by [NIST FIPS 203 ipd], -// with the unintentional transposition of the matrix A reverted to match the -// behavior of [Kyber version 3.0]. Future versions of this package might -// introduce backwards incompatible changes to implement changes to FIPS 203. -// -// [Kyber version 3.0]: https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf -// [NIST FIPS 203 ipd]: https://doi.org/10.6028/NIST.FIPS.203.ipd -package mlkem768 - -// This package targets security, correctness, simplicity, readability, and -// reviewability as its primary goals. All critical operations are performed in -// constant time. -// -// Variable and function names, as well as code layout, are selected to -// facilitate reviewing the implementation against the NIST FIPS 203 ipd -// document. -// -// Reviewers unfamiliar with polynomials or linear algebra might find the -// background at https://words.filippo.io/kyber-math/ useful. - -import ( - "crypto/rand" - "crypto/subtle" - "encoding/binary" - "errors" - - "golang.org/x/crypto/sha3" -) - -const ( - // ML-KEM global constants. - n = 256 - q = 3329 - - log2q = 12 - - // ML-KEM-768 parameters. The code makes assumptions based on these values, - // they can't be changed blindly. - k = 3 - η = 2 - du = 10 - dv = 4 - - // encodingSizeX is the byte size of a ringElement or nttElement encoded - // by ByteEncode_X (FIPS 203 (DRAFT), Algorithm 4). - encodingSize12 = n * log2q / 8 - encodingSize10 = n * du / 8 - encodingSize4 = n * dv / 8 - encodingSize1 = n * 1 / 8 - - messageSize = encodingSize1 - decryptionKeySize = k * encodingSize12 - encryptionKeySize = k*encodingSize12 + 32 - - CiphertextSize = k*encodingSize10 + encodingSize4 - EncapsulationKeySize = encryptionKeySize - DecapsulationKeySize = decryptionKeySize + encryptionKeySize + 32 + 32 - SharedKeySize = 32 - SeedSize = 32 + 32 -) - -// A DecapsulationKey is the secret key used to decapsulate a shared key from a -// ciphertext. It includes various precomputed values. -type DecapsulationKey struct { - dk [DecapsulationKeySize]byte - encryptionKey - decryptionKey -} - -// Bytes returns the extended encoding of the decapsulation key, according to -// FIPS 203 (DRAFT). -func (dk *DecapsulationKey) Bytes() []byte { - var b [DecapsulationKeySize]byte - copy(b[:], dk.dk[:]) - return b[:] -} - -// EncapsulationKey returns the public encapsulation key necessary to produce -// ciphertexts. -func (dk *DecapsulationKey) EncapsulationKey() []byte { - var b [EncapsulationKeySize]byte - copy(b[:], dk.dk[decryptionKeySize:]) - return b[:] -} - -// encryptionKey is the parsed and expanded form of a PKE encryption key. -type encryptionKey struct { - t [k]nttElement // ByteDecode₁₂(ek[:384k]) - A [k * k]nttElement // A[i*k+j] = sampleNTT(ρ, j, i) -} - -// decryptionKey is the parsed and expanded form of a PKE decryption key. -type decryptionKey struct { - s [k]nttElement // ByteDecode₁₂(dk[:decryptionKeySize]) -} - -// GenerateKey generates a new decapsulation key, drawing random bytes from -// crypto/rand. The decapsulation key must be kept secret. -func GenerateKey() (*DecapsulationKey, error) { - // The actual logic is in a separate function to outline this allocation. - dk := &DecapsulationKey{} - return generateKey(dk) -} - -func generateKey(dk *DecapsulationKey) (*DecapsulationKey, error) { - var d [32]byte - if _, err := rand.Read(d[:]); err != nil { - return nil, errors.New("mlkem768: crypto/rand Read failed: " + err.Error()) - } - var z [32]byte - if _, err := rand.Read(z[:]); err != nil { - return nil, errors.New("mlkem768: crypto/rand Read failed: " + err.Error()) - } - return kemKeyGen(dk, &d, &z), nil -} - -// NewKeyFromSeed deterministically generates a decapsulation key from a 64-byte -// seed in the "d || z" form. The seed must be uniformly random. -func NewKeyFromSeed(seed []byte) (*DecapsulationKey, error) { - // The actual logic is in a separate function to outline this allocation. - dk := &DecapsulationKey{} - return newKeyFromSeed(dk, seed) -} - -func newKeyFromSeed(dk *DecapsulationKey, seed []byte) (*DecapsulationKey, error) { - if len(seed) != SeedSize { - return nil, errors.New("mlkem768: invalid seed length") - } - d := (*[32]byte)(seed[:32]) - z := (*[32]byte)(seed[32:]) - return kemKeyGen(dk, d, z), nil -} - -// NewKeyFromExtendedEncoding parses a decapsulation key from its FIPS 203 -// (DRAFT) extended encoding. -func NewKeyFromExtendedEncoding(decapsulationKey []byte) (*DecapsulationKey, error) { - // The actual logic is in a separate function to outline this allocation. - dk := &DecapsulationKey{} - return newKeyFromExtendedEncoding(dk, decapsulationKey) -} - -func newKeyFromExtendedEncoding(dk *DecapsulationKey, dkBytes []byte) (*DecapsulationKey, error) { - if len(dkBytes) != DecapsulationKeySize { - return nil, errors.New("mlkem768: invalid decapsulation key length") - } - - // Note that we don't check that H(ek) matches ekPKE, as that's not - // specified in FIPS 203 (DRAFT). This is one reason to prefer the seed - // private key format. - dk.dk = [DecapsulationKeySize]byte(dkBytes) - - dkPKE := dkBytes[:decryptionKeySize] - if err := parseDK(&dk.decryptionKey, dkPKE); err != nil { - return nil, err - } - - ekPKE := dkBytes[decryptionKeySize : decryptionKeySize+encryptionKeySize] - if err := parseEK(&dk.encryptionKey, ekPKE); err != nil { - return nil, err - } - - return dk, nil -} - -// kemKeyGen generates a decapsulation key. -// -// It implements ML-KEM.KeyGen according to FIPS 203 (DRAFT), Algorithm 15, and -// K-PKE.KeyGen according to FIPS 203 (DRAFT), Algorithm 12. The two are merged -// to save copies and allocations. -func kemKeyGen(dk *DecapsulationKey, d, z *[32]byte) *DecapsulationKey { - if dk == nil { - dk = &DecapsulationKey{} - } - - G := sha3.Sum512(d[:]) - ρ, σ := G[:32], G[32:] - - A := &dk.A - for i := byte(0); i < k; i++ { - for j := byte(0); j < k; j++ { - // Note that this is consistent with Kyber round 3, rather than with - // the initial draft of FIPS 203, because NIST signaled that the - // change was involuntary and will be reverted. - A[i*k+j] = sampleNTT(ρ, j, i) - } - } - - var N byte - s := &dk.s - for i := range s { - s[i] = ntt(samplePolyCBD(σ, N)) - N++ - } - e := make([]nttElement, k) - for i := range e { - e[i] = ntt(samplePolyCBD(σ, N)) - N++ - } - - t := &dk.t - for i := range t { // t = A ◦ s + e - t[i] = e[i] - for j := range s { - t[i] = polyAdd(t[i], nttMul(A[i*k+j], s[j])) - } - } - - // dkPKE ← ByteEncode₁₂(s) - // ekPKE ← ByteEncode₁₂(t) || ρ - // ek ← ekPKE - // dk ← dkPKE || ek || H(ek) || z - dkB := dk.dk[:0] - - for i := range s { - dkB = polyByteEncode(dkB, s[i]) - } - - for i := range t { - dkB = polyByteEncode(dkB, t[i]) - } - dkB = append(dkB, ρ...) - - H := sha3.New256() - H.Write(dkB[decryptionKeySize:]) - dkB = H.Sum(dkB) - - dkB = append(dkB, z[:]...) - - if len(dkB) != len(dk.dk) { - panic("mlkem768: internal error: invalid decapsulation key size") - } - - return dk -} - -// Encapsulate generates a shared key and an associated ciphertext from an -// encapsulation key, drawing random bytes from crypto/rand. -// If the encapsulation key is not valid, Encapsulate returns an error. -// -// The shared key must be kept secret. -func Encapsulate(encapsulationKey []byte) (ciphertext, sharedKey []byte, err error) { - // The actual logic is in a separate function to outline this allocation. - var cc [CiphertextSize]byte - return encapsulate(&cc, encapsulationKey) -} - -func encapsulate(cc *[CiphertextSize]byte, encapsulationKey []byte) (ciphertext, sharedKey []byte, err error) { - if len(encapsulationKey) != EncapsulationKeySize { - return nil, nil, errors.New("mlkem768: invalid encapsulation key length") - } - var m [messageSize]byte - if _, err := rand.Read(m[:]); err != nil { - return nil, nil, errors.New("mlkem768: crypto/rand Read failed: " + err.Error()) - } - return kemEncaps(cc, encapsulationKey, &m) -} - -// kemEncaps generates a shared key and an associated ciphertext. -// -// It implements ML-KEM.Encaps according to FIPS 203 (DRAFT), Algorithm 16. -func kemEncaps(cc *[CiphertextSize]byte, ek []byte, m *[messageSize]byte) (c, K []byte, err error) { - if cc == nil { - cc = &[CiphertextSize]byte{} - } - - H := sha3.Sum256(ek[:]) - g := sha3.New512() - g.Write(m[:]) - g.Write(H[:]) - G := g.Sum(nil) - K, r := G[:SharedKeySize], G[SharedKeySize:] - var ex encryptionKey - if err := parseEK(&ex, ek[:]); err != nil { - return nil, nil, err - } - c = pkeEncrypt(cc, &ex, m, r) - return c, K, nil -} - -// parseEK parses an encryption key from its encoded form. -// -// It implements the initial stages of K-PKE.Encrypt according to FIPS 203 -// (DRAFT), Algorithm 13. -func parseEK(ex *encryptionKey, ekPKE []byte) error { - if len(ekPKE) != encryptionKeySize { - return errors.New("mlkem768: invalid encryption key length") - } - - for i := range ex.t { - var err error - ex.t[i], err = polyByteDecode[nttElement](ekPKE[:encodingSize12]) - if err != nil { - return err - } - ekPKE = ekPKE[encodingSize12:] - } - ρ := ekPKE - - for i := byte(0); i < k; i++ { - for j := byte(0); j < k; j++ { - // See the note in pkeKeyGen about the order of the indices being - // consistent with Kyber round 3. - ex.A[i*k+j] = sampleNTT(ρ, j, i) - } - } - - return nil -} - -// pkeEncrypt encrypt a plaintext message. -// -// It implements K-PKE.Encrypt according to FIPS 203 (DRAFT), Algorithm 13, -// although the computation of t and AT is done in parseEK. -func pkeEncrypt(cc *[CiphertextSize]byte, ex *encryptionKey, m *[messageSize]byte, rnd []byte) []byte { - var N byte - r, e1 := make([]nttElement, k), make([]ringElement, k) - for i := range r { - r[i] = ntt(samplePolyCBD(rnd, N)) - N++ - } - for i := range e1 { - e1[i] = samplePolyCBD(rnd, N) - N++ - } - e2 := samplePolyCBD(rnd, N) - - u := make([]ringElement, k) // NTT⁻¹(AT ◦ r) + e1 - for i := range u { - u[i] = e1[i] - for j := range r { - // Note that i and j are inverted, as we need the transposed of A. - u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.A[j*k+i], r[j]))) - } - } - - μ := ringDecodeAndDecompress1(m) - - var vNTT nttElement // t⊺ ◦ r - for i := range ex.t { - vNTT = polyAdd(vNTT, nttMul(ex.t[i], r[i])) - } - v := polyAdd(polyAdd(inverseNTT(vNTT), e2), μ) - - c := cc[:0] - for _, f := range u { - c = ringCompressAndEncode10(c, f) - } - c = ringCompressAndEncode4(c, v) - - return c -} - -// Decapsulate generates a shared key from a ciphertext and a decapsulation key. -// If the ciphertext is not valid, Decapsulate returns an error. -// -// The shared key must be kept secret. -func Decapsulate(dk *DecapsulationKey, ciphertext []byte) (sharedKey []byte, err error) { - if len(ciphertext) != CiphertextSize { - return nil, errors.New("mlkem768: invalid ciphertext length") - } - c := (*[CiphertextSize]byte)(ciphertext) - return kemDecaps(dk, c), nil -} - -// kemDecaps produces a shared key from a ciphertext. -// -// It implements ML-KEM.Decaps according to FIPS 203 (DRAFT), Algorithm 17. -func kemDecaps(dk *DecapsulationKey, c *[CiphertextSize]byte) (K []byte) { - h := dk.dk[decryptionKeySize+encryptionKeySize : decryptionKeySize+encryptionKeySize+32] - z := dk.dk[decryptionKeySize+encryptionKeySize+32:] - - m := pkeDecrypt(&dk.decryptionKey, c) - g := sha3.New512() - g.Write(m[:]) - g.Write(h) - G := g.Sum(nil) - Kprime, r := G[:SharedKeySize], G[SharedKeySize:] - J := sha3.NewShake256() - J.Write(z) - J.Write(c[:]) - Kout := make([]byte, SharedKeySize) - J.Read(Kout) - var cc [CiphertextSize]byte - c1 := pkeEncrypt(&cc, &dk.encryptionKey, (*[32]byte)(m), r) - - subtle.ConstantTimeCopy(subtle.ConstantTimeCompare(c[:], c1), Kout, Kprime) - return Kout -} - -// parseDK parses a decryption key from its encoded form. -// -// It implements the computation of s from K-PKE.Decrypt according to FIPS 203 -// (DRAFT), Algorithm 14. -func parseDK(dx *decryptionKey, dkPKE []byte) error { - if len(dkPKE) != decryptionKeySize { - return errors.New("mlkem768: invalid decryption key length") - } - - for i := range dx.s { - f, err := polyByteDecode[nttElement](dkPKE[:encodingSize12]) - if err != nil { - return err - } - dx.s[i] = f - dkPKE = dkPKE[encodingSize12:] - } - - return nil -} - -// pkeDecrypt decrypts a ciphertext. -// -// It implements K-PKE.Decrypt according to FIPS 203 (DRAFT), Algorithm 14, -// although the computation of s is done in parseDK. -func pkeDecrypt(dx *decryptionKey, c *[CiphertextSize]byte) []byte { - u := make([]ringElement, k) - for i := range u { - b := (*[encodingSize10]byte)(c[encodingSize10*i : encodingSize10*(i+1)]) - u[i] = ringDecodeAndDecompress10(b) - } - - b := (*[encodingSize4]byte)(c[encodingSize10*k:]) - v := ringDecodeAndDecompress4(b) - - var mask nttElement // s⊺ ◦ NTT(u) - for i := range dx.s { - mask = polyAdd(mask, nttMul(dx.s[i], ntt(u[i]))) - } - w := polySub(v, inverseNTT(mask)) - - return ringCompressAndEncode1(nil, w) -} - -// fieldElement is an integer modulo q, an element of ℤ_q. It is always reduced. -type fieldElement uint16 - -// fieldCheckReduced checks that a value a is < q. -func fieldCheckReduced(a uint16) (fieldElement, error) { - if a >= q { - return 0, errors.New("unreduced field element") - } - return fieldElement(a), nil -} - -// fieldReduceOnce reduces a value a < 2q. -func fieldReduceOnce(a uint16) fieldElement { - x := a - q - // If x underflowed, then x >= 2¹⁶ - q > 2¹⁵, so the top bit is set. - x += (x >> 15) * q - return fieldElement(x) -} - -func fieldAdd(a, b fieldElement) fieldElement { - x := uint16(a + b) - return fieldReduceOnce(x) -} - -func fieldSub(a, b fieldElement) fieldElement { - x := uint16(a - b + q) - return fieldReduceOnce(x) -} - -const ( - barrettMultiplier = 5039 // 2¹² * 2¹² / q - barrettShift = 24 // log₂(2¹² * 2¹²) -) - -// fieldReduce reduces a value a < 2q² using Barrett reduction, to avoid -// potentially variable-time division. -func fieldReduce(a uint32) fieldElement { - quotient := uint32((uint64(a) * barrettMultiplier) >> barrettShift) - return fieldReduceOnce(uint16(a - quotient*q)) -} - -func fieldMul(a, b fieldElement) fieldElement { - x := uint32(a) * uint32(b) - return fieldReduce(x) -} - -// fieldMulSub returns a * (b - c). This operation is fused to save a -// fieldReduceOnce after the subtraction. -func fieldMulSub(a, b, c fieldElement) fieldElement { - x := uint32(a) * uint32(b-c+q) - return fieldReduce(x) -} - -// fieldAddMul returns a * b + c * d. This operation is fused to save a -// fieldReduceOnce and a fieldReduce. -func fieldAddMul(a, b, c, d fieldElement) fieldElement { - x := uint32(a) * uint32(b) - x += uint32(c) * uint32(d) - return fieldReduce(x) -} - -// compress maps a field element uniformly to the range 0 to 2ᵈ-1, according to -// FIPS 203 (DRAFT), Definition 4.5. -func compress(x fieldElement, d uint8) uint16 { - // We want to compute (x * 2ᵈ) / q, rounded to nearest integer, with 1/2 - // rounding up (see FIPS 203 (DRAFT), Section 2.3). - - // Barrett reduction produces a quotient and a remainder in the range [0, 2q), - // such that dividend = quotient * q + remainder. - dividend := uint32(x) << d // x * 2ᵈ - quotient := uint32(uint64(dividend) * barrettMultiplier >> barrettShift) - remainder := dividend - quotient*q - - // Since the remainder is in the range [0, 2q), not [0, q), we need to - // portion it into three spans for rounding. - // - // [ 0, q/2 ) -> round to 0 - // [ q/2, q + q/2 ) -> round to 1 - // [ q + q/2, 2q ) -> round to 2 - // - // We can convert that to the following logic: add 1 if remainder > q/2, - // then add 1 again if remainder > q + q/2. - // - // Note that if remainder > x, then ⌊x⌋ - remainder underflows, and the top - // bit of the difference will be set. - quotient += (q/2 - remainder) >> 31 & 1 - quotient += (q + q/2 - remainder) >> 31 & 1 - - // quotient might have overflowed at this point, so reduce it by masking. - var mask uint32 = (1 << d) - 1 - return uint16(quotient & mask) -} - -// decompress maps a number x between 0 and 2ᵈ-1 uniformly to the full range of -// field elements, according to FIPS 203 (DRAFT), Definition 4.6. -func decompress(y uint16, d uint8) fieldElement { - // We want to compute (y * q) / 2ᵈ, rounded to nearest integer, with 1/2 - // rounding up (see FIPS 203 (DRAFT), Section 2.3). - - dividend := uint32(y) * q - quotient := dividend >> d // (y * q) / 2ᵈ - - // The d'th least-significant bit of the dividend (the most significant bit - // of the remainder) is 1 for the top half of the values that divide to the - // same quotient, which are the ones that round up. - quotient += dividend >> (d - 1) & 1 - - // quotient is at most (2¹¹-1) * q / 2¹¹ + 1 = 3328, so it didn't overflow. - return fieldElement(quotient) -} - -// ringElement is a polynomial, an element of R_q, represented as an array -// according to FIPS 203 (DRAFT), Section 2.4. -type ringElement [n]fieldElement - -// polyAdd adds two ringElements or nttElements. -func polyAdd[T ~[n]fieldElement](a, b T) (s T) { - for i := range s { - s[i] = fieldAdd(a[i], b[i]) - } - return s -} - -// polySub subtracts two ringElements or nttElements. -func polySub[T ~[n]fieldElement](a, b T) (s T) { - for i := range s { - s[i] = fieldSub(a[i], b[i]) - } - return s -} - -// polyByteEncode appends the 384-byte encoding of f to b. -// -// It implements ByteEncode₁₂, according to FIPS 203 (DRAFT), Algorithm 4. -func polyByteEncode[T ~[n]fieldElement](b []byte, f T) []byte { - out, B := sliceForAppend(b, encodingSize12) - for i := 0; i < n; i += 2 { - x := uint32(f[i]) | uint32(f[i+1])<<12 - B[0] = uint8(x) - B[1] = uint8(x >> 8) - B[2] = uint8(x >> 16) - B = B[3:] - } - return out -} - -// polyByteDecode decodes the 384-byte encoding of a polynomial, checking that -// all the coefficients are properly reduced. This achieves the "Modulus check" -// step of ML-KEM Encapsulation Input Validation. -// -// polyByteDecode is also used in ML-KEM Decapsulation, where the input -// validation is not required, but implicitly allowed by the specification. -// -// It implements ByteDecode₁₂, according to FIPS 203 (DRAFT), Algorithm 5. -func polyByteDecode[T ~[n]fieldElement](b []byte) (T, error) { - if len(b) != encodingSize12 { - return T{}, errors.New("mlkem768: invalid encoding length") - } - var f T - for i := 0; i < n; i += 2 { - d := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 - const mask12 = 0b1111_1111_1111 - var err error - if f[i], err = fieldCheckReduced(uint16(d & mask12)); err != nil { - return T{}, errors.New("mlkem768: invalid polynomial encoding") - } - if f[i+1], err = fieldCheckReduced(uint16(d >> 12)); err != nil { - return T{}, errors.New("mlkem768: invalid polynomial encoding") - } - b = b[3:] - } - return f, nil -} - -// sliceForAppend takes a slice and a requested number of bytes. It returns a -// slice with the contents of the given slice followed by that many bytes and a -// second slice that aliases into it and contains only the extra bytes. If the -// original slice has sufficient capacity then no allocation is performed. -func sliceForAppend(in []byte, n int) (head, tail []byte) { - if total := len(in) + n; cap(in) >= total { - head = in[:total] - } else { - head = make([]byte, total) - copy(head, in) - } - tail = head[len(in):] - return -} - -// ringCompressAndEncode1 appends a 32-byte encoding of a ring element to s, -// compressing one coefficients per bit. -// -// It implements Compress₁, according to FIPS 203 (DRAFT), Definition 4.5, -// followed by ByteEncode₁, according to FIPS 203 (DRAFT), Algorithm 4. -func ringCompressAndEncode1(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize1) - for i := range b { - b[i] = 0 - } - for i := range f { - b[i/8] |= uint8(compress(f[i], 1) << (i % 8)) - } - return s -} - -// ringDecodeAndDecompress1 decodes a 32-byte slice to a ring element where each -// bit is mapped to 0 or ⌈q/2⌋. -// -// It implements ByteDecode₁, according to FIPS 203 (DRAFT), Algorithm 5, -// followed by Decompress₁, according to FIPS 203 (DRAFT), Definition 4.6. -func ringDecodeAndDecompress1(b *[encodingSize1]byte) ringElement { - var f ringElement - for i := range f { - b_i := b[i/8] >> (i % 8) & 1 - const halfQ = (q + 1) / 2 // ⌈q/2⌋, rounded up per FIPS 203 (DRAFT), Section 2.3 - f[i] = fieldElement(b_i) * halfQ // 0 decompresses to 0, and 1 to ⌈q/2⌋ - } - return f -} - -// ringCompressAndEncode4 appends a 128-byte encoding of a ring element to s, -// compressing two coefficients per byte. -// -// It implements Compress₄, according to FIPS 203 (DRAFT), Definition 4.5, -// followed by ByteEncode₄, according to FIPS 203 (DRAFT), Algorithm 4. -func ringCompressAndEncode4(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize4) - for i := 0; i < n; i += 2 { - b[i/2] = uint8(compress(f[i], 4) | compress(f[i+1], 4)<<4) - } - return s -} - -// ringDecodeAndDecompress4 decodes a 128-byte encoding of a ring element where -// each four bits are mapped to an equidistant distribution. -// -// It implements ByteDecode₄, according to FIPS 203 (DRAFT), Algorithm 5, -// followed by Decompress₄, according to FIPS 203 (DRAFT), Definition 4.6. -func ringDecodeAndDecompress4(b *[encodingSize4]byte) ringElement { - var f ringElement - for i := 0; i < n; i += 2 { - f[i] = fieldElement(decompress(uint16(b[i/2]&0b1111), 4)) - f[i+1] = fieldElement(decompress(uint16(b[i/2]>>4), 4)) - } - return f -} - -// ringCompressAndEncode10 appends a 320-byte encoding of a ring element to s, -// compressing four coefficients per five bytes. -// -// It implements Compress₁₀, according to FIPS 203 (DRAFT), Definition 4.5, -// followed by ByteEncode₁₀, according to FIPS 203 (DRAFT), Algorithm 4. -func ringCompressAndEncode10(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize10) - for i := 0; i < n; i += 4 { - var x uint64 - x |= uint64(compress(f[i+0], 10)) - x |= uint64(compress(f[i+1], 10)) << 10 - x |= uint64(compress(f[i+2], 10)) << 20 - x |= uint64(compress(f[i+3], 10)) << 30 - b[0] = uint8(x) - b[1] = uint8(x >> 8) - b[2] = uint8(x >> 16) - b[3] = uint8(x >> 24) - b[4] = uint8(x >> 32) - b = b[5:] - } - return s -} - -// ringDecodeAndDecompress10 decodes a 320-byte encoding of a ring element where -// each ten bits are mapped to an equidistant distribution. -// -// It implements ByteDecode₁₀, according to FIPS 203 (DRAFT), Algorithm 5, -// followed by Decompress₁₀, according to FIPS 203 (DRAFT), Definition 4.6. -func ringDecodeAndDecompress10(bb *[encodingSize10]byte) ringElement { - b := bb[:] - var f ringElement - for i := 0; i < n; i += 4 { - x := uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 - b = b[5:] - f[i] = fieldElement(decompress(uint16(x>>0&0b11_1111_1111), 10)) - f[i+1] = fieldElement(decompress(uint16(x>>10&0b11_1111_1111), 10)) - f[i+2] = fieldElement(decompress(uint16(x>>20&0b11_1111_1111), 10)) - f[i+3] = fieldElement(decompress(uint16(x>>30&0b11_1111_1111), 10)) - } - return f -} - -// samplePolyCBD draws a ringElement from the special Dη distribution given a -// stream of random bytes generated by the PRF function, according to FIPS 203 -// (DRAFT), Algorithm 7 and Definition 4.1. -func samplePolyCBD(s []byte, b byte) ringElement { - prf := sha3.NewShake256() - prf.Write(s) - prf.Write([]byte{b}) - B := make([]byte, 128) - prf.Read(B) - - // SamplePolyCBD simply draws four (2η) bits for each coefficient, and adds - // the first two and subtracts the last two. - - var f ringElement - for i := 0; i < n; i += 2 { - b := B[i/2] - b_7, b_6, b_5, b_4 := b>>7, b>>6&1, b>>5&1, b>>4&1 - b_3, b_2, b_1, b_0 := b>>3&1, b>>2&1, b>>1&1, b&1 - f[i] = fieldSub(fieldElement(b_0+b_1), fieldElement(b_2+b_3)) - f[i+1] = fieldSub(fieldElement(b_4+b_5), fieldElement(b_6+b_7)) - } - return f -} - -// nttElement is an NTT representation, an element of T_q, represented as an -// array according to FIPS 203 (DRAFT), Section 2.4. -type nttElement [n]fieldElement - -// gammas are the values ζ^2BitRev7(i)+1 mod q for each index i. -var gammas = [128]fieldElement{17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, 2110, 1219, 2935, 394, 885, 2444, 2154, 1175} - -// nttMul multiplies two nttElements. -// -// It implements MultiplyNTTs, according to FIPS 203 (DRAFT), Algorithm 10. -func nttMul(f, g nttElement) nttElement { - var h nttElement - // We use i += 2 for bounds check elimination. See https://go.dev/issue/66826. - for i := 0; i < 256; i += 2 { - a0, a1 := f[i], f[i+1] - b0, b1 := g[i], g[i+1] - h[i] = fieldAddMul(a0, b0, fieldMul(a1, b1), gammas[i/2]) - h[i+1] = fieldAddMul(a0, b1, a1, b0) - } - return h -} - -// zetas are the values ζ^BitRev7(k) mod q for each index k. -var zetas = [128]fieldElement{1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154} - -// ntt maps a ringElement to its nttElement representation. -// -// It implements NTT, according to FIPS 203 (DRAFT), Algorithm 8. -func ntt(f ringElement) nttElement { - k := 1 - for len := 128; len >= 2; len /= 2 { - for start := 0; start < 256; start += 2 * len { - zeta := zetas[k] - k++ - // Bounds check elimination hint. - f, flen := f[start:start+len], f[start+len:start+len+len] - for j := 0; j < len; j++ { - t := fieldMul(zeta, flen[j]) - flen[j] = fieldSub(f[j], t) - f[j] = fieldAdd(f[j], t) - } - } - } - return nttElement(f) -} - -// inverseNTT maps a nttElement back to the ringElement it represents. -// -// It implements NTT⁻¹, according to FIPS 203 (DRAFT), Algorithm 9. -func inverseNTT(f nttElement) ringElement { - k := 127 - for len := 2; len <= 128; len *= 2 { - for start := 0; start < 256; start += 2 * len { - zeta := zetas[k] - k-- - // Bounds check elimination hint. - f, flen := f[start:start+len], f[start+len:start+len+len] - for j := 0; j < len; j++ { - t := f[j] - f[j] = fieldAdd(t, flen[j]) - flen[j] = fieldMulSub(zeta, flen[j], t) - } - } - } - for i := range f { - f[i] = fieldMul(f[i], 3303) // 3303 = 128⁻¹ mod q - } - return ringElement(f) -} - -// sampleNTT draws a uniformly random nttElement from a stream of uniformly -// random bytes generated by the XOF function, according to FIPS 203 (DRAFT), -// Algorithm 6 and Definition 4.2. -func sampleNTT(rho []byte, ii, jj byte) nttElement { - B := sha3.NewShake128() - B.Write(rho) - B.Write([]byte{ii, jj}) - - // SampleNTT essentially draws 12 bits at a time from r, interprets them in - // little-endian, and rejects values higher than q, until it drew 256 - // values. (The rejection rate is approximately 19%.) - // - // To do this from a bytes stream, it draws three bytes at a time, and - // splits them into two uint16 appropriately masked. - // - // r₀ r₁ r₂ - // |- - - - - - - -|- - - - - - - -|- - - - - - - -| - // - // Uint16(r₀ || r₁) - // |- - - - - - - - - - - - - - - -| - // |- - - - - - - - - - - -| - // d₁ - // - // Uint16(r₁ || r₂) - // |- - - - - - - - - - - - - - - -| - // |- - - - - - - - - - - -| - // d₂ - // - // Note that in little-endian, the rightmost bits are the most significant - // bits (dropped with a mask) and the leftmost bits are the least - // significant bits (dropped with a right shift). - - var a nttElement - var j int // index into a - var buf [24]byte // buffered reads from B - off := len(buf) // index into buf, starts in a "buffer fully consumed" state - for { - if off >= len(buf) { - B.Read(buf[:]) - off = 0 - } - d1 := binary.LittleEndian.Uint16(buf[off:]) & 0b1111_1111_1111 - d2 := binary.LittleEndian.Uint16(buf[off+1:]) >> 4 - off += 3 - if d1 < q { - a[j] = fieldElement(d1) - j++ - } - if j >= len(a) { - break - } - if d2 < q { - a[j] = fieldElement(d2) - j++ - } - if j >= len(a) { - break - } - } - return a -} diff --git a/randutil/randutil.go b/randutil/randutil.go new file mode 100644 index 0000000..70a9e96 --- /dev/null +++ b/randutil/randutil.go @@ -0,0 +1,26 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package randutil contains internal randomness utilities for various +// crypto packages. +package randutil + +import ( + "io" + "math/rand/v2" +) + +// MaybeReadByte reads a single byte from r with 50% probability. This is used +// to ensure that callers do not depend on non-guaranteed behaviour, e.g. +// assuming that rsa.GenerateKey is deterministic w.r.t. a given random stream. +// +// This does not affect tests that pass a stream of fixed bytes as the random +// source (e.g. a zeroReader). +func MaybeReadByte(r io.Reader) { + if rand.Uint64()&1 == 1 { + return + } + var buf [1]byte + r.Read(buf[:]) +} \ No newline at end of file diff --git a/sha3/shake.go b/sha3/shake.go new file mode 100644 index 0000000..d4329ea --- /dev/null +++ b/sha3/shake.go @@ -0,0 +1,151 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sha3 + +import ( + "bytes" + //"crypto/internal/fips140" + "github.com/xtls/reality/byteorder" + "errors" + "math/bits" +) + +type SHAKE struct { + d Digest // SHA-3 state context and Read/Write operations + + // initBlock is the cSHAKE specific initialization set of bytes. It is initialized + // by newCShake function and stores concatenation of N followed by S, encoded + // by the method specified in 3.3 of [1]. + // It is stored here in order for Reset() to be able to put context into + // initial state. + initBlock []byte +} + +func bytepad(data []byte, rate int) []byte { + out := make([]byte, 0, 9+len(data)+rate-1) + out = append(out, leftEncode(uint64(rate))...) + out = append(out, data...) + if padlen := rate - len(out)%rate; padlen < rate { + out = append(out, make([]byte, padlen)...) + } + return out +} + +func leftEncode(x uint64) []byte { + // Let n be the smallest positive integer for which 2^(8n) > x. + n := (bits.Len64(x) + 7) / 8 + if n == 0 { + n = 1 + } + // Return n || x with n as a byte and x an n bytes in big-endian order. + b := make([]byte, 9) + byteorder.BEPutUint64(b[1:], x) + b = b[9-n-1:] + b[0] = byte(n) + return b +} + +func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) *SHAKE { + c := &SHAKE{d: Digest{rate: rate, outputLen: outputLen, dsbyte: dsbyte}} + c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes + c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...) + c.initBlock = append(c.initBlock, N...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...) + c.initBlock = append(c.initBlock, S...) + c.Write(bytepad(c.initBlock, c.d.rate)) + return c +} + +func (s *SHAKE) BlockSize() int { return s.d.BlockSize() } +func (s *SHAKE) Size() int { return s.d.Size() } + +// Sum appends a portion of output to b and returns the resulting slice. The +// output length is selected to provide full-strength generic security: 32 bytes +// for SHAKE128 and 64 bytes for SHAKE256. It does not change the underlying +// state. It panics if any output has already been read. +func (s *SHAKE) Sum(in []byte) []byte { return s.d.Sum(in) } + +// Write absorbs more data into the hash's state. +// It panics if any output has already been read. +func (s *SHAKE) Write(p []byte) (n int, err error) { return s.d.Write(p) } + +func (s *SHAKE) Read(out []byte) (n int, err error) { + //fips140.RecordApproved() + // Note that read is not exposed on Digest since SHA-3 does not offer + // variable output length. It is only used internally by Sum. + return s.d.read(out) +} + +// Reset resets the hash to initial state. +func (s *SHAKE) Reset() { + s.d.Reset() + if len(s.initBlock) != 0 { + s.Write(bytepad(s.initBlock, s.d.rate)) + } +} + +// Clone returns a copy of the SHAKE context in its current state. +func (s *SHAKE) Clone() *SHAKE { + ret := *s + return &ret +} + +func (s *SHAKE) MarshalBinary() ([]byte, error) { + return s.AppendBinary(make([]byte, 0, marshaledSize+len(s.initBlock))) +} + +func (s *SHAKE) AppendBinary(b []byte) ([]byte, error) { + b, err := s.d.AppendBinary(b) + if err != nil { + return nil, err + } + b = append(b, s.initBlock...) + return b, nil +} + +func (s *SHAKE) UnmarshalBinary(b []byte) error { + if len(b) < marshaledSize { + return errors.New("sha3: invalid hash state") + } + if err := s.d.UnmarshalBinary(b[:marshaledSize]); err != nil { + return err + } + s.initBlock = bytes.Clone(b[marshaledSize:]) + return nil +} + +// NewShake128 creates a new SHAKE128 XOF. +func NewShake128() *SHAKE { + return &SHAKE{d: Digest{rate: rateK256, outputLen: 32, dsbyte: dsbyteShake}} +} + +// NewShake256 creates a new SHAKE256 XOF. +func NewShake256() *SHAKE { + return &SHAKE{d: Digest{rate: rateK512, outputLen: 64, dsbyte: dsbyteShake}} +} + +// NewCShake128 creates a new cSHAKE128 XOF. +// +// N is used to define functions based on cSHAKE, it can be empty when plain +// cSHAKE is desired. S is a customization byte string used for domain +// separation. When N and S are both empty, this is equivalent to NewShake128. +func NewCShake128(N, S []byte) *SHAKE { + if len(N) == 0 && len(S) == 0 { + return NewShake128() + } + return newCShake(N, S, rateK256, 32, dsbyteCShake) +} + +// NewCShake256 creates a new cSHAKE256 XOF. +// +// N is used to define functions based on cSHAKE, it can be empty when plain +// cSHAKE is desired. S is a customization byte string used for domain +// separation. When N and S are both empty, this is equivalent to NewShake256. +func NewCShake256(N, S []byte) *SHAKE { + if len(N) == 0 && len(S) == 0 { + return NewShake256() + } + return newCShake(N, S, rateK512, 64, dsbyteCShake) +} \ No newline at end of file