EndGame v3

This commit is contained in:
Aksh 2024-10-23 20:50:14 +05:30
commit 9e36ba54ee
646 changed files with 271674 additions and 0 deletions

View file

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

View file

@ -0,0 +1,59 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package curve25519 provides an implementation of the X25519 function, which
// performs scalar multiplication on the elliptic curve known as Curve25519.
// See RFC 7748.
//
// Starting in Go 1.20, this package is a wrapper for the X25519 implementation
// in the crypto/ecdh package.
package curve25519 // import "golang.org/x/crypto/curve25519"
// ScalarMult sets dst to the product scalar * point.
//
// Deprecated: when provided a low-order point, ScalarMult will set dst to all
// zeroes, irrespective of the scalar. Instead, use the X25519 function, which
// will return an error.
func ScalarMult(dst, scalar, point *[32]byte) {
scalarMult(dst, scalar, point)
}
// ScalarBaseMult sets dst to the product scalar * base where base is the
// standard generator.
//
// It is recommended to use the X25519 function with Basepoint instead, as
// copying into fixed size arrays can lead to unexpected bugs.
func ScalarBaseMult(dst, scalar *[32]byte) {
scalarBaseMult(dst, scalar)
}
const (
// ScalarSize is the size of the scalar input to X25519.
ScalarSize = 32
// PointSize is the size of the point input to X25519.
PointSize = 32
)
// Basepoint is the canonical Curve25519 generator.
var Basepoint []byte
var basePoint = [32]byte{9}
func init() { Basepoint = basePoint[:] }
// X25519 returns the result of the scalar multiplication (scalar * point),
// according to RFC 7748, Section 5. scalar, point and the return value are
// slices of 32 bytes.
//
// scalar can be generated at random, for example with crypto/rand. point should
// be either Basepoint or the output of another X25519 call.
//
// If point is Basepoint (but not if it's a different slice with the same
// contents) a precomputed implementation might be used for performance.
func X25519(scalar, point []byte) ([]byte, error) {
// Outline the body of function, to let the allocation be inlined in the
// caller, and possibly avoid escaping to the heap.
var dst [32]byte
return x25519(&dst, scalar, point)
}

View file

@ -0,0 +1,105 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.20
package curve25519
import (
"crypto/subtle"
"errors"
"strconv"
"golang.org/x/crypto/curve25519/internal/field"
)
func scalarMult(dst, scalar, point *[32]byte) {
var e [32]byte
copy(e[:], scalar[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var x1, x2, z2, x3, z3, tmp0, tmp1 field.Element
x1.SetBytes(point[:])
x2.One()
x3.Set(&x1)
z3.One()
swap := 0
for pos := 254; pos >= 0; pos-- {
b := e[pos/8] >> uint(pos&7)
b &= 1
swap ^= int(b)
x2.Swap(&x3, swap)
z2.Swap(&z3, swap)
swap = int(b)
tmp0.Subtract(&x3, &z3)
tmp1.Subtract(&x2, &z2)
x2.Add(&x2, &z2)
z2.Add(&x3, &z3)
z3.Multiply(&tmp0, &x2)
z2.Multiply(&z2, &tmp1)
tmp0.Square(&tmp1)
tmp1.Square(&x2)
x3.Add(&z3, &z2)
z2.Subtract(&z3, &z2)
x2.Multiply(&tmp1, &tmp0)
tmp1.Subtract(&tmp1, &tmp0)
z2.Square(&z2)
z3.Mult32(&tmp1, 121666)
x3.Square(&x3)
tmp0.Add(&tmp0, &z3)
z3.Multiply(&x1, &z2)
z2.Multiply(&tmp1, &tmp0)
}
x2.Swap(&x3, swap)
z2.Swap(&z3, swap)
z2.Invert(&z2)
x2.Multiply(&x2, &z2)
copy(dst[:], x2.Bytes())
}
func scalarBaseMult(dst, scalar *[32]byte) {
checkBasepoint()
scalarMult(dst, scalar, &basePoint)
}
func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
var in [32]byte
if l := len(scalar); l != 32 {
return nil, errors.New("bad scalar length: " + strconv.Itoa(l) + ", expected 32")
}
if l := len(point); l != 32 {
return nil, errors.New("bad point length: " + strconv.Itoa(l) + ", expected 32")
}
copy(in[:], scalar)
if &point[0] == &Basepoint[0] {
scalarBaseMult(dst, &in)
} else {
var base, zero [32]byte
copy(base[:], point)
scalarMult(dst, &in, &base)
if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
return nil, errors.New("bad input point: low order point")
}
}
return dst[:], nil
}
func checkBasepoint() {
if subtle.ConstantTimeCompare(Basepoint, []byte{
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}) != 1 {
panic("curve25519: global Basepoint value was modified")
}
}

View file

@ -0,0 +1,46 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.20
package curve25519
import "crypto/ecdh"
func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
curve := ecdh.X25519()
pub, err := curve.NewPublicKey(point)
if err != nil {
return nil, err
}
priv, err := curve.NewPrivateKey(scalar)
if err != nil {
return nil, err
}
out, err := priv.ECDH(pub)
if err != nil {
return nil, err
}
copy(dst[:], out)
return dst[:], nil
}
func scalarMult(dst, scalar, point *[32]byte) {
if _, err := x25519(dst, scalar[:], point[:]); err != nil {
// The only error condition for x25519 when the inputs are 32 bytes long
// is if the output would have been the all-zero value.
for i := range dst {
dst[i] = 0
}
}
}
func scalarBaseMult(dst, scalar *[32]byte) {
curve := ecdh.X25519()
priv, err := curve.NewPrivateKey(scalar[:])
if err != nil {
panic("curve25519: internal error: scalarBaseMult was not 32 bytes")
}
copy(dst[:], priv.PublicKey().Bytes())
}

View file

@ -0,0 +1,7 @@
This package is kept in sync with crypto/ed25519/internal/edwards25519/field in
the standard library.
If there are any changes in the standard library that need to be synced to this
package, run sync.sh. It will not overwrite any local changes made since the
previous sync, so it's ok to land changes in this package first, and then sync
to the standard library later.

View file

@ -0,0 +1,416 @@
// Copyright (c) 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package field implements fast arithmetic modulo 2^255-19.
package field
import (
"crypto/subtle"
"encoding/binary"
"math/bits"
)
// Element represents an element of the field GF(2^255-19). Note that this
// is not a cryptographically secure group, and should only be used to interact
// with edwards25519.Point coordinates.
//
// This type works similarly to math/big.Int, and all arguments and receivers
// are allowed to alias.
//
// The zero value is a valid zero element.
type Element struct {
// An element t represents the integer
// t.l0 + t.l1*2^51 + t.l2*2^102 + t.l3*2^153 + t.l4*2^204
//
// Between operations, all limbs are expected to be lower than 2^52.
l0 uint64
l1 uint64
l2 uint64
l3 uint64
l4 uint64
}
const maskLow51Bits uint64 = (1 << 51) - 1
var feZero = &Element{0, 0, 0, 0, 0}
// Zero sets v = 0, and returns v.
func (v *Element) Zero() *Element {
*v = *feZero
return v
}
var feOne = &Element{1, 0, 0, 0, 0}
// One sets v = 1, and returns v.
func (v *Element) One() *Element {
*v = *feOne
return v
}
// reduce reduces v modulo 2^255 - 19 and returns it.
func (v *Element) reduce() *Element {
v.carryPropagate()
// After the light reduction we now have a field element representation
// v < 2^255 + 2^13 * 19, but need v < 2^255 - 19.
// If v >= 2^255 - 19, then v + 19 >= 2^255, which would overflow 2^255 - 1,
// generating a carry. That is, c will be 0 if v < 2^255 - 19, and 1 otherwise.
c := (v.l0 + 19) >> 51
c = (v.l1 + c) >> 51
c = (v.l2 + c) >> 51
c = (v.l3 + c) >> 51
c = (v.l4 + c) >> 51
// If v < 2^255 - 19 and c = 0, this will be a no-op. Otherwise, it's
// effectively applying the reduction identity to the carry.
v.l0 += 19 * c
v.l1 += v.l0 >> 51
v.l0 = v.l0 & maskLow51Bits
v.l2 += v.l1 >> 51
v.l1 = v.l1 & maskLow51Bits
v.l3 += v.l2 >> 51
v.l2 = v.l2 & maskLow51Bits
v.l4 += v.l3 >> 51
v.l3 = v.l3 & maskLow51Bits
// no additional carry
v.l4 = v.l4 & maskLow51Bits
return v
}
// Add sets v = a + b, and returns v.
func (v *Element) Add(a, b *Element) *Element {
v.l0 = a.l0 + b.l0
v.l1 = a.l1 + b.l1
v.l2 = a.l2 + b.l2
v.l3 = a.l3 + b.l3
v.l4 = a.l4 + b.l4
// Using the generic implementation here is actually faster than the
// assembly. Probably because the body of this function is so simple that
// the compiler can figure out better optimizations by inlining the carry
// propagation. TODO
return v.carryPropagateGeneric()
}
// Subtract sets v = a - b, and returns v.
func (v *Element) Subtract(a, b *Element) *Element {
// We first add 2 * p, to guarantee the subtraction won't underflow, and
// then subtract b (which can be up to 2^255 + 2^13 * 19).
v.l0 = (a.l0 + 0xFFFFFFFFFFFDA) - b.l0
v.l1 = (a.l1 + 0xFFFFFFFFFFFFE) - b.l1
v.l2 = (a.l2 + 0xFFFFFFFFFFFFE) - b.l2
v.l3 = (a.l3 + 0xFFFFFFFFFFFFE) - b.l3
v.l4 = (a.l4 + 0xFFFFFFFFFFFFE) - b.l4
return v.carryPropagate()
}
// Negate sets v = -a, and returns v.
func (v *Element) Negate(a *Element) *Element {
return v.Subtract(feZero, a)
}
// Invert sets v = 1/z mod p, and returns v.
//
// If z == 0, Invert returns v = 0.
func (v *Element) Invert(z *Element) *Element {
// Inversion is implemented as exponentiation with exponent p 2. It uses the
// same sequence of 255 squarings and 11 multiplications as [Curve25519].
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t Element
z2.Square(z) // 2
t.Square(&z2) // 4
t.Square(&t) // 8
z9.Multiply(&t, z) // 9
z11.Multiply(&z9, &z2) // 11
t.Square(&z11) // 22
z2_5_0.Multiply(&t, &z9) // 31 = 2^5 - 2^0
t.Square(&z2_5_0) // 2^6 - 2^1
for i := 0; i < 4; i++ {
t.Square(&t) // 2^10 - 2^5
}
z2_10_0.Multiply(&t, &z2_5_0) // 2^10 - 2^0
t.Square(&z2_10_0) // 2^11 - 2^1
for i := 0; i < 9; i++ {
t.Square(&t) // 2^20 - 2^10
}
z2_20_0.Multiply(&t, &z2_10_0) // 2^20 - 2^0
t.Square(&z2_20_0) // 2^21 - 2^1
for i := 0; i < 19; i++ {
t.Square(&t) // 2^40 - 2^20
}
t.Multiply(&t, &z2_20_0) // 2^40 - 2^0
t.Square(&t) // 2^41 - 2^1
for i := 0; i < 9; i++ {
t.Square(&t) // 2^50 - 2^10
}
z2_50_0.Multiply(&t, &z2_10_0) // 2^50 - 2^0
t.Square(&z2_50_0) // 2^51 - 2^1
for i := 0; i < 49; i++ {
t.Square(&t) // 2^100 - 2^50
}
z2_100_0.Multiply(&t, &z2_50_0) // 2^100 - 2^0
t.Square(&z2_100_0) // 2^101 - 2^1
for i := 0; i < 99; i++ {
t.Square(&t) // 2^200 - 2^100
}
t.Multiply(&t, &z2_100_0) // 2^200 - 2^0
t.Square(&t) // 2^201 - 2^1
for i := 0; i < 49; i++ {
t.Square(&t) // 2^250 - 2^50
}
t.Multiply(&t, &z2_50_0) // 2^250 - 2^0
t.Square(&t) // 2^251 - 2^1
t.Square(&t) // 2^252 - 2^2
t.Square(&t) // 2^253 - 2^3
t.Square(&t) // 2^254 - 2^4
t.Square(&t) // 2^255 - 2^5
return v.Multiply(&t, &z11) // 2^255 - 21
}
// Set sets v = a, and returns v.
func (v *Element) Set(a *Element) *Element {
*v = *a
return v
}
// SetBytes sets v to x, which must be a 32-byte little-endian encoding.
//
// Consistent with RFC 7748, the most significant bit (the high bit of the
// last byte) is ignored, and non-canonical values (2^255-19 through 2^255-1)
// are accepted. Note that this is laxer than specified by RFC 8032.
func (v *Element) SetBytes(x []byte) *Element {
if len(x) != 32 {
panic("edwards25519: invalid field element input size")
}
// Bits 0:51 (bytes 0:8, bits 0:64, shift 0, mask 51).
v.l0 = binary.LittleEndian.Uint64(x[0:8])
v.l0 &= maskLow51Bits
// Bits 51:102 (bytes 6:14, bits 48:112, shift 3, mask 51).
v.l1 = binary.LittleEndian.Uint64(x[6:14]) >> 3
v.l1 &= maskLow51Bits
// Bits 102:153 (bytes 12:20, bits 96:160, shift 6, mask 51).
v.l2 = binary.LittleEndian.Uint64(x[12:20]) >> 6
v.l2 &= maskLow51Bits
// Bits 153:204 (bytes 19:27, bits 152:216, shift 1, mask 51).
v.l3 = binary.LittleEndian.Uint64(x[19:27]) >> 1
v.l3 &= maskLow51Bits
// Bits 204:251 (bytes 24:32, bits 192:256, shift 12, mask 51).
// Note: not bytes 25:33, shift 4, to avoid overread.
v.l4 = binary.LittleEndian.Uint64(x[24:32]) >> 12
v.l4 &= maskLow51Bits
return v
}
// Bytes returns the canonical 32-byte little-endian encoding of v.
func (v *Element) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [32]byte
return v.bytes(&out)
}
func (v *Element) bytes(out *[32]byte) []byte {
t := *v
t.reduce()
var buf [8]byte
for i, l := range [5]uint64{t.l0, t.l1, t.l2, t.l3, t.l4} {
bitsOffset := i * 51
binary.LittleEndian.PutUint64(buf[:], l<<uint(bitsOffset%8))
for i, bb := range buf {
off := bitsOffset/8 + i
if off >= len(out) {
break
}
out[off] |= bb
}
}
return out[:]
}
// Equal returns 1 if v and u are equal, and 0 otherwise.
func (v *Element) Equal(u *Element) int {
sa, sv := u.Bytes(), v.Bytes()
return subtle.ConstantTimeCompare(sa, sv)
}
// mask64Bits returns 0xffffffff if cond is 1, and 0 otherwise.
func mask64Bits(cond int) uint64 { return ^(uint64(cond) - 1) }
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *Element) Select(a, b *Element, cond int) *Element {
m := mask64Bits(cond)
v.l0 = (m & a.l0) | (^m & b.l0)
v.l1 = (m & a.l1) | (^m & b.l1)
v.l2 = (m & a.l2) | (^m & b.l2)
v.l3 = (m & a.l3) | (^m & b.l3)
v.l4 = (m & a.l4) | (^m & b.l4)
return v
}
// Swap swaps v and u if cond == 1 or leaves them unchanged if cond == 0, and returns v.
func (v *Element) Swap(u *Element, cond int) {
m := mask64Bits(cond)
t := m & (v.l0 ^ u.l0)
v.l0 ^= t
u.l0 ^= t
t = m & (v.l1 ^ u.l1)
v.l1 ^= t
u.l1 ^= t
t = m & (v.l2 ^ u.l2)
v.l2 ^= t
u.l2 ^= t
t = m & (v.l3 ^ u.l3)
v.l3 ^= t
u.l3 ^= t
t = m & (v.l4 ^ u.l4)
v.l4 ^= t
u.l4 ^= t
}
// IsNegative returns 1 if v is negative, and 0 otherwise.
func (v *Element) IsNegative() int {
return int(v.Bytes()[0] & 1)
}
// Absolute sets v to |u|, and returns v.
func (v *Element) Absolute(u *Element) *Element {
return v.Select(new(Element).Negate(u), u, u.IsNegative())
}
// Multiply sets v = x * y, and returns v.
func (v *Element) Multiply(x, y *Element) *Element {
feMul(v, x, y)
return v
}
// Square sets v = x * x, and returns v.
func (v *Element) Square(x *Element) *Element {
feSquare(v, x)
return v
}
// Mult32 sets v = x * y, and returns v.
func (v *Element) Mult32(x *Element, y uint32) *Element {
x0lo, x0hi := mul51(x.l0, y)
x1lo, x1hi := mul51(x.l1, y)
x2lo, x2hi := mul51(x.l2, y)
x3lo, x3hi := mul51(x.l3, y)
x4lo, x4hi := mul51(x.l4, y)
v.l0 = x0lo + 19*x4hi // carried over per the reduction identity
v.l1 = x1lo + x0hi
v.l2 = x2lo + x1hi
v.l3 = x3lo + x2hi
v.l4 = x4lo + x3hi
// The hi portions are going to be only 32 bits, plus any previous excess,
// so we can skip the carry propagation.
return v
}
// mul51 returns lo + hi * 2⁵¹ = a * b.
func mul51(a uint64, b uint32) (lo uint64, hi uint64) {
mh, ml := bits.Mul64(a, uint64(b))
lo = ml & maskLow51Bits
hi = (mh << 13) | (ml >> 51)
return
}
// Pow22523 set v = x^((p-5)/8), and returns v. (p-5)/8 is 2^252-3.
func (v *Element) Pow22523(x *Element) *Element {
var t0, t1, t2 Element
t0.Square(x) // x^2
t1.Square(&t0) // x^4
t1.Square(&t1) // x^8
t1.Multiply(x, &t1) // x^9
t0.Multiply(&t0, &t1) // x^11
t0.Square(&t0) // x^22
t0.Multiply(&t1, &t0) // x^31
t1.Square(&t0) // x^62
for i := 1; i < 5; i++ { // x^992
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // x^1023 -> 1023 = 2^10 - 1
t1.Square(&t0) // 2^11 - 2
for i := 1; i < 10; i++ { // 2^20 - 2^10
t1.Square(&t1)
}
t1.Multiply(&t1, &t0) // 2^20 - 1
t2.Square(&t1) // 2^21 - 2
for i := 1; i < 20; i++ { // 2^40 - 2^20
t2.Square(&t2)
}
t1.Multiply(&t2, &t1) // 2^40 - 1
t1.Square(&t1) // 2^41 - 2
for i := 1; i < 10; i++ { // 2^50 - 2^10
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // 2^50 - 1
t1.Square(&t0) // 2^51 - 2
for i := 1; i < 50; i++ { // 2^100 - 2^50
t1.Square(&t1)
}
t1.Multiply(&t1, &t0) // 2^100 - 1
t2.Square(&t1) // 2^101 - 2
for i := 1; i < 100; i++ { // 2^200 - 2^100
t2.Square(&t2)
}
t1.Multiply(&t2, &t1) // 2^200 - 1
t1.Square(&t1) // 2^201 - 2
for i := 1; i < 50; i++ { // 2^250 - 2^50
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // 2^250 - 1
t0.Square(&t0) // 2^251 - 2
t0.Square(&t0) // 2^252 - 4
return v.Multiply(&t0, x) // 2^252 - 3 -> x^(2^252-3)
}
// sqrtM1 is 2^((p-1)/4), which squared is equal to -1 by Euler's Criterion.
var sqrtM1 = &Element{1718705420411056, 234908883556509,
2233514472574048, 2117202627021982, 765476049583133}
// SqrtRatio sets r to the non-negative square root of the ratio of u and v.
//
// If u/v is square, SqrtRatio returns r and 1. If u/v is not square, SqrtRatio
// sets r according to Section 4.3 of draft-irtf-cfrg-ristretto255-decaf448-00,
// and returns r and 0.
func (r *Element) SqrtRatio(u, v *Element) (rr *Element, wasSquare int) {
var a, b Element
// r = (u * v3) * (u * v7)^((p-5)/8)
v2 := a.Square(v)
uv3 := b.Multiply(u, b.Multiply(v2, v))
uv7 := a.Multiply(uv3, a.Square(v2))
r.Multiply(uv3, r.Pow22523(uv7))
check := a.Multiply(v, a.Square(r)) // check = v * r^2
uNeg := b.Negate(u)
correctSignSqrt := check.Equal(u)
flippedSignSqrt := check.Equal(uNeg)
flippedSignSqrtI := check.Equal(uNeg.Multiply(uNeg, sqrtM1))
rPrime := b.Multiply(r, sqrtM1) // r_prime = SQRT_M1 * r
// r = CT_SELECT(r_prime IF flipped_sign_sqrt | flipped_sign_sqrt_i ELSE r)
r.Select(rPrime, r, flippedSignSqrt|flippedSignSqrtI)
r.Absolute(r) // Choose the nonnegative square root.
return r, correctSignSqrt | flippedSignSqrt
}

View file

@ -0,0 +1,15 @@
// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
//go:build amd64 && gc && !purego
package field
// feMul sets out = a * b. It works like feMulGeneric.
//
//go:noescape
func feMul(out *Element, a *Element, b *Element)
// feSquare sets out = a * a. It works like feSquareGeneric.
//
//go:noescape
func feSquare(out *Element, a *Element)

View file

@ -0,0 +1,378 @@
// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
// func feMul(out *Element, a *Element, b *Element)
TEXT ·feMul(SB), NOSPLIT, $0-24
MOVQ a+8(FP), CX
MOVQ b+16(FP), BX
// r0 = a0×b0
MOVQ (CX), AX
MULQ (BX)
MOVQ AX, DI
MOVQ DX, SI
// r0 += 19×a1×b4
MOVQ 8(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a2×b3
MOVQ 16(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a3×b2
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 16(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a4×b1
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 8(BX)
ADDQ AX, DI
ADCQ DX, SI
// r1 = a0×b1
MOVQ (CX), AX
MULQ 8(BX)
MOVQ AX, R9
MOVQ DX, R8
// r1 += a1×b0
MOVQ 8(CX), AX
MULQ (BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a2×b4
MOVQ 16(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a3×b3
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a4×b2
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 16(BX)
ADDQ AX, R9
ADCQ DX, R8
// r2 = a0×b2
MOVQ (CX), AX
MULQ 16(BX)
MOVQ AX, R11
MOVQ DX, R10
// r2 += a1×b1
MOVQ 8(CX), AX
MULQ 8(BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += a2×b0
MOVQ 16(CX), AX
MULQ (BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += 19×a3×b4
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += 19×a4×b3
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, R11
ADCQ DX, R10
// r3 = a0×b3
MOVQ (CX), AX
MULQ 24(BX)
MOVQ AX, R13
MOVQ DX, R12
// r3 += a1×b2
MOVQ 8(CX), AX
MULQ 16(BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += a2×b1
MOVQ 16(CX), AX
MULQ 8(BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += a3×b0
MOVQ 24(CX), AX
MULQ (BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += 19×a4×b4
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R13
ADCQ DX, R12
// r4 = a0×b4
MOVQ (CX), AX
MULQ 32(BX)
MOVQ AX, R15
MOVQ DX, R14
// r4 += a1×b3
MOVQ 8(CX), AX
MULQ 24(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a2×b2
MOVQ 16(CX), AX
MULQ 16(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a3×b1
MOVQ 24(CX), AX
MULQ 8(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a4×b0
MOVQ 32(CX), AX
MULQ (BX)
ADDQ AX, R15
ADCQ DX, R14
// First reduction chain
MOVQ $0x0007ffffffffffff, AX
SHLQ $0x0d, DI, SI
SHLQ $0x0d, R9, R8
SHLQ $0x0d, R11, R10
SHLQ $0x0d, R13, R12
SHLQ $0x0d, R15, R14
ANDQ AX, DI
IMUL3Q $0x13, R14, R14
ADDQ R14, DI
ANDQ AX, R9
ADDQ SI, R9
ANDQ AX, R11
ADDQ R8, R11
ANDQ AX, R13
ADDQ R10, R13
ANDQ AX, R15
ADDQ R12, R15
// Second reduction chain (carryPropagate)
MOVQ DI, SI
SHRQ $0x33, SI
MOVQ R9, R8
SHRQ $0x33, R8
MOVQ R11, R10
SHRQ $0x33, R10
MOVQ R13, R12
SHRQ $0x33, R12
MOVQ R15, R14
SHRQ $0x33, R14
ANDQ AX, DI
IMUL3Q $0x13, R14, R14
ADDQ R14, DI
ANDQ AX, R9
ADDQ SI, R9
ANDQ AX, R11
ADDQ R8, R11
ANDQ AX, R13
ADDQ R10, R13
ANDQ AX, R15
ADDQ R12, R15
// Store output
MOVQ out+0(FP), AX
MOVQ DI, (AX)
MOVQ R9, 8(AX)
MOVQ R11, 16(AX)
MOVQ R13, 24(AX)
MOVQ R15, 32(AX)
RET
// func feSquare(out *Element, a *Element)
TEXT ·feSquare(SB), NOSPLIT, $0-16
MOVQ a+8(FP), CX
// r0 = l0×l0
MOVQ (CX), AX
MULQ (CX)
MOVQ AX, SI
MOVQ DX, BX
// r0 += 38×l1×l4
MOVQ 8(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, SI
ADCQ DX, BX
// r0 += 38×l2×l3
MOVQ 16(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 24(CX)
ADDQ AX, SI
ADCQ DX, BX
// r1 = 2×l0×l1
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 8(CX)
MOVQ AX, R8
MOVQ DX, DI
// r1 += 38×l2×l4
MOVQ 16(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, R8
ADCQ DX, DI
// r1 += 19×l3×l3
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(CX)
ADDQ AX, R8
ADCQ DX, DI
// r2 = 2×l0×l2
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 16(CX)
MOVQ AX, R10
MOVQ DX, R9
// r2 += l1×l1
MOVQ 8(CX), AX
MULQ 8(CX)
ADDQ AX, R10
ADCQ DX, R9
// r2 += 38×l3×l4
MOVQ 24(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, R10
ADCQ DX, R9
// r3 = 2×l0×l3
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 24(CX)
MOVQ AX, R12
MOVQ DX, R11
// r3 += 2×l1×l2
MOVQ 8(CX), AX
IMUL3Q $0x02, AX, AX
MULQ 16(CX)
ADDQ AX, R12
ADCQ DX, R11
// r3 += 19×l4×l4
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(CX)
ADDQ AX, R12
ADCQ DX, R11
// r4 = 2×l0×l4
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 32(CX)
MOVQ AX, R14
MOVQ DX, R13
// r4 += 2×l1×l3
MOVQ 8(CX), AX
IMUL3Q $0x02, AX, AX
MULQ 24(CX)
ADDQ AX, R14
ADCQ DX, R13
// r4 += l2×l2
MOVQ 16(CX), AX
MULQ 16(CX)
ADDQ AX, R14
ADCQ DX, R13
// First reduction chain
MOVQ $0x0007ffffffffffff, AX
SHLQ $0x0d, SI, BX
SHLQ $0x0d, R8, DI
SHLQ $0x0d, R10, R9
SHLQ $0x0d, R12, R11
SHLQ $0x0d, R14, R13
ANDQ AX, SI
IMUL3Q $0x13, R13, R13
ADDQ R13, SI
ANDQ AX, R8
ADDQ BX, R8
ANDQ AX, R10
ADDQ DI, R10
ANDQ AX, R12
ADDQ R9, R12
ANDQ AX, R14
ADDQ R11, R14
// Second reduction chain (carryPropagate)
MOVQ SI, BX
SHRQ $0x33, BX
MOVQ R8, DI
SHRQ $0x33, DI
MOVQ R10, R9
SHRQ $0x33, R9
MOVQ R12, R11
SHRQ $0x33, R11
MOVQ R14, R13
SHRQ $0x33, R13
ANDQ AX, SI
IMUL3Q $0x13, R13, R13
ADDQ R13, SI
ANDQ AX, R8
ADDQ BX, R8
ANDQ AX, R10
ADDQ DI, R10
ANDQ AX, R12
ADDQ R9, R12
ANDQ AX, R14
ADDQ R11, R14
// Store output
MOVQ out+0(FP), AX
MOVQ SI, (AX)
MOVQ R8, 8(AX)
MOVQ R10, 16(AX)
MOVQ R12, 24(AX)
MOVQ R14, 32(AX)
RET

View file

@ -0,0 +1,11 @@
// Copyright (c) 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 || !gc || purego
package field
func feMul(v, x, y *Element) { feMulGeneric(v, x, y) }
func feSquare(v, x *Element) { feSquareGeneric(v, x) }

View file

@ -0,0 +1,15 @@
// Copyright (c) 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build arm64 && gc && !purego
package field
//go:noescape
func carryPropagate(v *Element)
func (v *Element) carryPropagate() *Element {
carryPropagate(v)
return v
}

View file

@ -0,0 +1,42 @@
// Copyright (c) 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build arm64 && gc && !purego
#include "textflag.h"
// carryPropagate works exactly like carryPropagateGeneric and uses the
// same AND, ADD, and LSR+MADD instructions emitted by the compiler, but
// avoids loading R0-R4 twice and uses LDP and STP.
//
// See https://golang.org/issues/43145 for the main compiler issue.
//
// func carryPropagate(v *Element)
TEXT ·carryPropagate(SB),NOFRAME|NOSPLIT,$0-8
MOVD v+0(FP), R20
LDP 0(R20), (R0, R1)
LDP 16(R20), (R2, R3)
MOVD 32(R20), R4
AND $0x7ffffffffffff, R0, R10
AND $0x7ffffffffffff, R1, R11
AND $0x7ffffffffffff, R2, R12
AND $0x7ffffffffffff, R3, R13
AND $0x7ffffffffffff, R4, R14
ADD R0>>51, R11, R11
ADD R1>>51, R12, R12
ADD R2>>51, R13, R13
ADD R3>>51, R14, R14
// R4>>51 * 19 + R10 -> R10
LSR $51, R4, R21
MOVD $19, R22
MADD R22, R10, R21, R10
STP (R10, R11), 0(R20)
STP (R12, R13), 16(R20)
MOVD R14, 32(R20)
RET

View file

@ -0,0 +1,11 @@
// Copyright (c) 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !arm64 || !gc || purego
package field
func (v *Element) carryPropagate() *Element {
return v.carryPropagateGeneric()
}

View file

@ -0,0 +1,264 @@
// Copyright (c) 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package field
import "math/bits"
// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
// bits.Mul64 and bits.Add64 intrinsics.
type uint128 struct {
lo, hi uint64
}
// mul64 returns a * b.
func mul64(a, b uint64) uint128 {
hi, lo := bits.Mul64(a, b)
return uint128{lo, hi}
}
// addMul64 returns v + a * b.
func addMul64(v uint128, a, b uint64) uint128 {
hi, lo := bits.Mul64(a, b)
lo, c := bits.Add64(lo, v.lo, 0)
hi, _ = bits.Add64(hi, v.hi, c)
return uint128{lo, hi}
}
// shiftRightBy51 returns a >> 51. a is assumed to be at most 115 bits.
func shiftRightBy51(a uint128) uint64 {
return (a.hi << (64 - 51)) | (a.lo >> 51)
}
func feMulGeneric(v, a, b *Element) {
a0 := a.l0
a1 := a.l1
a2 := a.l2
a3 := a.l3
a4 := a.l4
b0 := b.l0
b1 := b.l1
b2 := b.l2
b3 := b.l3
b4 := b.l4
// Limb multiplication works like pen-and-paper columnar multiplication, but
// with 51-bit limbs instead of digits.
//
// a4 a3 a2 a1 a0 x
// b4 b3 b2 b1 b0 =
// ------------------------
// a4b0 a3b0 a2b0 a1b0 a0b0 +
// a4b1 a3b1 a2b1 a1b1 a0b1 +
// a4b2 a3b2 a2b2 a1b2 a0b2 +
// a4b3 a3b3 a2b3 a1b3 a0b3 +
// a4b4 a3b4 a2b4 a1b4 a0b4 =
// ----------------------------------------------
// r8 r7 r6 r5 r4 r3 r2 r1 r0
//
// We can then use the reduction identity (a * 2²⁵⁵ + b = a * 19 + b) to
// reduce the limbs that would overflow 255 bits. r5 * 2²⁵⁵ becomes 19 * r5,
// r6 * 2³⁰⁶ becomes 19 * r6 * 2⁵¹, etc.
//
// Reduction can be carried out simultaneously to multiplication. For
// example, we do not compute r5: whenever the result of a multiplication
// belongs to r5, like a1b4, we multiply it by 19 and add the result to r0.
//
// a4b0 a3b0 a2b0 a1b0 a0b0 +
// a3b1 a2b1 a1b1 a0b1 19×a4b1 +
// a2b2 a1b2 a0b2 19×a4b2 19×a3b2 +
// a1b3 a0b3 19×a4b3 19×a3b3 19×a2b3 +
// a0b4 19×a4b4 19×a3b4 19×a2b4 19×a1b4 =
// --------------------------------------
// r4 r3 r2 r1 r0
//
// Finally we add up the columns into wide, overlapping limbs.
a1_19 := a1 * 19
a2_19 := a2 * 19
a3_19 := a3 * 19
a4_19 := a4 * 19
// r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1)
r0 := mul64(a0, b0)
r0 = addMul64(r0, a1_19, b4)
r0 = addMul64(r0, a2_19, b3)
r0 = addMul64(r0, a3_19, b2)
r0 = addMul64(r0, a4_19, b1)
// r1 = a0×b1 + a1×b0 + 19×(a2×b4 + a3×b3 + a4×b2)
r1 := mul64(a0, b1)
r1 = addMul64(r1, a1, b0)
r1 = addMul64(r1, a2_19, b4)
r1 = addMul64(r1, a3_19, b3)
r1 = addMul64(r1, a4_19, b2)
// r2 = a0×b2 + a1×b1 + a2×b0 + 19×(a3×b4 + a4×b3)
r2 := mul64(a0, b2)
r2 = addMul64(r2, a1, b1)
r2 = addMul64(r2, a2, b0)
r2 = addMul64(r2, a3_19, b4)
r2 = addMul64(r2, a4_19, b3)
// r3 = a0×b3 + a1×b2 + a2×b1 + a3×b0 + 19×a4×b4
r3 := mul64(a0, b3)
r3 = addMul64(r3, a1, b2)
r3 = addMul64(r3, a2, b1)
r3 = addMul64(r3, a3, b0)
r3 = addMul64(r3, a4_19, b4)
// r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0
r4 := mul64(a0, b4)
r4 = addMul64(r4, a1, b3)
r4 = addMul64(r4, a2, b2)
r4 = addMul64(r4, a3, b1)
r4 = addMul64(r4, a4, b0)
// After the multiplication, we need to reduce (carry) the five coefficients
// to obtain a result with limbs that are at most slightly larger than 2⁵¹,
// to respect the Element invariant.
//
// Overall, the reduction works the same as carryPropagate, except with
// wider inputs: we take the carry for each coefficient by shifting it right
// by 51, and add it to the limb above it. The top carry is multiplied by 19
// according to the reduction identity and added to the lowest limb.
//
// The largest coefficient (r0) will be at most 111 bits, which guarantees
// that all carries are at most 111 - 51 = 60 bits, which fits in a uint64.
//
// r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1)
// r0 < 2⁵²×2⁵² + 19×(2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵²)
// r0 < (1 + 19 × 4) × 2⁵² × 2⁵²
// r0 < 2⁷ × 2⁵² × 2⁵²
// r0 < 2¹¹¹
//
// Moreover, the top coefficient (r4) is at most 107 bits, so c4 is at most
// 56 bits, and c4 * 19 is at most 61 bits, which again fits in a uint64 and
// allows us to easily apply the reduction identity.
//
// r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0
// r4 < 5 × 2⁵² × 2⁵²
// r4 < 2¹⁰⁷
//
c0 := shiftRightBy51(r0)
c1 := shiftRightBy51(r1)
c2 := shiftRightBy51(r2)
c3 := shiftRightBy51(r3)
c4 := shiftRightBy51(r4)
rr0 := r0.lo&maskLow51Bits + c4*19
rr1 := r1.lo&maskLow51Bits + c0
rr2 := r2.lo&maskLow51Bits + c1
rr3 := r3.lo&maskLow51Bits + c2
rr4 := r4.lo&maskLow51Bits + c3
// Now all coefficients fit into 64-bit registers but are still too large to
// be passed around as a Element. We therefore do one last carry chain,
// where the carries will be small enough to fit in the wiggle room above 2⁵¹.
*v = Element{rr0, rr1, rr2, rr3, rr4}
v.carryPropagate()
}
func feSquareGeneric(v, a *Element) {
l0 := a.l0
l1 := a.l1
l2 := a.l2
l3 := a.l3
l4 := a.l4
// Squaring works precisely like multiplication above, but thanks to its
// symmetry we get to group a few terms together.
//
// l4 l3 l2 l1 l0 x
// l4 l3 l2 l1 l0 =
// ------------------------
// l4l0 l3l0 l2l0 l1l0 l0l0 +
// l4l1 l3l1 l2l1 l1l1 l0l1 +
// l4l2 l3l2 l2l2 l1l2 l0l2 +
// l4l3 l3l3 l2l3 l1l3 l0l3 +
// l4l4 l3l4 l2l4 l1l4 l0l4 =
// ----------------------------------------------
// r8 r7 r6 r5 r4 r3 r2 r1 r0
//
// l4l0 l3l0 l2l0 l1l0 l0l0 +
// l3l1 l2l1 l1l1 l0l1 19×l4l1 +
// l2l2 l1l2 l0l2 19×l4l2 19×l3l2 +
// l1l3 l0l3 19×l4l3 19×l3l3 19×l2l3 +
// l0l4 19×l4l4 19×l3l4 19×l2l4 19×l1l4 =
// --------------------------------------
// r4 r3 r2 r1 r0
//
// With precomputed 2×, 19×, and 2×19× terms, we can compute each limb with
// only three Mul64 and four Add64, instead of five and eight.
l0_2 := l0 * 2
l1_2 := l1 * 2
l1_38 := l1 * 38
l2_38 := l2 * 38
l3_38 := l3 * 38
l3_19 := l3 * 19
l4_19 := l4 * 19
// r0 = l0×l0 + 19×(l1×l4 + l2×l3 + l3×l2 + l4×l1) = l0×l0 + 19×2×(l1×l4 + l2×l3)
r0 := mul64(l0, l0)
r0 = addMul64(r0, l1_38, l4)
r0 = addMul64(r0, l2_38, l3)
// r1 = l0×l1 + l1×l0 + 19×(l2×l4 + l3×l3 + l4×l2) = 2×l0×l1 + 19×2×l2×l4 + 19×l3×l3
r1 := mul64(l0_2, l1)
r1 = addMul64(r1, l2_38, l4)
r1 = addMul64(r1, l3_19, l3)
// r2 = l0×l2 + l1×l1 + l2×l0 + 19×(l3×l4 + l4×l3) = 2×l0×l2 + l1×l1 + 19×2×l3×l4
r2 := mul64(l0_2, l2)
r2 = addMul64(r2, l1, l1)
r2 = addMul64(r2, l3_38, l4)
// r3 = l0×l3 + l1×l2 + l2×l1 + l3×l0 + 19×l4×l4 = 2×l0×l3 + 2×l1×l2 + 19×l4×l4
r3 := mul64(l0_2, l3)
r3 = addMul64(r3, l1_2, l2)
r3 = addMul64(r3, l4_19, l4)
// r4 = l0×l4 + l1×l3 + l2×l2 + l3×l1 + l4×l0 = 2×l0×l4 + 2×l1×l3 + l2×l2
r4 := mul64(l0_2, l4)
r4 = addMul64(r4, l1_2, l3)
r4 = addMul64(r4, l2, l2)
c0 := shiftRightBy51(r0)
c1 := shiftRightBy51(r1)
c2 := shiftRightBy51(r2)
c3 := shiftRightBy51(r3)
c4 := shiftRightBy51(r4)
rr0 := r0.lo&maskLow51Bits + c4*19
rr1 := r1.lo&maskLow51Bits + c0
rr2 := r2.lo&maskLow51Bits + c1
rr3 := r3.lo&maskLow51Bits + c2
rr4 := r4.lo&maskLow51Bits + c3
*v = Element{rr0, rr1, rr2, rr3, rr4}
v.carryPropagate()
}
// carryPropagateGeneric brings the limbs below 52 bits by applying the reduction
// identity (a * 2²⁵⁵ + b = a * 19 + b) to the l4 carry. TODO inline
func (v *Element) carryPropagateGeneric() *Element {
c0 := v.l0 >> 51
c1 := v.l1 >> 51
c2 := v.l2 >> 51
c3 := v.l3 >> 51
c4 := v.l4 >> 51
v.l0 = v.l0&maskLow51Bits + c4*19
v.l1 = v.l1&maskLow51Bits + c0
v.l2 = v.l2&maskLow51Bits + c1
v.l3 = v.l3&maskLow51Bits + c2
v.l4 = v.l4&maskLow51Bits + c3
return v
}

View file

@ -0,0 +1 @@
b0c49ae9f59d233526f8934262c5bbbe14d4358d

View file

@ -0,0 +1,19 @@
#! /bin/bash
set -euo pipefail
cd "$(git rev-parse --show-toplevel)"
STD_PATH=src/crypto/ed25519/internal/edwards25519/field
LOCAL_PATH=curve25519/internal/field
LAST_SYNC_REF=$(cat $LOCAL_PATH/sync.checkpoint)
git fetch https://go.googlesource.com/go master
if git diff --quiet $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH; then
echo "No changes."
else
NEW_REF=$(git rev-parse FETCH_HEAD | tee $LOCAL_PATH/sync.checkpoint)
echo "Applying changes from $LAST_SYNC_REF to $NEW_REF..."
git diff $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH | \
git apply -3 --directory=$LOCAL_PATH
fi

View file

@ -0,0 +1,62 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package sha3 implements the SHA-3 fixed-output-length hash functions and
// the SHAKE variable-output-length hash functions defined by FIPS-202.
//
// Both types of hash function use the "sponge" construction and the Keccak
// permutation. For a detailed specification see http://keccak.noekeon.org/
//
// # Guidance
//
// If you aren't sure what function you need, use SHAKE256 with at least 64
// bytes of output. The SHAKE instances are faster than the SHA3 instances;
// the latter have to allocate memory to conform to the hash.Hash interface.
//
// If you need a secret-key MAC (message authentication code), prepend the
// secret key to the input, hash with SHAKE256 and read at least 32 bytes of
// output.
//
// # Security strengths
//
// The SHA3-x (x equals 224, 256, 384, or 512) functions have a security
// strength against preimage attacks of x bits. Since they only produce "x"
// bits of output, their collision-resistance is only "x/2" bits.
//
// The SHAKE-256 and -128 functions have a generic security strength of 256 and
// 128 bits against all attacks, provided that at least 2x bits of their output
// is used. Requesting more than 64 or 32 bytes of output, respectively, does
// not increase the collision-resistance of the SHAKE functions.
//
// # The sponge construction
//
// A sponge builds a pseudo-random function from a public pseudo-random
// permutation, by applying the permutation to a state of "rate + capacity"
// bytes, but hiding "capacity" of the bytes.
//
// A sponge starts out with a zero state. To hash an input using a sponge, up
// to "rate" bytes of the input are XORed into the sponge's state. The sponge
// is then "full" and the permutation is applied to "empty" it. This process is
// repeated until all the input has been "absorbed". The input is then padded.
// The digest is "squeezed" from the sponge in the same way, except that output
// is copied out instead of input being XORed in.
//
// A sponge is parameterized by its generic security strength, which is equal
// to half its capacity; capacity + rate is equal to the permutation's width.
// Since the KeccakF-1600 permutation is 1600 bits (200 bytes) wide, this means
// that the security strength of a sponge instance is equal to (1600 - bitrate) / 2.
//
// # Recommendations
//
// The SHAKE functions are recommended for most new uses. They can produce
// output of arbitrary length. SHAKE256, with an output length of at least
// 64 bytes, provides 256-bit security against all attacks. The Keccak team
// recommends it for most applications upgrading from SHA2-512. (NIST chose a
// much stronger, but much slower, sponge instance for SHA3-512.)
//
// The SHA-3 functions are "drop-in" replacements for the SHA-2 functions.
// They produce output of the same length, with the same security strengths
// against all attacks. This means, in particular, that SHA3-256 only has
// 128-bit collision resistance, because its output length is 32 bytes.
package sha3 // import "golang.org/x/crypto/sha3"

View file

@ -0,0 +1,97 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sha3
// This file provides functions for creating instances of the SHA-3
// and SHAKE hash functions, as well as utility functions for hashing
// bytes.
import (
"hash"
)
// New224 creates a new SHA3-224 hash.
// Its generic security strength is 224 bits against preimage attacks,
// and 112 bits against collision attacks.
func New224() hash.Hash {
if h := new224Asm(); h != nil {
return h
}
return &state{rate: 144, outputLen: 28, dsbyte: 0x06}
}
// New256 creates a new SHA3-256 hash.
// Its generic security strength is 256 bits against preimage attacks,
// and 128 bits against collision attacks.
func New256() hash.Hash {
if h := new256Asm(); h != nil {
return h
}
return &state{rate: 136, outputLen: 32, dsbyte: 0x06}
}
// New384 creates a new SHA3-384 hash.
// Its generic security strength is 384 bits against preimage attacks,
// and 192 bits against collision attacks.
func New384() hash.Hash {
if h := new384Asm(); h != nil {
return h
}
return &state{rate: 104, outputLen: 48, dsbyte: 0x06}
}
// New512 creates a new SHA3-512 hash.
// Its generic security strength is 512 bits against preimage attacks,
// and 256 bits against collision attacks.
func New512() hash.Hash {
if h := new512Asm(); h != nil {
return h
}
return &state{rate: 72, outputLen: 64, dsbyte: 0x06}
}
// NewLegacyKeccak256 creates a new Keccak-256 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New256 instead.
func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} }
// NewLegacyKeccak512 creates a new Keccak-512 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New512 instead.
func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} }
// Sum224 returns the SHA3-224 digest of the data.
func Sum224(data []byte) (digest [28]byte) {
h := New224()
h.Write(data)
h.Sum(digest[:0])
return
}
// Sum256 returns the SHA3-256 digest of the data.
func Sum256(data []byte) (digest [32]byte) {
h := New256()
h.Write(data)
h.Sum(digest[:0])
return
}
// Sum384 returns the SHA3-384 digest of the data.
func Sum384(data []byte) (digest [48]byte) {
h := New384()
h.Write(data)
h.Sum(digest[:0])
return
}
// Sum512 returns the SHA3-512 digest of the data.
func Sum512(data []byte) (digest [64]byte) {
h := New512()
h.Write(data)
h.Sum(digest[:0])
return
}

View file

@ -0,0 +1,27 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !gc || purego || !s390x
package sha3
import (
"hash"
)
// new224Asm returns an assembly implementation of SHA3-224 if available,
// otherwise it returns nil.
func new224Asm() hash.Hash { return nil }
// new256Asm returns an assembly implementation of SHA3-256 if available,
// otherwise it returns nil.
func new256Asm() hash.Hash { return nil }
// new384Asm returns an assembly implementation of SHA3-384 if available,
// otherwise it returns nil.
func new384Asm() hash.Hash { return nil }
// new512Asm returns an assembly implementation of SHA3-512 if available,
// otherwise it returns nil.
func new512Asm() hash.Hash { return nil }

View file

@ -0,0 +1,414 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 || purego || !gc
package sha3
import "math/bits"
// rc stores the round constants for use in the ι step.
var rc = [24]uint64{
0x0000000000000001,
0x0000000000008082,
0x800000000000808A,
0x8000000080008000,
0x000000000000808B,
0x0000000080000001,
0x8000000080008081,
0x8000000000008009,
0x000000000000008A,
0x0000000000000088,
0x0000000080008009,
0x000000008000000A,
0x000000008000808B,
0x800000000000008B,
0x8000000000008089,
0x8000000000008003,
0x8000000000008002,
0x8000000000000080,
0x000000000000800A,
0x800000008000000A,
0x8000000080008081,
0x8000000000008080,
0x0000000080000001,
0x8000000080008008,
}
// keccakF1600 applies the Keccak permutation to a 1600b-wide
// state represented as a slice of 25 uint64s.
func keccakF1600(a *[25]uint64) {
// Implementation translated from Keccak-inplace.c
// in the keccak reference code.
var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64
for i := 0; i < 24; i += 4 {
// Combines the 5 steps in each round into 2 steps.
// Unrolls 4 rounds per loop and spreads some steps across rounds.
// Round 1
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[6] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[12] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[18] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[24] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i]
a[6] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[16] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[22] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[3] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[10] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[1] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[7] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[19] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[20] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[11] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[23] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[4] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[5] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[2] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[8] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[14] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[15] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
// Round 2
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[16] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[7] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[23] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[14] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1]
a[16] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[11] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[2] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[18] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[20] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[6] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[22] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[4] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[15] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[1] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[8] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[24] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[10] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[12] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[3] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[19] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[5] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
// Round 3
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[11] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[22] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[8] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[19] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2]
a[11] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[1] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[12] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[23] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[15] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[16] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[2] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[24] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[5] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[6] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[3] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[14] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[20] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[7] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[18] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[4] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[10] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
// Round 4
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[1] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[2] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[3] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[4] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3]
a[1] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[6] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[7] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[8] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[5] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[11] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[12] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[14] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[10] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[16] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[18] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[19] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[15] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[22] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[23] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[24] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[20] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
}
}

View file

@ -0,0 +1,13 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build amd64 && !purego && gc
package sha3
// This function is implemented in keccakf_amd64.s.
//go:noescape
func keccakF1600(a *[25]uint64)

View file

@ -0,0 +1,390 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build amd64 && !purego && gc
// This code was translated into a form compatible with 6a from the public
// domain sources at https://github.com/gvanas/KeccakCodePackage
// Offsets in state
#define _ba (0*8)
#define _be (1*8)
#define _bi (2*8)
#define _bo (3*8)
#define _bu (4*8)
#define _ga (5*8)
#define _ge (6*8)
#define _gi (7*8)
#define _go (8*8)
#define _gu (9*8)
#define _ka (10*8)
#define _ke (11*8)
#define _ki (12*8)
#define _ko (13*8)
#define _ku (14*8)
#define _ma (15*8)
#define _me (16*8)
#define _mi (17*8)
#define _mo (18*8)
#define _mu (19*8)
#define _sa (20*8)
#define _se (21*8)
#define _si (22*8)
#define _so (23*8)
#define _su (24*8)
// Temporary registers
#define rT1 AX
// Round vars
#define rpState DI
#define rpStack SP
#define rDa BX
#define rDe CX
#define rDi DX
#define rDo R8
#define rDu R9
#define rBa R10
#define rBe R11
#define rBi R12
#define rBo R13
#define rBu R14
#define rCa SI
#define rCe BP
#define rCi rBi
#define rCo rBo
#define rCu R15
#define MOVQ_RBI_RCE MOVQ rBi, rCe
#define XORQ_RT1_RCA XORQ rT1, rCa
#define XORQ_RT1_RCE XORQ rT1, rCe
#define XORQ_RBA_RCU XORQ rBa, rCu
#define XORQ_RBE_RCU XORQ rBe, rCu
#define XORQ_RDU_RCU XORQ rDu, rCu
#define XORQ_RDA_RCA XORQ rDa, rCa
#define XORQ_RDE_RCE XORQ rDe, rCe
#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \
/* Prepare round */ \
MOVQ rCe, rDa; \
ROLQ $1, rDa; \
\
MOVQ _bi(iState), rCi; \
XORQ _gi(iState), rDi; \
XORQ rCu, rDa; \
XORQ _ki(iState), rCi; \
XORQ _mi(iState), rDi; \
XORQ rDi, rCi; \
\
MOVQ rCi, rDe; \
ROLQ $1, rDe; \
\
MOVQ _bo(iState), rCo; \
XORQ _go(iState), rDo; \
XORQ rCa, rDe; \
XORQ _ko(iState), rCo; \
XORQ _mo(iState), rDo; \
XORQ rDo, rCo; \
\
MOVQ rCo, rDi; \
ROLQ $1, rDi; \
\
MOVQ rCu, rDo; \
XORQ rCe, rDi; \
ROLQ $1, rDo; \
\
MOVQ rCa, rDu; \
XORQ rCi, rDo; \
ROLQ $1, rDu; \
\
/* Result b */ \
MOVQ _ba(iState), rBa; \
MOVQ _ge(iState), rBe; \
XORQ rCo, rDu; \
MOVQ _ki(iState), rBi; \
MOVQ _mo(iState), rBo; \
MOVQ _su(iState), rBu; \
XORQ rDe, rBe; \
ROLQ $44, rBe; \
XORQ rDi, rBi; \
XORQ rDa, rBa; \
ROLQ $43, rBi; \
\
MOVQ rBe, rCa; \
MOVQ rc, rT1; \
ORQ rBi, rCa; \
XORQ rBa, rT1; \
XORQ rT1, rCa; \
MOVQ rCa, _ba(oState); \
\
XORQ rDu, rBu; \
ROLQ $14, rBu; \
MOVQ rBa, rCu; \
ANDQ rBe, rCu; \
XORQ rBu, rCu; \
MOVQ rCu, _bu(oState); \
\
XORQ rDo, rBo; \
ROLQ $21, rBo; \
MOVQ rBo, rT1; \
ANDQ rBu, rT1; \
XORQ rBi, rT1; \
MOVQ rT1, _bi(oState); \
\
NOTQ rBi; \
ORQ rBa, rBu; \
ORQ rBo, rBi; \
XORQ rBo, rBu; \
XORQ rBe, rBi; \
MOVQ rBu, _bo(oState); \
MOVQ rBi, _be(oState); \
B_RBI_RCE; \
\
/* Result g */ \
MOVQ _gu(iState), rBe; \
XORQ rDu, rBe; \
MOVQ _ka(iState), rBi; \
ROLQ $20, rBe; \
XORQ rDa, rBi; \
ROLQ $3, rBi; \
MOVQ _bo(iState), rBa; \
MOVQ rBe, rT1; \
ORQ rBi, rT1; \
XORQ rDo, rBa; \
MOVQ _me(iState), rBo; \
MOVQ _si(iState), rBu; \
ROLQ $28, rBa; \
XORQ rBa, rT1; \
MOVQ rT1, _ga(oState); \
G_RT1_RCA; \
\
XORQ rDe, rBo; \
ROLQ $45, rBo; \
MOVQ rBi, rT1; \
ANDQ rBo, rT1; \
XORQ rBe, rT1; \
MOVQ rT1, _ge(oState); \
G_RT1_RCE; \
\
XORQ rDi, rBu; \
ROLQ $61, rBu; \
MOVQ rBu, rT1; \
ORQ rBa, rT1; \
XORQ rBo, rT1; \
MOVQ rT1, _go(oState); \
\
ANDQ rBe, rBa; \
XORQ rBu, rBa; \
MOVQ rBa, _gu(oState); \
NOTQ rBu; \
G_RBA_RCU; \
\
ORQ rBu, rBo; \
XORQ rBi, rBo; \
MOVQ rBo, _gi(oState); \
\
/* Result k */ \
MOVQ _be(iState), rBa; \
MOVQ _gi(iState), rBe; \
MOVQ _ko(iState), rBi; \
MOVQ _mu(iState), rBo; \
MOVQ _sa(iState), rBu; \
XORQ rDi, rBe; \
ROLQ $6, rBe; \
XORQ rDo, rBi; \
ROLQ $25, rBi; \
MOVQ rBe, rT1; \
ORQ rBi, rT1; \
XORQ rDe, rBa; \
ROLQ $1, rBa; \
XORQ rBa, rT1; \
MOVQ rT1, _ka(oState); \
K_RT1_RCA; \
\
XORQ rDu, rBo; \
ROLQ $8, rBo; \
MOVQ rBi, rT1; \
ANDQ rBo, rT1; \
XORQ rBe, rT1; \
MOVQ rT1, _ke(oState); \
K_RT1_RCE; \
\
XORQ rDa, rBu; \
ROLQ $18, rBu; \
NOTQ rBo; \
MOVQ rBo, rT1; \
ANDQ rBu, rT1; \
XORQ rBi, rT1; \
MOVQ rT1, _ki(oState); \
\
MOVQ rBu, rT1; \
ORQ rBa, rT1; \
XORQ rBo, rT1; \
MOVQ rT1, _ko(oState); \
\
ANDQ rBe, rBa; \
XORQ rBu, rBa; \
MOVQ rBa, _ku(oState); \
K_RBA_RCU; \
\
/* Result m */ \
MOVQ _ga(iState), rBe; \
XORQ rDa, rBe; \
MOVQ _ke(iState), rBi; \
ROLQ $36, rBe; \
XORQ rDe, rBi; \
MOVQ _bu(iState), rBa; \
ROLQ $10, rBi; \
MOVQ rBe, rT1; \
MOVQ _mi(iState), rBo; \
ANDQ rBi, rT1; \
XORQ rDu, rBa; \
MOVQ _so(iState), rBu; \
ROLQ $27, rBa; \
XORQ rBa, rT1; \
MOVQ rT1, _ma(oState); \
M_RT1_RCA; \
\
XORQ rDi, rBo; \
ROLQ $15, rBo; \
MOVQ rBi, rT1; \
ORQ rBo, rT1; \
XORQ rBe, rT1; \
MOVQ rT1, _me(oState); \
M_RT1_RCE; \
\
XORQ rDo, rBu; \
ROLQ $56, rBu; \
NOTQ rBo; \
MOVQ rBo, rT1; \
ORQ rBu, rT1; \
XORQ rBi, rT1; \
MOVQ rT1, _mi(oState); \
\
ORQ rBa, rBe; \
XORQ rBu, rBe; \
MOVQ rBe, _mu(oState); \
\
ANDQ rBa, rBu; \
XORQ rBo, rBu; \
MOVQ rBu, _mo(oState); \
M_RBE_RCU; \
\
/* Result s */ \
MOVQ _bi(iState), rBa; \
MOVQ _go(iState), rBe; \
MOVQ _ku(iState), rBi; \
XORQ rDi, rBa; \
MOVQ _ma(iState), rBo; \
ROLQ $62, rBa; \
XORQ rDo, rBe; \
MOVQ _se(iState), rBu; \
ROLQ $55, rBe; \
\
XORQ rDu, rBi; \
MOVQ rBa, rDu; \
XORQ rDe, rBu; \
ROLQ $2, rBu; \
ANDQ rBe, rDu; \
XORQ rBu, rDu; \
MOVQ rDu, _su(oState); \
\
ROLQ $39, rBi; \
S_RDU_RCU; \
NOTQ rBe; \
XORQ rDa, rBo; \
MOVQ rBe, rDa; \
ANDQ rBi, rDa; \
XORQ rBa, rDa; \
MOVQ rDa, _sa(oState); \
S_RDA_RCA; \
\
ROLQ $41, rBo; \
MOVQ rBi, rDe; \
ORQ rBo, rDe; \
XORQ rBe, rDe; \
MOVQ rDe, _se(oState); \
S_RDE_RCE; \
\
MOVQ rBo, rDi; \
MOVQ rBu, rDo; \
ANDQ rBu, rDi; \
ORQ rBa, rDo; \
XORQ rBi, rDi; \
XORQ rBo, rDo; \
MOVQ rDi, _si(oState); \
MOVQ rDo, _so(oState) \
// func keccakF1600(a *[25]uint64)
TEXT ·keccakF1600(SB), 0, $200-8
MOVQ a+0(FP), rpState
// Convert the user state into an internal state
NOTQ _be(rpState)
NOTQ _bi(rpState)
NOTQ _go(rpState)
NOTQ _ki(rpState)
NOTQ _mi(rpState)
NOTQ _sa(rpState)
// Execute the KeccakF permutation
MOVQ _ba(rpState), rCa
MOVQ _be(rpState), rCe
MOVQ _bu(rpState), rCu
XORQ _ga(rpState), rCa
XORQ _ge(rpState), rCe
XORQ _gu(rpState), rCu
XORQ _ka(rpState), rCa
XORQ _ke(rpState), rCe
XORQ _ku(rpState), rCu
XORQ _ma(rpState), rCa
XORQ _me(rpState), rCe
XORQ _mu(rpState), rCu
XORQ _sa(rpState), rCa
XORQ _se(rpState), rCe
MOVQ _si(rpState), rDi
MOVQ _so(rpState), rDo
XORQ _su(rpState), rCu
mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP)
// Revert the internal state to the user state
NOTQ _be(rpState)
NOTQ _bi(rpState)
NOTQ _go(rpState)
NOTQ _ki(rpState)
NOTQ _mi(rpState)
NOTQ _sa(rpState)
RET

View file

@ -0,0 +1,18 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.4
package sha3
import (
"crypto"
)
func init() {
crypto.RegisterHash(crypto.SHA3_224, New224)
crypto.RegisterHash(crypto.SHA3_256, New256)
crypto.RegisterHash(crypto.SHA3_384, New384)
crypto.RegisterHash(crypto.SHA3_512, New512)
}

View file

@ -0,0 +1,197 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sha3
// spongeDirection indicates the direction bytes are flowing through the sponge.
type spongeDirection int
const (
// spongeAbsorbing indicates that the sponge is absorbing input.
spongeAbsorbing spongeDirection = iota
// spongeSqueezing indicates that the sponge is being squeezed.
spongeSqueezing
)
const (
// maxRate is the maximum size of the internal buffer. SHAKE-256
// currently needs the largest buffer.
maxRate = 168
)
type state struct {
// Generic sponge components.
a [25]uint64 // main state of the hash
buf []byte // points into storage
rate int // the number of bytes of state to use
// dsbyte contains the "domain separation" bits and the first bit of
// the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the
// SHA-3 and SHAKE functions by appending bitstrings to the message.
// Using a little-endian bit-ordering convention, these are "01" for SHA-3
// and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the
// padding rule from section 5.1 is applied to pad the message to a multiple
// of the rate, which involves adding a "1" bit, zero or more "0" bits, and
// a final "1" bit. We merge the first "1" bit from the padding into dsbyte,
// giving 00000110b (0x06) and 00011111b (0x1f).
// [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf
// "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and
// Extendable-Output Functions (May 2014)"
dsbyte byte
storage storageBuf
// Specific to SHA-3 and SHAKE.
outputLen int // the default output size in bytes
state spongeDirection // whether the sponge is absorbing or squeezing
}
// BlockSize returns the rate of sponge underlying this hash function.
func (d *state) BlockSize() int { return d.rate }
// Size returns the output size of the hash function in bytes.
func (d *state) Size() int { return d.outputLen }
// Reset clears the internal state by zeroing the sponge state and
// the byte buffer, and setting Sponge.state to absorbing.
func (d *state) Reset() {
// Zero the permutation's state.
for i := range d.a {
d.a[i] = 0
}
d.state = spongeAbsorbing
d.buf = d.storage.asBytes()[:0]
}
func (d *state) clone() *state {
ret := *d
if ret.state == spongeAbsorbing {
ret.buf = ret.storage.asBytes()[:len(ret.buf)]
} else {
ret.buf = ret.storage.asBytes()[d.rate-cap(d.buf) : d.rate]
}
return &ret
}
// permute applies the KeccakF-1600 permutation. It handles
// any input-output buffering.
func (d *state) permute() {
switch d.state {
case spongeAbsorbing:
// If we're absorbing, we need to xor the input into the state
// before applying the permutation.
xorIn(d, d.buf)
d.buf = d.storage.asBytes()[:0]
keccakF1600(&d.a)
case spongeSqueezing:
// If we're squeezing, we need to apply the permutation before
// copying more output.
keccakF1600(&d.a)
d.buf = d.storage.asBytes()[:d.rate]
copyOut(d, d.buf)
}
}
// pads appends the domain separation bits in dsbyte, applies
// the multi-bitrate 10..1 padding rule, and permutes the state.
func (d *state) padAndPermute(dsbyte byte) {
if d.buf == nil {
d.buf = d.storage.asBytes()[:0]
}
// Pad with this instance's domain-separator bits. We know that there's
// at least one byte of space in d.buf because, if it were full,
// permute would have been called to empty it. dsbyte also contains the
// first one bit for the padding. See the comment in the state struct.
d.buf = append(d.buf, dsbyte)
zerosStart := len(d.buf)
d.buf = d.storage.asBytes()[:d.rate]
for i := zerosStart; i < d.rate; i++ {
d.buf[i] = 0
}
// This adds the final one bit for the padding. Because of the way that
// bits are numbered from the LSB upwards, the final bit is the MSB of
// the last byte.
d.buf[d.rate-1] ^= 0x80
// Apply the permutation
d.permute()
d.state = spongeSqueezing
d.buf = d.storage.asBytes()[:d.rate]
copyOut(d, d.buf)
}
// Write absorbs more data into the hash's state. It panics if any
// output has already been read.
func (d *state) Write(p []byte) (written int, err error) {
if d.state != spongeAbsorbing {
panic("sha3: Write after Read")
}
if d.buf == nil {
d.buf = d.storage.asBytes()[:0]
}
written = len(p)
for len(p) > 0 {
if len(d.buf) == 0 && len(p) >= d.rate {
// The fast path; absorb a full "rate" bytes of input and apply the permutation.
xorIn(d, p[:d.rate])
p = p[d.rate:]
keccakF1600(&d.a)
} else {
// The slow path; buffer the input until we can fill the sponge, and then xor it in.
todo := d.rate - len(d.buf)
if todo > len(p) {
todo = len(p)
}
d.buf = append(d.buf, p[:todo]...)
p = p[todo:]
// If the sponge is full, apply the permutation.
if len(d.buf) == d.rate {
d.permute()
}
}
}
return
}
// Read squeezes an arbitrary number of bytes from the sponge.
func (d *state) Read(out []byte) (n int, err error) {
// If we're still absorbing, pad and apply the permutation.
if d.state == spongeAbsorbing {
d.padAndPermute(d.dsbyte)
}
n = len(out)
// Now, do the squeezing.
for len(out) > 0 {
n := copy(out, d.buf)
d.buf = d.buf[n:]
out = out[n:]
// Apply the permutation if we've squeezed the sponge dry.
if len(d.buf) == 0 {
d.permute()
}
}
return
}
// Sum applies padding to the hash state and then squeezes out the desired
// number of output bytes. It panics if any output has already been read.
func (d *state) Sum(in []byte) []byte {
if d.state != spongeAbsorbing {
panic("sha3: Sum after Read")
}
// Make a copy of the original hash so that caller can keep writing
// and summing.
dup := d.clone()
hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation
dup.Read(hash)
return append(in, hash...)
}

View file

@ -0,0 +1,288 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
package sha3
// This file contains code for using the 'compute intermediate
// message digest' (KIMD) and 'compute last message digest' (KLMD)
// instructions to compute SHA-3 and SHAKE hashes on IBM Z.
import (
"hash"
"golang.org/x/sys/cpu"
)
// codes represent 7-bit KIMD/KLMD function codes as defined in
// the Principles of Operation.
type code uint64
const (
// function codes for KIMD/KLMD
sha3_224 code = 32
sha3_256 = 33
sha3_384 = 34
sha3_512 = 35
shake_128 = 36
shake_256 = 37
nopad = 0x100
)
// kimd is a wrapper for the 'compute intermediate message digest' instruction.
// src must be a multiple of the rate for the given function code.
//
//go:noescape
func kimd(function code, chain *[200]byte, src []byte)
// klmd is a wrapper for the 'compute last message digest' instruction.
// src padding is handled by the instruction.
//
//go:noescape
func klmd(function code, chain *[200]byte, dst, src []byte)
type asmState struct {
a [200]byte // 1600 bit state
buf []byte // care must be taken to ensure cap(buf) is a multiple of rate
rate int // equivalent to block size
storage [3072]byte // underlying storage for buf
outputLen int // output length for full security
function code // KIMD/KLMD function code
state spongeDirection // whether the sponge is absorbing or squeezing
}
func newAsmState(function code) *asmState {
var s asmState
s.function = function
switch function {
case sha3_224:
s.rate = 144
s.outputLen = 28
case sha3_256:
s.rate = 136
s.outputLen = 32
case sha3_384:
s.rate = 104
s.outputLen = 48
case sha3_512:
s.rate = 72
s.outputLen = 64
case shake_128:
s.rate = 168
s.outputLen = 32
case shake_256:
s.rate = 136
s.outputLen = 64
default:
panic("sha3: unrecognized function code")
}
// limit s.buf size to a multiple of s.rate
s.resetBuf()
return &s
}
func (s *asmState) clone() *asmState {
c := *s
c.buf = c.storage[:len(s.buf):cap(s.buf)]
return &c
}
// copyIntoBuf copies b into buf. It will panic if there is not enough space to
// store all of b.
func (s *asmState) copyIntoBuf(b []byte) {
bufLen := len(s.buf)
s.buf = s.buf[:len(s.buf)+len(b)]
copy(s.buf[bufLen:], b)
}
// resetBuf points buf at storage, sets the length to 0 and sets cap to be a
// multiple of the rate.
func (s *asmState) resetBuf() {
max := (cap(s.storage) / s.rate) * s.rate
s.buf = s.storage[:0:max]
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (s *asmState) Write(b []byte) (int, error) {
if s.state != spongeAbsorbing {
panic("sha3: Write after Read")
}
length := len(b)
for len(b) > 0 {
if len(s.buf) == 0 && len(b) >= cap(s.buf) {
// Hash the data directly and push any remaining bytes
// into the buffer.
remainder := len(b) % s.rate
kimd(s.function, &s.a, b[:len(b)-remainder])
if remainder != 0 {
s.copyIntoBuf(b[len(b)-remainder:])
}
return length, nil
}
if len(s.buf) == cap(s.buf) {
// flush the buffer
kimd(s.function, &s.a, s.buf)
s.buf = s.buf[:0]
}
// copy as much as we can into the buffer
n := len(b)
if len(b) > cap(s.buf)-len(s.buf) {
n = cap(s.buf) - len(s.buf)
}
s.copyIntoBuf(b[:n])
b = b[n:]
}
return length, nil
}
// Read squeezes an arbitrary number of bytes from the sponge.
func (s *asmState) Read(out []byte) (n int, err error) {
n = len(out)
// need to pad if we were absorbing
if s.state == spongeAbsorbing {
s.state = spongeSqueezing
// write hash directly into out if possible
if len(out)%s.rate == 0 {
klmd(s.function, &s.a, out, s.buf) // len(out) may be 0
s.buf = s.buf[:0]
return
}
// write hash into buffer
max := cap(s.buf)
if max > len(out) {
max = (len(out)/s.rate)*s.rate + s.rate
}
klmd(s.function, &s.a, s.buf[:max], s.buf)
s.buf = s.buf[:max]
}
for len(out) > 0 {
// flush the buffer
if len(s.buf) != 0 {
c := copy(out, s.buf)
out = out[c:]
s.buf = s.buf[c:]
continue
}
// write hash directly into out if possible
if len(out)%s.rate == 0 {
klmd(s.function|nopad, &s.a, out, nil)
return
}
// write hash into buffer
s.resetBuf()
if cap(s.buf) > len(out) {
s.buf = s.buf[:(len(out)/s.rate)*s.rate+s.rate]
}
klmd(s.function|nopad, &s.a, s.buf, nil)
}
return
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (s *asmState) Sum(b []byte) []byte {
if s.state != spongeAbsorbing {
panic("sha3: Sum after Read")
}
// Copy the state to preserve the original.
a := s.a
// Hash the buffer. Note that we don't clear it because we
// aren't updating the state.
klmd(s.function, &a, nil, s.buf)
return append(b, a[:s.outputLen]...)
}
// Reset resets the Hash to its initial state.
func (s *asmState) Reset() {
for i := range s.a {
s.a[i] = 0
}
s.resetBuf()
s.state = spongeAbsorbing
}
// Size returns the number of bytes Sum will return.
func (s *asmState) Size() int {
return s.outputLen
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (s *asmState) BlockSize() int {
return s.rate
}
// Clone returns a copy of the ShakeHash in its current state.
func (s *asmState) Clone() ShakeHash {
return s.clone()
}
// new224Asm returns an assembly implementation of SHA3-224 if available,
// otherwise it returns nil.
func new224Asm() hash.Hash {
if cpu.S390X.HasSHA3 {
return newAsmState(sha3_224)
}
return nil
}
// new256Asm returns an assembly implementation of SHA3-256 if available,
// otherwise it returns nil.
func new256Asm() hash.Hash {
if cpu.S390X.HasSHA3 {
return newAsmState(sha3_256)
}
return nil
}
// new384Asm returns an assembly implementation of SHA3-384 if available,
// otherwise it returns nil.
func new384Asm() hash.Hash {
if cpu.S390X.HasSHA3 {
return newAsmState(sha3_384)
}
return nil
}
// new512Asm returns an assembly implementation of SHA3-512 if available,
// otherwise it returns nil.
func new512Asm() hash.Hash {
if cpu.S390X.HasSHA3 {
return newAsmState(sha3_512)
}
return nil
}
// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
// otherwise it returns nil.
func newShake128Asm() ShakeHash {
if cpu.S390X.HasSHA3 {
return newAsmState(shake_128)
}
return nil
}
// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
// otherwise it returns nil.
func newShake256Asm() ShakeHash {
if cpu.S390X.HasSHA3 {
return newAsmState(shake_256)
}
return nil
}

View file

@ -0,0 +1,33 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
#include "textflag.h"
// func kimd(function code, chain *[200]byte, src []byte)
TEXT ·kimd(SB), NOFRAME|NOSPLIT, $0-40
MOVD function+0(FP), R0
MOVD chain+8(FP), R1
LMG src+16(FP), R2, R3 // R2=base, R3=len
continue:
WORD $0xB93E0002 // KIMD --, R2
BVS continue // continue if interrupted
MOVD $0, R0 // reset R0 for pre-go1.8 compilers
RET
// func klmd(function code, chain *[200]byte, dst, src []byte)
TEXT ·klmd(SB), NOFRAME|NOSPLIT, $0-64
// TODO: SHAKE support
MOVD function+0(FP), R0
MOVD chain+8(FP), R1
LMG dst+16(FP), R2, R3 // R2=base, R3=len
LMG src+40(FP), R4, R5 // R4=base, R5=len
continue:
WORD $0xB93F0024 // KLMD R2, R4
BVS continue // continue if interrupted
MOVD $0, R0 // reset R0 for pre-go1.8 compilers
RET

View file

@ -0,0 +1,172 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sha3
// This file defines the ShakeHash interface, and provides
// functions for creating SHAKE and cSHAKE instances, as well as utility
// functions for hashing bytes to arbitrary-length output.
//
//
// SHAKE implementation is based on FIPS PUB 202 [1]
// cSHAKE implementations is based on NIST SP 800-185 [2]
//
// [1] https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
// [2] https://doi.org/10.6028/NIST.SP.800-185
import (
"encoding/binary"
"hash"
"io"
)
// ShakeHash defines the interface to hash functions that support
// arbitrary-length output. When used as a plain [hash.Hash], it
// produces minimum-length outputs that provide full-strength generic
// security.
type ShakeHash interface {
hash.Hash
// Read reads more output from the hash; reading affects the hash's
// state. (ShakeHash.Read is thus very different from Hash.Sum)
// It never returns an error, but subsequent calls to Write or Sum
// will panic.
io.Reader
// Clone returns a copy of the ShakeHash in its current state.
Clone() ShakeHash
}
// cSHAKE specific context
type cshakeState struct {
*state // SHA-3 state context and Read/Write operations
// initBlock is the cSHAKE specific initialization set of bytes. It is initialized
// by newCShake function and stores concatenation of N followed by S, encoded
// by the method specified in 3.3 of [1].
// It is stored here in order for Reset() to be able to put context into
// initial state.
initBlock []byte
}
// Consts for configuring initial SHA-3 state
const (
dsbyteShake = 0x1f
dsbyteCShake = 0x04
rate128 = 168
rate256 = 136
)
func bytepad(input []byte, w int) []byte {
// leftEncode always returns max 9 bytes
buf := make([]byte, 0, 9+len(input)+w)
buf = append(buf, leftEncode(uint64(w))...)
buf = append(buf, input...)
padlen := w - (len(buf) % w)
return append(buf, make([]byte, padlen)...)
}
func leftEncode(value uint64) []byte {
var b [9]byte
binary.BigEndian.PutUint64(b[1:], value)
// Trim all but last leading zero bytes
i := byte(1)
for i < 8 && b[i] == 0 {
i++
}
// Prepend number of encoded bytes
b[i-1] = 9 - i
return b[i-1:]
}
func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash {
c := cshakeState{state: &state{rate: rate, outputLen: outputLen, dsbyte: dsbyte}}
// leftEncode returns max 9 bytes
c.initBlock = make([]byte, 0, 9*2+len(N)+len(S))
c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...)
c.initBlock = append(c.initBlock, N...)
c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...)
c.initBlock = append(c.initBlock, S...)
c.Write(bytepad(c.initBlock, c.rate))
return &c
}
// Reset resets the hash to initial state.
func (c *cshakeState) Reset() {
c.state.Reset()
c.Write(bytepad(c.initBlock, c.rate))
}
// Clone returns copy of a cSHAKE context within its current state.
func (c *cshakeState) Clone() ShakeHash {
b := make([]byte, len(c.initBlock))
copy(b, c.initBlock)
return &cshakeState{state: c.clone(), initBlock: b}
}
// Clone returns copy of SHAKE context within its current state.
func (c *state) Clone() ShakeHash {
return c.clone()
}
// NewShake128 creates a new SHAKE128 variable-output-length ShakeHash.
// Its generic security strength is 128 bits against all attacks if at
// least 32 bytes of its output are used.
func NewShake128() ShakeHash {
if h := newShake128Asm(); h != nil {
return h
}
return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake}
}
// NewShake256 creates a new SHAKE256 variable-output-length ShakeHash.
// Its generic security strength is 256 bits against all attacks if
// at least 64 bytes of its output are used.
func NewShake256() ShakeHash {
if h := newShake256Asm(); h != nil {
return h
}
return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake}
}
// NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash,
// a customizable variant of SHAKE128.
// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is
// desired. S is a customization byte string used for domain separation - two cSHAKE
// computations on same input with different S yield unrelated outputs.
// When N and S are both empty, this is equivalent to NewShake128.
func NewCShake128(N, S []byte) ShakeHash {
if len(N) == 0 && len(S) == 0 {
return NewShake128()
}
return newCShake(N, S, rate128, 32, dsbyteCShake)
}
// NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash,
// a customizable variant of SHAKE256.
// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is
// desired. S is a customization byte string used for domain separation - two cSHAKE
// computations on same input with different S yield unrelated outputs.
// When N and S are both empty, this is equivalent to NewShake256.
func NewCShake256(N, S []byte) ShakeHash {
if len(N) == 0 && len(S) == 0 {
return NewShake256()
}
return newCShake(N, S, rate256, 64, dsbyteCShake)
}
// ShakeSum128 writes an arbitrary-length digest of data into hash.
func ShakeSum128(hash, data []byte) {
h := NewShake128()
h.Write(data)
h.Read(hash)
}
// ShakeSum256 writes an arbitrary-length digest of data into hash.
func ShakeSum256(hash, data []byte) {
h := NewShake256()
h.Write(data)
h.Read(hash)
}

View file

@ -0,0 +1,19 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !gc || purego || !s390x
package sha3
// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
// otherwise it returns nil.
func newShake128Asm() ShakeHash {
return nil
}
// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
// otherwise it returns nil.
func newShake256Asm() ShakeHash {
return nil
}

View file

@ -0,0 +1,23 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (!amd64 && !386 && !ppc64le) || purego
package sha3
// A storageBuf is an aligned array of maxRate bytes.
type storageBuf [maxRate]byte
func (b *storageBuf) asBytes() *[maxRate]byte {
return (*[maxRate]byte)(b)
}
var (
xorIn = xorInGeneric
copyOut = copyOutGeneric
xorInUnaligned = xorInGeneric
copyOutUnaligned = copyOutGeneric
)
const xorImplementationUnaligned = "generic"

View file

@ -0,0 +1,28 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sha3
import "encoding/binary"
// xorInGeneric xors the bytes in buf into the state; it
// makes no non-portable assumptions about memory layout
// or alignment.
func xorInGeneric(d *state, buf []byte) {
n := len(buf) / 8
for i := 0; i < n; i++ {
a := binary.LittleEndian.Uint64(buf)
d.a[i] ^= a
buf = buf[8:]
}
}
// copyOutGeneric copies uint64s to a byte buffer.
func copyOutGeneric(d *state, b []byte) {
for i := 0; len(b) >= 8; i++ {
binary.LittleEndian.PutUint64(b, d.a[i])
b = b[8:]
}
}

View file

@ -0,0 +1,66 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (amd64 || 386 || ppc64le) && !purego
package sha3
import "unsafe"
// A storageBuf is an aligned array of maxRate bytes.
type storageBuf [maxRate / 8]uint64
func (b *storageBuf) asBytes() *[maxRate]byte {
return (*[maxRate]byte)(unsafe.Pointer(b))
}
// xorInUnaligned uses unaligned reads and writes to update d.a to contain d.a
// XOR buf.
func xorInUnaligned(d *state, buf []byte) {
n := len(buf)
bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0]))[: n/8 : n/8]
if n >= 72 {
d.a[0] ^= bw[0]
d.a[1] ^= bw[1]
d.a[2] ^= bw[2]
d.a[3] ^= bw[3]
d.a[4] ^= bw[4]
d.a[5] ^= bw[5]
d.a[6] ^= bw[6]
d.a[7] ^= bw[7]
d.a[8] ^= bw[8]
}
if n >= 104 {
d.a[9] ^= bw[9]
d.a[10] ^= bw[10]
d.a[11] ^= bw[11]
d.a[12] ^= bw[12]
}
if n >= 136 {
d.a[13] ^= bw[13]
d.a[14] ^= bw[14]
d.a[15] ^= bw[15]
d.a[16] ^= bw[16]
}
if n >= 144 {
d.a[17] ^= bw[17]
}
if n >= 168 {
d.a[18] ^= bw[18]
d.a[19] ^= bw[19]
d.a[20] ^= bw[20]
}
}
func copyOutUnaligned(d *state, buf []byte) {
ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0]))
copy(buf, ab[:])
}
var (
xorIn = xorInUnaligned
copyOut = copyOutUnaligned
)
const xorImplementationUnaligned = "unaligned"