diff --git a/vendor/github.com/awnumar/memguard/.cirrus.yml b/vendor/github.com/awnumar/memguard/.cirrus.yml new file mode 100644 index 0000000..970e10b --- /dev/null +++ b/vendor/github.com/awnumar/memguard/.cirrus.yml @@ -0,0 +1,91 @@ +linux_task: + container: + image: ubuntu:latest + env: + GOPROXY: https://proxy.golang.org + GOPATH: /tmp/go + PATH: ${GOPATH}/bin:${PATH} + CIRRUS_WORKING_DIR: /tmp/go/src/github.com/${CIRRUS_REPO_FULL_NAME} + install_script: + - apt-get update && apt-get install --reinstall software-properties-common -y + - add-apt-repository ppa:longsleep/golang-backports + - apt-get update && apt-get install git golang-go -y + clone_script: | + if [[ -z "$CIRRUS_PR" ]]; then + git clone --recursive --branch=$CIRRUS_BRANCH https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git reset --hard $CIRRUS_CHANGE_IN_REPO + else + git clone --recursive https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git fetch origin pull/$CIRRUS_PR/head:pull/$CIRRUS_PR + git reset --hard $CIRRUS_CHANGE_IN_REPO + fi + build_script: + - go version + - go build -race -v ./... + test_script: + - go test -race -v ./... + +osx_task: + osx_instance: + image: mojave-base + env: + GOPROXY: https://proxy.golang.org + GOPATH: /tmp/go + PATH: ${GOPATH}/bin:${PATH} + CIRRUS_WORKING_DIR: /tmp/go/src/github.com/${CIRRUS_REPO_FULL_NAME} + install_script: + - brew install git go + clone_script: | + if [[ -z "$CIRRUS_PR" ]]; then + git clone --recursive --branch=$CIRRUS_BRANCH https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git reset --hard $CIRRUS_CHANGE_IN_REPO + else + git clone --recursive https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git fetch origin pull/$CIRRUS_PR/head:pull/$CIRRUS_PR + git reset --hard $CIRRUS_CHANGE_IN_REPO + fi + build_script: + - go version + - go build -race -v ./... + test_script: + - go test -race -v ./... + +windows_task: + windows_container: + image: golang:windowsservercore-1803 + os_version: 1803 + env: + GOPROXY: https://proxy.golang.org + GOPATH: C:\go + PATH: ${GOPATH}\bin:${PATH} + CIRRUS_WORKING_DIR: C:\go\src\github.com\${CIRRUS_REPO_FULL_NAME} + build_script: + - go version + - go build -race -v ./... + test_script: + - go test -race -v ./... + +freebsd_task: + freebsd_instance: + image: freebsd-12-0-release-amd64 + env: + GOPROXY: https://proxy.golang.org + GOPATH: /tmp/go + PATH: ${GOPATH}/bin:${PATH} + CIRRUS_WORKING_DIR: /tmp/go/src/github.com/${CIRRUS_REPO_FULL_NAME} + install_script: + - pkg install -y git go + clone_script: | + if ( "$CIRRUS_PR" == "" || ! $?CIRRUS_PR ) then + git clone --recursive --branch=$CIRRUS_BRANCH https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git reset --hard $CIRRUS_CHANGE_IN_REPO + else + git clone --recursive https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR} + git fetch origin pull/$CIRRUS_PR/head:pull/$CIRRUS_PR + git reset --hard $CIRRUS_CHANGE_IN_REPO + fi + build_script: + - go version + - go build -race -v ./... + test_script: + - go test -race -v ./... diff --git a/vendor/github.com/awnumar/memguard/AUTHORS b/vendor/github.com/awnumar/memguard/AUTHORS new file mode 100644 index 0000000..5f5f3b6 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/AUTHORS @@ -0,0 +1,8 @@ +# Entries should be added alphabetically in the form: +# Name or Organization +# The email address is not required for organizations. + +Awn Umar +Carlo Alberto Ferraris +dotcppfile +Joseph Richey diff --git a/vendor/github.com/awnumar/memguard/LICENSE b/vendor/github.com/awnumar/memguard/LICENSE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/vendor/github.com/awnumar/memguard/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/awnumar/memguard/README.md b/vendor/github.com/awnumar/memguard/README.md new file mode 100644 index 0000000..0c6c67d --- /dev/null +++ b/vendor/github.com/awnumar/memguard/README.md @@ -0,0 +1,58 @@ +

+ +

MemGuard

+

Secure software enclave for storage of sensitive information in memory.

+

+ + + +

+

+ +--- + +This package attempts to reduce the likelihood of sensitive data being exposed. It supports all major operating systems and is written in pure Go. + +## Features + +* Sensitive data is encrypted and authenticated in memory using xSalsa20 and Poly1305 respectively. The scheme also defends against cold-boot attacks. +* Memory allocation bypasses the language runtime by using system calls to query the kernel for resources directly. This avoids interference from the garbage-collector. +* Buffers that store plaintext data are fortified with guard pages and canary values to detect spurious accesses and overflows. +* Effort is taken to prevent sensitive data from touching the disk. This includes locking memory to prevent swapping and handling core dumps. +* Kernel-level immutability is implemented so that attempted modification of protected regions results in an access violation. +* Multiple endpoints provide session purging and safe termination capabilities as well as signal handling to prevent remnant data being left behind. +* Side-channel attacks are mitigated against by making sure that the copying and comparison of data is done in constant-time. +* Accidental memory leaks are mitigated against by harnessing the garbage-collector to automatically destroy containers that have become unreachable. + +Some features were inspired by [libsodium](https://github.com/jedisct1/libsodium), so credits to them. + +Full documentation and a complete overview of the API can be found [here](https://godoc.org/github.com/awnumar/memguard). Interesting and useful code samples can be found within the [examples](examples) subpackage. + +## Installation + +``` +$ go get github.com/awnumar/memguard +``` + +We **strongly** encourage you to pin a specific version for a clean and reliable build. This can be accomplished using [modules](https://github.com/golang/go/wiki/Modules). + +## Contributing + +* Using the package and identifying points of friction. +* Reading the source code and looking for improvements. +* Adding interesting and useful program samples to [`./examples`](examples). +* Developing Proof-of-Concept attacks and mitigations. +* Improving compatibility with more kernels and architectures. +* Implementing kernel-specific and cpu-specific protections. +* Writing useful security and crypto libraries that utilise memguard. +* Submitting performance improvements or benchmarking code. + +Issues are for reporting bugs and for discussion on proposals. Pull requests should be made against master. + +## Future goals + +* Ability to stream data to and from encrypted enclave objects. +* Catch segmentation faults to wipe memory before crashing. +* Evaluate and improve the strategies in place, particularly for [Coffer](core/coffer.go) objects. +* Formalise a threat model and evaluate our performance in regards to it. +* Use lessons learned to apply patches upstream to the Go language and runtime. diff --git a/vendor/github.com/awnumar/memguard/buffer.go b/vendor/github.com/awnumar/memguard/buffer.go new file mode 100644 index 0000000..77bb7bf --- /dev/null +++ b/vendor/github.com/awnumar/memguard/buffer.go @@ -0,0 +1,685 @@ +package memguard + +import ( + "bytes" + "io" + "os" + "runtime" + "unsafe" + + "github.com/awnumar/memguard/core" +) + +/* +LockedBuffer is a structure that holds raw sensitive data. + +The number of LockedBuffers that you are able to create is limited by how much memory your system's kernel allows each process to mlock/VirtualLock. Therefore you should call Destroy on LockedBuffers that you no longer need or defer a Destroy call after creating a new LockedBuffer. +*/ +type LockedBuffer struct { + *core.Buffer + *drop +} + +/* +Value monitored by a finalizer so that we can clean up LockedBuffers that have gone out of scope. +*/ +type drop [16]byte + +// Constructs a LockedBuffer object from a core.Buffer while also setting up the finalizer for it. +func newBuffer(buf *core.Buffer) *LockedBuffer { + b := &LockedBuffer{buf, new(drop)} + runtime.SetFinalizer(b.drop, func(_ *drop) { + go buf.Destroy() + }) + return b +} + +/* +NewBuffer creates a mutable data container of the specified size. + +The size must be strictly positive or the function will panic. +*/ +func NewBuffer(size int) *LockedBuffer { + // Construct a Buffer of the specified size. + buf, err := core.NewBuffer(size) + if err != nil { + core.Panic(err) + } + + // Construct and return the wrapped container object. + return newBuffer(buf) +} + +/* +NewBufferFromBytes constructs an immutable buffer from a byte slice. + +The length of the buffer must be non-zero or the function will panic. The source buffer is wiped after the value has been copied over to the created container. +*/ +func NewBufferFromBytes(src []byte) *LockedBuffer { + // Construct a buffer of the correct size. + b := NewBuffer(len(src)) + + // Move the data over. + b.Move(src) + + // Make the buffer immutable. + b.Freeze() + + // Return the created Buffer object. + return b +} + +/* +NewBufferFromReader reads a given number of bytes from a Reader into a LockedBuffer. The returned object will be immutable. + +If an error is encountered before size bytes are read, a smaller LockedBuffer object will be returned and the number of bytes read can be inferred using the Size method. If no bytes are read, a destroyed LockedBuffer with size zero is returned. + +The provided size must be strictly positive or the function will panic. +*/ +func NewBufferFromReader(r io.Reader, size int) *LockedBuffer { + // Construct a buffer of the provided size. + b := NewBuffer(size) + + // Attempt to fill it with data from the Reader. + if n, err := io.ReadFull(r, b.Bytes()); err != nil { + if n == 0 { + // nothing was read + b.Destroy() + return &LockedBuffer{new(core.Buffer), new(drop)} + } + + // partial read + d := NewBuffer(n) + d.Copy(b.Bytes()[:n]) + d.Freeze() + b.Destroy() + return d + } + + // success + b.Freeze() + return b +} + +/* +NewBufferFromReaderUntil constructs an immutable buffer containing data sourced from a Reader object. It will continue reading until either an EOF is encountered or the provided delimiter value is encountered. The delimiter will not be included in the returned data. + +The number of bytes read can be inferred using the Size method. If no data was read, or if the first byte was the delimiter, a destroyed LockedBuffer with size zero is returned. +*/ +func NewBufferFromReaderUntil(r io.Reader, delim byte) *LockedBuffer { + // Construct a buffer with a data page that fills an entire memory page. + b := NewBuffer(os.Getpagesize()) + + // Loop over the buffer a byte at a time. + for i := 0; ; i++ { + // If we have filled this buffer... + if i == b.Size() { + // Construct a new buffer that is a page size larger. + c := NewBuffer(b.Size() + os.Getpagesize()) + + // Copy the data over. + c.Copy(b.Bytes()) + + // Destroy the old one and reassign its variable. + b.Destroy() + b = c + } + + // Attempt to read a single byte. + n, err := r.Read(b.Bytes()[i : i+1]) + if n != 1 { // if we did not read a byte + if err == nil { // and there was no error + i-- // try again + continue + } + if i == 0 { // no data read + b.Destroy() + return &LockedBuffer{new(core.Buffer), new(drop)} + } + // if there was an error, we're done early + d := NewBuffer(i) + d.Copy(b.Bytes()[:i]) + d.Freeze() + b.Destroy() + return d + } + // we managed to read a byte, check if it was the delimiter + if b.Bytes()[i] == delim { + if i == 0 { + // if first byte was delimiter, there's no data to return + b.Destroy() + return &LockedBuffer{new(core.Buffer), new(drop)} + } + d := NewBuffer(i) + d.Copy(b.Bytes()[:i]) + d.Freeze() + b.Destroy() + return d + } + } +} + +/* +NewBufferFromEntireReader reads from a Reader until EOF or until an error occurs, placing the data into an immutable buffer. + +The number of bytes read can be inferred using the Size method. If no data was read, a destroyed LockedBuffer with size zero is returned. +*/ +func NewBufferFromEntireReader(r io.Reader) *LockedBuffer { + // Create a buffer with a data region of one page size. + b := NewBuffer(os.Getpagesize()) + + for read := 0; ; { + // Attempt to read some data from the reader. + n, err := r.Read(b.Bytes()[read:]) + + // Nothing read but no error, try again. + if n == 0 && err == nil { + continue + } + + // Increment the read count by the number of bytes that we just read. + read += n + + if err != nil { + // We're done, return the data. + if read == 0 { + // No data read. + b.Destroy() + return &LockedBuffer{new(core.Buffer), new(drop)} + } + d := NewBuffer(read) + d.Copy(b.Bytes()[:read]) + d.Freeze() + b.Destroy() + return d + } + + // If we've filled this buffer, grow it by another page size. + if len(b.Bytes()[read:]) == 0 { + d := NewBuffer(b.Size() + os.Getpagesize()) + d.Copy(b.Bytes()) + b.Destroy() + b = d + } + } +} + +/* +NewBufferRandom constructs an immutable buffer filled with cryptographically-secure random bytes. + +The size must be strictly positive or the function will panic. +*/ +func NewBufferRandom(size int) *LockedBuffer { + // Construct a buffer of the specified size. + b := NewBuffer(size) + + // Fill the buffer with random bytes. + b.Scramble() + + // Make the buffer immutable. + b.Freeze() + + // Return the created Buffer object. + return b +} + +// Freeze makes a LockedBuffer's memory immutable. The call can be reversed with Melt. +func (b *LockedBuffer) Freeze() { + b.Buffer.Freeze() +} + +// Melt makes a LockedBuffer's memory mutable. The call can be reversed with Freeze. +func (b *LockedBuffer) Melt() { + b.Buffer.Melt() +} + +/* +Seal takes a LockedBuffer object and returns its contents encrypted inside a sealed Enclave object. The LockedBuffer is subsequently destroyed and its contents wiped. + +If Seal is called on a destroyed buffer, a nil enclave is returned. +*/ +func (b *LockedBuffer) Seal() *Enclave { + e, err := core.Seal(b.Buffer) + if err != nil { + if err == core.ErrBufferExpired { + return nil + } + core.Panic(err) + } + return &Enclave{e} +} + +/* +Copy performs a time-constant copy into a LockedBuffer. Move is preferred if the source is not also a LockedBuffer or if the source is no longer needed. +*/ +func (b *LockedBuffer) Copy(src []byte) { + b.CopyAt(0, src) +} + +/* +CopyAt performs a time-constant copy into a LockedBuffer at an offset. Move is preferred if the source is not also a LockedBuffer or if the source is no longer needed. +*/ +func (b *LockedBuffer) CopyAt(offset int, src []byte) { + if !b.IsAlive() { + return + } + + b.Lock() + defer b.Unlock() + + core.Copy(b.Bytes()[offset:], src) +} + +/* +Move performs a time-constant move into a LockedBuffer. The source is wiped after the bytes are copied. +*/ +func (b *LockedBuffer) Move(src []byte) { + b.MoveAt(0, src) +} + +/* +MoveAt performs a time-constant move into a LockedBuffer at an offset. The source is wiped after the bytes are copied. +*/ +func (b *LockedBuffer) MoveAt(offset int, src []byte) { + if !b.IsAlive() { + return + } + + b.Lock() + defer b.Unlock() + + core.Move(b.Bytes()[offset:], src) +} + +/* +Scramble attempts to overwrite the data with cryptographically-secure random bytes. +*/ +func (b *LockedBuffer) Scramble() { + if !b.IsAlive() { + return + } + + b.Lock() + defer b.Unlock() + + core.Scramble(b.Bytes()) +} + +/* +Wipe attempts to overwrite the data with zeros. +*/ +func (b *LockedBuffer) Wipe() { + if !b.IsAlive() { + return + } + + b.Lock() + defer b.Unlock() + + core.Wipe(b.Bytes()) +} + +/* +Size gives you the length of a given LockedBuffer's data segment. A destroyed LockedBuffer will have a size of zero. +*/ +func (b *LockedBuffer) Size() int { + return len(b.Bytes()) +} + +/* +Destroy wipes and frees the underlying memory of a LockedBuffer. The LockedBuffer will not be accessible or usable after this calls is made. +*/ +func (b *LockedBuffer) Destroy() { + b.Buffer.Destroy() +} + +/* +IsAlive returns a boolean value indicating if a LockedBuffer is alive, i.e. that it has not been destroyed. +*/ +func (b *LockedBuffer) IsAlive() bool { + return core.GetBufferState(b.Buffer).IsAlive +} + +/* +IsMutable returns a boolean value indicating if a LockedBuffer is mutable. +*/ +func (b *LockedBuffer) IsMutable() bool { + return core.GetBufferState(b.Buffer).IsMutable +} + +/* +EqualTo performs a time-constant comparison on the contents of a LockedBuffer with a given buffer. A destroyed LockedBuffer will always return false. +*/ +func (b *LockedBuffer) EqualTo(buf []byte) bool { + b.RLock() + defer b.RUnlock() + + return core.Equal(b.Bytes(), buf) +} + +/* + Functions for representing the memory region as various data types. +*/ + +/* +Bytes returns a byte slice referencing the protected region of memory. +*/ +func (b *LockedBuffer) Bytes() []byte { + return b.Buffer.Data() +} + +/* +Reader returns a Reader object referencing the protected region of memory. +*/ +func (b *LockedBuffer) Reader() *bytes.Reader { + return bytes.NewReader(b.Bytes()) +} + +/* +String returns a string representation of the protected region of memory. +*/ +func (b *LockedBuffer) String() string { + slice := b.Bytes() + return *(*string)(unsafe.Pointer(&slice)) +} + +/* +Uint16 returns a slice pointing to the protected region of memory with the data represented as a sequence of unsigned 16 bit integers. Its length will be half that of the byte slice, excluding any remaining part that doesn't form a complete uint16 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Uint16() []uint16 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 2 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]uint16)(unsafe.Pointer(&sl)) +} + +/* +Uint32 returns a slice pointing to the protected region of memory with the data represented as a sequence of unsigned 32 bit integers. Its length will be one quarter that of the byte slice, excluding any remaining part that doesn't form a complete uint32 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Uint32() []uint32 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 4 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]uint32)(unsafe.Pointer(&sl)) +} + +/* +Uint64 returns a slice pointing to the protected region of memory with the data represented as a sequence of unsigned 64 bit integers. Its length will be one eighth that of the byte slice, excluding any remaining part that doesn't form a complete uint64 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Uint64() []uint64 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 8 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]uint64)(unsafe.Pointer(&sl)) +} + +/* +Int8 returns a slice pointing to the protected region of memory with the data represented as a sequence of signed 8 bit integers. If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Int8() []int8 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), b.Size(), b.Size()} + + // Cast the representation to the correct type and return it. + return *(*[]int8)(unsafe.Pointer(&sl)) +} + +/* +Int16 returns a slice pointing to the protected region of memory with the data represented as a sequence of signed 16 bit integers. Its length will be half that of the byte slice, excluding any remaining part that doesn't form a complete int16 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Int16() []int16 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 2 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]int16)(unsafe.Pointer(&sl)) +} + +/* +Int32 returns a slice pointing to the protected region of memory with the data represented as a sequence of signed 32 bit integers. Its length will be one quarter that of the byte slice, excluding any remaining part that doesn't form a complete int32 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Int32() []int32 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 4 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]int32)(unsafe.Pointer(&sl)) +} + +/* +Int64 returns a slice pointing to the protected region of memory with the data represented as a sequence of signed 64 bit integers. Its length will be one eighth that of the byte slice, excluding any remaining part that doesn't form a complete int64 value. + +If called on a destroyed LockedBuffer, a nil slice will be returned. +*/ +func (b *LockedBuffer) Int64() []int64 { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Compute size of new slice representation. + size := b.Size() / 8 + if size < 1 { + return nil + } + + // Construct the new slice representation. + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(&b.Bytes()[0])), size, size} + + // Cast the representation to the correct type and return it. + return *(*[]int64)(unsafe.Pointer(&sl)) +} + +/* +ByteArray8 returns a pointer to some 8 byte array. Care must be taken not to dereference the pointer and instead pass it around as-is. + +The length of the buffer must be at least 8 bytes in size and the LockedBuffer should not be destroyed. In either of these cases a nil value is returned. +*/ +func (b *LockedBuffer) ByteArray8() *[8]byte { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Check if the length is large enough. + if len(b.Bytes()) < 8 { + return nil + } + + // Cast the representation to the correct type. + return (*[8]byte)(unsafe.Pointer(&b.Bytes()[0])) +} + +/* +ByteArray16 returns a pointer to some 16 byte array. Care must be taken not to dereference the pointer and instead pass it around as-is. + +The length of the buffer must be at least 16 bytes in size and the LockedBuffer should not be destroyed. In either of these cases a nil value is returned. +*/ +func (b *LockedBuffer) ByteArray16() *[16]byte { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Check if the length is large enough. + if len(b.Bytes()) < 16 { + return nil + } + + // Cast the representation to the correct type. + return (*[16]byte)(unsafe.Pointer(&b.Bytes()[0])) +} + +/* +ByteArray32 returns a pointer to some 32 byte array. Care must be taken not to dereference the pointer and instead pass it around as-is. + +The length of the buffer must be at least 32 bytes in size and the LockedBuffer should not be destroyed. In either of these cases a nil value is returned. +*/ +func (b *LockedBuffer) ByteArray32() *[32]byte { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Check if the length is large enough. + if len(b.Bytes()) < 32 { + return nil + } + + // Cast the representation to the correct type. + return (*[32]byte)(unsafe.Pointer(&b.Bytes()[0])) +} + +/* +ByteArray64 returns a pointer to some 64 byte array. Care must be taken not to dereference the pointer and instead pass it around as-is. + +The length of the buffer must be at least 64 bytes in size and the LockedBuffer should not be destroyed. In either of these cases a nil value is returned. +*/ +func (b *LockedBuffer) ByteArray64() *[64]byte { + b.RLock() + defer b.RUnlock() + + // Check if still alive. + if !core.GetBufferState(b.Buffer).IsAlive { + return nil + } + + // Check if the length is large enough. + if len(b.Bytes()) < 64 { + return nil + } + + // Cast the representation to the correct type. + return (*[64]byte)(unsafe.Pointer(&b.Bytes()[0])) +} diff --git a/vendor/github.com/awnumar/memguard/core/auxiliary.go b/vendor/github.com/awnumar/memguard/core/auxiliary.go new file mode 100644 index 0000000..a805b29 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/auxiliary.go @@ -0,0 +1,26 @@ +package core + +import ( + "os" + "unsafe" +) + +var ( + // Ascertain and store the system memory page size. + pageSize = os.Getpagesize() +) + +// Round a length to a multiple of the system page size. +func roundToPageSize(length int) int { + return (length + (pageSize - 1)) & (^(pageSize - 1)) +} + +// Convert a pointer and length to a byte slice that describes that memory. +func getBytes(ptr *byte, len int) []byte { + var sl = struct { + addr uintptr + len int + cap int + }{uintptr(unsafe.Pointer(ptr)), len, len} + return *(*[]byte)(unsafe.Pointer(&sl)) +} diff --git a/vendor/github.com/awnumar/memguard/core/buffer.go b/vendor/github.com/awnumar/memguard/core/buffer.go new file mode 100644 index 0000000..58d9158 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/buffer.go @@ -0,0 +1,269 @@ +package core + +import ( + "errors" + "sync" + + "github.com/awnumar/memguard/memcall" + "gitlab.com/NebulousLabs/fastrand" +) + +var ( + buffers = new(bufferList) +) + +// ErrNullBuffer is returned when attempting to construct a buffer of size less than one. +var ErrNullBuffer = errors.New(" buffer size must be greater than zero") + +// ErrBufferExpired is returned when attempting to perform an operation on or with a buffer that has been destroyed. +var ErrBufferExpired = errors.New(" buffer has been purged from memory and can no longer be used") + +/* +Buffer is a structure that holds raw sensitive data. + +The number of Buffers that can exist at one time is limited by how much memory your system's kernel allows each process to mlock/VirtualLock. Therefore you should call DestroyBuffer on Buffers that you no longer need, ideally defering a Destroy call after creating a new one. +*/ +type Buffer struct { + sync.RWMutex // Local mutex lock + + alive bool // Signals that destruction has not come + mutable bool // Mutability state of underlying memory + + data []byte // Portion of memory holding the data + memory []byte // Entire allocated memory region + + preguard []byte // Guard page addressed before the data + inner []byte // Inner region between the guard pages + postguard []byte // Guard page addressed after the data + + canary []byte // Value written behind data to detect spillage +} + +/* +NewBuffer is a raw constructor for the Buffer object. +*/ +func NewBuffer(size int) (*Buffer, error) { + var err error + + // Return an error if length < 1. + if size < 1 { + return nil, ErrNullBuffer + } + + // Declare and allocate + b := new(Buffer) + + // Allocate the total needed memory + innerLen := roundToPageSize(size) + b.memory, err = memcall.Alloc((2 * pageSize) + innerLen) + if err != nil { + Panic(err) + } + + // Construct slice reference for data buffer. + b.data = getBytes(&b.memory[pageSize+innerLen-size], size) + + // Construct slice references for page sectors. + b.preguard = getBytes(&b.memory[0], pageSize) + b.inner = getBytes(&b.memory[pageSize], innerLen) + b.postguard = getBytes(&b.memory[pageSize+innerLen], pageSize) + + // Construct slice reference for canary portion of inner page. + b.canary = getBytes(&b.memory[pageSize], len(b.inner)-len(b.data)) + + // Lock the pages that will hold sensitive data. + if err := memcall.Lock(b.inner); err != nil { + Panic(err) + } + + // Initialise the canary value and reference regions. + fastrand.Read(b.canary) + Copy(b.preguard, b.canary) + Copy(b.postguard, b.canary) + + // Make the guard pages inaccessible. + if err := memcall.Protect(b.preguard, memcall.NoAccess); err != nil { + Panic(err) + } + if err := memcall.Protect(b.postguard, memcall.NoAccess); err != nil { + Panic(err) + } + + // Set remaining properties + b.alive = true + b.mutable = true + + // Append the container to list of active buffers. + buffers.add(b) + + // Return the created Buffer to the caller. + return b, nil +} + +// Data returns a byte slice representing the memory region containing the data. +func (b *Buffer) Data() []byte { + return b.data +} + +// Freeze makes the underlying memory of a given buffer immutable. This will do nothing if the Buffer has been destroyed. +func (b *Buffer) Freeze() { + // Attain lock. + b.RLock() + defer b.RUnlock() + + // Check if destroyed. + if !b.alive { + return + } + + // Only do anything if currently mutable. + if b.mutable { + // Make the memory immutable. + if err := memcall.Protect(b.inner, memcall.ReadOnly); err != nil { + Panic(err) + } + b.mutable = false + } +} + +// Melt makes the underlying memory of a given buffer mutable. This will do nothing if the Buffer has been destroyed. +func (b *Buffer) Melt() { + // Attain lock. + b.RLock() + defer b.RUnlock() + + // Check if destroyed. + if !b.alive { + return + } + + // Only do anything if currently immutable. + if !b.mutable { + // Make the memory mutable. + if err := memcall.Protect(b.inner, memcall.ReadWrite); err != nil { + Panic(err) + } + b.mutable = true + } +} + +/* +Destroy performs some security checks, securely wipes the contents of, and then releases a Buffer's memory back to the OS. If a security check fails, the process will attempt to wipe all it can before safely panicking. + +If the Buffer has already been destroyed, subsequent calls are idempotent. +*/ +func (b *Buffer) Destroy() { + // Attain a mutex lock on this Buffer. + b.Lock() + defer b.Unlock() + + // Return if it's already destroyed. + if !b.alive { + return + } + + // Make all of the memory readable and writable. + if err := memcall.Protect(b.memory, memcall.ReadWrite); err != nil { + Panic(err) + } + + // Verify the canary + if !Equal(b.preguard, b.postguard) || !Equal(b.preguard[:len(b.canary)], b.canary) { + Panic(" canary verification failed; buffer overflow detected") + } + + // Wipe the memory. + Wipe(b.memory) + + // Remove this one from global slice. + buffers.remove(b) + + // Unlock pages locked into memory. + if err := memcall.Unlock(b.inner); err != nil { + Panic(err) + } + + // Free all related memory. + if err := memcall.Free(b.memory); err != nil { + Panic(err) + } + + // Reset the fields. + b.alive = false + b.mutable = false + b.data = nil + b.memory = nil + b.preguard = nil + b.inner = nil + b.postguard = nil + b.canary = nil +} + +// BufferState encodes a buffer's various states. +type BufferState struct { + IsAlive bool // true = not destroyed + IsMutable bool // true = memory is writable +} + +/* +GetBufferState returns a BufferState struct that encodes state information about a given Buffer object. +*/ +func GetBufferState(b *Buffer) BufferState { + b.RLock() + defer b.RUnlock() + return BufferState{IsAlive: b.alive, IsMutable: b.mutable} +} + +// BufferList stores a list of buffers in a thread-safe manner. +type bufferList struct { + sync.RWMutex + list []*Buffer +} + +// Add appends a given Buffer to the list. +func (l *bufferList) add(b ...*Buffer) { + l.Lock() + defer l.Unlock() + + l.list = append(l.list, b...) +} + +// Remove removes a given Buffer from the list. +func (l *bufferList) remove(b *Buffer) { + l.Lock() + defer l.Unlock() + + for i, v := range l.list { + if v == b { + l.list = append(l.list[:i], l.list[i+1:]...) + break + } + } +} + +// Exists checks if a given buffer is in the list. +func (l *bufferList) exists(b *Buffer) bool { + l.RLock() + defer l.RUnlock() + + for _, v := range l.list { + if b == v { + return true + } + } + + return false +} + +// Flush clears the list and returns its previous contents. +func (l *bufferList) flush() []*Buffer { + l.Lock() + defer l.Unlock() + + list := make([]*Buffer, len(l.list)) + copy(list, l.list) + + l.list = nil + + return list +} diff --git a/vendor/github.com/awnumar/memguard/core/coffer.go b/vendor/github.com/awnumar/memguard/core/coffer.go new file mode 100644 index 0000000..15b1593 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/coffer.go @@ -0,0 +1,164 @@ +package core + +import ( + "errors" + "sync" + "time" + + "gitlab.com/NebulousLabs/fastrand" +) + +var ( + // Static allocation for fast random bytes reading. + buf32, _ = NewBuffer(32) +) + +// Interval of time between each verify & re-key cycle. +const Interval uint = 8 // milliseconds + +// ErrCofferExpired is returned when a function attempts to perform an operation using a secure key container that has been wiped and destroyed. +var ErrCofferExpired = errors.New(" attempted usage of destroyed key object") + +/* +Coffer is a specialized container for securing highly-sensitive, 32 byte values. +*/ +type Coffer struct { + sync.RWMutex + + left *Buffer // Left partition. + right *Buffer // Right partition. +} + +// NewCoffer is a raw constructor for the *Coffer object. +func NewCoffer() *Coffer { + // Create a new Coffer object. + s := new(Coffer) + + // Allocate the partitions. + s.left, _ = NewBuffer(32) + s.right, _ = NewBuffer(32) + + // Initialise with a random 32 byte value. + s.Initialise() + + go func(s *Coffer) { + for { + // Sleep for the specified interval. + time.Sleep(time.Duration(Interval) * time.Millisecond) + + // Re-key the contents, exiting the routine if object destroyed. + if err := s.Rekey(); err != nil { + break + } + } + }(s) + + return s +} + +/* +Initialise is used to reset the value stored inside a Coffer to a new random 32 byte value, overwriting the old. +*/ +func (s *Coffer) Initialise() error { + // Check if it has been destroyed. + if s.Destroyed() { + return ErrCofferExpired + } + + // Attain the mutex. + s.Lock() + defer s.Unlock() + + // Overwrite the old value with fresh random bytes. + Scramble(s.left.Data()) + Scramble(s.right.Data()) + + // left = left XOR hash(right) + hr := Hash(s.right.Data()) + for i := range hr { + s.left.Data()[i] ^= hr[i] + } + + return nil +} + +/* +View returns a snapshot of the contents of a Coffer inside a Buffer. As usual the Buffer should be destroyed as soon as possible after use by calling the Destroy method. +*/ +func (s *Coffer) View() (*Buffer, error) { + // Attain a read-only lock. + s.RLock() + defer s.RUnlock() + + // Check if it's destroyed. + if s.Destroyed() { + return nil, ErrCofferExpired + } + + // Create a new Buffer for the data. + b, _ := NewBuffer(32) + + // data = hash(right) XOR left + h := Hash(s.right.Data()) + for i := range b.Data() { + b.Data()[i] = h[i] ^ s.left.Data()[i] + } + + // Return the view. + return b, nil +} + +/* +Rekey is used to re-key a Coffer. Ideally this should be done at short, regular intervals. +*/ +func (s *Coffer) Rekey() error { + // Check if it has been destroyed. + if s.Destroyed() { + return ErrCofferExpired + } + + // Attain the mutex. + s.Lock() + defer s.Unlock() + + // Attain 32 bytes of fresh cryptographic buf32. + fastrand.Read(buf32.Data()) + + // Hash the current right partition for later. + hashRightCurrent := Hash(s.right.Data()) + + // new_right = current_right XOR buf32 + for i := range s.right.Data() { + s.right.Data()[i] ^= buf32.Data()[i] + } + + // new_left = current_left XOR hash(current_right) XOR hash(new_right) + hashRightNew := Hash(s.right.Data()) + for i := range s.left.Data() { + s.left.Data()[i] ^= hashRightCurrent[i] ^ hashRightNew[i] + } + Wipe(hashRightNew) + + return nil +} + +/* +Destroy wipes and cleans up all memory related to a Coffer object. Once this method has been called, the Coffer can no longer be used and a new one should be created instead. +*/ +func (s *Coffer) Destroy() { + // Attain the mutex. + s.Lock() + defer s.Unlock() + + // Destroy the partitions. + s.left.Destroy() + s.right.Destroy() +} + +// Destroyed returns a boolean value indicating if a Coffer has been destroyed. +func (s *Coffer) Destroyed() bool { + s.RLock() + defer s.RUnlock() + + return (!GetBufferState(s.left).IsAlive) && (!GetBufferState(s.right).IsAlive) +} diff --git a/vendor/github.com/awnumar/memguard/core/crypto.go b/vendor/github.com/awnumar/memguard/core/crypto.go new file mode 100644 index 0000000..c53c6aa --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/crypto.go @@ -0,0 +1,119 @@ +package core + +import ( + "crypto/rand" + "crypto/subtle" + "errors" + "unsafe" + + "golang.org/x/crypto/blake2b" + "golang.org/x/crypto/nacl/secretbox" +) + +// Overhead is the size by which the ciphertext exceeds the plaintext. +const Overhead int = secretbox.Overhead + 24 // auth + nonce + +// ErrInvalidKeyLength is returned when attempting to encrypt or decrypt with a key that is not exactly 32 bytes in size. +var ErrInvalidKeyLength = errors.New(" key must be exactly 32 bytes") + +// ErrBufferTooSmall is returned when the decryption function, Open, is given an output buffer that is too small to hold the plaintext. In practice the plaintext will be Overhead bytes smaller than the ciphertext returned by the encryption function, Seal. +var ErrBufferTooSmall = errors.New(" the given buffer is too small to hold the plaintext") + +// ErrDecryptionFailed is returned when the attempted decryption fails. This can occur if the given key is incorrect or if the ciphertext is invalid. +var ErrDecryptionFailed = errors.New(" decryption failed") + +// Encrypt takes a plaintext message and a 32 byte key and returns an authenticated ciphertext. +func Encrypt(plaintext, key []byte) ([]byte, error) { + // Check the length of the key is correct. + if len(key) != 32 { + return nil, ErrInvalidKeyLength + } + + // Get a reference to the key's underlying array without making a copy. + k := (*[32]byte)(unsafe.Pointer(&key[0])) + + // Allocate space for and generate a nonce value. + var nonce [24]byte + Scramble(nonce[:]) + + // Encrypt m and return the result. + return secretbox.Seal(nonce[:], plaintext, &nonce, k), nil +} + +/* +Decrypt decrypts a given ciphertext with a given 32 byte key and writes the result to the start of a given buffer. + +The buffer must be large enough to contain the decrypted data. This is in practice Overhead bytes less than the length of the ciphertext returned by the Seal function above. This value is the size of the nonce plus the size of the Poly1305 authenticator. + +The size of the decrypted data is returned. +*/ +func Decrypt(ciphertext, key []byte, output []byte) (int, error) { + // Check the length of the key is correct. + if len(key) != 32 { + return 0, ErrInvalidKeyLength + } + + // Check the capacity of the given output buffer. + if cap(output) < (len(ciphertext) - Overhead) { + return 0, ErrBufferTooSmall + } + + // Get a reference to the key's underlying array without making a copy. + k := (*[32]byte)(unsafe.Pointer(&key[0])) + + // Retrieve and store the nonce value. + var nonce [24]byte + Copy(nonce[:], ciphertext[:24]) + + // Decrypt and return the result. + m, ok := secretbox.Open(nil, ciphertext[24:], &nonce, k) + if ok { // Decryption successful. + Move(output[:cap(output)], m) // Move plaintext to given output buffer. + return len(m), nil // Return length of decrypted plaintext. + } + + // Decryption unsuccessful. Either the key was wrong or the authentication failed. + return 0, ErrDecryptionFailed +} + +// Hash implements a cryptographic hash function using Blake2b. +func Hash(b []byte) []byte { + h := blake2b.Sum256(b) + return h[:] +} + +// Scramble fills a given buffer with cryptographically-secure random bytes. +func Scramble(buf []byte) { + if _, err := rand.Read(buf); err != nil { + Panic(err) + } +} + +// Wipe takes a buffer and wipes it with zeroes. +func Wipe(buf []byte) { + for i := range buf { + buf[i] = 0 + } +} + +// Copy is identical to Go's builtin copy function except the copying is done in constant time. This is to mitigate against side-channel attacks. +func Copy(dst, src []byte) { + if len(dst) > len(src) { + subtle.ConstantTimeCopy(1, dst[:len(src)], src) + } else if len(dst) < len(src) { + subtle.ConstantTimeCopy(1, dst, src[:len(dst)]) + } else { + subtle.ConstantTimeCopy(1, dst, src) + } +} + +// Move is identical to Copy except it wipes the source buffer after the copy operation is executed. +func Move(dst, src []byte) { + Copy(dst, src) + Wipe(src) +} + +// Equal does a constant-time comparison of two byte slices. This is to mitigate against side-channel attacks. +func Equal(x, y []byte) bool { + return subtle.ConstantTimeCompare(x, y) == 1 +} diff --git a/vendor/github.com/awnumar/memguard/core/enclave.go b/vendor/github.com/awnumar/memguard/core/enclave.go new file mode 100644 index 0000000..9ba47a0 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/enclave.go @@ -0,0 +1,115 @@ +package core + +import "errors" + +var ( + // Declare a key for use in encrypting data this session. + key *Coffer +) + +func init() { + // Initialize the key declared above with a random value. + key = NewCoffer() +} + +// ErrNullEnclave is returned when attempting to construct an enclave of size less than one. +var ErrNullEnclave = errors.New(" enclave size must be greater than zero") + +/* +Enclave is a sealed and encrypted container for sensitive data. +*/ +type Enclave struct { + ciphertext []byte +} + +/* +NewEnclave is a raw constructor for the Enclave object. The given buffer is wiped after the enclave is created. +*/ +func NewEnclave(buf []byte) (*Enclave, error) { + // Return an error if length < 1. + if len(buf) < 1 { + return nil, ErrNullEnclave + } + + // Create a new Enclave. + e := new(Enclave) + + // Get a view of the key. + k, err := key.View() + if err != nil { + return nil, err + } + + // Encrypt the plaintext. + e.ciphertext, err = Encrypt(buf, k.Data()) + if err != nil { + Panic(err) // key is not 32 bytes long + } + + // Destroy our copy of the key. + k.Destroy() + + // Wipe the given buffer. + Wipe(buf) + + return e, nil +} + +/* +Seal consumes a given Buffer object and returns its data secured and encrypted inside an Enclave. The given Buffer is destroyed after the Enclave is created. +*/ +func Seal(b *Buffer) (*Enclave, error) { + b.RLock() + + // Check if the Buffer has been destroyed. + if !b.alive { + return nil, ErrBufferExpired + } + + // Make the buffer mutable so that we can wipe it. + b.Melt() + + // Construct the Enclave from the Buffer's data. + e, err := NewEnclave(b.Data()) + if err != nil { + return nil, err + } + + // Destroy the Buffer object. + b.RUnlock() + b.Destroy() + + // Return the newly created Enclave. + return e, nil +} + +/* +Open decrypts an Enclave and puts the contents into a Buffer object. The given Enclave is left untouched and may be reused. + +The Buffer object should be destroyed after the contents are no longer needed. +*/ +func Open(e *Enclave) (*Buffer, error) { + // Allocate a secure Buffer to hold the decrypted data. + b, err := NewBuffer(len(e.ciphertext) - Overhead) + if err != nil { + Panic(err) // ciphertext has invalid length + } + + // Grab a view of the key. + k, err := key.View() + if err != nil { + return nil, err + } + + // Decrypt the enclave into the buffer we created. + _, err = Decrypt(e.ciphertext, k.Data(), b.Data()) + if err != nil { + return nil, err + } + + // Destroy our copy of the key. + k.Destroy() + + // Return the contents of the Enclave inside a Buffer. + return b, nil +} diff --git a/vendor/github.com/awnumar/memguard/core/exit.go b/vendor/github.com/awnumar/memguard/core/exit.go new file mode 100644 index 0000000..8838a4c --- /dev/null +++ b/vendor/github.com/awnumar/memguard/core/exit.go @@ -0,0 +1,74 @@ +package core + +import ( + "os" + + "github.com/awnumar/memguard/memcall" +) + +/* +Purge wipes all sensitive data and keys before reinitialising the session with a fresh encryption key and secure values. Subsequent library operations will use these fresh values and the old data is assumed to be practically unrecoverable. + +The creation of new Enclave objects should wait for this function to return since subsequent Enclave objects will use the newly created key. + +This function should be called before the program terminates, or else the provided Exit or Panic functions should be used to terminate. +*/ +func Purge() { + // Generate a new encryption key, wiping the old. + err := key.Initialise() + if err != nil { + key.Destroy() + key = NewCoffer() + } + + // Get a snapshot of existing Buffers. + snapshot := buffers.flush() + buffers.add(key.left, key.right, buf32) + + // Destroy them, performing the usual sanity checks. + for _, b := range snapshot { + // Don't destroy the key partitions. + if b != key.left && b != key.right && b != buf32 { + b.Destroy() + } + } +} + +/* +Exit terminates the process with a specified exit code but securely wipes and cleans up sensitive data before doing so. +*/ +func Exit(c int) { + // Wipe the encryption key used to encrypt data inside Enclaves. + key.Destroy() + + // Get a snapshot of existing Buffers. + snapshot := buffers.flush() + + // Destroy them, performing the usual sanity checks. + for _, b := range snapshot { + b.Destroy() + } + + // Exit with the specified exit code. + os.Exit(c) +} + +/* +Panic is identical to the builtin panic except it wipes all it can before calling panic. +*/ +func Panic(v interface{}) { + // Wipe both halves of the Enclave encryption key. + Wipe(key.left.Data()) + Wipe(key.right.Data()) + + // Wipe all of the currently active LockedBuffers. + for _, b := range buffers.list { + if !b.mutable { + memcall.Protect(b.inner, memcall.ReadWrite) + } + Wipe(b.Data()) + } + + // Panic. + panic(v) +} diff --git a/vendor/github.com/awnumar/memguard/docs.go b/vendor/github.com/awnumar/memguard/docs.go new file mode 100644 index 0000000..16e6fc6 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/docs.go @@ -0,0 +1,90 @@ +/* +Package memguard implements a secure software enclave for the storage of sensitive information in memory. + + package main + + import ( + "fmt" + "os" + + "github.com/awnumar/memguard" + ) + + func main() { + // Safely terminate in case of an interrupt signal + memguard.CatchInterrupt() + + // Purge the session when we return + defer memguard.Purge() + + // Generate a key sealed inside an encrypted container + key := memguard.NewEnclaveRandom(32) + + // Passing the key off to another function + key = invert(key) + + // Decrypt the result returned from invert + keyBuf, err := key.Open() + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + defer keyBuf.Destroy() + + // Um output it + fmt.Println(keyBuf.Bytes()) + } + + func invert(key *memguard.Enclave) *memguard.Enclave { + // Decrypt the key into a local copy + b, err := key.Open() + if err != nil { + memguard.SafePanic(err) + } + defer b.Destroy() // Destroy the copy when we return + + // Open returns the data in an immutable buffer, so make it mutable + b.Melt() + + // Set every element to its complement + for i := range b.Bytes() { + b.Bytes()[i] = ^b.Bytes()[i] + } + + // Return the new data in encrypted form + return b.Seal() // <- sealing also destroys b + } + +There are two main container objects exposed in this API. Enclave objects encrypt data and store the ciphertext whereas LockedBuffers are more like guarded memory allocations. There is a limit on the maximum number of LockedBuffer objects that can exist at any one time, imposed by the system's mlock limits. There is no limit on Enclaves. + +The general workflow is to store sensitive information in Enclaves when it is not immediately needed and decrypt it when and where it is. After use, the LockedBuffer should be destroyed. + +If you need access to the data inside a LockedBuffer in a type not covered by any methods provided by this API, you can type-cast the allocation's memory to whatever type you want. + + key := memguard.NewBuffer(32) + keyArrayPtr := (*[32]byte)(unsafe.Pointer(&key.Bytes()[0])) // do not dereference + +This is of course an unsafe operation and so care must be taken to ensure that the cast is valid and does not result in memory unsafety. Further examples of code and interesting use-cases can be found in the examples subpackage. + +Several functions exist to make the mass purging of data very easy. It is recommended to make use of them when appropriate. + + // Start an interrupt handler that will clean up memory before exiting + memguard.CatchInterrupt() + + // Purge the session when returning from the main function of your program + defer memguard.Purge() + + // Use the safe variants of exit functions provided in the stdlib + memguard.SafeExit(1) + memguard.SafePanic(err) + + // Destroy LockedBuffers as soon as possible after using them + b, err := enclave.Open() + if err != nil { + memguard.SafePanic(err) + } + defer b.Destroy() + +Core dumps are disabled by default. If you absolutely require them, you can enable them by using unix.Setrlimit to set RLIMIT_CORE to an appropriate value. +*/ +package memguard diff --git a/vendor/github.com/awnumar/memguard/enclave.go b/vendor/github.com/awnumar/memguard/enclave.go new file mode 100644 index 0000000..6bb20bc --- /dev/null +++ b/vendor/github.com/awnumar/memguard/enclave.go @@ -0,0 +1,49 @@ +package memguard + +import ( + "github.com/awnumar/memguard/core" +) + +/* +Enclave is a sealed and encrypted container for sensitive data. +*/ +type Enclave struct { + *core.Enclave +} + +/* +NewEnclave seals up some given data into an encrypted enclave object. The buffer is wiped after the data is copied. The length of the buffer must be strictly positive or else the function will panic. + +Alternatively, a LockedBuffer may be converted into an Enclave object using the Seal method provided. This will also have the effect of destroying the LockedBuffer. +*/ +func NewEnclave(src []byte) *Enclave { + e, err := core.NewEnclave(src) + if err != nil { + core.Panic(err) + } + return &Enclave{e} +} + +/* +NewEnclaveRandom generates and seals arbitrary amounts of cryptographically-secure random bytes into an encrypted enclave object. +*/ +func NewEnclaveRandom(size int) *Enclave { + // todo: stream data into enclave + b := NewBufferRandom(size) + return b.Seal() +} + +/* +Open decrypts an Enclave object and places its contents into an immutable LockedBuffer. An error will be returned if decryption failed. +*/ +func (e *Enclave) Open() (*LockedBuffer, error) { + b, err := core.Open(e.Enclave) + if err != nil { + if err != core.ErrDecryptionFailed { + core.Panic(err) + } + return nil, err + } + b.Freeze() + return newBuffer(b), nil +} diff --git a/vendor/github.com/awnumar/memguard/go.mod b/vendor/github.com/awnumar/memguard/go.mod new file mode 100644 index 0000000..ca0725a --- /dev/null +++ b/vendor/github.com/awnumar/memguard/go.mod @@ -0,0 +1,7 @@ +module github.com/awnumar/memguard + +require ( + gitlab.com/NebulousLabs/fastrand v0.0.0-20181126182046-603482d69e40 + golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 + golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 +) diff --git a/vendor/github.com/awnumar/memguard/go.sum b/vendor/github.com/awnumar/memguard/go.sum new file mode 100644 index 0000000..1713171 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/go.sum @@ -0,0 +1,11 @@ +gitlab.com/NebulousLabs/fastrand v0.0.0-20181126182046-603482d69e40 h1:dizWJqTWjwyD8KGcMOwgrkqu1JIkofYgKkmDeNE7oAs= +gitlab.com/NebulousLabs/fastrand v0.0.0-20181126182046-603482d69e40/go.mod h1:rOnSnoRyxMI3fe/7KIbVcsHRGxe30OONv8dEgo+vCfA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= +golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/vendor/github.com/awnumar/memguard/logo.svg b/vendor/github.com/awnumar/memguard/logo.svg new file mode 100644 index 0000000..346bba3 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/logo.svg @@ -0,0 +1,2 @@ + \ No newline at end of file diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_freebsd.go b/vendor/github.com/awnumar/memguard/memcall/memcall_freebsd.go new file mode 100644 index 0000000..2f7fdc0 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_freebsd.go @@ -0,0 +1,99 @@ +// +build freebsd + +package memcall + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Lock is a wrapper for unix.Mlock(), with extra precautions. +func Lock(b []byte) error { + // Advise the kernel not to dump. Ignore failure. + unix.Madvise(b, unix.MADV_NOCORE) + + // Call mlock. + if err := unix.Mlock(b); err != nil { + return fmt.Errorf(" could not acquire lock on %p, limit reached? [Err: %s]", &b[0], err) + } + + return nil +} + +// Unlock is a wrapper for unix.Munlock(). +func Unlock(b []byte) error { + if err := unix.Munlock(b); err != nil { + return fmt.Errorf(" could not free lock on %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Alloc allocates a byte slice of length n and returns it. +func Alloc(n int) ([]byte, error) { + // Allocate the memory. + b, err := unix.Mmap(-1, 0, n, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS|unix.MAP_NOCORE) + if err != nil { + return nil, fmt.Errorf(" could not allocate [Err: %s]", err) + } + + // Wipe it just in case there is some remnant data. + for i := range b { + b[i] = 0 + } + + // Return the allocated memory. + return b, nil +} + +// Free deallocates the byte slice specified. +func Free(b []byte) error { + // Make the memory region readable and writable. + if err := Protect(b, ReadWrite); err != nil { + return err + } + + // Wipe the memory region in case of remnant data. + for i := range b { + b[i] = 0 + } + + // Free the memory back to the kernel. + if err := unix.Munmap(b); err != nil { + return fmt.Errorf(" could not deallocate %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Protect modifies the protection state for a specified byte slice. +func Protect(b []byte, mpf MemoryProtectionFlag) error { + var prot int + if mpf.flag == ReadWrite.flag { + prot = unix.PROT_READ | unix.PROT_WRITE + } else if mpf.flag == ReadOnly.flag { + prot = unix.PROT_READ + } else if mpf.flag == NoAccess.flag { + prot = unix.PROT_NONE + } else { + return ErrInvalidFlag + } + + // Change the protection value of the byte slice. + if err := unix.Mprotect(b, prot); err != nil { + return fmt.Errorf(" could not set %d on %p [Err: %s]", prot, &b[0], err) + } + + return nil +} + +// DisableCoreDumps disables core dumps on Unix systems. +func DisableCoreDumps() error { + // Disable core dumps. + if err := unix.Setrlimit(unix.RLIMIT_CORE, &unix.Rlimit{Cur: 0, Max: 0}); err != nil { + return fmt.Errorf(" could not set rlimit [Err: %s]", err) + } + + return nil +} diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_openbsd.go b/vendor/github.com/awnumar/memguard/memcall/memcall_openbsd.go new file mode 100644 index 0000000..ac6b7cd --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_openbsd.go @@ -0,0 +1,96 @@ +// +build openbsd + +package memcall + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Lock is a wrapper for mlock(2). +func Lock(b []byte) error { + // Call mlock. + if err := unix.Mlock(b); err != nil { + return fmt.Errorf(" could not acquire lock on %p, limit reached? [Err: %s]", &b[0], err) + } + + return nil +} + +// Unlock is a wrapper for munlock(2). +func Unlock(b []byte) error { + if err := unix.Munlock(b); err != nil { + return fmt.Errorf(" could not free lock on %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Alloc allocates a byte slice of length n and returns it. +func Alloc(n int) ([]byte, error) { + // Allocate the memory. + b, err := unix.Mmap(-1, 0, n, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANON|unix.MAP_CONCEAL) + if err != nil { + return nil, fmt.Errorf(" could not allocate [Err: %s]", err) + } + + // Wipe it just in case there is some remnant data. + for i := range b { + b[i] = 0 + } + + // Return the allocated memory. + return b, nil +} + +// Free deallocates the byte slice specified. +func Free(b []byte) error { + // Make the memory region readable and writable. + if err := Protect(b, ReadWrite); err != nil { + return err + } + + // Wipe the memory region in case of remnant data. + for i := range b { + b[i] = 0 + } + + // Free the memory back to the kernel. + if err := unix.Munmap(b); err != nil { + return fmt.Errorf(" could not deallocate %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Protect modifies the protection state for a specified byte slice. +func Protect(b []byte, mpf MemoryProtectionFlag) error { + var prot int + if mpf.flag == ReadWrite.flag { + prot = unix.PROT_READ | unix.PROT_WRITE + } else if mpf.flag == ReadOnly.flag { + prot = unix.PROT_READ + } else if mpf.flag == NoAccess.flag { + prot = unix.PROT_NONE + } else { + return ErrInvalidFlag + } + + // Change the protection value of the byte slice. + if err := unix.Mprotect(b, prot); err != nil { + return fmt.Errorf(" could not set %d on %p [Err: %s]", prot, &b[0], err) + } + + return nil +} + +// DisableCoreDumps disables core dumps on Unix systems. +func DisableCoreDumps() error { + // Disable core dumps. + if err := unix.Setrlimit(unix.RLIMIT_CORE, &unix.Rlimit{Cur: 0, Max: 0}); err != nil { + return fmt.Errorf(" could not set rlimit [Err: %s]", err) + } + + return nil +} diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_osx.go b/vendor/github.com/awnumar/memguard/memcall/memcall_osx.go new file mode 100644 index 0000000..9221691 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_osx.go @@ -0,0 +1,95 @@ +// +build darwin + +package memcall + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Lock is a wrapper for mlock(2). +func Lock(b []byte) error { + if err := unix.Mlock(b); err != nil { + return fmt.Errorf(" could not acquire lock on %p, limit reached? [Err: %s]", &b[0], err) + } + + return nil +} + +// Unlock is a wrapper for munlock(2). +func Unlock(b []byte) error { + if err := unix.Munlock(b); err != nil { + return fmt.Errorf(" could not free lock on %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Alloc allocates a byte slice of length n and returns it. +func Alloc(n int) ([]byte, error) { + // Allocate the memory. + b, err := unix.Mmap(-1, 0, n, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANON) + if err != nil { + return nil, fmt.Errorf(" could not allocate [Err: %s]", err) + } + + // Wipe it just in case there is some remnant data. + for i := range b { + b[i] = 0 + } + + // Return the allocated memory. + return b, nil +} + +// Free deallocates the byte slice specified. +func Free(b []byte) error { + // Make the memory region readable and writable. + if err := Protect(b, ReadWrite); err != nil { + return err + } + + // Wipe the memory region in case of remnant data. + for i := range b { + b[i] = 0 + } + + // Free the memory back to the kernel. + if err := unix.Munmap(b); err != nil { + return fmt.Errorf(" could not deallocate %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Protect modifies the protection state for a specified byte slice. +func Protect(b []byte, mpf MemoryProtectionFlag) error { + var prot int + if mpf.flag == ReadWrite.flag { + prot = unix.PROT_READ | unix.PROT_WRITE + } else if mpf.flag == ReadOnly.flag { + prot = unix.PROT_READ + } else if mpf.flag == NoAccess.flag { + prot = unix.PROT_NONE + } else { + return ErrInvalidFlag + } + + // Change the protection value of the byte slice. + if err := unix.Mprotect(b, prot); err != nil { + return fmt.Errorf(" could not set %d on %p [Err: %s]", prot, &b[0], err) + } + + return nil +} + +// DisableCoreDumps disables core dumps on Unix systems. +func DisableCoreDumps() error { + // Disable core dumps. + if err := unix.Setrlimit(unix.RLIMIT_CORE, &unix.Rlimit{Cur: 0, Max: 0}); err != nil { + return fmt.Errorf(" could not set rlimit [Err: %s]", err) + } + + return nil +} diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_symbols.go b/vendor/github.com/awnumar/memguard/memcall/memcall_symbols.go new file mode 100644 index 0000000..e64639a --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_symbols.go @@ -0,0 +1,27 @@ +package memcall + +import "errors" + +// Structure for typed specification of memory protection constants. + +// MemoryProtectionFlag specifies some particular memory protection flag. +type MemoryProtectionFlag struct { + // NOACCESS := 1 (00000001) + // READ := 2 (00000010) + // WRITE := 4 (00000100) // unused + // READWRITE := 6 (00000110) + + flag byte +} + +// NoAccess specifies that the memory should be marked unreadable and immutable. +var NoAccess = MemoryProtectionFlag{1} + +// ReadOnly specifies that the memory should be marked read-only (immutable). +var ReadOnly = MemoryProtectionFlag{2} + +// ReadWrite specifies that the memory should be made readable and writable. +var ReadWrite = MemoryProtectionFlag{6} + +// ErrInvalidFlag indicates that a given memory protection flag is undefined. +var ErrInvalidFlag = errors.New(" memory protection flag is undefined") diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_unix.go b/vendor/github.com/awnumar/memguard/memcall/memcall_unix.go new file mode 100644 index 0000000..d3262c9 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_unix.go @@ -0,0 +1,99 @@ +// +build !windows,!darwin,!openbsd,!freebsd + +package memcall + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Lock is a wrapper for mlock(2), with extra precautions. +func Lock(b []byte) error { + // Advise the kernel not to dump. Ignore failure. + unix.Madvise(b, unix.MADV_DONTDUMP) + + // Call mlock. + if err := unix.Mlock(b); err != nil { + return fmt.Errorf(" could not acquire lock on %p, limit reached? [Err: %s]", &b[0], err) + } + + return nil +} + +// Unlock is a wrapper for munlock(2). +func Unlock(b []byte) error { + if err := unix.Munlock(b); err != nil { + return fmt.Errorf(" could not free lock on %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Alloc allocates a byte slice of length n and returns it. +func Alloc(n int) ([]byte, error) { + // Allocate the memory. + b, err := unix.Mmap(-1, 0, n, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS) + if err != nil { + return nil, fmt.Errorf(" could not allocate [Err: %s]", err) + } + + // Wipe it just in case there is some remnant data. + for i := range b { + b[i] = 0 + } + + // Return the allocated memory. + return b, nil +} + +// Free deallocates the byte slice specified. +func Free(b []byte) error { + // Make the memory region readable and writable. + if err := Protect(b, ReadWrite); err != nil { + return err + } + + // Wipe the memory region in case of remnant data. + for i := range b { + b[i] = 0 + } + + // Free the memory back to the kernel. + if err := unix.Munmap(b); err != nil { + return fmt.Errorf(" could not deallocate %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Protect modifies the protection state for a specified byte slice. +func Protect(b []byte, mpf MemoryProtectionFlag) error { + var prot int + if mpf.flag == ReadWrite.flag { + prot = unix.PROT_READ | unix.PROT_WRITE + } else if mpf.flag == ReadOnly.flag { + prot = unix.PROT_READ + } else if mpf.flag == NoAccess.flag { + prot = unix.PROT_NONE + } else { + return ErrInvalidFlag + } + + // Change the protection value of the byte slice. + if err := unix.Mprotect(b, prot); err != nil { + return fmt.Errorf(" could not set %d on %p [Err: %s]", prot, &b[0], err) + } + + return nil +} + +// DisableCoreDumps disables core dumps on Unix systems. +func DisableCoreDumps() error { + // Disable core dumps. + if err := unix.Setrlimit(unix.RLIMIT_CORE, &unix.Rlimit{Cur: 0, Max: 0}); err != nil { + return fmt.Errorf(" could not set rlimit [Err: %s]", err) + } + + return nil +} diff --git a/vendor/github.com/awnumar/memguard/memcall/memcall_windows.go b/vendor/github.com/awnumar/memguard/memcall/memcall_windows.go new file mode 100644 index 0000000..004cd81 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memcall/memcall_windows.go @@ -0,0 +1,115 @@ +// +build windows + +package memcall + +import ( + "fmt" + "unsafe" + + "golang.org/x/sys/windows" +) + +// Placeholder variable for when we need a valid pointer to zero bytes. +var _zero uintptr + +// Lock is a wrapper for windows.VirtualLock() +func Lock(b []byte) error { + if err := windows.VirtualLock(_getPtr(b), uintptr(len(b))); err != nil { + return fmt.Errorf(" could not acquire lock on %p, limit reached? [Err: %s]", &b[0], err) + } + + return nil +} + +// Unlock is a wrapper for windows.VirtualUnlock() +func Unlock(b []byte) error { + if err := windows.VirtualUnlock(_getPtr(b), uintptr(len(b))); err != nil { + return fmt.Errorf(" could not free lock on %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Alloc allocates a byte slice of length n and returns it. +func Alloc(n int) ([]byte, error) { + // Allocate the memory. + ptr, err := windows.VirtualAlloc(_zero, uintptr(n), 0x1000|0x2000, 0x4) + if err != nil { + return nil, fmt.Errorf(" could not allocate [Err: %s]", err) + } + + // Convert this pointer to a slice. + b := _getBytes(ptr, n, n) + + // Wipe it just in case there is some remnant data. + for i := range b { + b[i] = 0 + } + + // Return the allocated memory. + return b, nil +} + +// Free deallocates the byte slice specified. +func Free(b []byte) error { + // Make the memory region readable and writable. + if err := Protect(b, ReadWrite); err != nil { + return err + } + + // Wipe the memory region in case of remnant data. + for i := range b { + b[i] = 0 + } + + // Free the memory back to the kernel. + if err := windows.VirtualFree(_getPtr(b), uintptr(0), 0x8000); err != nil { + return fmt.Errorf(" could not deallocate %p [Err: %s]", &b[0], err) + } + + return nil +} + +// Protect modifies the memory protection flags for a specified byte slice. +func Protect(b []byte, mpf MemoryProtectionFlag) error { + var prot int + if mpf.flag == ReadWrite.flag { + prot = 0x4 // PAGE_READWRITE + } else if mpf.flag == ReadOnly.flag { + prot = 0x2 // PAGE_READ + } else if mpf.flag == NoAccess.flag { + prot = 0x1 // PAGE_NOACCESS + } else { + return ErrInvalidFlag + } + + var oldProtect uint32 + if err := windows.VirtualProtect(_getPtr(b), uintptr(len(b)), uint32(prot), &oldProtect); err != nil { + return fmt.Errorf(" could not set %d on %p [Err: %s]", prot, &b[0], err) + } + + return nil +} + +// DisableCoreDumps is included for compatibility reasons. On windows it is a no-op function. +func DisableCoreDumps() error { return nil } + +// Auxiliary functions. +func _getPtr(b []byte) uintptr { + var _p0 unsafe.Pointer + if len(b) > 0 { + _p0 = unsafe.Pointer(&b[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + return uintptr(_p0) +} + +func _getBytes(ptr uintptr, len int, cap int) []byte { + var sl = struct { + addr uintptr + len int + cap int + }{ptr, len, cap} + return *(*[]byte)(unsafe.Pointer(&sl)) +} diff --git a/vendor/github.com/awnumar/memguard/memguard.go b/vendor/github.com/awnumar/memguard/memguard.go new file mode 100644 index 0000000..1dab74b --- /dev/null +++ b/vendor/github.com/awnumar/memguard/memguard.go @@ -0,0 +1,46 @@ +package memguard + +import ( + "github.com/awnumar/memguard/core" + "github.com/awnumar/memguard/memcall" +) + +func init() { + // Disable core dumps on unixesque systems. Does nothing on windows :( + memcall.DisableCoreDumps() +} + +/* +ScrambleBytes overwrites an arbitrary buffer with cryptographically-secure random bytes. +*/ +func ScrambleBytes(buf []byte) { + core.Scramble(buf) +} + +/* +WipeBytes overwrites an arbitrary buffer with zeroes. +*/ +func WipeBytes(buf []byte) { + core.Wipe(buf) +} + +/* +Purge resets the session key to a fresh value and destroys all existing LockedBuffers. Existing Enclave objects will no longer be decryptable. +*/ +func Purge() { + core.Purge() +} + +/* +SafePanic wipes all it can before calling panic(v). +*/ +func SafePanic(v interface{}) { + core.Panic(v) +} + +/* +SafeExit destroys everything sensitive before exiting with a specified status code. +*/ +func SafeExit(c int) { + core.Exit(c) +} diff --git a/vendor/github.com/awnumar/memguard/signals.go b/vendor/github.com/awnumar/memguard/signals.go new file mode 100644 index 0000000..78dff56 --- /dev/null +++ b/vendor/github.com/awnumar/memguard/signals.go @@ -0,0 +1,63 @@ +package memguard + +import ( + "os" + "os/signal" + "sync" + + "github.com/awnumar/memguard/core" +) + +var ( + // Ensure we only start a single signal handling instance + create sync.Once + + // Channel for updating the signal handler + sigfunc = make(chan func(os.Signal), 1) + + // Channel that caught signals are sent to by the runtime + listener = make(chan os.Signal, 4) +) + +/* +CatchSignal assigns a given function to be run in the event of a signal being received by the process. If no signals are provided all signals will be caught. + + 1. Signal is caught by the process + 2. Interrupt handler is executed + 3. Secure session state is wiped + 4. Process terminates with exit code 1 + +This function can be called multiple times with the effect that only the last call will have any effect. +*/ +func CatchSignal(f func(os.Signal), signals ...os.Signal) { + create.Do(func() { + // Start a goroutine to listen on the channels. + go func() { + var handler func(os.Signal) + for { + select { + case signal := <-listener: + handler(signal) + core.Exit(1) + case handler = <-sigfunc: + } + } + }() + }) + + // Update the handler function. + sigfunc <- f + + // Notify the channel if we receive a signal. + signal.Reset() + signal.Notify(listener, signals...) +} + +/* +CatchInterrupt is a wrapper around CatchSignal that makes it easy to safely handle receiving interrupt signals. If an interrupt is received, the process will wipe sensitive data in memory before terminating. + +A subsequent call to CatchSignal will override this call. +*/ +func CatchInterrupt() { + CatchSignal(func(_ os.Signal) {}, os.Interrupt) +} diff --git a/vendor/gitlab.com/NebulousLabs/fastrand/LICENSE b/vendor/gitlab.com/NebulousLabs/fastrand/LICENSE new file mode 100644 index 0000000..c2667a1 --- /dev/null +++ b/vendor/gitlab.com/NebulousLabs/fastrand/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 Nebulous Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/gitlab.com/NebulousLabs/fastrand/README.md b/vendor/gitlab.com/NebulousLabs/fastrand/README.md new file mode 100644 index 0000000..dc38ac8 --- /dev/null +++ b/vendor/gitlab.com/NebulousLabs/fastrand/README.md @@ -0,0 +1,55 @@ +fastrand +-------- + +[![GoDoc](https://godoc.org/gitlab.com/NebulousLabs/fastrand?status.svg)](https://godoc.org/gitlab.com/NebulousLabs/fastrand) +[![Go Report Card](http://goreportcard.com/badge/gitlab.com/NebulousLabs/fastrand)](https://goreportcard.com/report/gitlab.com/NebulousLabs/fastrand) + +``` +go get gitlab.com/NebulousLabs/fastrand +``` + +`fastrand` implements a cryptographically secure pseudorandom number generator. +The generator is seeded using the system's default entropy source, and +thereafter produces random values via repeated hashing. As a result, `fastrand` +can generate randomness much faster than `crypto/rand`, and generation cannot +fail beyond a potential panic during `init()`. + +`fastrand` also scales better than `crypto/rand` and `math/rand` when called in +parallel. In fact, `fastrand` can even outperform `math/rand` when using enough threads. + + +## Benchmarks ## + +``` +// 32 byte reads +BenchmarkRead32 10000000 175 ns/op 181.86 MB/s +BenchmarkReadCrypto32 500000 2733 ns/op 11.71 MB/s + +// 512 kb reads +BenchmarkRead512kb 1000 1336217 ns/op 383.17 MB/s +BenchmarkReadCrypto512kb 50 33423693 ns/op 15.32 MB/s + +// 32 byte reads using 4 threads +BenchmarkRead4Threads32 3000000 392 ns/op 326.46 MB/s +BenchmarkReadCrypto4Threads32 200000 7579 ns/op 16.89 MB/s + +// 512 kb reads using 4 threads +BenchmarkRead4Threads512kb 1000 1899048 ns/op 1078.43 MB/s +BenchmarkReadCrypto4Threads512kb 20 97423380 ns/op 21.02 MB/s +``` + +## Security ## + +`fastrand` uses an algorithm similar to Fortuna, which is the basis for the +`/dev/random` device in FreeBSD. However, although the techniques used by +`fastrand` are known to be secure, the specific implementation has not been +reviewed by a security professional. Use with caution. + +The general strategy is to use `crypto/rand` at init to get 32 bytes of strong +entropy. From there, the entropy is concatenated to a counter and hashed +repeatedly, providing 64 bytes of random output each time the counter is +incremented. The counter is 16 bytes, which provides strong guarantees that a +cycle will not be seen throughout the lifetime of the program. + +The `sync/atomic` package is used to ensure that multiple threads calling +`fastrand` concurrently are always guaranteed to end up with unique counters. diff --git a/vendor/gitlab.com/NebulousLabs/fastrand/fastrand.go b/vendor/gitlab.com/NebulousLabs/fastrand/fastrand.go new file mode 100644 index 0000000..dc45e86 --- /dev/null +++ b/vendor/gitlab.com/NebulousLabs/fastrand/fastrand.go @@ -0,0 +1,186 @@ +// Package fastrand implements a cryptographically secure pseudorandom number +// generator. The generator is seeded using the system's default entropy source, +// and thereafter produces random values via repeated hashing. As a result, +// fastrand can generate randomness much faster than crypto/rand, and generation +// cannot fail beyond a potential panic at init. +// +// The method used in this package is similar to the Fortuna algorithm, which is +// used in used in FreeBSD for /dev/urandom. This package uses techniques that +// are known to be secure, however the exact implementation has not been heavily +// reviewed by cryptographers. +package fastrand + +import ( + "crypto/rand" + "encoding/binary" + "math" + "math/big" + "strconv" + "sync/atomic" + "unsafe" + + "golang.org/x/crypto/blake2b" +) + +// A randReader produces random values via repeated hashing. The entropy field +// is the concatenation of an initial seed and a 128-bit counter. Each time +// the entropy is hashed, the counter is incremented. +type randReader struct { + counter uint64 // First 64 bits of the counter. + counterExtra uint64 // Second 64 bits of the counter. + entropy [32]byte +} + +// Reader is a global, shared instance of a cryptographically strong pseudo- +// random generator. It uses blake2b as its hashing function. Reader is safe +// for concurrent use by multiple goroutines. +var Reader *randReader + +// init provides the initial entropy for the reader that will seed all numbers +// coming out of fastrand. +func init() { + r := &randReader{} + n, err := rand.Read(r.entropy[:]) + if err != nil || n != len(r.entropy) { + panic("not enough entropy to fill fastrand reader at startup") + } + Reader = r +} + +// Read fills b with random data. It always returns len(b), nil. +func (r *randReader) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + // Grab a unique counter from the reader, while atomically updating the + // counter so that concurrent callers also end up with unique values. + counter := atomic.AddUint64(&r.counter, 1) + counterExtra := atomic.LoadUint64(&r.counterExtra) + + // Increment counterExtra when counter is close to overflowing. We cannot + // wait until counter == math.MaxUint64 to increment counterExtra, because + // another goroutine could call Read, overflowing counter to 0 before the + // first goroutine increments counterExtra. The second goroutine would then + // be reusing the counter pair (0, 0). Instead, we increment at 1<<63 so + // that there is little risk of an overflow. + // + // There is still a potential overlap near 1<<63, though, because another + // goroutine could see counter == 1<<63+1 before the first goroutine + // increments counterExtra. The counter pair (1<<63+1, 1) would then be + // reused. To prevent this, we also increment at math.MaxUint64. This means + // that in order for an overlap to occur, 1<<63 goroutine would need to + // increment counter before the first goroutine increments counterExtra. + // + // This strategy means that many counters will be omitted, and that the + // total space cycle time is potentially as low as 2^126. This is fine + // however, as the security model merely mandates that no counter is ever + // used twice. + if counter == 1<<63 || counter == math.MaxUint64 { + atomic.AddUint64(&r.counterExtra, 1) + } + + // Copy the counter and entropy into a separate slice, so that the result + // may be used in isolation of the other threads. The counter ensures that + // the result is unique to this thread. + seed := make([]byte, 64) + binary.LittleEndian.PutUint64(seed[0:8], counter) + binary.LittleEndian.PutUint64(seed[8:16], counterExtra) + // Leave 16 bytes for the inner counter. + copy(seed[32:], r.entropy[:]) + + // Set up an inner counter, that can be incremented to produce unique + // entropy within this thread. + n := 0 + innerCounter := uint64(0) + innerCounterExtra := uint64(0) + for n < len(b) { + // Copy in the inner counter values. + binary.LittleEndian.PutUint64(seed[16:24], innerCounter) + binary.LittleEndian.PutUint64(seed[24:32], innerCounterExtra) + + // Hash the seed to produce the next set of entropy. + result := blake2b.Sum512(seed) + n += copy(b[n:], result[:]) + + // Increment the inner counter. Because we are the only thread accessing + // the counter, we can wait until the first 64 bits have reached their + // maximum value before incrementing the next 64 bits. + innerCounter++ + if innerCounter == math.MaxUint64 { + innerCounterExtra++ + } + } + return n, nil +} + +// Read is a helper function that calls Reader.Read on b. It always fills b +// completely. +func Read(b []byte) { Reader.Read(b) } + +// Bytes is a helper function that returns n bytes of random data. +func Bytes(n int) []byte { + b := make([]byte, n) + Read(b) + return b +} + +// Uint64n returns a uniform random uint64 in [0,n). It panics if n == 0. +func Uint64n(n uint64) uint64 { + if n == 0 { + panic("fastrand: argument to Uint64n is 0") + } + // To eliminate modulo bias, keep selecting at random until we fall within + // a range that is evenly divisible by n. + // NOTE: since n is at most math.MaxUint64, max is minimized when: + // n = math.MaxUint64/2 + 1 -> max = math.MaxUint64 - math.MaxUint64/2 + // This gives an expected 2 tries before choosing a value < max. + max := math.MaxUint64 - math.MaxUint64%n + var r uint64 + b := (*[8]byte)(unsafe.Pointer(&r))[:] + Read(b) + for r >= max { + Read(b) + } + return r % n +} + +// Intn returns a uniform random int in [0,n). It panics if n <= 0. +func Intn(n int) int { + if n <= 0 { + panic("fastrand: argument to Intn is <= 0: " + strconv.Itoa(n)) + } + // NOTE: since n is at most math.MaxUint64/2, max is minimized when: + // n = math.MaxUint64/4 + 1 -> max = math.MaxUint64 - math.MaxUint64/4 + // This gives an expected 1.333 tries before choosing a value < max. + return int(Uint64n(uint64(n))) +} + +// BigIntn returns a uniform random *big.Int in [0,n). It panics if n <= 0. +func BigIntn(n *big.Int) *big.Int { + i, _ := rand.Int(Reader, n) + return i +} + +// Perm returns a random permutation of the integers [0,n). +func Perm(n int) []int { + m := make([]int, n) + for i := 1; i < n; i++ { + j := Intn(i + 1) + m[i] = m[j] + m[j] = i + } + return m +} + +// Shuffle randomizes the order of elements. n is the number of elements. It +// panics if n < 0. swap swaps the elements with indexes i and j. +func Shuffle(n int, swap func(i, j int)) { + if n < 0 { + panic("fastrand: argument to Shuffle is < 0") + } + // Fisher-Yates + for i := n - 1; i > 0; i-- { + j := Intn(i + 1) + swap(i, j) + } +} diff --git a/vendor/golang.org/x/crypto/AUTHORS b/vendor/golang.org/x/crypto/AUTHORS new file mode 100644 index 0000000..2b00ddb --- /dev/null +++ b/vendor/golang.org/x/crypto/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at https://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/crypto/CONTRIBUTORS b/vendor/golang.org/x/crypto/CONTRIBUTORS new file mode 100644 index 0000000..1fbd3e9 --- /dev/null +++ b/vendor/golang.org/x/crypto/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at https://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE new file mode 100644 index 0000000..6a66aea --- /dev/null +++ b/vendor/golang.org/x/crypto/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/crypto/PATENTS b/vendor/golang.org/x/crypto/PATENTS new file mode 100644 index 0000000..7330990 --- /dev/null +++ b/vendor/golang.org/x/crypto/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go new file mode 100644 index 0000000..c160e1a --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b.go @@ -0,0 +1,289 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693 +// and the extendable output function (XOF) BLAKE2Xb. +// +// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf +// and for BLAKE2Xb see https://blake2.net/blake2x.pdf +// +// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512). +// If you need a secret-key MAC (message authentication code), use the New512 +// function with a non-nil key. +// +// BLAKE2X is a construction to compute hash values larger than 64 bytes. It +// can produce hash values between 0 and 4 GiB. +package blake2b + +import ( + "encoding/binary" + "errors" + "hash" +) + +const ( + // The blocksize of BLAKE2b in bytes. + BlockSize = 128 + // The hash size of BLAKE2b-512 in bytes. + Size = 64 + // The hash size of BLAKE2b-384 in bytes. + Size384 = 48 + // The hash size of BLAKE2b-256 in bytes. + Size256 = 32 +) + +var ( + useAVX2 bool + useAVX bool + useSSE4 bool +) + +var ( + errKeySize = errors.New("blake2b: invalid key size") + errHashSize = errors.New("blake2b: invalid hash size") +) + +var iv = [8]uint64{ + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, +} + +// Sum512 returns the BLAKE2b-512 checksum of the data. +func Sum512(data []byte) [Size]byte { + var sum [Size]byte + checkSum(&sum, Size, data) + return sum +} + +// Sum384 returns the BLAKE2b-384 checksum of the data. +func Sum384(data []byte) [Size384]byte { + var sum [Size]byte + var sum384 [Size384]byte + checkSum(&sum, Size384, data) + copy(sum384[:], sum[:Size384]) + return sum384 +} + +// Sum256 returns the BLAKE2b-256 checksum of the data. +func Sum256(data []byte) [Size256]byte { + var sum [Size]byte + var sum256 [Size256]byte + checkSum(&sum, Size256, data) + copy(sum256[:], sum[:Size256]) + return sum256 +} + +// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil +// key turns the hash into a MAC. The key must be between zero and 64 bytes long. +func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) } + +// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil +// key turns the hash into a MAC. The key must be between zero and 64 bytes long. +func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) } + +// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil +// key turns the hash into a MAC. The key must be between zero and 64 bytes long. +func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) } + +// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length. +// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long. +// The hash size can be a value between 1 and 64 but it is highly recommended to use +// values equal or greater than: +// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long). +// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long). +// When the key is nil, the returned hash.Hash implements BinaryMarshaler +// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash. +func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) } + +func newDigest(hashSize int, key []byte) (*digest, error) { + if hashSize < 1 || hashSize > Size { + return nil, errHashSize + } + if len(key) > Size { + return nil, errKeySize + } + d := &digest{ + size: hashSize, + keyLen: len(key), + } + copy(d.key[:], key) + d.Reset() + return d, nil +} + +func checkSum(sum *[Size]byte, hashSize int, data []byte) { + h := iv + h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24) + var c [2]uint64 + + if length := len(data); length > BlockSize { + n := length &^ (BlockSize - 1) + if length == n { + n -= BlockSize + } + hashBlocks(&h, &c, 0, data[:n]) + data = data[n:] + } + + var block [BlockSize]byte + offset := copy(block[:], data) + remaining := uint64(BlockSize - offset) + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) + + for i, v := range h[:(hashSize+7)/8] { + binary.LittleEndian.PutUint64(sum[8*i:], v) + } +} + +type digest struct { + h [8]uint64 + c [2]uint64 + size int + block [BlockSize]byte + offset int + + key [BlockSize]byte + keyLen int +} + +const ( + magic = "b2b" + marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1 +) + +func (d *digest) MarshalBinary() ([]byte, error) { + if d.keyLen != 0 { + return nil, errors.New("crypto/blake2b: cannot marshal MACs") + } + b := make([]byte, 0, marshaledSize) + b = append(b, magic...) + for i := 0; i < 8; i++ { + b = appendUint64(b, d.h[i]) + } + b = appendUint64(b, d.c[0]) + b = appendUint64(b, d.c[1]) + // Maximum value for size is 64 + b = append(b, byte(d.size)) + b = append(b, d.block[:]...) + b = append(b, byte(d.offset)) + return b, nil +} + +func (d *digest) UnmarshalBinary(b []byte) error { + if len(b) < len(magic) || string(b[:len(magic)]) != magic { + return errors.New("crypto/blake2b: invalid hash state identifier") + } + if len(b) != marshaledSize { + return errors.New("crypto/blake2b: invalid hash state size") + } + b = b[len(magic):] + for i := 0; i < 8; i++ { + b, d.h[i] = consumeUint64(b) + } + b, d.c[0] = consumeUint64(b) + b, d.c[1] = consumeUint64(b) + d.size = int(b[0]) + b = b[1:] + copy(d.block[:], b[:BlockSize]) + b = b[BlockSize:] + d.offset = int(b[0]) + return nil +} + +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Size() int { return d.size } + +func (d *digest) Reset() { + d.h = iv + d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24) + d.offset, d.c[0], d.c[1] = 0, 0, 0 + if d.keyLen > 0 { + d.block = d.key + d.offset = BlockSize + } +} + +func (d *digest) Write(p []byte) (n int, err error) { + n = len(p) + + if d.offset > 0 { + remaining := BlockSize - d.offset + if n <= remaining { + d.offset += copy(d.block[d.offset:], p) + return + } + copy(d.block[d.offset:], p[:remaining]) + hashBlocks(&d.h, &d.c, 0, d.block[:]) + d.offset = 0 + p = p[remaining:] + } + + if length := len(p); length > BlockSize { + nn := length &^ (BlockSize - 1) + if length == nn { + nn -= BlockSize + } + hashBlocks(&d.h, &d.c, 0, p[:nn]) + p = p[nn:] + } + + if len(p) > 0 { + d.offset += copy(d.block[:], p) + } + + return +} + +func (d *digest) Sum(sum []byte) []byte { + var hash [Size]byte + d.finalize(&hash) + return append(sum, hash[:d.size]...) +} + +func (d *digest) finalize(hash *[Size]byte) { + var block [BlockSize]byte + copy(block[:], d.block[:d.offset]) + remaining := uint64(BlockSize - d.offset) + + c := d.c + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + h := d.h + hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) + + for i, v := range h { + binary.LittleEndian.PutUint64(hash[8*i:], v) + } +} + +func appendUint64(b []byte, x uint64) []byte { + var a [8]byte + binary.BigEndian.PutUint64(a[:], x) + return append(b, a[:]...) +} + +func appendUint32(b []byte, x uint32) []byte { + var a [4]byte + binary.BigEndian.PutUint32(a[:], x) + return append(b, a[:]...) +} + +func consumeUint64(b []byte) ([]byte, uint64) { + x := binary.BigEndian.Uint64(b) + return b[8:], x +} + +func consumeUint32(b []byte) ([]byte, uint32) { + x := binary.BigEndian.Uint32(b) + return b[4:], x +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go new file mode 100644 index 0000000..4d31dd0 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go @@ -0,0 +1,37 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7,amd64,!gccgo,!appengine + +package blake2b + +import "golang.org/x/sys/cpu" + +func init() { + useAVX2 = cpu.X86.HasAVX2 + useAVX = cpu.X86.HasAVX + useSSE4 = cpu.X86.HasSSE41 +} + +//go:noescape +func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +//go:noescape +func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +//go:noescape +func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + switch { + case useAVX2: + hashBlocksAVX2(h, c, flag, blocks) + case useAVX: + hashBlocksAVX(h, c, flag, blocks) + case useSSE4: + hashBlocksSSE4(h, c, flag, blocks) + default: + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s new file mode 100644 index 0000000..5593b1b --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -0,0 +1,750 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7,amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 + +#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 +#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 +#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e +#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 +#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 + +#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ + VPADDQ m0, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFD $-79, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPSHUFB c40, Y1, Y1; \ + VPADDQ m1, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFB c48, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPADDQ Y1, Y1, t; \ + VPSRLQ $63, Y1, Y1; \ + VPXOR t, Y1, Y1; \ + VPERMQ_0x39_Y1_Y1; \ + VPERMQ_0x4E_Y2_Y2; \ + VPERMQ_0x93_Y3_Y3; \ + VPADDQ m2, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFD $-79, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPSHUFB c40, Y1, Y1; \ + VPADDQ m3, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFB c48, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPADDQ Y1, Y1, t; \ + VPSRLQ $63, Y1, Y1; \ + VPXOR t, Y1, Y1; \ + VPERMQ_0x39_Y3_Y3; \ + VPERMQ_0x4E_Y2_Y2; \ + VPERMQ_0x93_Y1_Y1 + +#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E +#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 +#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E +#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 +#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E + +#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n +#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n +#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n +#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n +#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n + +#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 +#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 +#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 +#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 +#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 + +#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 + +#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 +#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 + +// load msg: Y12 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ + VMOVQ_SI_X12(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X12(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y12, Y12 + +// load msg: Y13 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ + VMOVQ_SI_X13(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X13(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y13, Y13 + +// load msg: Y14 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ + VMOVQ_SI_X14(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X14(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y14, Y14 + +// load msg: Y15 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ + VMOVQ_SI_X15(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X15(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ + VMOVQ_SI_X12_0; \ + VMOVQ_SI_X11(4*8); \ + VPINSRQ_1_SI_X12(2*8); \ + VPINSRQ_1_SI_X11(6*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ + LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ + LOAD_MSG_AVX2_Y15(9, 11, 13, 15) + +#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ + LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ + LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ + VMOVQ_SI_X11(11*8); \ + VPSHUFD $0x4E, 0*8(SI), X14; \ + VPINSRQ_1_SI_X11(5*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + LOAD_MSG_AVX2_Y15(12, 2, 7, 3) + +#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ + VMOVQ_SI_X11(5*8); \ + VMOVDQU 11*8(SI), X12; \ + VPINSRQ_1_SI_X11(15*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + VMOVQ_SI_X13(8*8); \ + VMOVQ_SI_X11(2*8); \ + VPINSRQ_1_SI_X13_0; \ + VPINSRQ_1_SI_X11(13*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ + LOAD_MSG_AVX2_Y15(14, 6, 1, 4) + +#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ + LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ + LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ + LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ + VMOVQ_SI_X15(6*8); \ + VMOVQ_SI_X11_0; \ + VPINSRQ_1_SI_X15(10*8); \ + VPINSRQ_1_SI_X11(8*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ + LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ + VMOVQ_SI_X13_0; \ + VMOVQ_SI_X11(4*8); \ + VPINSRQ_1_SI_X13(7*8); \ + VPINSRQ_1_SI_X11(15*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ + LOAD_MSG_AVX2_Y15(1, 12, 8, 13) + +#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X11_0; \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X11(8*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ + LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ + LOAD_MSG_AVX2_Y15(13, 5, 14, 9) + +#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ + LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ + LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ + VMOVQ_SI_X14_0; \ + VPSHUFD $0x4E, 8*8(SI), X11; \ + VPINSRQ_1_SI_X14(6*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + LOAD_MSG_AVX2_Y15(7, 3, 2, 11) + +#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ + LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ + LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ + LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ + VMOVQ_SI_X15_0; \ + VMOVQ_SI_X11(6*8); \ + VPINSRQ_1_SI_X15(4*8); \ + VPINSRQ_1_SI_X11(10*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ + VMOVQ_SI_X12(6*8); \ + VMOVQ_SI_X11(11*8); \ + VPINSRQ_1_SI_X12(14*8); \ + VPINSRQ_1_SI_X11_0; \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ + VMOVQ_SI_X11(1*8); \ + VMOVDQU 12*8(SI), X14; \ + VPINSRQ_1_SI_X11(10*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + VMOVQ_SI_X15(2*8); \ + VMOVDQU 4*8(SI), X11; \ + VPINSRQ_1_SI_X15(7*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ + LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ + VMOVQ_SI_X13(2*8); \ + VPSHUFD $0x4E, 5*8(SI), X11; \ + VPINSRQ_1_SI_X13(4*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ + VMOVQ_SI_X15(11*8); \ + VMOVQ_SI_X11(12*8); \ + VPINSRQ_1_SI_X15(14*8); \ + VPINSRQ_1_SI_X11_0; \ + VINSERTI128 $1, X11, Y15, Y15 + +// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, DX + MOVQ SP, R9 + ADDQ $31, R9 + ANDQ $~31, R9 + MOVQ R9, SP + + MOVQ CX, 16(SP) + XORQ CX, CX + MOVQ CX, 24(SP) + + VMOVDQU ·AVX2_c40<>(SB), Y4 + VMOVDQU ·AVX2_c48<>(SB), Y5 + + VMOVDQU 0(AX), Y8 + VMOVDQU 32(AX), Y9 + VMOVDQU ·AVX2_iv0<>(SB), Y6 + VMOVDQU ·AVX2_iv1<>(SB), Y7 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(SP) + +loop: + ADDQ $128, R8 + MOVQ R8, 0(SP) + CMPQ R8, $128 + JGE noinc + INCQ R9 + MOVQ R9, 8(SP) + +noinc: + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR 0(SP), Y7, Y3 + + LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() + VMOVDQA Y12, 32(SP) + VMOVDQA Y13, 64(SP) + VMOVDQA Y14, 96(SP) + VMOVDQA Y15, 128(SP) + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() + VMOVDQA Y12, 160(SP) + VMOVDQA Y13, 192(SP) + VMOVDQA Y14, 224(SP) + VMOVDQA Y15, 256(SP) + + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + + ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5) + ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5) + + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + + VMOVDQU Y8, 0(AX) + VMOVDQU Y9, 32(AX) + VZEROUPPER + + MOVQ DX, SP + RET + +#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA +#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB +#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF +#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD +#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE + +#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 +#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF +#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 +#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF +#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 +#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 +#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF +#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF + +#define SHUFFLE_AVX() \ + VMOVDQA X6, X13; \ + VMOVDQA X2, X14; \ + VMOVDQA X4, X6; \ + VPUNPCKLQDQ_X13_X13_X15; \ + VMOVDQA X5, X4; \ + VMOVDQA X6, X5; \ + VPUNPCKHQDQ_X15_X7_X6; \ + VPUNPCKLQDQ_X7_X7_X15; \ + VPUNPCKHQDQ_X15_X13_X7; \ + VPUNPCKLQDQ_X3_X3_X15; \ + VPUNPCKHQDQ_X15_X2_X2; \ + VPUNPCKLQDQ_X14_X14_X15; \ + VPUNPCKHQDQ_X15_X3_X3; \ + +#define SHUFFLE_AVX_INV() \ + VMOVDQA X2, X13; \ + VMOVDQA X4, X14; \ + VPUNPCKLQDQ_X2_X2_X15; \ + VMOVDQA X5, X4; \ + VPUNPCKHQDQ_X15_X3_X2; \ + VMOVDQA X14, X5; \ + VPUNPCKLQDQ_X3_X3_X15; \ + VMOVDQA X6, X14; \ + VPUNPCKHQDQ_X15_X13_X3; \ + VPUNPCKLQDQ_X7_X7_X15; \ + VPUNPCKHQDQ_X15_X6_X6; \ + VPUNPCKLQDQ_X14_X14_X15; \ + VPUNPCKHQDQ_X15_X7_X7; \ + +#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ + VPADDQ m0, v0, v0; \ + VPADDQ v2, v0, v0; \ + VPADDQ m1, v1, v1; \ + VPADDQ v3, v1, v1; \ + VPXOR v0, v6, v6; \ + VPXOR v1, v7, v7; \ + VPSHUFD $-79, v6, v6; \ + VPSHUFD $-79, v7, v7; \ + VPADDQ v6, v4, v4; \ + VPADDQ v7, v5, v5; \ + VPXOR v4, v2, v2; \ + VPXOR v5, v3, v3; \ + VPSHUFB c40, v2, v2; \ + VPSHUFB c40, v3, v3; \ + VPADDQ m2, v0, v0; \ + VPADDQ v2, v0, v0; \ + VPADDQ m3, v1, v1; \ + VPADDQ v3, v1, v1; \ + VPXOR v0, v6, v6; \ + VPXOR v1, v7, v7; \ + VPSHUFB c48, v6, v6; \ + VPSHUFB c48, v7, v7; \ + VPADDQ v6, v4, v4; \ + VPADDQ v7, v5, v5; \ + VPXOR v4, v2, v2; \ + VPXOR v5, v3, v3; \ + VPADDQ v2, v2, t0; \ + VPSRLQ $63, v2, v2; \ + VPXOR t0, v2, v2; \ + VPADDQ v3, v3, t0; \ + VPSRLQ $63, v3, v3; \ + VPXOR t0, v3, v3 + +// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) +// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 +#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ + VMOVQ_SI_X12(i0*8); \ + VMOVQ_SI_X13(i2*8); \ + VMOVQ_SI_X14(i4*8); \ + VMOVQ_SI_X15(i6*8); \ + VPINSRQ_1_SI_X12(i1*8); \ + VPINSRQ_1_SI_X13(i3*8); \ + VPINSRQ_1_SI_X14(i5*8); \ + VPINSRQ_1_SI_X15(i7*8) + +// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) +#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ + VMOVQ_SI_X12_0; \ + VMOVQ_SI_X13(4*8); \ + VMOVQ_SI_X14(1*8); \ + VMOVQ_SI_X15(5*8); \ + VPINSRQ_1_SI_X12(2*8); \ + VPINSRQ_1_SI_X13(6*8); \ + VPINSRQ_1_SI_X14(3*8); \ + VPINSRQ_1_SI_X15(7*8) + +// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) +#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ + VPSHUFD $0x4E, 0*8(SI), X12; \ + VMOVQ_SI_X13(11*8); \ + VMOVQ_SI_X14(12*8); \ + VMOVQ_SI_X15(7*8); \ + VPINSRQ_1_SI_X13(5*8); \ + VPINSRQ_1_SI_X14(2*8); \ + VPINSRQ_1_SI_X15(3*8) + +// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) +#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ + VMOVDQU 11*8(SI), X12; \ + VMOVQ_SI_X13(5*8); \ + VMOVQ_SI_X14(8*8); \ + VMOVQ_SI_X15(2*8); \ + VPINSRQ_1_SI_X13(15*8); \ + VPINSRQ_1_SI_X14_0; \ + VPINSRQ_1_SI_X15(13*8) + +// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) +#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X13(4*8); \ + VMOVQ_SI_X14(6*8); \ + VMOVQ_SI_X15_0; \ + VPINSRQ_1_SI_X12(5*8); \ + VPINSRQ_1_SI_X13(15*8); \ + VPINSRQ_1_SI_X14(10*8); \ + VPINSRQ_1_SI_X15(8*8) + +// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) +#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ + VMOVQ_SI_X12(9*8); \ + VMOVQ_SI_X13(2*8); \ + VMOVQ_SI_X14_0; \ + VMOVQ_SI_X15(4*8); \ + VPINSRQ_1_SI_X12(5*8); \ + VPINSRQ_1_SI_X13(10*8); \ + VPINSRQ_1_SI_X14(7*8); \ + VPINSRQ_1_SI_X15(15*8) + +// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) +#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X13_0; \ + VMOVQ_SI_X14(12*8); \ + VMOVQ_SI_X15(11*8); \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X13(8*8); \ + VPINSRQ_1_SI_X14(10*8); \ + VPINSRQ_1_SI_X15(3*8) + +// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) +#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ + MOVQ 0*8(SI), X12; \ + VPSHUFD $0x4E, 8*8(SI), X13; \ + MOVQ 7*8(SI), X14; \ + MOVQ 2*8(SI), X15; \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X14(3*8); \ + VPINSRQ_1_SI_X15(11*8) + +// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) +#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ + MOVQ 6*8(SI), X12; \ + MOVQ 11*8(SI), X13; \ + MOVQ 15*8(SI), X14; \ + MOVQ 3*8(SI), X15; \ + VPINSRQ_1_SI_X12(14*8); \ + VPINSRQ_1_SI_X13_0; \ + VPINSRQ_1_SI_X14(9*8); \ + VPINSRQ_1_SI_X15(8*8) + +// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) +#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ + MOVQ 5*8(SI), X12; \ + MOVQ 8*8(SI), X13; \ + MOVQ 0*8(SI), X14; \ + MOVQ 6*8(SI), X15; \ + VPINSRQ_1_SI_X12(15*8); \ + VPINSRQ_1_SI_X13(2*8); \ + VPINSRQ_1_SI_X14(4*8); \ + VPINSRQ_1_SI_X15(10*8) + +// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) +#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ + VMOVDQU 12*8(SI), X12; \ + MOVQ 1*8(SI), X13; \ + MOVQ 2*8(SI), X14; \ + VPINSRQ_1_SI_X13(10*8); \ + VPINSRQ_1_SI_X14(7*8); \ + VMOVDQU 4*8(SI), X15 + +// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) +#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ + MOVQ 15*8(SI), X12; \ + MOVQ 3*8(SI), X13; \ + MOVQ 11*8(SI), X14; \ + MOVQ 12*8(SI), X15; \ + VPINSRQ_1_SI_X12(9*8); \ + VPINSRQ_1_SI_X13(13*8); \ + VPINSRQ_1_SI_X14(14*8); \ + VPINSRQ_1_SI_X15_0 + +// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, BP + MOVQ SP, R9 + ADDQ $15, R9 + ANDQ $~15, R9 + MOVQ R9, SP + + VMOVDQU ·AVX_c40<>(SB), X0 + VMOVDQU ·AVX_c48<>(SB), X1 + VMOVDQA X0, X8 + VMOVDQA X1, X9 + + VMOVDQU ·AVX_iv3<>(SB), X0 + VMOVDQA X0, 0(SP) + XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0) + + VMOVDQU 0(AX), X10 + VMOVDQU 16(AX), X11 + VMOVDQU 32(AX), X2 + VMOVDQU 48(AX), X3 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + +loop: + ADDQ $128, R8 + CMPQ R8, $128 + JGE noinc + INCQ R9 + +noinc: + VMOVQ_R8_X15 + VPINSRQ_1_R9_X15 + + VMOVDQA X10, X0 + VMOVDQA X11, X1 + VMOVDQU ·AVX_iv0<>(SB), X4 + VMOVDQU ·AVX_iv1<>(SB), X5 + VMOVDQU ·AVX_iv2<>(SB), X6 + + VPXOR X15, X6, X6 + VMOVDQA 0(SP), X7 + + LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA X12, 16(SP) + VMOVDQA X13, 32(SP) + VMOVDQA X14, 48(SP) + VMOVDQA X15, 64(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VMOVDQA X12, 80(SP) + VMOVDQA X13, 96(SP) + VMOVDQA X14, 112(SP) + VMOVDQA X15, 128(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VMOVDQA X12, 144(SP) + VMOVDQA X13, 160(SP) + VMOVDQA X14, 176(SP) + VMOVDQA X15, 192(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VMOVDQA X12, 208(SP) + VMOVDQA X13, 224(SP) + VMOVDQA X14, 240(SP) + VMOVDQA X15, 256(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_11_12_5_15_8_0_2_13() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_2_5_4_15_6_10_0_8() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_9_5_2_10_0_7_4_15() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_2_6_0_8_12_10_11_3() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_0_6_9_8_7_3_2_11() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_5_15_8_2_0_4_6_10() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_6_14_11_0_15_9_3_8() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_12_13_1_10_2_7_4_5() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_15_9_3_13_11_14_12_0() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X15, X8, X9) + SHUFFLE_AVX() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X15, X8, X9) + SHUFFLE_AVX_INV() + + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X15, X8, X9) + SHUFFLE_AVX() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X15, X8, X9) + SHUFFLE_AVX_INV() + + VMOVDQU 32(AX), X14 + VMOVDQU 48(AX), X15 + VPXOR X0, X10, X10 + VPXOR X1, X11, X11 + VPXOR X2, X14, X14 + VPXOR X3, X15, X15 + VPXOR X4, X10, X10 + VPXOR X5, X11, X11 + VPXOR X6, X14, X2 + VPXOR X7, X15, X3 + VMOVDQU X2, 32(AX) + VMOVDQU X3, 48(AX) + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + VMOVDQU X10, 0(AX) + VMOVDQU X11, 16(AX) + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + VZEROUPPER + + MOVQ BP, SP + RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go new file mode 100644 index 0000000..30e2fcd --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go @@ -0,0 +1,24 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !go1.7,amd64,!gccgo,!appengine + +package blake2b + +import "golang.org/x/sys/cpu" + +func init() { + useSSE4 = cpu.X86.HasSSE41 +} + +//go:noescape +func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + if useSSE4 { + hashBlocksSSE4(h, c, flag, blocks) + } else { + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s new file mode 100644 index 0000000..578e947 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -0,0 +1,281 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 + +DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 + +DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 + +#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ + MOVO v4, t1; \ + MOVO v5, v4; \ + MOVO t1, v5; \ + MOVO v6, t1; \ + PUNPCKLQDQ v6, t2; \ + PUNPCKHQDQ v7, v6; \ + PUNPCKHQDQ t2, v6; \ + PUNPCKLQDQ v7, t2; \ + MOVO t1, v7; \ + MOVO v2, t1; \ + PUNPCKHQDQ t2, v7; \ + PUNPCKLQDQ v3, t2; \ + PUNPCKHQDQ t2, v2; \ + PUNPCKLQDQ t1, t2; \ + PUNPCKHQDQ t2, v3 + +#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ + MOVO v4, t1; \ + MOVO v5, v4; \ + MOVO t1, v5; \ + MOVO v2, t1; \ + PUNPCKLQDQ v2, t2; \ + PUNPCKHQDQ v3, v2; \ + PUNPCKHQDQ t2, v2; \ + PUNPCKLQDQ v3, t2; \ + MOVO t1, v3; \ + MOVO v6, t1; \ + PUNPCKHQDQ t2, v3; \ + PUNPCKLQDQ v7, t2; \ + PUNPCKHQDQ t2, v6; \ + PUNPCKLQDQ t1, t2; \ + PUNPCKHQDQ t2, v7 + +#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ + PADDQ m0, v0; \ + PADDQ m1, v1; \ + PADDQ v2, v0; \ + PADDQ v3, v1; \ + PXOR v0, v6; \ + PXOR v1, v7; \ + PSHUFD $0xB1, v6, v6; \ + PSHUFD $0xB1, v7, v7; \ + PADDQ v6, v4; \ + PADDQ v7, v5; \ + PXOR v4, v2; \ + PXOR v5, v3; \ + PSHUFB c40, v2; \ + PSHUFB c40, v3; \ + PADDQ m2, v0; \ + PADDQ m3, v1; \ + PADDQ v2, v0; \ + PADDQ v3, v1; \ + PXOR v0, v6; \ + PXOR v1, v7; \ + PSHUFB c48, v6; \ + PSHUFB c48, v7; \ + PADDQ v6, v4; \ + PADDQ v7, v5; \ + PXOR v4, v2; \ + PXOR v5, v3; \ + MOVOU v2, t0; \ + PADDQ v2, t0; \ + PSRLQ $63, v2; \ + PXOR t0, v2; \ + MOVOU v3, t0; \ + PADDQ v3, t0; \ + PSRLQ $63, v3; \ + PXOR t0, v3 + +#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ + MOVQ i0*8(src), m0; \ + PINSRQ $1, i1*8(src), m0; \ + MOVQ i2*8(src), m1; \ + PINSRQ $1, i3*8(src), m1; \ + MOVQ i4*8(src), m2; \ + PINSRQ $1, i5*8(src), m2; \ + MOVQ i6*8(src), m3; \ + PINSRQ $1, i7*8(src), m3 + +// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, BP + MOVQ SP, R9 + ADDQ $15, R9 + ANDQ $~15, R9 + MOVQ R9, SP + + MOVOU ·iv3<>(SB), X0 + MOVO X0, 0(SP) + XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0) + + MOVOU ·c40<>(SB), X13 + MOVOU ·c48<>(SB), X14 + + MOVOU 0(AX), X12 + MOVOU 16(AX), X15 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + +loop: + ADDQ $128, R8 + CMPQ R8, $128 + JGE noinc + INCQ R9 + +noinc: + MOVQ R8, X8 + PINSRQ $1, R9, X8 + + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>(SB), X4 + MOVOU ·iv1<>(SB), X5 + MOVOU ·iv2<>(SB), X6 + + PXOR X8, X6 + MOVO 0(SP), X7 + + LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) + MOVO X8, 16(SP) + MOVO X9, 32(SP) + MOVO X10, 48(SP) + MOVO X11, 64(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) + MOVO X8, 80(SP) + MOVO X9, 96(SP) + MOVO X10, 112(SP) + MOVO X11, 128(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) + MOVO X8, 144(SP) + MOVO X9, 160(SP) + MOVO X10, 176(SP) + MOVO X11, 192(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) + MOVO X8, 208(SP) + MOVO X9, 224(SP) + MOVO X10, 240(SP) + MOVO X11, 256(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + MOVOU X12, 0(AX) + MOVOU X15, 16(AX) + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + + MOVQ BP, SP + RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go new file mode 100644 index 0000000..3168a8a --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go @@ -0,0 +1,182 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2b + +import ( + "encoding/binary" + "math/bits" +) + +// the precomputed values for BLAKE2b +// there are 12 16-byte arrays - one for each round +// the entries are calculated from the sigma constants. +var precomputed = [12][16]byte{ + {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, + {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, + {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, + {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, + {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, + {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, + {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, + {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, + {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, + {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, + {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first + {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second +} + +func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + var m [16]uint64 + c0, c1 := c[0], c[1] + + for i := 0; i < len(blocks); { + c0 += BlockSize + if c0 < BlockSize { + c1++ + } + + v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] + v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] + v12 ^= c0 + v13 ^= c1 + v14 ^= flag + + for j := range m { + m[j] = binary.LittleEndian.Uint64(blocks[i:]) + i += 8 + } + + for j := range precomputed { + s := &(precomputed[j]) + + v0 += m[s[0]] + v0 += v4 + v12 ^= v0 + v12 = bits.RotateLeft64(v12, -32) + v8 += v12 + v4 ^= v8 + v4 = bits.RotateLeft64(v4, -24) + v1 += m[s[1]] + v1 += v5 + v13 ^= v1 + v13 = bits.RotateLeft64(v13, -32) + v9 += v13 + v5 ^= v9 + v5 = bits.RotateLeft64(v5, -24) + v2 += m[s[2]] + v2 += v6 + v14 ^= v2 + v14 = bits.RotateLeft64(v14, -32) + v10 += v14 + v6 ^= v10 + v6 = bits.RotateLeft64(v6, -24) + v3 += m[s[3]] + v3 += v7 + v15 ^= v3 + v15 = bits.RotateLeft64(v15, -32) + v11 += v15 + v7 ^= v11 + v7 = bits.RotateLeft64(v7, -24) + + v0 += m[s[4]] + v0 += v4 + v12 ^= v0 + v12 = bits.RotateLeft64(v12, -16) + v8 += v12 + v4 ^= v8 + v4 = bits.RotateLeft64(v4, -63) + v1 += m[s[5]] + v1 += v5 + v13 ^= v1 + v13 = bits.RotateLeft64(v13, -16) + v9 += v13 + v5 ^= v9 + v5 = bits.RotateLeft64(v5, -63) + v2 += m[s[6]] + v2 += v6 + v14 ^= v2 + v14 = bits.RotateLeft64(v14, -16) + v10 += v14 + v6 ^= v10 + v6 = bits.RotateLeft64(v6, -63) + v3 += m[s[7]] + v3 += v7 + v15 ^= v3 + v15 = bits.RotateLeft64(v15, -16) + v11 += v15 + v7 ^= v11 + v7 = bits.RotateLeft64(v7, -63) + + v0 += m[s[8]] + v0 += v5 + v15 ^= v0 + v15 = bits.RotateLeft64(v15, -32) + v10 += v15 + v5 ^= v10 + v5 = bits.RotateLeft64(v5, -24) + v1 += m[s[9]] + v1 += v6 + v12 ^= v1 + v12 = bits.RotateLeft64(v12, -32) + v11 += v12 + v6 ^= v11 + v6 = bits.RotateLeft64(v6, -24) + v2 += m[s[10]] + v2 += v7 + v13 ^= v2 + v13 = bits.RotateLeft64(v13, -32) + v8 += v13 + v7 ^= v8 + v7 = bits.RotateLeft64(v7, -24) + v3 += m[s[11]] + v3 += v4 + v14 ^= v3 + v14 = bits.RotateLeft64(v14, -32) + v9 += v14 + v4 ^= v9 + v4 = bits.RotateLeft64(v4, -24) + + v0 += m[s[12]] + v0 += v5 + v15 ^= v0 + v15 = bits.RotateLeft64(v15, -16) + v10 += v15 + v5 ^= v10 + v5 = bits.RotateLeft64(v5, -63) + v1 += m[s[13]] + v1 += v6 + v12 ^= v1 + v12 = bits.RotateLeft64(v12, -16) + v11 += v12 + v6 ^= v11 + v6 = bits.RotateLeft64(v6, -63) + v2 += m[s[14]] + v2 += v7 + v13 ^= v2 + v13 = bits.RotateLeft64(v13, -16) + v8 += v13 + v7 ^= v8 + v7 = bits.RotateLeft64(v7, -63) + v3 += m[s[15]] + v3 += v4 + v14 ^= v3 + v14 = bits.RotateLeft64(v14, -16) + v9 += v14 + v4 ^= v9 + v4 = bits.RotateLeft64(v4, -63) + + } + + h[0] ^= v0 ^ v8 + h[1] ^= v1 ^ v9 + h[2] ^= v2 ^ v10 + h[3] ^= v3 ^ v11 + h[4] ^= v4 ^ v12 + h[5] ^= v5 ^ v13 + h[6] ^= v6 ^ v14 + h[7] ^= v7 ^ v15 + } + c[0], c[1] = c0, c1 +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go new file mode 100644 index 0000000..da156a1 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go @@ -0,0 +1,11 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package blake2b + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + hashBlocksGeneric(h, c, flag, blocks) +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2x.go b/vendor/golang.org/x/crypto/blake2b/blake2x.go new file mode 100644 index 0000000..52c414d --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2x.go @@ -0,0 +1,177 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2b + +import ( + "encoding/binary" + "errors" + "io" +) + +// XOF defines the interface to hash functions that +// support arbitrary-length output. +type XOF interface { + // Write absorbs more data into the hash's state. It panics if called + // after Read. + io.Writer + + // Read reads more output from the hash. It returns io.EOF if the limit + // has been reached. + io.Reader + + // Clone returns a copy of the XOF in its current state. + Clone() XOF + + // Reset resets the XOF to its initial state. + Reset() +} + +// OutputLengthUnknown can be used as the size argument to NewXOF to indicate +// the length of the output is not known in advance. +const OutputLengthUnknown = 0 + +// magicUnknownOutputLength is a magic value for the output size that indicates +// an unknown number of output bytes. +const magicUnknownOutputLength = (1 << 32) - 1 + +// maxOutputLength is the absolute maximum number of bytes to produce when the +// number of output bytes is unknown. +const maxOutputLength = (1 << 32) * 64 + +// NewXOF creates a new variable-output-length hash. The hash either produce a +// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes +// (size == OutputLengthUnknown). In the latter case, an absolute limit of +// 256GiB applies. +// +// A non-nil key turns the hash into a MAC. The key must between +// zero and 32 bytes long. +func NewXOF(size uint32, key []byte) (XOF, error) { + if len(key) > Size { + return nil, errKeySize + } + if size == magicUnknownOutputLength { + // 2^32-1 indicates an unknown number of bytes and thus isn't a + // valid length. + return nil, errors.New("blake2b: XOF length too large") + } + if size == OutputLengthUnknown { + size = magicUnknownOutputLength + } + x := &xof{ + d: digest{ + size: Size, + keyLen: len(key), + }, + length: size, + } + copy(x.d.key[:], key) + x.Reset() + return x, nil +} + +type xof struct { + d digest + length uint32 + remaining uint64 + cfg, root, block [Size]byte + offset int + nodeOffset uint32 + readMode bool +} + +func (x *xof) Write(p []byte) (n int, err error) { + if x.readMode { + panic("blake2b: write to XOF after read") + } + return x.d.Write(p) +} + +func (x *xof) Clone() XOF { + clone := *x + return &clone +} + +func (x *xof) Reset() { + x.cfg[0] = byte(Size) + binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length + binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length + x.cfg[17] = byte(Size) // inner hash size + + x.d.Reset() + x.d.h[1] ^= uint64(x.length) << 32 + + x.remaining = uint64(x.length) + if x.remaining == magicUnknownOutputLength { + x.remaining = maxOutputLength + } + x.offset, x.nodeOffset = 0, 0 + x.readMode = false +} + +func (x *xof) Read(p []byte) (n int, err error) { + if !x.readMode { + x.d.finalize(&x.root) + x.readMode = true + } + + if x.remaining == 0 { + return 0, io.EOF + } + + n = len(p) + if uint64(n) > x.remaining { + n = int(x.remaining) + p = p[:n] + } + + if x.offset > 0 { + blockRemaining := Size - x.offset + if n < blockRemaining { + x.offset += copy(p, x.block[x.offset:]) + x.remaining -= uint64(n) + return + } + copy(p, x.block[x.offset:]) + p = p[blockRemaining:] + x.offset = 0 + x.remaining -= uint64(blockRemaining) + } + + for len(p) >= Size { + binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) + x.nodeOffset++ + + x.d.initConfig(&x.cfg) + x.d.Write(x.root[:]) + x.d.finalize(&x.block) + + copy(p, x.block[:]) + p = p[Size:] + x.remaining -= uint64(Size) + } + + if todo := len(p); todo > 0 { + if x.remaining < uint64(Size) { + x.cfg[0] = byte(x.remaining) + } + binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) + x.nodeOffset++ + + x.d.initConfig(&x.cfg) + x.d.Write(x.root[:]) + x.d.finalize(&x.block) + + x.offset = copy(p, x.block[:todo]) + x.remaining -= uint64(todo) + } + return +} + +func (d *digest) initConfig(cfg *[Size]byte) { + d.offset, d.c[0], d.c[1] = 0, 0, 0 + for i := range d.h { + d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:]) + } +} diff --git a/vendor/golang.org/x/crypto/blake2b/register.go b/vendor/golang.org/x/crypto/blake2b/register.go new file mode 100644 index 0000000..efd689a --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/register.go @@ -0,0 +1,32 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.9 + +package blake2b + +import ( + "crypto" + "hash" +) + +func init() { + newHash256 := func() hash.Hash { + h, _ := New256(nil) + return h + } + newHash384 := func() hash.Hash { + h, _ := New384(nil) + return h + } + + newHash512 := func() hash.Hash { + h, _ := New512(nil) + return h + } + + crypto.RegisterHash(crypto.BLAKE2b_256, newHash256) + crypto.RegisterHash(crypto.BLAKE2b_384, newHash384) + crypto.RegisterHash(crypto.BLAKE2b_512, newHash512) +} diff --git a/vendor/golang.org/x/crypto/internal/subtle/aliasing.go b/vendor/golang.org/x/crypto/internal/subtle/aliasing.go new file mode 100644 index 0000000..f38797b --- /dev/null +++ b/vendor/golang.org/x/crypto/internal/subtle/aliasing.go @@ -0,0 +1,32 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine + +// Package subtle implements functions that are often useful in cryptographic +// code but require careful thought to use correctly. +package subtle // import "golang.org/x/crypto/internal/subtle" + +import "unsafe" + +// AnyOverlap reports whether x and y share memory at any (not necessarily +// corresponding) index. The memory beyond the slice length is ignored. +func AnyOverlap(x, y []byte) bool { + return len(x) > 0 && len(y) > 0 && + uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) && + uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1])) +} + +// InexactOverlap reports whether x and y share memory at any non-corresponding +// index. The memory beyond the slice length is ignored. Note that x and y can +// have different lengths and still not have any inexact overlap. +// +// InexactOverlap can be used to implement the requirements of the crypto/cipher +// AEAD, Block, BlockMode and Stream interfaces. +func InexactOverlap(x, y []byte) bool { + if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] { + return false + } + return AnyOverlap(x, y) +} diff --git a/vendor/golang.org/x/crypto/internal/subtle/aliasing_appengine.go b/vendor/golang.org/x/crypto/internal/subtle/aliasing_appengine.go new file mode 100644 index 0000000..0cc4a8a --- /dev/null +++ b/vendor/golang.org/x/crypto/internal/subtle/aliasing_appengine.go @@ -0,0 +1,35 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build appengine + +// Package subtle implements functions that are often useful in cryptographic +// code but require careful thought to use correctly. +package subtle // import "golang.org/x/crypto/internal/subtle" + +// This is the Google App Engine standard variant based on reflect +// because the unsafe package and cgo are disallowed. + +import "reflect" + +// AnyOverlap reports whether x and y share memory at any (not necessarily +// corresponding) index. The memory beyond the slice length is ignored. +func AnyOverlap(x, y []byte) bool { + return len(x) > 0 && len(y) > 0 && + reflect.ValueOf(&x[0]).Pointer() <= reflect.ValueOf(&y[len(y)-1]).Pointer() && + reflect.ValueOf(&y[0]).Pointer() <= reflect.ValueOf(&x[len(x)-1]).Pointer() +} + +// InexactOverlap reports whether x and y share memory at any non-corresponding +// index. The memory beyond the slice length is ignored. Note that x and y can +// have different lengths and still not have any inexact overlap. +// +// InexactOverlap can be used to implement the requirements of the crypto/cipher +// AEAD, Block, BlockMode and Stream interfaces. +func InexactOverlap(x, y []byte) bool { + if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] { + return false + } + return AnyOverlap(x, y) +} diff --git a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go new file mode 100644 index 0000000..a98d1bd --- /dev/null +++ b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go @@ -0,0 +1,173 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package secretbox encrypts and authenticates small messages. + +Secretbox uses XSalsa20 and Poly1305 to encrypt and authenticate messages with +secret-key cryptography. The length of messages is not hidden. + +It is the caller's responsibility to ensure the uniqueness of nonces—for +example, by using nonce 1 for the first message, nonce 2 for the second +message, etc. Nonces are long enough that randomly generated nonces have +negligible risk of collision. + +Messages should be small because: + +1. The whole message needs to be held in memory to be processed. + +2. Using large messages pressures implementations on small machines to decrypt +and process plaintext before authenticating it. This is very dangerous, and +this API does not allow it, but a protocol that uses excessive message sizes +might present some implementations with no other choice. + +3. Fixed overheads will be sufficiently amortised by messages as small as 8KB. + +4. Performance may be improved by working with messages that fit into data caches. + +Thus large amounts of data should be chunked so that each message is small. +(Each message still needs a unique nonce.) If in doubt, 16KB is a reasonable +chunk size. + +This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html. +*/ +package secretbox // import "golang.org/x/crypto/nacl/secretbox" + +import ( + "golang.org/x/crypto/internal/subtle" + "golang.org/x/crypto/poly1305" + "golang.org/x/crypto/salsa20/salsa" +) + +// Overhead is the number of bytes of overhead when boxing a message. +const Overhead = poly1305.TagSize + +// setup produces a sub-key and Salsa20 counter given a nonce and key. +func setup(subKey *[32]byte, counter *[16]byte, nonce *[24]byte, key *[32]byte) { + // We use XSalsa20 for encryption so first we need to generate a + // key and nonce with HSalsa20. + var hNonce [16]byte + copy(hNonce[:], nonce[:]) + salsa.HSalsa20(subKey, &hNonce, key, &salsa.Sigma) + + // The final 8 bytes of the original nonce form the new nonce. + copy(counter[:], nonce[16:]) +} + +// sliceForAppend takes a slice and a requested number of bytes. It returns a +// slice with the contents of the given slice followed by that many bytes and a +// second slice that aliases into it and contains only the extra bytes. If the +// original slice has sufficient capacity then no allocation is performed. +func sliceForAppend(in []byte, n int) (head, tail []byte) { + if total := len(in) + n; cap(in) >= total { + head = in[:total] + } else { + head = make([]byte, total) + copy(head, in) + } + tail = head[len(in):] + return +} + +// Seal appends an encrypted and authenticated copy of message to out, which +// must not overlap message. The key and nonce pair must be unique for each +// distinct message and the output will be Overhead bytes longer than message. +func Seal(out, message []byte, nonce *[24]byte, key *[32]byte) []byte { + var subKey [32]byte + var counter [16]byte + setup(&subKey, &counter, nonce, key) + + // The Poly1305 key is generated by encrypting 32 bytes of zeros. Since + // Salsa20 works with 64-byte blocks, we also generate 32 bytes of + // keystream as a side effect. + var firstBlock [64]byte + salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) + + var poly1305Key [32]byte + copy(poly1305Key[:], firstBlock[:]) + + ret, out := sliceForAppend(out, len(message)+poly1305.TagSize) + if subtle.AnyOverlap(out, message) { + panic("nacl: invalid buffer overlap") + } + + // We XOR up to 32 bytes of message with the keystream generated from + // the first block. + firstMessageBlock := message + if len(firstMessageBlock) > 32 { + firstMessageBlock = firstMessageBlock[:32] + } + + tagOut := out + out = out[poly1305.TagSize:] + for i, x := range firstMessageBlock { + out[i] = firstBlock[32+i] ^ x + } + message = message[len(firstMessageBlock):] + ciphertext := out + out = out[len(firstMessageBlock):] + + // Now encrypt the rest. + counter[8] = 1 + salsa.XORKeyStream(out, message, &counter, &subKey) + + var tag [poly1305.TagSize]byte + poly1305.Sum(&tag, ciphertext, &poly1305Key) + copy(tagOut, tag[:]) + + return ret +} + +// Open authenticates and decrypts a box produced by Seal and appends the +// message to out, which must not overlap box. The output will be Overhead +// bytes smaller than box. +func Open(out, box []byte, nonce *[24]byte, key *[32]byte) ([]byte, bool) { + if len(box) < Overhead { + return nil, false + } + + var subKey [32]byte + var counter [16]byte + setup(&subKey, &counter, nonce, key) + + // The Poly1305 key is generated by encrypting 32 bytes of zeros. Since + // Salsa20 works with 64-byte blocks, we also generate 32 bytes of + // keystream as a side effect. + var firstBlock [64]byte + salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) + + var poly1305Key [32]byte + copy(poly1305Key[:], firstBlock[:]) + var tag [poly1305.TagSize]byte + copy(tag[:], box) + + if !poly1305.Verify(&tag, box[poly1305.TagSize:], &poly1305Key) { + return nil, false + } + + ret, out := sliceForAppend(out, len(box)-Overhead) + if subtle.AnyOverlap(out, box) { + panic("nacl: invalid buffer overlap") + } + + // We XOR up to 32 bytes of box with the keystream generated from + // the first block. + box = box[Overhead:] + firstMessageBlock := box + if len(firstMessageBlock) > 32 { + firstMessageBlock = firstMessageBlock[:32] + } + for i, x := range firstMessageBlock { + out[i] = firstBlock[32+i] ^ x + } + + box = box[len(firstMessageBlock):] + out = out[len(firstMessageBlock):] + + // Now decrypt the rest. + counter[8] = 1 + salsa.XORKeyStream(out, box, &counter, &subKey) + + return ret, true +} diff --git a/vendor/golang.org/x/crypto/poly1305/mac_noasm.go b/vendor/golang.org/x/crypto/poly1305/mac_noasm.go new file mode 100644 index 0000000..a8dd589 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/mac_noasm.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!ppc64le gccgo appengine + +package poly1305 + +type mac struct{ macGeneric } + +func newMAC(key *[32]byte) mac { return mac{newMACGeneric(key)} } diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305.go b/vendor/golang.org/x/crypto/poly1305/poly1305.go new file mode 100644 index 0000000..d076a56 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go @@ -0,0 +1,83 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package poly1305 implements Poly1305 one-time message authentication code as +// specified in https://cr.yp.to/mac/poly1305-20050329.pdf. +// +// Poly1305 is a fast, one-time authentication function. It is infeasible for an +// attacker to generate an authenticator for a message without the key. However, a +// key must only be used for a single message. Authenticating two different +// messages with the same key allows an attacker to forge authenticators for other +// messages with the same key. +// +// Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was +// used with a fixed key in order to generate one-time keys from an nonce. +// However, in this package AES isn't used and the one-time key is specified +// directly. +package poly1305 // import "golang.org/x/crypto/poly1305" + +import "crypto/subtle" + +// TagSize is the size, in bytes, of a poly1305 authenticator. +const TagSize = 16 + +// Verify returns true if mac is a valid authenticator for m with the given +// key. +func Verify(mac *[16]byte, m []byte, key *[32]byte) bool { + var tmp [16]byte + Sum(&tmp, m, key) + return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 +} + +// New returns a new MAC computing an authentication +// tag of all data written to it with the given key. +// This allows writing the message progressively instead +// of passing it as a single slice. Common users should use +// the Sum function instead. +// +// The key must be unique for each message, as authenticating +// two different messages with the same key allows an attacker +// to forge messages at will. +func New(key *[32]byte) *MAC { + return &MAC{ + mac: newMAC(key), + finalized: false, + } +} + +// MAC is an io.Writer computing an authentication tag +// of the data written to it. +// +// MAC cannot be used like common hash.Hash implementations, +// because using a poly1305 key twice breaks its security. +// Therefore writing data to a running MAC after calling +// Sum causes it to panic. +type MAC struct { + mac // platform-dependent implementation + + finalized bool +} + +// Size returns the number of bytes Sum will return. +func (h *MAC) Size() int { return TagSize } + +// Write adds more data to the running message authentication code. +// It never returns an error. +// +// It must not be called after the first call of Sum. +func (h *MAC) Write(p []byte) (n int, err error) { + if h.finalized { + panic("poly1305: write to MAC after Sum") + } + return h.mac.Write(p) +} + +// Sum computes the authenticator of all data written to the +// message authentication code. +func (h *MAC) Sum(b []byte) []byte { + var mac [TagSize]byte + h.mac.Sum(&mac) + h.finalized = true + return append(b, mac[:]...) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go new file mode 100644 index 0000000..2dbf42a --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go @@ -0,0 +1,68 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +package poly1305 + +//go:noescape +func initialize(state *[7]uint64, key *[32]byte) + +//go:noescape +func update(state *[7]uint64, msg []byte) + +//go:noescape +func finalize(tag *[TagSize]byte, state *[7]uint64) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + h := newMAC(key) + h.Write(m) + h.Sum(out) +} + +func newMAC(key *[32]byte) (h mac) { + initialize(&h.state, key) + return +} + +type mac struct { + state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 } + + buffer [TagSize]byte + offset int +} + +func (h *mac) Write(p []byte) (n int, err error) { + n = len(p) + if h.offset > 0 { + remaining := TagSize - h.offset + if n < remaining { + h.offset += copy(h.buffer[h.offset:], p) + return n, nil + } + copy(h.buffer[h.offset:], p[:remaining]) + p = p[remaining:] + h.offset = 0 + update(&h.state, h.buffer[:]) + } + if nn := len(p) - (len(p) % TagSize); nn > 0 { + update(&h.state, p[:nn]) + p = p[nn:] + } + if len(p) > 0 { + h.offset += copy(h.buffer[h.offset:], p) + } + return n, nil +} + +func (h *mac) Sum(out *[16]byte) { + state := h.state + if h.offset > 0 { + update(&state, h.buffer[:h.offset]) + } + finalize(out, &state) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s new file mode 100644 index 0000000..7d600f1 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s @@ -0,0 +1,148 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +#define POLY1305_ADD(msg, h0, h1, h2) \ + ADDQ 0(msg), h0; \ + ADCQ 8(msg), h1; \ + ADCQ $1, h2; \ + LEAQ 16(msg), msg + +#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ + MOVQ r0, AX; \ + MULQ h0; \ + MOVQ AX, t0; \ + MOVQ DX, t1; \ + MOVQ r0, AX; \ + MULQ h1; \ + ADDQ AX, t1; \ + ADCQ $0, DX; \ + MOVQ r0, t2; \ + IMULQ h2, t2; \ + ADDQ DX, t2; \ + \ + MOVQ r1, AX; \ + MULQ h0; \ + ADDQ AX, t1; \ + ADCQ $0, DX; \ + MOVQ DX, h0; \ + MOVQ r1, t3; \ + IMULQ h2, t3; \ + MOVQ r1, AX; \ + MULQ h1; \ + ADDQ AX, t2; \ + ADCQ DX, t3; \ + ADDQ h0, t2; \ + ADCQ $0, t3; \ + \ + MOVQ t0, h0; \ + MOVQ t1, h1; \ + MOVQ t2, h2; \ + ANDQ $3, h2; \ + MOVQ t2, t0; \ + ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ + ADDQ t0, h0; \ + ADCQ t3, h1; \ + ADCQ $0, h2; \ + SHRQ $2, t3, t2; \ + SHRQ $2, t3; \ + ADDQ t2, h0; \ + ADCQ t3, h1; \ + ADCQ $0, h2 + +DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF +DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC +GLOBL ·poly1305Mask<>(SB), RODATA, $16 + +// func update(state *[7]uint64, msg []byte) +TEXT ·update(SB), $0-32 + MOVQ state+0(FP), DI + MOVQ msg_base+8(FP), SI + MOVQ msg_len+16(FP), R15 + + MOVQ 0(DI), R8 // h0 + MOVQ 8(DI), R9 // h1 + MOVQ 16(DI), R10 // h2 + MOVQ 24(DI), R11 // r0 + MOVQ 32(DI), R12 // r1 + + CMPQ R15, $16 + JB bytes_between_0_and_15 + +loop: + POLY1305_ADD(SI, R8, R9, R10) + +multiply: + POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) + SUBQ $16, R15 + CMPQ R15, $16 + JAE loop + +bytes_between_0_and_15: + TESTQ R15, R15 + JZ done + MOVQ $1, BX + XORQ CX, CX + XORQ R13, R13 + ADDQ R15, SI + +flush_buffer: + SHLQ $8, BX, CX + SHLQ $8, BX + MOVB -1(SI), R13 + XORQ R13, BX + DECQ SI + DECQ R15 + JNZ flush_buffer + + ADDQ BX, R8 + ADCQ CX, R9 + ADCQ $0, R10 + MOVQ $16, R15 + JMP multiply + +done: + MOVQ R8, 0(DI) + MOVQ R9, 8(DI) + MOVQ R10, 16(DI) + RET + +// func initialize(state *[7]uint64, key *[32]byte) +TEXT ·initialize(SB), $0-16 + MOVQ state+0(FP), DI + MOVQ key+8(FP), SI + + // state[0...7] is initialized with zero + MOVOU 0(SI), X0 + MOVOU 16(SI), X1 + MOVOU ·poly1305Mask<>(SB), X2 + PAND X2, X0 + MOVOU X0, 24(DI) + MOVOU X1, 40(DI) + RET + +// func finalize(tag *[TagSize]byte, state *[7]uint64) +TEXT ·finalize(SB), $0-16 + MOVQ tag+0(FP), DI + MOVQ state+8(FP), SI + + MOVQ 0(SI), AX + MOVQ 8(SI), BX + MOVQ 16(SI), CX + MOVQ AX, R8 + MOVQ BX, R9 + SUBQ $0xFFFFFFFFFFFFFFFB, AX + SBBQ $0xFFFFFFFFFFFFFFFF, BX + SBBQ $3, CX + CMOVQCS R8, AX + CMOVQCS R9, BX + ADDQ 40(SI), AX + ADCQ 48(SI), BX + + MOVQ AX, 0(DI) + MOVQ BX, 8(DI) + RET diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.go b/vendor/golang.org/x/crypto/poly1305/sum_arm.go new file mode 100644 index 0000000..5dc321c --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go @@ -0,0 +1,22 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm,!gccgo,!appengine,!nacl + +package poly1305 + +// This function is implemented in sum_arm.s +//go:noescape +func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.s b/vendor/golang.org/x/crypto/poly1305/sum_arm.s new file mode 100644 index 0000000..f70b4ac --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.s @@ -0,0 +1,427 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm,!gccgo,!appengine,!nacl + +#include "textflag.h" + +// This code was translated into a form compatible with 5a from the public +// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. + +DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff +DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 +DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff +DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff +DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff +GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20 + +// Warning: the linker may use R11 to synthesize certain instructions. Please +// take care and verify that no synthetic instructions use it. + +TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0 + // Needs 16 bytes of stack and 64 bytes of space pointed to by R0. (It + // might look like it's only 60 bytes of space but the final four bytes + // will be written by another function.) We need to skip over four + // bytes of stack because that's saving the value of 'g'. + ADD $4, R13, R8 + MOVM.IB [R4-R7], (R8) + MOVM.IA.W (R1), [R2-R5] + MOVW $·poly1305_init_constants_armv6<>(SB), R7 + MOVW R2, R8 + MOVW R2>>26, R9 + MOVW R3>>20, g + MOVW R4>>14, R11 + MOVW R5>>8, R12 + ORR R3<<6, R9, R9 + ORR R4<<12, g, g + ORR R5<<18, R11, R11 + MOVM.IA (R7), [R2-R6] + AND R8, R2, R2 + AND R9, R3, R3 + AND g, R4, R4 + AND R11, R5, R5 + AND R12, R6, R6 + MOVM.IA.W [R2-R6], (R0) + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + MOVM.IA.W [R2-R6], (R0) + MOVM.IA.W (R1), [R2-R5] + MOVM.IA [R2-R6], (R0) + ADD $20, R13, R0 + MOVM.DA (R0), [R4-R7] + RET + +#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \ + MOVBU (offset+0)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+0)(Rdst); \ + MOVBU (offset+1)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+1)(Rdst); \ + MOVBU (offset+2)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+2)(Rdst); \ + MOVBU (offset+3)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+3)(Rdst) + +TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0 + // Needs 24 bytes of stack for saved registers and then 88 bytes of + // scratch space after that. We assume that 24 bytes at (R13) have + // already been used: four bytes for the link register saved in the + // prelude of poly1305_auth_armv6, four bytes for saving the value of g + // in that function and 16 bytes of scratch space used around + // poly1305_finish_ext_armv6_skip1. + ADD $24, R13, R12 + MOVM.IB [R4-R8, R14], (R12) + MOVW R0, 88(R13) + MOVW R1, 92(R13) + MOVW R2, 96(R13) + MOVW R1, R14 + MOVW R2, R12 + MOVW 56(R0), R8 + WORD $0xe1180008 // TST R8, R8 not working see issue 5921 + EOR R6, R6, R6 + MOVW.EQ $(1<<24), R6 + MOVW R6, 84(R13) + ADD $116, R13, g + MOVM.IA (R0), [R0-R9] + MOVM.IA [R0-R4], (g) + CMP $16, R12 + BLO poly1305_blocks_armv6_done + +poly1305_blocks_armv6_mainloop: + WORD $0xe31e0003 // TST R14, #3 not working see issue 5921 + BEQ poly1305_blocks_armv6_mainloop_aligned + ADD $100, R13, g + MOVW_UNALIGNED(R14, g, R0, 0) + MOVW_UNALIGNED(R14, g, R0, 4) + MOVW_UNALIGNED(R14, g, R0, 8) + MOVW_UNALIGNED(R14, g, R0, 12) + MOVM.IA (g), [R0-R3] + ADD $16, R14 + B poly1305_blocks_armv6_mainloop_loaded + +poly1305_blocks_armv6_mainloop_aligned: + MOVM.IA.W (R14), [R0-R3] + +poly1305_blocks_armv6_mainloop_loaded: + MOVW R0>>26, g + MOVW R1>>20, R11 + MOVW R2>>14, R12 + MOVW R14, 92(R13) + MOVW R3>>8, R4 + ORR R1<<6, g, g + ORR R2<<12, R11, R11 + ORR R3<<18, R12, R12 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, g, g + MOVW 84(R13), R3 + BIC $0xfc000000, R11, R11 + BIC $0xfc000000, R12, R12 + ADD R0, R5, R5 + ADD g, R6, R6 + ORR R3, R4, R4 + ADD R11, R7, R7 + ADD $116, R13, R14 + ADD R12, R8, R8 + ADD R4, R9, R9 + MOVM.IA (R14), [R0-R4] + MULLU R4, R5, (R11, g) + MULLU R3, R5, (R14, R12) + MULALU R3, R6, (R11, g) + MULALU R2, R6, (R14, R12) + MULALU R2, R7, (R11, g) + MULALU R1, R7, (R14, R12) + ADD R4<<2, R4, R4 + ADD R3<<2, R3, R3 + MULALU R1, R8, (R11, g) + MULALU R0, R8, (R14, R12) + MULALU R0, R9, (R11, g) + MULALU R4, R9, (R14, R12) + MOVW g, 76(R13) + MOVW R11, 80(R13) + MOVW R12, 68(R13) + MOVW R14, 72(R13) + MULLU R2, R5, (R11, g) + MULLU R1, R5, (R14, R12) + MULALU R1, R6, (R11, g) + MULALU R0, R6, (R14, R12) + MULALU R0, R7, (R11, g) + MULALU R4, R7, (R14, R12) + ADD R2<<2, R2, R2 + ADD R1<<2, R1, R1 + MULALU R4, R8, (R11, g) + MULALU R3, R8, (R14, R12) + MULALU R3, R9, (R11, g) + MULALU R2, R9, (R14, R12) + MOVW g, 60(R13) + MOVW R11, 64(R13) + MOVW R12, 52(R13) + MOVW R14, 56(R13) + MULLU R0, R5, (R11, g) + MULALU R4, R6, (R11, g) + MULALU R3, R7, (R11, g) + MULALU R2, R8, (R11, g) + MULALU R1, R9, (R11, g) + ADD $52, R13, R0 + MOVM.IA (R0), [R0-R7] + MOVW g>>26, R12 + MOVW R4>>26, R14 + ORR R11<<6, R12, R12 + ORR R5<<6, R14, R14 + BIC $0xfc000000, g, g + BIC $0xfc000000, R4, R4 + ADD.S R12, R0, R0 + ADC $0, R1, R1 + ADD.S R14, R6, R6 + ADC $0, R7, R7 + MOVW R0>>26, R12 + MOVW R6>>26, R14 + ORR R1<<6, R12, R12 + ORR R7<<6, R14, R14 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, R6, R6 + ADD R14<<2, R14, R14 + ADD.S R12, R2, R2 + ADC $0, R3, R3 + ADD R14, g, g + MOVW R2>>26, R12 + MOVW g>>26, R14 + ORR R3<<6, R12, R12 + BIC $0xfc000000, g, R5 + BIC $0xfc000000, R2, R7 + ADD R12, R4, R4 + ADD R14, R0, R0 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R8 + ADD R12, R6, R9 + MOVW 96(R13), R12 + MOVW 92(R13), R14 + MOVW R0, R6 + CMP $32, R12 + SUB $16, R12, R12 + MOVW R12, 96(R13) + BHS poly1305_blocks_armv6_mainloop + +poly1305_blocks_armv6_done: + MOVW 88(R13), R12 + MOVW R5, 20(R12) + MOVW R6, 24(R12) + MOVW R7, 28(R12) + MOVW R8, 32(R12) + MOVW R9, 36(R12) + ADD $48, R13, R0 + MOVM.DA (R0), [R4-R8, R14] + RET + +#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst); \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst) + +#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) + +// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) +TEXT ·poly1305_auth_armv6(SB), $196-16 + // The value 196, just above, is the sum of 64 (the size of the context + // structure) and 132 (the amount of stack needed). + // + // At this point, the stack pointer (R13) has been moved down. It + // points to the saved link register and there's 196 bytes of free + // space above it. + // + // The stack for this function looks like: + // + // +--------------------- + // | + // | 64 bytes of context structure + // | + // +--------------------- + // | + // | 112 bytes for poly1305_blocks_armv6 + // | + // +--------------------- + // | 16 bytes of final block, constructed at + // | poly1305_finish_ext_armv6_skip8 + // +--------------------- + // | four bytes of saved 'g' + // +--------------------- + // | lr, saved by prelude <- R13 points here + // +--------------------- + MOVW g, 4(R13) + + MOVW out+0(FP), R4 + MOVW m+4(FP), R5 + MOVW mlen+8(FP), R6 + MOVW key+12(FP), R7 + + ADD $136, R13, R0 // 136 = 4 + 4 + 16 + 112 + MOVW R7, R1 + + // poly1305_init_ext_armv6 will write to the stack from R13+4, but + // that's ok because none of the other values have been written yet. + BL poly1305_init_ext_armv6<>(SB) + BIC.S $15, R6, R2 + BEQ poly1305_auth_armv6_noblocks + ADD $136, R13, R0 + MOVW R5, R1 + ADD R2, R5, R5 + SUB R2, R6, R6 + BL poly1305_blocks_armv6<>(SB) + +poly1305_auth_armv6_noblocks: + ADD $136, R13, R0 + MOVW R5, R1 + MOVW R6, R2 + MOVW R4, R3 + + MOVW R0, R5 + MOVW R1, R6 + MOVW R2, R7 + MOVW R3, R8 + AND.S R2, R2, R2 + BEQ poly1305_finish_ext_armv6_noremaining + EOR R0, R0 + ADD $8, R13, R9 // 8 = offset to 16 byte scratch space + MOVW R0, (R9) + MOVW R0, 4(R9) + MOVW R0, 8(R9) + MOVW R0, 12(R9) + WORD $0xe3110003 // TST R1, #3 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_aligned + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8 + MOVWP_UNALIGNED(R1, R9, g) + MOVWP_UNALIGNED(R1, R9, g) + +poly1305_finish_ext_armv6_skip8: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4 + MOVWP_UNALIGNED(R1, R9, g) + +poly1305_finish_ext_armv6_skip4: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHUP_UNALIGNED(R1, R9, g) + B poly1305_finish_ext_armv6_skip2 + +poly1305_finish_ext_armv6_aligned: + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8_aligned + MOVM.IA.W (R1), [g-R11] + MOVM.IA.W [g-R11], (R9) + +poly1305_finish_ext_armv6_skip8_aligned: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4_aligned + MOVW.P 4(R1), g + MOVW.P g, 4(R9) + +poly1305_finish_ext_armv6_skip4_aligned: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHU.P 2(R1), g + MOVH.P g, 2(R9) + +poly1305_finish_ext_armv6_skip2: + WORD $0xe3120001 // TST $1, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip1 + MOVBU.P 1(R1), g + MOVBU.P g, 1(R9) + +poly1305_finish_ext_armv6_skip1: + MOVW $1, R11 + MOVBU R11, 0(R9) + MOVW R11, 56(R5) + MOVW R5, R0 + ADD $8, R13, R1 + MOVW $16, R2 + BL poly1305_blocks_armv6<>(SB) + +poly1305_finish_ext_armv6_noremaining: + MOVW 20(R5), R0 + MOVW 24(R5), R1 + MOVW 28(R5), R2 + MOVW 32(R5), R3 + MOVW 36(R5), R4 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R4 + ADD R12<<2, R12, R12 + ADD R12, R0, R0 + MOVW R0>>26, R12 + BIC $0xfc000000, R0, R0 + ADD R12, R1, R1 + MOVW R1>>26, R12 + BIC $0xfc000000, R1, R1 + ADD R12, R2, R2 + MOVW R2>>26, R12 + BIC $0xfc000000, R2, R2 + ADD R12, R3, R3 + MOVW R3>>26, R12 + BIC $0xfc000000, R3, R3 + ADD R12, R4, R4 + ADD $5, R0, R6 + MOVW R6>>26, R12 + BIC $0xfc000000, R6, R6 + ADD R12, R1, R7 + MOVW R7>>26, R12 + BIC $0xfc000000, R7, R7 + ADD R12, R2, g + MOVW g>>26, R12 + BIC $0xfc000000, g, g + ADD R12, R3, R11 + MOVW $-(1<<26), R12 + ADD R11>>26, R12, R12 + BIC $0xfc000000, R11, R11 + ADD R12, R4, R9 + MOVW R9>>31, R12 + SUB $1, R12 + AND R12, R6, R6 + AND R12, R7, R7 + AND R12, g, g + AND R12, R11, R11 + AND R12, R9, R9 + MVN R12, R12 + AND R12, R0, R0 + AND R12, R1, R1 + AND R12, R2, R2 + AND R12, R3, R3 + AND R12, R4, R4 + ORR R6, R0, R0 + ORR R7, R1, R1 + ORR g, R2, R2 + ORR R11, R3, R3 + ORR R9, R4, R4 + ORR R1<<26, R0, R0 + MOVW R1>>6, R1 + ORR R2<<20, R1, R1 + MOVW R2>>12, R2 + ORR R3<<14, R2, R2 + MOVW R3>>18, R3 + ORR R4<<8, R3, R3 + MOVW 40(R5), R6 + MOVW 44(R5), R7 + MOVW 48(R5), g + MOVW 52(R5), R11 + ADD.S R6, R0, R0 + ADC.S R7, R1, R1 + ADC.S g, R2, R2 + ADC.S R11, R3, R3 + MOVM.IA [R0-R3], (R8) + MOVW R5, R12 + EOR R0, R0, R0 + EOR R1, R1, R1 + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + EOR R7, R7, R7 + MOVM.IA.W [R0-R7], (R12) + MOVM.IA [R0-R7], (R12) + MOVW 4(R13), g + RET diff --git a/vendor/golang.org/x/crypto/poly1305/sum_generic.go b/vendor/golang.org/x/crypto/poly1305/sum_generic.go new file mode 100644 index 0000000..bab76ef --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_generic.go @@ -0,0 +1,172 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package poly1305 + +import "encoding/binary" + +const ( + msgBlock = uint32(1 << 24) + finalBlock = uint32(0) +) + +// sumGeneric generates an authenticator for msg using a one-time key and +// puts the 16-byte result into out. This is the generic implementation of +// Sum and should be called if no assembly implementation is available. +func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) { + h := newMACGeneric(key) + h.Write(msg) + h.Sum(out) +} + +func newMACGeneric(key *[32]byte) (h macGeneric) { + h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff + h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03 + h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff + h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff + h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff + + h.s[0] = binary.LittleEndian.Uint32(key[16:]) + h.s[1] = binary.LittleEndian.Uint32(key[20:]) + h.s[2] = binary.LittleEndian.Uint32(key[24:]) + h.s[3] = binary.LittleEndian.Uint32(key[28:]) + return +} + +type macGeneric struct { + h, r [5]uint32 + s [4]uint32 + + buffer [TagSize]byte + offset int +} + +func (h *macGeneric) Write(p []byte) (n int, err error) { + n = len(p) + if h.offset > 0 { + remaining := TagSize - h.offset + if n < remaining { + h.offset += copy(h.buffer[h.offset:], p) + return n, nil + } + copy(h.buffer[h.offset:], p[:remaining]) + p = p[remaining:] + h.offset = 0 + updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r)) + } + if nn := len(p) - (len(p) % TagSize); nn > 0 { + updateGeneric(p, msgBlock, &(h.h), &(h.r)) + p = p[nn:] + } + if len(p) > 0 { + h.offset += copy(h.buffer[h.offset:], p) + } + return n, nil +} + +func (h *macGeneric) Sum(out *[16]byte) { + H, R := h.h, h.r + if h.offset > 0 { + var buffer [TagSize]byte + copy(buffer[:], h.buffer[:h.offset]) + buffer[h.offset] = 1 // invariant: h.offset < TagSize + updateGeneric(buffer[:], finalBlock, &H, &R) + } + finalizeGeneric(out, &H, &(h.s)) +} + +func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) { + h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4] + r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4]) + R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5 + + for len(msg) >= TagSize { + // h += msg + h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff + h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff + h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff + h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff + h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag + + // h *= r + d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1) + d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2) + d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3) + d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4) + d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0) + + // h %= p + h0 = uint32(d0) & 0x3ffffff + h1 = uint32(d1) & 0x3ffffff + h2 = uint32(d2) & 0x3ffffff + h3 = uint32(d3) & 0x3ffffff + h4 = uint32(d4) & 0x3ffffff + + h0 += uint32(d4>>26) * 5 + h1 += h0 >> 26 + h0 = h0 & 0x3ffffff + + msg = msg[TagSize:] + } + + h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4 +} + +func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) { + h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4] + + // h %= p reduction + h2 += h1 >> 26 + h1 &= 0x3ffffff + h3 += h2 >> 26 + h2 &= 0x3ffffff + h4 += h3 >> 26 + h3 &= 0x3ffffff + h0 += 5 * (h4 >> 26) + h4 &= 0x3ffffff + h1 += h0 >> 26 + h0 &= 0x3ffffff + + // h - p + t0 := h0 + 5 + t1 := h1 + (t0 >> 26) + t2 := h2 + (t1 >> 26) + t3 := h3 + (t2 >> 26) + t4 := h4 + (t3 >> 26) - (1 << 26) + t0 &= 0x3ffffff + t1 &= 0x3ffffff + t2 &= 0x3ffffff + t3 &= 0x3ffffff + + // select h if h < p else h - p + t_mask := (t4 >> 31) - 1 + h_mask := ^t_mask + h0 = (h0 & h_mask) | (t0 & t_mask) + h1 = (h1 & h_mask) | (t1 & t_mask) + h2 = (h2 & h_mask) | (t2 & t_mask) + h3 = (h3 & h_mask) | (t3 & t_mask) + h4 = (h4 & h_mask) | (t4 & t_mask) + + // h %= 2^128 + h0 |= h1 << 26 + h1 = ((h1 >> 6) | (h2 << 20)) + h2 = ((h2 >> 12) | (h3 << 14)) + h3 = ((h3 >> 18) | (h4 << 8)) + + // s: the s part of the key + // tag = (h + s) % (2^128) + t := uint64(h0) + uint64(s[0]) + h0 = uint32(t) + t = uint64(h1) + uint64(s[1]) + (t >> 32) + h1 = uint32(t) + t = uint64(h2) + uint64(s[2]) + (t >> 32) + h2 = uint32(t) + t = uint64(h3) + uint64(s[3]) + (t >> 32) + h3 = uint32(t) + + binary.LittleEndian.PutUint32(out[0:], h0) + binary.LittleEndian.PutUint32(out[4:], h1) + binary.LittleEndian.PutUint32(out[8:], h2) + binary.LittleEndian.PutUint32(out[12:], h3) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go new file mode 100644 index 0000000..8a9c207 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go @@ -0,0 +1,16 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build s390x,!go1.11 !arm,!amd64,!s390x,!ppc64le gccgo appengine nacl + +package poly1305 + +// Sum generates an authenticator for msg using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) { + h := newMAC(key) + h.Write(msg) + h.Sum(out) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go new file mode 100644 index 0000000..2402b63 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go @@ -0,0 +1,68 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ppc64le,!gccgo,!appengine + +package poly1305 + +//go:noescape +func initialize(state *[7]uint64, key *[32]byte) + +//go:noescape +func update(state *[7]uint64, msg []byte) + +//go:noescape +func finalize(tag *[TagSize]byte, state *[7]uint64) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + h := newMAC(key) + h.Write(m) + h.Sum(out) +} + +func newMAC(key *[32]byte) (h mac) { + initialize(&h.state, key) + return +} + +type mac struct { + state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 } + + buffer [TagSize]byte + offset int +} + +func (h *mac) Write(p []byte) (n int, err error) { + n = len(p) + if h.offset > 0 { + remaining := TagSize - h.offset + if n < remaining { + h.offset += copy(h.buffer[h.offset:], p) + return n, nil + } + copy(h.buffer[h.offset:], p[:remaining]) + p = p[remaining:] + h.offset = 0 + update(&h.state, h.buffer[:]) + } + if nn := len(p) - (len(p) % TagSize); nn > 0 { + update(&h.state, p[:nn]) + p = p[nn:] + } + if len(p) > 0 { + h.offset += copy(h.buffer[h.offset:], p) + } + return n, nil +} + +func (h *mac) Sum(out *[16]byte) { + state := h.state + if h.offset > 0 { + update(&state, h.buffer[:h.offset]) + } + finalize(out, &state) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s new file mode 100644 index 0000000..55c7167 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s @@ -0,0 +1,247 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ppc64le,!gccgo,!appengine + +#include "textflag.h" + +// This was ported from the amd64 implementation. + +#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \ + MOVD (msg), t0; \ + MOVD 8(msg), t1; \ + MOVD $1, t2; \ + ADDC t0, h0, h0; \ + ADDE t1, h1, h1; \ + ADDE t2, h2; \ + ADD $16, msg + +#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \ + MULLD r0, h0, t0; \ + MULLD r0, h1, t4; \ + MULHDU r0, h0, t1; \ + MULHDU r0, h1, t5; \ + ADDC t4, t1, t1; \ + MULLD r0, h2, t2; \ + ADDZE t5; \ + MULHDU r1, h0, t4; \ + MULLD r1, h0, h0; \ + ADD t5, t2, t2; \ + ADDC h0, t1, t1; \ + MULLD h2, r1, t3; \ + ADDZE t4, h0; \ + MULHDU r1, h1, t5; \ + MULLD r1, h1, t4; \ + ADDC t4, t2, t2; \ + ADDE t5, t3, t3; \ + ADDC h0, t2, t2; \ + MOVD $-4, t4; \ + MOVD t0, h0; \ + MOVD t1, h1; \ + ADDZE t3; \ + ANDCC $3, t2, h2; \ + AND t2, t4, t0; \ + ADDC t0, h0, h0; \ + ADDE t3, h1, h1; \ + SLD $62, t3, t4; \ + SRD $2, t2; \ + ADDZE h2; \ + OR t4, t2, t2; \ + SRD $2, t3; \ + ADDC t2, h0, h0; \ + ADDE t3, h1, h1; \ + ADDZE h2 + +DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF +DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC +GLOBL ·poly1305Mask<>(SB), RODATA, $16 + +// func update(state *[7]uint64, msg []byte) + +TEXT ·update(SB), $0-32 + MOVD state+0(FP), R3 + MOVD msg_base+8(FP), R4 + MOVD msg_len+16(FP), R5 + + MOVD 0(R3), R8 // h0 + MOVD 8(R3), R9 // h1 + MOVD 16(R3), R10 // h2 + MOVD 24(R3), R11 // r0 + MOVD 32(R3), R12 // r1 + + CMP R5, $16 + BLT bytes_between_0_and_15 + +loop: + POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22) + +multiply: + POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21) + ADD $-16, R5 + CMP R5, $16 + BGE loop + +bytes_between_0_and_15: + CMP $0, R5 + BEQ done + MOVD $0, R16 // h0 + MOVD $0, R17 // h1 + +flush_buffer: + CMP R5, $8 + BLE just1 + + MOVD $8, R21 + SUB R21, R5, R21 + + // Greater than 8 -- load the rightmost remaining bytes in msg + // and put into R17 (h1) + MOVD (R4)(R21), R17 + MOVD $16, R22 + + // Find the offset to those bytes + SUB R5, R22, R22 + SLD $3, R22 + + // Shift to get only the bytes in msg + SRD R22, R17, R17 + + // Put 1 at high end + MOVD $1, R23 + SLD $3, R21 + SLD R21, R23, R23 + OR R23, R17, R17 + + // Remainder is 8 + MOVD $8, R5 + +just1: + CMP R5, $8 + BLT less8 + + // Exactly 8 + MOVD (R4), R16 + + CMP $0, R17 + + // Check if we've already set R17; if not + // set 1 to indicate end of msg. + BNE carry + MOVD $1, R17 + BR carry + +less8: + MOVD $0, R16 // h0 + MOVD $0, R22 // shift count + CMP R5, $4 + BLT less4 + MOVWZ (R4), R16 + ADD $4, R4 + ADD $-4, R5 + MOVD $32, R22 + +less4: + CMP R5, $2 + BLT less2 + MOVHZ (R4), R21 + SLD R22, R21, R21 + OR R16, R21, R16 + ADD $16, R22 + ADD $-2, R5 + ADD $2, R4 + +less2: + CMP $0, R5 + BEQ insert1 + MOVBZ (R4), R21 + SLD R22, R21, R21 + OR R16, R21, R16 + ADD $8, R22 + +insert1: + // Insert 1 at end of msg + MOVD $1, R21 + SLD R22, R21, R21 + OR R16, R21, R16 + +carry: + // Add new values to h0, h1, h2 + ADDC R16, R8 + ADDE R17, R9 + ADDE $0, R10 + MOVD $16, R5 + ADD R5, R4 + BR multiply + +done: + // Save h0, h1, h2 in state + MOVD R8, 0(R3) + MOVD R9, 8(R3) + MOVD R10, 16(R3) + RET + +// func initialize(state *[7]uint64, key *[32]byte) +TEXT ·initialize(SB), $0-16 + MOVD state+0(FP), R3 + MOVD key+8(FP), R4 + + // state[0...7] is initialized with zero + // Load key + MOVD 0(R4), R5 + MOVD 8(R4), R6 + MOVD 16(R4), R7 + MOVD 24(R4), R8 + + // Address of key mask + MOVD $·poly1305Mask<>(SB), R9 + + // Save original key in state + MOVD R7, 40(R3) + MOVD R8, 48(R3) + + // Get mask + MOVD (R9), R7 + MOVD 8(R9), R8 + + // And with key + AND R5, R7, R5 + AND R6, R8, R6 + + // Save masked key in state + MOVD R5, 24(R3) + MOVD R6, 32(R3) + RET + +// func finalize(tag *[TagSize]byte, state *[7]uint64) +TEXT ·finalize(SB), $0-16 + MOVD tag+0(FP), R3 + MOVD state+8(FP), R4 + + // Get h0, h1, h2 from state + MOVD 0(R4), R5 + MOVD 8(R4), R6 + MOVD 16(R4), R7 + + // Save h0, h1 + MOVD R5, R8 + MOVD R6, R9 + MOVD $3, R20 + MOVD $-1, R21 + SUBC $-5, R5 + SUBE R21, R6 + SUBE R20, R7 + MOVD $0, R21 + SUBZE R21 + + // Check for carry + CMP $0, R21 + ISEL $2, R5, R8, R5 + ISEL $2, R6, R9, R6 + MOVD 40(R4), R8 + MOVD 48(R4), R9 + ADDC R8, R5 + ADDE R9, R6 + MOVD R5, 0(R3) + MOVD R6, 8(R3) + RET diff --git a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go new file mode 100644 index 0000000..ec99e07 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go @@ -0,0 +1,42 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build s390x,go1.11,!gccgo,!appengine + +package poly1305 + +import ( + "golang.org/x/sys/cpu" +) + +// poly1305vx is an assembly implementation of Poly1305 that uses vector +// instructions. It must only be called if the vector facility (vx) is +// available. +//go:noescape +func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte) + +// poly1305vmsl is an assembly implementation of Poly1305 that uses vector +// instructions, including VMSL. It must only be called if the vector facility (vx) is +// available and if VMSL is supported. +//go:noescape +func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + if cpu.S390X.HasVX { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + if cpu.S390X.HasVXE && len(m) > 256 { + poly1305vmsl(out, mPtr, uint64(len(m)), key) + } else { + poly1305vx(out, mPtr, uint64(len(m)), key) + } + } else { + sumGeneric(out, m, key) + } +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s b/vendor/golang.org/x/crypto/poly1305/sum_s390x.s new file mode 100644 index 0000000..ca5a309 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.s @@ -0,0 +1,378 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build s390x,go1.11,!gccgo,!appengine + +#include "textflag.h" + +// Implementation of Poly1305 using the vector facility (vx). + +// constants +#define MOD26 V0 +#define EX0 V1 +#define EX1 V2 +#define EX2 V3 + +// temporaries +#define T_0 V4 +#define T_1 V5 +#define T_2 V6 +#define T_3 V7 +#define T_4 V8 + +// key (r) +#define R_0 V9 +#define R_1 V10 +#define R_2 V11 +#define R_3 V12 +#define R_4 V13 +#define R5_1 V14 +#define R5_2 V15 +#define R5_3 V16 +#define R5_4 V17 +#define RSAVE_0 R5 +#define RSAVE_1 R6 +#define RSAVE_2 R7 +#define RSAVE_3 R8 +#define RSAVE_4 R9 +#define R5SAVE_1 V28 +#define R5SAVE_2 V29 +#define R5SAVE_3 V30 +#define R5SAVE_4 V31 + +// message block +#define F_0 V18 +#define F_1 V19 +#define F_2 V20 +#define F_3 V21 +#define F_4 V22 + +// accumulator +#define H_0 V23 +#define H_1 V24 +#define H_2 V25 +#define H_3 V26 +#define H_4 V27 + +GLOBL ·keyMask<>(SB), RODATA, $16 +DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f +DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f + +GLOBL ·bswapMask<>(SB), RODATA, $16 +DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908 +DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100 + +GLOBL ·constants<>(SB), RODATA, $64 +// MOD26 +DATA ·constants<>+0(SB)/8, $0x3ffffff +DATA ·constants<>+8(SB)/8, $0x3ffffff +// EX0 +DATA ·constants<>+16(SB)/8, $0x0006050403020100 +DATA ·constants<>+24(SB)/8, $0x1016151413121110 +// EX1 +DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706 +DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716 +// EX2 +DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d +DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d + +// h = (f*g) % (2**130-5) [partial reduction] +#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \ + VMLOF f0, g0, h0 \ + VMLOF f0, g1, h1 \ + VMLOF f0, g2, h2 \ + VMLOF f0, g3, h3 \ + VMLOF f0, g4, h4 \ + VMLOF f1, g54, T_0 \ + VMLOF f1, g0, T_1 \ + VMLOF f1, g1, T_2 \ + VMLOF f1, g2, T_3 \ + VMLOF f1, g3, T_4 \ + VMALOF f2, g53, h0, h0 \ + VMALOF f2, g54, h1, h1 \ + VMALOF f2, g0, h2, h2 \ + VMALOF f2, g1, h3, h3 \ + VMALOF f2, g2, h4, h4 \ + VMALOF f3, g52, T_0, T_0 \ + VMALOF f3, g53, T_1, T_1 \ + VMALOF f3, g54, T_2, T_2 \ + VMALOF f3, g0, T_3, T_3 \ + VMALOF f3, g1, T_4, T_4 \ + VMALOF f4, g51, h0, h0 \ + VMALOF f4, g52, h1, h1 \ + VMALOF f4, g53, h2, h2 \ + VMALOF f4, g54, h3, h3 \ + VMALOF f4, g0, h4, h4 \ + VAG T_0, h0, h0 \ + VAG T_1, h1, h1 \ + VAG T_2, h2, h2 \ + VAG T_3, h3, h3 \ + VAG T_4, h4, h4 + +// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4 +#define REDUCE(h0, h1, h2, h3, h4) \ + VESRLG $26, h0, T_0 \ + VESRLG $26, h3, T_1 \ + VN MOD26, h0, h0 \ + VN MOD26, h3, h3 \ + VAG T_0, h1, h1 \ + VAG T_1, h4, h4 \ + VESRLG $26, h1, T_2 \ + VESRLG $26, h4, T_3 \ + VN MOD26, h1, h1 \ + VN MOD26, h4, h4 \ + VESLG $2, T_3, T_4 \ + VAG T_3, T_4, T_4 \ + VAG T_2, h2, h2 \ + VAG T_4, h0, h0 \ + VESRLG $26, h2, T_0 \ + VESRLG $26, h0, T_1 \ + VN MOD26, h2, h2 \ + VN MOD26, h0, h0 \ + VAG T_0, h3, h3 \ + VAG T_1, h1, h1 \ + VESRLG $26, h3, T_2 \ + VN MOD26, h3, h3 \ + VAG T_2, h4, h4 + +// expand in0 into d[0] and in1 into d[1] +#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \ + VGBM $0x0707, d1 \ // d1=tmp + VPERM in0, in1, EX2, d4 \ + VPERM in0, in1, EX0, d0 \ + VPERM in0, in1, EX1, d2 \ + VN d1, d4, d4 \ + VESRLG $26, d0, d1 \ + VESRLG $30, d2, d3 \ + VESRLG $4, d2, d2 \ + VN MOD26, d0, d0 \ + VN MOD26, d1, d1 \ + VN MOD26, d2, d2 \ + VN MOD26, d3, d3 + +// pack h4:h0 into h1:h0 (no carry) +#define PACK(h0, h1, h2, h3, h4) \ + VESLG $26, h1, h1 \ + VESLG $26, h3, h3 \ + VO h0, h1, h0 \ + VO h2, h3, h2 \ + VESLG $4, h2, h2 \ + VLEIB $7, $48, h1 \ + VSLB h1, h2, h2 \ + VO h0, h2, h0 \ + VLEIB $7, $104, h1 \ + VSLB h1, h4, h3 \ + VO h3, h0, h0 \ + VLEIB $7, $24, h1 \ + VSRLB h1, h4, h1 + +// if h > 2**130-5 then h -= 2**130-5 +#define MOD(h0, h1, t0, t1, t2) \ + VZERO t0 \ + VLEIG $1, $5, t0 \ + VACCQ h0, t0, t1 \ + VAQ h0, t0, t0 \ + VONE t2 \ + VLEIG $1, $-4, t2 \ + VAQ t2, t1, t1 \ + VACCQ h1, t1, t1 \ + VONE t2 \ + VAQ t2, t1, t1 \ + VN h0, t1, t2 \ + VNC t0, t1, t1 \ + VO t1, t2, h0 + +// func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305vx(SB), $0-32 + // This code processes up to 2 blocks (32 bytes) per iteration + // using the algorithm described in: + // NEON crypto, Daniel J. Bernstein & Peter Schwabe + // https://cryptojedi.org/papers/neoncrypto-20120320.pdf + LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key + + // load MOD26, EX0, EX1 and EX2 + MOVD $·constants<>(SB), R5 + VLM (R5), MOD26, EX2 + + // setup r + VL (R4), T_0 + MOVD $·keyMask<>(SB), R6 + VL (R6), T_1 + VN T_0, T_1, T_0 + EXPAND(T_0, T_0, R_0, R_1, R_2, R_3, R_4) + + // setup r*5 + VLEIG $0, $5, T_0 + VLEIG $1, $5, T_0 + + // store r (for final block) + VMLOF T_0, R_1, R5SAVE_1 + VMLOF T_0, R_2, R5SAVE_2 + VMLOF T_0, R_3, R5SAVE_3 + VMLOF T_0, R_4, R5SAVE_4 + VLGVG $0, R_0, RSAVE_0 + VLGVG $0, R_1, RSAVE_1 + VLGVG $0, R_2, RSAVE_2 + VLGVG $0, R_3, RSAVE_3 + VLGVG $0, R_4, RSAVE_4 + + // skip r**2 calculation + CMPBLE R3, $16, skip + + // calculate r**2 + MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5SAVE_1, R5SAVE_2, R5SAVE_3, R5SAVE_4, H_0, H_1, H_2, H_3, H_4) + REDUCE(H_0, H_1, H_2, H_3, H_4) + VLEIG $0, $5, T_0 + VLEIG $1, $5, T_0 + VMLOF T_0, H_1, R5_1 + VMLOF T_0, H_2, R5_2 + VMLOF T_0, H_3, R5_3 + VMLOF T_0, H_4, R5_4 + VLR H_0, R_0 + VLR H_1, R_1 + VLR H_2, R_2 + VLR H_3, R_3 + VLR H_4, R_4 + + // initialize h + VZERO H_0 + VZERO H_1 + VZERO H_2 + VZERO H_3 + VZERO H_4 + +loop: + CMPBLE R3, $32, b2 + VLM (R2), T_0, T_1 + SUB $32, R3 + MOVD $32(R2), R2 + EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) + VLEIB $4, $1, F_4 + VLEIB $12, $1, F_4 + +multiply: + VAG H_0, F_0, F_0 + VAG H_1, F_1, F_1 + VAG H_2, F_2, F_2 + VAG H_3, F_3, F_3 + VAG H_4, F_4, F_4 + MULTIPLY(F_0, F_1, F_2, F_3, F_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4) + REDUCE(H_0, H_1, H_2, H_3, H_4) + CMPBNE R3, $0, loop + +finish: + // sum vectors + VZERO T_0 + VSUMQG H_0, T_0, H_0 + VSUMQG H_1, T_0, H_1 + VSUMQG H_2, T_0, H_2 + VSUMQG H_3, T_0, H_3 + VSUMQG H_4, T_0, H_4 + + // h may be >= 2*(2**130-5) so we need to reduce it again + REDUCE(H_0, H_1, H_2, H_3, H_4) + + // carry h1->h4 + VESRLG $26, H_1, T_1 + VN MOD26, H_1, H_1 + VAQ T_1, H_2, H_2 + VESRLG $26, H_2, T_2 + VN MOD26, H_2, H_2 + VAQ T_2, H_3, H_3 + VESRLG $26, H_3, T_3 + VN MOD26, H_3, H_3 + VAQ T_3, H_4, H_4 + + // h is now < 2*(2**130-5) + // pack h into h1 (hi) and h0 (lo) + PACK(H_0, H_1, H_2, H_3, H_4) + + // if h > 2**130-5 then h -= 2**130-5 + MOD(H_0, H_1, T_0, T_1, T_2) + + // h += s + MOVD $·bswapMask<>(SB), R5 + VL (R5), T_1 + VL 16(R4), T_0 + VPERM T_0, T_0, T_1, T_0 // reverse bytes (to big) + VAQ T_0, H_0, H_0 + VPERM H_0, H_0, T_1, H_0 // reverse bytes (to little) + VST H_0, (R1) + + RET + +b2: + CMPBLE R3, $16, b1 + + // 2 blocks remaining + SUB $17, R3 + VL (R2), T_0 + VLL R3, 16(R2), T_1 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, T_1 + EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) + CMPBNE R3, $16, 2(PC) + VLEIB $12, $1, F_4 + VLEIB $4, $1, F_4 + + // setup [r²,r] + VLVGG $1, RSAVE_0, R_0 + VLVGG $1, RSAVE_1, R_1 + VLVGG $1, RSAVE_2, R_2 + VLVGG $1, RSAVE_3, R_3 + VLVGG $1, RSAVE_4, R_4 + VPDI $0, R5_1, R5SAVE_1, R5_1 + VPDI $0, R5_2, R5SAVE_2, R5_2 + VPDI $0, R5_3, R5SAVE_3, R5_3 + VPDI $0, R5_4, R5SAVE_4, R5_4 + + MOVD $0, R3 + BR multiply + +skip: + VZERO H_0 + VZERO H_1 + VZERO H_2 + VZERO H_3 + VZERO H_4 + + CMPBEQ R3, $0, finish + +b1: + // 1 block remaining + SUB $1, R3 + VLL R3, (R2), T_0 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, T_0 + VZERO T_1 + EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) + CMPBNE R3, $16, 2(PC) + VLEIB $4, $1, F_4 + VLEIG $1, $1, R_0 + VZERO R_1 + VZERO R_2 + VZERO R_3 + VZERO R_4 + VZERO R5_1 + VZERO R5_2 + VZERO R5_3 + VZERO R5_4 + + // setup [r, 1] + VLVGG $0, RSAVE_0, R_0 + VLVGG $0, RSAVE_1, R_1 + VLVGG $0, RSAVE_2, R_2 + VLVGG $0, RSAVE_3, R_3 + VLVGG $0, RSAVE_4, R_4 + VPDI $0, R5SAVE_1, R5_1, R5_1 + VPDI $0, R5SAVE_2, R5_2, R5_2 + VPDI $0, R5SAVE_3, R5_3, R5_3 + VPDI $0, R5SAVE_4, R5_4, R5_4 + + MOVD $0, R3 + BR multiply diff --git a/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s b/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s new file mode 100644 index 0000000..e60bbc1 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s @@ -0,0 +1,909 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build s390x,go1.11,!gccgo,!appengine + +#include "textflag.h" + +// Implementation of Poly1305 using the vector facility (vx) and the VMSL instruction. + +// constants +#define EX0 V1 +#define EX1 V2 +#define EX2 V3 + +// temporaries +#define T_0 V4 +#define T_1 V5 +#define T_2 V6 +#define T_3 V7 +#define T_4 V8 +#define T_5 V9 +#define T_6 V10 +#define T_7 V11 +#define T_8 V12 +#define T_9 V13 +#define T_10 V14 + +// r**2 & r**4 +#define R_0 V15 +#define R_1 V16 +#define R_2 V17 +#define R5_1 V18 +#define R5_2 V19 +// key (r) +#define RSAVE_0 R7 +#define RSAVE_1 R8 +#define RSAVE_2 R9 +#define R5SAVE_1 R10 +#define R5SAVE_2 R11 + +// message block +#define M0 V20 +#define M1 V21 +#define M2 V22 +#define M3 V23 +#define M4 V24 +#define M5 V25 + +// accumulator +#define H0_0 V26 +#define H1_0 V27 +#define H2_0 V28 +#define H0_1 V29 +#define H1_1 V30 +#define H2_1 V31 + +GLOBL ·keyMask<>(SB), RODATA, $16 +DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f +DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f + +GLOBL ·bswapMask<>(SB), RODATA, $16 +DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908 +DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100 + +GLOBL ·constants<>(SB), RODATA, $48 +// EX0 +DATA ·constants<>+0(SB)/8, $0x18191a1b1c1d1e1f +DATA ·constants<>+8(SB)/8, $0x0000050403020100 +// EX1 +DATA ·constants<>+16(SB)/8, $0x18191a1b1c1d1e1f +DATA ·constants<>+24(SB)/8, $0x00000a0908070605 +// EX2 +DATA ·constants<>+32(SB)/8, $0x18191a1b1c1d1e1f +DATA ·constants<>+40(SB)/8, $0x0000000f0e0d0c0b + +GLOBL ·c<>(SB), RODATA, $48 +// EX0 +DATA ·c<>+0(SB)/8, $0x0000050403020100 +DATA ·c<>+8(SB)/8, $0x0000151413121110 +// EX1 +DATA ·c<>+16(SB)/8, $0x00000a0908070605 +DATA ·c<>+24(SB)/8, $0x00001a1918171615 +// EX2 +DATA ·c<>+32(SB)/8, $0x0000000f0e0d0c0b +DATA ·c<>+40(SB)/8, $0x0000001f1e1d1c1b + +GLOBL ·reduce<>(SB), RODATA, $32 +// 44 bit +DATA ·reduce<>+0(SB)/8, $0x0 +DATA ·reduce<>+8(SB)/8, $0xfffffffffff +// 42 bit +DATA ·reduce<>+16(SB)/8, $0x0 +DATA ·reduce<>+24(SB)/8, $0x3ffffffffff + +// h = (f*g) % (2**130-5) [partial reduction] +// uses T_0...T_9 temporary registers +// input: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2 +// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 +// output: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2 +#define MULTIPLY(m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) \ + \ // Eliminate the dependency for the last 2 VMSLs + VMSLG m02_0, r_2, m4_2, m4_2 \ + VMSLG m13_0, r_2, m5_2, m5_2 \ // 8 VMSLs pipelined + VMSLG m02_0, r_0, m4_0, m4_0 \ + VMSLG m02_1, r5_2, V0, T_0 \ + VMSLG m02_0, r_1, m4_1, m4_1 \ + VMSLG m02_1, r_0, V0, T_1 \ + VMSLG m02_1, r_1, V0, T_2 \ + VMSLG m02_2, r5_1, V0, T_3 \ + VMSLG m02_2, r5_2, V0, T_4 \ + VMSLG m13_0, r_0, m5_0, m5_0 \ + VMSLG m13_1, r5_2, V0, T_5 \ + VMSLG m13_0, r_1, m5_1, m5_1 \ + VMSLG m13_1, r_0, V0, T_6 \ + VMSLG m13_1, r_1, V0, T_7 \ + VMSLG m13_2, r5_1, V0, T_8 \ + VMSLG m13_2, r5_2, V0, T_9 \ + VMSLG m02_2, r_0, m4_2, m4_2 \ + VMSLG m13_2, r_0, m5_2, m5_2 \ + VAQ m4_0, T_0, m02_0 \ + VAQ m4_1, T_1, m02_1 \ + VAQ m5_0, T_5, m13_0 \ + VAQ m5_1, T_6, m13_1 \ + VAQ m02_0, T_3, m02_0 \ + VAQ m02_1, T_4, m02_1 \ + VAQ m13_0, T_8, m13_0 \ + VAQ m13_1, T_9, m13_1 \ + VAQ m4_2, T_2, m02_2 \ + VAQ m5_2, T_7, m13_2 \ + +// SQUARE uses three limbs of r and r_2*5 to output square of r +// uses T_1, T_5 and T_7 temporary registers +// input: r_0, r_1, r_2, r5_2 +// temp: TEMP0, TEMP1, TEMP2 +// output: p0, p1, p2 +#define SQUARE(r_0, r_1, r_2, r5_2, p0, p1, p2, TEMP0, TEMP1, TEMP2) \ + VMSLG r_0, r_0, p0, p0 \ + VMSLG r_1, r5_2, V0, TEMP0 \ + VMSLG r_2, r5_2, p1, p1 \ + VMSLG r_0, r_1, V0, TEMP1 \ + VMSLG r_1, r_1, p2, p2 \ + VMSLG r_0, r_2, V0, TEMP2 \ + VAQ TEMP0, p0, p0 \ + VAQ TEMP1, p1, p1 \ + VAQ TEMP2, p2, p2 \ + VAQ TEMP0, p0, p0 \ + VAQ TEMP1, p1, p1 \ + VAQ TEMP2, p2, p2 \ + +// carry h0->h1->h2->h0 || h3->h4->h5->h3 +// uses T_2, T_4, T_5, T_7, T_8, T_9 +// t6, t7, t8, t9, t10, t11 +// input: h0, h1, h2, h3, h4, h5 +// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11 +// output: h0, h1, h2, h3, h4, h5 +#define REDUCE(h0, h1, h2, h3, h4, h5, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \ + VLM (R12), t6, t7 \ // 44 and 42 bit clear mask + VLEIB $7, $0x28, t10 \ // 5 byte shift mask + VREPIB $4, t8 \ // 4 bit shift mask + VREPIB $2, t11 \ // 2 bit shift mask + VSRLB t10, h0, t0 \ // h0 byte shift + VSRLB t10, h1, t1 \ // h1 byte shift + VSRLB t10, h2, t2 \ // h2 byte shift + VSRLB t10, h3, t3 \ // h3 byte shift + VSRLB t10, h4, t4 \ // h4 byte shift + VSRLB t10, h5, t5 \ // h5 byte shift + VSRL t8, t0, t0 \ // h0 bit shift + VSRL t8, t1, t1 \ // h2 bit shift + VSRL t11, t2, t2 \ // h2 bit shift + VSRL t8, t3, t3 \ // h3 bit shift + VSRL t8, t4, t4 \ // h4 bit shift + VESLG $2, t2, t9 \ // h2 carry x5 + VSRL t11, t5, t5 \ // h5 bit shift + VN t6, h0, h0 \ // h0 clear carry + VAQ t2, t9, t2 \ // h2 carry x5 + VESLG $2, t5, t9 \ // h5 carry x5 + VN t6, h1, h1 \ // h1 clear carry + VN t7, h2, h2 \ // h2 clear carry + VAQ t5, t9, t5 \ // h5 carry x5 + VN t6, h3, h3 \ // h3 clear carry + VN t6, h4, h4 \ // h4 clear carry + VN t7, h5, h5 \ // h5 clear carry + VAQ t0, h1, h1 \ // h0->h1 + VAQ t3, h4, h4 \ // h3->h4 + VAQ t1, h2, h2 \ // h1->h2 + VAQ t4, h5, h5 \ // h4->h5 + VAQ t2, h0, h0 \ // h2->h0 + VAQ t5, h3, h3 \ // h5->h3 + VREPG $1, t6, t6 \ // 44 and 42 bit masks across both halves + VREPG $1, t7, t7 \ + VSLDB $8, h0, h0, h0 \ // set up [h0/1/2, h3/4/5] + VSLDB $8, h1, h1, h1 \ + VSLDB $8, h2, h2, h2 \ + VO h0, h3, h3 \ + VO h1, h4, h4 \ + VO h2, h5, h5 \ + VESRLG $44, h3, t0 \ // 44 bit shift right + VESRLG $44, h4, t1 \ + VESRLG $42, h5, t2 \ + VN t6, h3, h3 \ // clear carry bits + VN t6, h4, h4 \ + VN t7, h5, h5 \ + VESLG $2, t2, t9 \ // multiply carry by 5 + VAQ t9, t2, t2 \ + VAQ t0, h4, h4 \ + VAQ t1, h5, h5 \ + VAQ t2, h3, h3 \ + +// carry h0->h1->h2->h0 +// input: h0, h1, h2 +// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8 +// output: h0, h1, h2 +#define REDUCE2(h0, h1, h2, t0, t1, t2, t3, t4, t5, t6, t7, t8) \ + VLEIB $7, $0x28, t3 \ // 5 byte shift mask + VREPIB $4, t4 \ // 4 bit shift mask + VREPIB $2, t7 \ // 2 bit shift mask + VGBM $0x003F, t5 \ // mask to clear carry bits + VSRLB t3, h0, t0 \ + VSRLB t3, h1, t1 \ + VSRLB t3, h2, t2 \ + VESRLG $4, t5, t5 \ // 44 bit clear mask + VSRL t4, t0, t0 \ + VSRL t4, t1, t1 \ + VSRL t7, t2, t2 \ + VESRLG $2, t5, t6 \ // 42 bit clear mask + VESLG $2, t2, t8 \ + VAQ t8, t2, t2 \ + VN t5, h0, h0 \ + VN t5, h1, h1 \ + VN t6, h2, h2 \ + VAQ t0, h1, h1 \ + VAQ t1, h2, h2 \ + VAQ t2, h0, h0 \ + VSRLB t3, h0, t0 \ + VSRLB t3, h1, t1 \ + VSRLB t3, h2, t2 \ + VSRL t4, t0, t0 \ + VSRL t4, t1, t1 \ + VSRL t7, t2, t2 \ + VN t5, h0, h0 \ + VN t5, h1, h1 \ + VESLG $2, t2, t8 \ + VN t6, h2, h2 \ + VAQ t0, h1, h1 \ + VAQ t8, t2, t2 \ + VAQ t1, h2, h2 \ + VAQ t2, h0, h0 \ + +// expands two message blocks into the lower halfs of the d registers +// moves the contents of the d registers into upper halfs +// input: in1, in2, d0, d1, d2, d3, d4, d5 +// temp: TEMP0, TEMP1, TEMP2, TEMP3 +// output: d0, d1, d2, d3, d4, d5 +#define EXPACC(in1, in2, d0, d1, d2, d3, d4, d5, TEMP0, TEMP1, TEMP2, TEMP3) \ + VGBM $0xff3f, TEMP0 \ + VGBM $0xff1f, TEMP1 \ + VESLG $4, d1, TEMP2 \ + VESLG $4, d4, TEMP3 \ + VESRLG $4, TEMP0, TEMP0 \ + VPERM in1, d0, EX0, d0 \ + VPERM in2, d3, EX0, d3 \ + VPERM in1, d2, EX2, d2 \ + VPERM in2, d5, EX2, d5 \ + VPERM in1, TEMP2, EX1, d1 \ + VPERM in2, TEMP3, EX1, d4 \ + VN TEMP0, d0, d0 \ + VN TEMP0, d3, d3 \ + VESRLG $4, d1, d1 \ + VESRLG $4, d4, d4 \ + VN TEMP1, d2, d2 \ + VN TEMP1, d5, d5 \ + VN TEMP0, d1, d1 \ + VN TEMP0, d4, d4 \ + +// expands one message block into the lower halfs of the d registers +// moves the contents of the d registers into upper halfs +// input: in, d0, d1, d2 +// temp: TEMP0, TEMP1, TEMP2 +// output: d0, d1, d2 +#define EXPACC2(in, d0, d1, d2, TEMP0, TEMP1, TEMP2) \ + VGBM $0xff3f, TEMP0 \ + VESLG $4, d1, TEMP2 \ + VGBM $0xff1f, TEMP1 \ + VPERM in, d0, EX0, d0 \ + VESRLG $4, TEMP0, TEMP0 \ + VPERM in, d2, EX2, d2 \ + VPERM in, TEMP2, EX1, d1 \ + VN TEMP0, d0, d0 \ + VN TEMP1, d2, d2 \ + VESRLG $4, d1, d1 \ + VN TEMP0, d1, d1 \ + +// pack h2:h0 into h1:h0 (no carry) +// input: h0, h1, h2 +// output: h0, h1, h2 +#define PACK(h0, h1, h2) \ + VMRLG h1, h2, h2 \ // copy h1 to upper half h2 + VESLG $44, h1, h1 \ // shift limb 1 44 bits, leaving 20 + VO h0, h1, h0 \ // combine h0 with 20 bits from limb 1 + VESRLG $20, h2, h1 \ // put top 24 bits of limb 1 into h1 + VLEIG $1, $0, h1 \ // clear h2 stuff from lower half of h1 + VO h0, h1, h0 \ // h0 now has 88 bits (limb 0 and 1) + VLEIG $0, $0, h2 \ // clear upper half of h2 + VESRLG $40, h2, h1 \ // h1 now has upper two bits of result + VLEIB $7, $88, h1 \ // for byte shift (11 bytes) + VSLB h1, h2, h2 \ // shift h2 11 bytes to the left + VO h0, h2, h0 \ // combine h0 with 20 bits from limb 1 + VLEIG $0, $0, h1 \ // clear upper half of h1 + +// if h > 2**130-5 then h -= 2**130-5 +// input: h0, h1 +// temp: t0, t1, t2 +// output: h0 +#define MOD(h0, h1, t0, t1, t2) \ + VZERO t0 \ + VLEIG $1, $5, t0 \ + VACCQ h0, t0, t1 \ + VAQ h0, t0, t0 \ + VONE t2 \ + VLEIG $1, $-4, t2 \ + VAQ t2, t1, t1 \ + VACCQ h1, t1, t1 \ + VONE t2 \ + VAQ t2, t1, t1 \ + VN h0, t1, t2 \ + VNC t0, t1, t1 \ + VO t1, t2, h0 \ + +// func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305vmsl(SB), $0-32 + // This code processes 6 + up to 4 blocks (32 bytes) per iteration + // using the algorithm described in: + // NEON crypto, Daniel J. Bernstein & Peter Schwabe + // https://cryptojedi.org/papers/neoncrypto-20120320.pdf + // And as moddified for VMSL as described in + // Accelerating Poly1305 Cryptographic Message Authentication on the z14 + // O'Farrell et al, CASCON 2017, p48-55 + // https://ibm.ent.box.com/s/jf9gedj0e9d2vjctfyh186shaztavnht + + LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key + VZERO V0 // c + + // load EX0, EX1 and EX2 + MOVD $·constants<>(SB), R5 + VLM (R5), EX0, EX2 // c + + // setup r + VL (R4), T_0 + MOVD $·keyMask<>(SB), R6 + VL (R6), T_1 + VN T_0, T_1, T_0 + VZERO T_2 // limbs for r + VZERO T_3 + VZERO T_4 + EXPACC2(T_0, T_2, T_3, T_4, T_1, T_5, T_7) + + // T_2, T_3, T_4: [0, r] + + // setup r*20 + VLEIG $0, $0, T_0 + VLEIG $1, $20, T_0 // T_0: [0, 20] + VZERO T_5 + VZERO T_6 + VMSLG T_0, T_3, T_5, T_5 + VMSLG T_0, T_4, T_6, T_6 + + // store r for final block in GR + VLGVG $1, T_2, RSAVE_0 // c + VLGVG $1, T_3, RSAVE_1 // c + VLGVG $1, T_4, RSAVE_2 // c + VLGVG $1, T_5, R5SAVE_1 // c + VLGVG $1, T_6, R5SAVE_2 // c + + // initialize h + VZERO H0_0 + VZERO H1_0 + VZERO H2_0 + VZERO H0_1 + VZERO H1_1 + VZERO H2_1 + + // initialize pointer for reduce constants + MOVD $·reduce<>(SB), R12 + + // calculate r**2 and 20*(r**2) + VZERO R_0 + VZERO R_1 + VZERO R_2 + SQUARE(T_2, T_3, T_4, T_6, R_0, R_1, R_2, T_1, T_5, T_7) + REDUCE2(R_0, R_1, R_2, M0, M1, M2, M3, M4, R5_1, R5_2, M5, T_1) + VZERO R5_1 + VZERO R5_2 + VMSLG T_0, R_1, R5_1, R5_1 + VMSLG T_0, R_2, R5_2, R5_2 + + // skip r**4 calculation if 3 blocks or less + CMPBLE R3, $48, b4 + + // calculate r**4 and 20*(r**4) + VZERO T_8 + VZERO T_9 + VZERO T_10 + SQUARE(R_0, R_1, R_2, R5_2, T_8, T_9, T_10, T_1, T_5, T_7) + REDUCE2(T_8, T_9, T_10, M0, M1, M2, M3, M4, T_2, T_3, M5, T_1) + VZERO T_2 + VZERO T_3 + VMSLG T_0, T_9, T_2, T_2 + VMSLG T_0, T_10, T_3, T_3 + + // put r**2 to the right and r**4 to the left of R_0, R_1, R_2 + VSLDB $8, T_8, T_8, T_8 + VSLDB $8, T_9, T_9, T_9 + VSLDB $8, T_10, T_10, T_10 + VSLDB $8, T_2, T_2, T_2 + VSLDB $8, T_3, T_3, T_3 + + VO T_8, R_0, R_0 + VO T_9, R_1, R_1 + VO T_10, R_2, R_2 + VO T_2, R5_1, R5_1 + VO T_3, R5_2, R5_2 + + CMPBLE R3, $80, load // less than or equal to 5 blocks in message + + // 6(or 5+1) blocks + SUB $81, R3 + VLM (R2), M0, M4 + VLL R3, 80(R2), M5 + ADD $1, R3 + MOVBZ $1, R0 + CMPBGE R3, $16, 2(PC) + VLVGB R3, R0, M5 + MOVD $96(R2), R2 + EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3) + EXPACC(M2, M3, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3) + VLEIB $2, $1, H2_0 + VLEIB $2, $1, H2_1 + VLEIB $10, $1, H2_0 + VLEIB $10, $1, H2_1 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO T_4 + VZERO T_10 + EXPACC(M4, M5, M0, M1, M2, M3, T_4, T_10, T_0, T_1, T_2, T_3) + VLR T_4, M4 + VLEIB $10, $1, M2 + CMPBLT R3, $16, 2(PC) + VLEIB $10, $1, T_10 + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9) + VMRHG V0, H0_1, H0_0 + VMRHG V0, H1_1, H1_0 + VMRHG V0, H2_1, H2_0 + VMRLG V0, H0_1, H0_1 + VMRLG V0, H1_1, H1_1 + VMRLG V0, H2_1, H2_1 + + SUB $16, R3 + CMPBLE R3, $0, square + +load: + // load EX0, EX1 and EX2 + MOVD $·c<>(SB), R5 + VLM (R5), EX0, EX2 + +loop: + CMPBLE R3, $64, add // b4 // last 4 or less blocks left + + // next 4 full blocks + VLM (R2), M2, M5 + SUB $64, R3 + MOVD $64(R2), R2 + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, T_0, T_1, T_3, T_4, T_5, T_2, T_7, T_8, T_9) + + // expacc in-lined to create [m2, m3] limbs + VGBM $0x3f3f, T_0 // 44 bit clear mask + VGBM $0x1f1f, T_1 // 40 bit clear mask + VPERM M2, M3, EX0, T_3 + VESRLG $4, T_0, T_0 // 44 bit clear mask ready + VPERM M2, M3, EX1, T_4 + VPERM M2, M3, EX2, T_5 + VN T_0, T_3, T_3 + VESRLG $4, T_4, T_4 + VN T_1, T_5, T_5 + VN T_0, T_4, T_4 + VMRHG H0_1, T_3, H0_0 + VMRHG H1_1, T_4, H1_0 + VMRHG H2_1, T_5, H2_0 + VMRLG H0_1, T_3, H0_1 + VMRLG H1_1, T_4, H1_1 + VMRLG H2_1, T_5, H2_1 + VLEIB $10, $1, H2_0 + VLEIB $10, $1, H2_1 + VPERM M4, M5, EX0, T_3 + VPERM M4, M5, EX1, T_4 + VPERM M4, M5, EX2, T_5 + VN T_0, T_3, T_3 + VESRLG $4, T_4, T_4 + VN T_1, T_5, T_5 + VN T_0, T_4, T_4 + VMRHG V0, T_3, M0 + VMRHG V0, T_4, M1 + VMRHG V0, T_5, M2 + VMRLG V0, T_3, M3 + VMRLG V0, T_4, M4 + VMRLG V0, T_5, M5 + VLEIB $10, $1, M2 + VLEIB $10, $1, M5 + + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + CMPBNE R3, $0, loop + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9) + VMRHG V0, H0_1, H0_0 + VMRHG V0, H1_1, H1_0 + VMRHG V0, H2_1, H2_0 + VMRLG V0, H0_1, H0_1 + VMRLG V0, H1_1, H1_1 + VMRLG V0, H2_1, H2_1 + + // load EX0, EX1, EX2 + MOVD $·constants<>(SB), R5 + VLM (R5), EX0, EX2 + + // sum vectors + VAQ H0_0, H0_1, H0_0 + VAQ H1_0, H1_1, H1_0 + VAQ H2_0, H2_1, H2_0 + + // h may be >= 2*(2**130-5) so we need to reduce it again + // M0...M4 are used as temps here + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5) + +next: // carry h1->h2 + VLEIB $7, $0x28, T_1 + VREPIB $4, T_2 + VGBM $0x003F, T_3 + VESRLG $4, T_3 + + // byte shift + VSRLB T_1, H1_0, T_4 + + // bit shift + VSRL T_2, T_4, T_4 + + // clear h1 carry bits + VN T_3, H1_0, H1_0 + + // add carry + VAQ T_4, H2_0, H2_0 + + // h is now < 2*(2**130-5) + // pack h into h1 (hi) and h0 (lo) + PACK(H0_0, H1_0, H2_0) + + // if h > 2**130-5 then h -= 2**130-5 + MOD(H0_0, H1_0, T_0, T_1, T_2) + + // h += s + MOVD $·bswapMask<>(SB), R5 + VL (R5), T_1 + VL 16(R4), T_0 + VPERM T_0, T_0, T_1, T_0 // reverse bytes (to big) + VAQ T_0, H0_0, H0_0 + VPERM H0_0, H0_0, T_1, H0_0 // reverse bytes (to little) + VST H0_0, (R1) + RET + +add: + // load EX0, EX1, EX2 + MOVD $·constants<>(SB), R5 + VLM (R5), EX0, EX2 + + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9) + VMRHG V0, H0_1, H0_0 + VMRHG V0, H1_1, H1_0 + VMRHG V0, H2_1, H2_0 + VMRLG V0, H0_1, H0_1 + VMRLG V0, H1_1, H1_1 + VMRLG V0, H2_1, H2_1 + CMPBLE R3, $64, b4 + +b4: + CMPBLE R3, $48, b3 // 3 blocks or less + + // 4(3+1) blocks remaining + SUB $49, R3 + VLM (R2), M0, M2 + VLL R3, 48(R2), M3 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, M3 + MOVD $64(R2), R2 + EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3) + VLEIB $10, $1, H2_0 + VLEIB $10, $1, H2_1 + VZERO M0 + VZERO M1 + VZERO M4 + VZERO M5 + VZERO T_4 + VZERO T_10 + EXPACC(M2, M3, M0, M1, M4, M5, T_4, T_10, T_0, T_1, T_2, T_3) + VLR T_4, M2 + VLEIB $10, $1, M4 + CMPBNE R3, $16, 2(PC) + VLEIB $10, $1, T_10 + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M4, M5, M2, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9) + VMRHG V0, H0_1, H0_0 + VMRHG V0, H1_1, H1_0 + VMRHG V0, H2_1, H2_0 + VMRLG V0, H0_1, H0_1 + VMRLG V0, H1_1, H1_1 + VMRLG V0, H2_1, H2_1 + SUB $16, R3 + CMPBLE R3, $0, square // this condition must always hold true! + +b3: + CMPBLE R3, $32, b2 + + // 3 blocks remaining + + // setup [r²,r] + VSLDB $8, R_0, R_0, R_0 + VSLDB $8, R_1, R_1, R_1 + VSLDB $8, R_2, R_2, R_2 + VSLDB $8, R5_1, R5_1, R5_1 + VSLDB $8, R5_2, R5_2, R5_2 + + VLVGG $1, RSAVE_0, R_0 + VLVGG $1, RSAVE_1, R_1 + VLVGG $1, RSAVE_2, R_2 + VLVGG $1, R5SAVE_1, R5_1 + VLVGG $1, R5SAVE_2, R5_2 + + // setup [h0, h1] + VSLDB $8, H0_0, H0_0, H0_0 + VSLDB $8, H1_0, H1_0, H1_0 + VSLDB $8, H2_0, H2_0, H2_0 + VO H0_1, H0_0, H0_0 + VO H1_1, H1_0, H1_0 + VO H2_1, H2_0, H2_0 + VZERO H0_1 + VZERO H1_1 + VZERO H2_1 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + + // H*[r**2, r] + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, T_10, M5) + + SUB $33, R3 + VLM (R2), M0, M1 + VLL R3, 32(R2), M2 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, M2 + + // H += m0 + VZERO T_1 + VZERO T_2 + VZERO T_3 + EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6) + VLEIB $10, $1, T_3 + VAG H0_0, T_1, H0_0 + VAG H1_0, T_2, H1_0 + VAG H2_0, T_3, H2_0 + + VZERO M0 + VZERO M3 + VZERO M4 + VZERO M5 + VZERO T_10 + + // (H+m0)*r + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M3, M4, M5, V0, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_10, H0_1, H1_1, H2_1, T_9) + + // H += m1 + VZERO V0 + VZERO T_1 + VZERO T_2 + VZERO T_3 + EXPACC2(M1, T_1, T_2, T_3, T_4, T_5, T_6) + VLEIB $10, $1, T_3 + VAQ H0_0, T_1, H0_0 + VAQ H1_0, T_2, H1_0 + VAQ H2_0, T_3, H2_0 + REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10) + + // [H, m2] * [r**2, r] + EXPACC2(M2, H0_0, H1_0, H2_0, T_1, T_2, T_3) + CMPBNE R3, $16, 2(PC) + VLEIB $10, $1, H2_0 + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, M5, T_10) + SUB $16, R3 + CMPBLE R3, $0, next // this condition must always hold true! + +b2: + CMPBLE R3, $16, b1 + + // 2 blocks remaining + + // setup [r²,r] + VSLDB $8, R_0, R_0, R_0 + VSLDB $8, R_1, R_1, R_1 + VSLDB $8, R_2, R_2, R_2 + VSLDB $8, R5_1, R5_1, R5_1 + VSLDB $8, R5_2, R5_2, R5_2 + + VLVGG $1, RSAVE_0, R_0 + VLVGG $1, RSAVE_1, R_1 + VLVGG $1, RSAVE_2, R_2 + VLVGG $1, R5SAVE_1, R5_1 + VLVGG $1, R5SAVE_2, R5_2 + + // setup [h0, h1] + VSLDB $8, H0_0, H0_0, H0_0 + VSLDB $8, H1_0, H1_0, H1_0 + VSLDB $8, H2_0, H2_0, H2_0 + VO H0_1, H0_0, H0_0 + VO H1_1, H1_0, H1_0 + VO H2_1, H2_0, H2_0 + VZERO H0_1 + VZERO H1_1 + VZERO H2_1 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + + // H*[r**2, r] + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9) + VMRHG V0, H0_1, H0_0 + VMRHG V0, H1_1, H1_0 + VMRHG V0, H2_1, H2_0 + VMRLG V0, H0_1, H0_1 + VMRLG V0, H1_1, H1_1 + VMRLG V0, H2_1, H2_1 + + // move h to the left and 0s at the right + VSLDB $8, H0_0, H0_0, H0_0 + VSLDB $8, H1_0, H1_0, H1_0 + VSLDB $8, H2_0, H2_0, H2_0 + + // get message blocks and append 1 to start + SUB $17, R3 + VL (R2), M0 + VLL R3, 16(R2), M1 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, M1 + VZERO T_6 + VZERO T_7 + VZERO T_8 + EXPACC2(M0, T_6, T_7, T_8, T_1, T_2, T_3) + EXPACC2(M1, T_6, T_7, T_8, T_1, T_2, T_3) + VLEIB $2, $1, T_8 + CMPBNE R3, $16, 2(PC) + VLEIB $10, $1, T_8 + + // add [m0, m1] to h + VAG H0_0, T_6, H0_0 + VAG H1_0, T_7, H1_0 + VAG H2_0, T_8, H2_0 + + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + VZERO T_10 + VZERO M0 + + // at this point R_0 .. R5_2 look like [r**2, r] + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M2, M3, M4, M5, T_10, M0, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M2, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10) + SUB $16, R3, R3 + CMPBLE R3, $0, next + +b1: + CMPBLE R3, $0, next + + // 1 block remaining + + // setup [r²,r] + VSLDB $8, R_0, R_0, R_0 + VSLDB $8, R_1, R_1, R_1 + VSLDB $8, R_2, R_2, R_2 + VSLDB $8, R5_1, R5_1, R5_1 + VSLDB $8, R5_2, R5_2, R5_2 + + VLVGG $1, RSAVE_0, R_0 + VLVGG $1, RSAVE_1, R_1 + VLVGG $1, RSAVE_2, R_2 + VLVGG $1, R5SAVE_1, R5_1 + VLVGG $1, R5SAVE_2, R5_2 + + // setup [h0, h1] + VSLDB $8, H0_0, H0_0, H0_0 + VSLDB $8, H1_0, H1_0, H1_0 + VSLDB $8, H2_0, H2_0, H2_0 + VO H0_1, H0_0, H0_0 + VO H1_1, H1_0, H1_0 + VO H2_1, H2_0, H2_0 + VZERO H0_1 + VZERO H1_1 + VZERO H2_1 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + + // H*[r**2, r] + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5) + + // set up [0, m0] limbs + SUB $1, R3 + VLL R3, (R2), M0 + ADD $1, R3 + MOVBZ $1, R0 + CMPBEQ R3, $16, 2(PC) + VLVGB R3, R0, M0 + VZERO T_1 + VZERO T_2 + VZERO T_3 + EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)// limbs: [0, m] + CMPBNE R3, $16, 2(PC) + VLEIB $10, $1, T_3 + + // h+m0 + VAQ H0_0, T_1, H0_0 + VAQ H1_0, T_2, H1_0 + VAQ H2_0, T_3, H2_0 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5) + + BR next + +square: + // setup [r²,r] + VSLDB $8, R_0, R_0, R_0 + VSLDB $8, R_1, R_1, R_1 + VSLDB $8, R_2, R_2, R_2 + VSLDB $8, R5_1, R5_1, R5_1 + VSLDB $8, R5_2, R5_2, R5_2 + + VLVGG $1, RSAVE_0, R_0 + VLVGG $1, RSAVE_1, R_1 + VLVGG $1, RSAVE_2, R_2 + VLVGG $1, R5SAVE_1, R5_1 + VLVGG $1, R5SAVE_2, R5_2 + + // setup [h0, h1] + VSLDB $8, H0_0, H0_0, H0_0 + VSLDB $8, H1_0, H1_0, H1_0 + VSLDB $8, H2_0, H2_0, H2_0 + VO H0_1, H0_0, H0_0 + VO H1_1, H1_0, H1_0 + VO H2_1, H2_0, H2_0 + VZERO H0_1 + VZERO H1_1 + VZERO H2_1 + + VZERO M0 + VZERO M1 + VZERO M2 + VZERO M3 + VZERO M4 + VZERO M5 + + // (h0*r**2) + (h1*r) + MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9) + REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5) + BR next diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go new file mode 100644 index 0000000..4c96147 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go @@ -0,0 +1,144 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package salsa provides low-level access to functions in the Salsa family. +package salsa // import "golang.org/x/crypto/salsa20/salsa" + +// Sigma is the Salsa20 constant for 256-bit keys. +var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'} + +// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte +// key k, and 16-byte constant c, and puts the result into the 32-byte array +// out. +func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + for i := 0; i < 20; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x5) + out[5] = byte(x5 >> 8) + out[6] = byte(x5 >> 16) + out[7] = byte(x5 >> 24) + + out[8] = byte(x10) + out[9] = byte(x10 >> 8) + out[10] = byte(x10 >> 16) + out[11] = byte(x10 >> 24) + + out[12] = byte(x15) + out[13] = byte(x15 >> 8) + out[14] = byte(x15 >> 16) + out[15] = byte(x15 >> 24) + + out[16] = byte(x6) + out[17] = byte(x6 >> 8) + out[18] = byte(x6 >> 16) + out[19] = byte(x6 >> 24) + + out[20] = byte(x7) + out[21] = byte(x7 >> 8) + out[22] = byte(x7 >> 16) + out[23] = byte(x7 >> 24) + + out[24] = byte(x8) + out[25] = byte(x8 >> 8) + out[26] = byte(x8 >> 16) + out[27] = byte(x8 >> 24) + + out[28] = byte(x9) + out[29] = byte(x9 >> 8) + out[30] = byte(x9 >> 16) + out[31] = byte(x9 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go new file mode 100644 index 0000000..9bfc092 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go @@ -0,0 +1,199 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package salsa + +// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts +// the result into the 64-byte array out. The input and output may be the same array. +func Core208(out *[64]byte, in *[64]byte) { + j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24 + j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24 + j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24 + j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24 + j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24 + j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24 + j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24 + j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24 + j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24 + j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24 + j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24 + j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < 8; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go new file mode 100644 index 0000000..656e8df --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go @@ -0,0 +1,23 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +package salsa + +//go:noescape + +// salsa2020XORKeyStream is implemented in salsa20_amd64.s. +func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out must overlap entirely or not at all. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + if len(in) == 0 { + return + } + _ = out[len(in)-1] + salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0]) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s new file mode 100644 index 0000000..18085d2 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s @@ -0,0 +1,883 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html + +// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) +// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size. +TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment + MOVQ out+0(FP),DI + MOVQ in+8(FP),SI + MOVQ n+16(FP),DX + MOVQ nonce+24(FP),CX + MOVQ key+32(FP),R8 + + MOVQ SP,R12 + MOVQ SP,R9 + ADDQ $31, R9 + ANDQ $~31, R9 + MOVQ R9, SP + + MOVQ DX,R9 + MOVQ CX,DX + MOVQ R8,R10 + CMPQ R9,$0 + JBE DONE + START: + MOVL 20(R10),CX + MOVL 0(R10),R8 + MOVL 0(DX),AX + MOVL 16(R10),R11 + MOVL CX,0(SP) + MOVL R8, 4 (SP) + MOVL AX, 8 (SP) + MOVL R11, 12 (SP) + MOVL 8(DX),CX + MOVL 24(R10),R8 + MOVL 4(R10),AX + MOVL 4(DX),R11 + MOVL CX,16(SP) + MOVL R8, 20 (SP) + MOVL AX, 24 (SP) + MOVL R11, 28 (SP) + MOVL 12(DX),CX + MOVL 12(R10),DX + MOVL 28(R10),R8 + MOVL 8(R10),AX + MOVL DX,32(SP) + MOVL CX, 36 (SP) + MOVL R8, 40 (SP) + MOVL AX, 44 (SP) + MOVQ $1634760805,DX + MOVQ $857760878,CX + MOVQ $2036477234,R8 + MOVQ $1797285236,AX + MOVL DX,48(SP) + MOVL CX, 52 (SP) + MOVL R8, 56 (SP) + MOVL AX, 60 (SP) + CMPQ R9,$256 + JB BYTESBETWEEN1AND255 + MOVOA 48(SP),X0 + PSHUFL $0X55,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X3 + PSHUFL $0X00,X0,X0 + MOVOA X1,64(SP) + MOVOA X2,80(SP) + MOVOA X3,96(SP) + MOVOA X0,112(SP) + MOVOA 0(SP),X0 + PSHUFL $0XAA,X0,X1 + PSHUFL $0XFF,X0,X2 + PSHUFL $0X00,X0,X3 + PSHUFL $0X55,X0,X0 + MOVOA X1,128(SP) + MOVOA X2,144(SP) + MOVOA X3,160(SP) + MOVOA X0,176(SP) + MOVOA 16(SP),X0 + PSHUFL $0XFF,X0,X1 + PSHUFL $0X55,X0,X2 + PSHUFL $0XAA,X0,X0 + MOVOA X1,192(SP) + MOVOA X2,208(SP) + MOVOA X0,224(SP) + MOVOA 32(SP),X0 + PSHUFL $0X00,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X0 + MOVOA X1,240(SP) + MOVOA X2,256(SP) + MOVOA X0,272(SP) + BYTESATLEAST256: + MOVL 16(SP),DX + MOVL 36 (SP),CX + MOVL DX,288(SP) + MOVL CX,304(SP) + SHLQ $32,CX + ADDQ CX,DX + ADDQ $1,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 292 (SP) + MOVL CX, 308 (SP) + ADDQ $1,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 296 (SP) + MOVL CX, 312 (SP) + ADDQ $1,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 300 (SP) + MOVL CX, 316 (SP) + ADDQ $1,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX,16(SP) + MOVL CX, 36 (SP) + MOVQ R9,352(SP) + MOVQ $20,DX + MOVOA 64(SP),X0 + MOVOA 80(SP),X1 + MOVOA 96(SP),X2 + MOVOA 256(SP),X3 + MOVOA 272(SP),X4 + MOVOA 128(SP),X5 + MOVOA 144(SP),X6 + MOVOA 176(SP),X7 + MOVOA 192(SP),X8 + MOVOA 208(SP),X9 + MOVOA 224(SP),X10 + MOVOA 304(SP),X11 + MOVOA 112(SP),X12 + MOVOA 160(SP),X13 + MOVOA 240(SP),X14 + MOVOA 288(SP),X15 + MAINLOOP1: + MOVOA X1,320(SP) + MOVOA X2,336(SP) + MOVOA X13,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X14 + PSRLL $25,X2 + PXOR X2,X14 + MOVOA X7,X1 + PADDL X0,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X11 + PSRLL $25,X2 + PXOR X2,X11 + MOVOA X12,X1 + PADDL X14,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X15 + PSRLL $23,X2 + PXOR X2,X15 + MOVOA X0,X1 + PADDL X11,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X9 + PSRLL $23,X2 + PXOR X2,X9 + MOVOA X14,X1 + PADDL X15,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X13 + PSRLL $19,X2 + PXOR X2,X13 + MOVOA X11,X1 + PADDL X9,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X7 + PSRLL $19,X2 + PXOR X2,X7 + MOVOA X15,X1 + PADDL X13,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA 320(SP),X1 + MOVOA X12,320(SP) + MOVOA X9,X2 + PADDL X7,X2 + MOVOA X2,X12 + PSLLL $18,X2 + PXOR X2,X0 + PSRLL $14,X12 + PXOR X12,X0 + MOVOA X5,X2 + PADDL X1,X2 + MOVOA X2,X12 + PSLLL $7,X2 + PXOR X2,X3 + PSRLL $25,X12 + PXOR X12,X3 + MOVOA 336(SP),X2 + MOVOA X0,336(SP) + MOVOA X6,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X4 + PSRLL $25,X12 + PXOR X12,X4 + MOVOA X1,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X10 + PSRLL $23,X12 + PXOR X12,X10 + MOVOA X2,X0 + PADDL X4,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X8 + PSRLL $23,X12 + PXOR X12,X8 + MOVOA X3,X0 + PADDL X10,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X5 + PSRLL $19,X12 + PXOR X12,X5 + MOVOA X4,X0 + PADDL X8,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X6 + PSRLL $19,X12 + PXOR X12,X6 + MOVOA X10,X0 + PADDL X5,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA 320(SP),X0 + MOVOA X1,320(SP) + MOVOA X4,X1 + PADDL X0,X1 + MOVOA X1,X12 + PSLLL $7,X1 + PXOR X1,X7 + PSRLL $25,X12 + PXOR X12,X7 + MOVOA X8,X1 + PADDL X6,X1 + MOVOA X1,X12 + PSLLL $18,X1 + PXOR X1,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 336(SP),X12 + MOVOA X2,336(SP) + MOVOA X14,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X5 + PSRLL $25,X2 + PXOR X2,X5 + MOVOA X0,X1 + PADDL X7,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X10 + PSRLL $23,X2 + PXOR X2,X10 + MOVOA X12,X1 + PADDL X5,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X8 + PSRLL $23,X2 + PXOR X2,X8 + MOVOA X7,X1 + PADDL X10,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X4 + PSRLL $19,X2 + PXOR X2,X4 + MOVOA X5,X1 + PADDL X8,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X14 + PSRLL $19,X2 + PXOR X2,X14 + MOVOA X10,X1 + PADDL X4,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X0 + PSRLL $14,X2 + PXOR X2,X0 + MOVOA 320(SP),X1 + MOVOA X0,320(SP) + MOVOA X8,X0 + PADDL X14,X0 + MOVOA X0,X2 + PSLLL $18,X0 + PXOR X0,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA X11,X0 + PADDL X1,X0 + MOVOA X0,X2 + PSLLL $7,X0 + PXOR X0,X6 + PSRLL $25,X2 + PXOR X2,X6 + MOVOA 336(SP),X2 + MOVOA X12,336(SP) + MOVOA X3,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X13 + PSRLL $25,X12 + PXOR X12,X13 + MOVOA X1,X0 + PADDL X6,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X15 + PSRLL $23,X12 + PXOR X12,X15 + MOVOA X2,X0 + PADDL X13,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X9 + PSRLL $23,X12 + PXOR X12,X9 + MOVOA X6,X0 + PADDL X15,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X11 + PSRLL $19,X12 + PXOR X12,X11 + MOVOA X13,X0 + PADDL X9,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X3 + PSRLL $19,X12 + PXOR X12,X3 + MOVOA X15,X0 + PADDL X11,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA X9,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 320(SP),X12 + MOVOA 336(SP),X0 + SUBQ $2,DX + JA MAINLOOP1 + PADDL 112(SP),X12 + PADDL 176(SP),X7 + PADDL 224(SP),X10 + PADDL 272(SP),X4 + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 0(SI),DX + XORL 4(SI),CX + XORL 8(SI),R8 + XORL 12(SI),R9 + MOVL DX,0(DI) + MOVL CX,4(DI) + MOVL R8,8(DI) + MOVL R9,12(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 64(SI),DX + XORL 68(SI),CX + XORL 72(SI),R8 + XORL 76(SI),R9 + MOVL DX,64(DI) + MOVL CX,68(DI) + MOVL R8,72(DI) + MOVL R9,76(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 128(SI),DX + XORL 132(SI),CX + XORL 136(SI),R8 + XORL 140(SI),R9 + MOVL DX,128(DI) + MOVL CX,132(DI) + MOVL R8,136(DI) + MOVL R9,140(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + XORL 192(SI),DX + XORL 196(SI),CX + XORL 200(SI),R8 + XORL 204(SI),R9 + MOVL DX,192(DI) + MOVL CX,196(DI) + MOVL R8,200(DI) + MOVL R9,204(DI) + PADDL 240(SP),X14 + PADDL 64(SP),X0 + PADDL 128(SP),X5 + PADDL 192(SP),X8 + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 16(SI),DX + XORL 20(SI),CX + XORL 24(SI),R8 + XORL 28(SI),R9 + MOVL DX,16(DI) + MOVL CX,20(DI) + MOVL R8,24(DI) + MOVL R9,28(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 80(SI),DX + XORL 84(SI),CX + XORL 88(SI),R8 + XORL 92(SI),R9 + MOVL DX,80(DI) + MOVL CX,84(DI) + MOVL R8,88(DI) + MOVL R9,92(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 144(SI),DX + XORL 148(SI),CX + XORL 152(SI),R8 + XORL 156(SI),R9 + MOVL DX,144(DI) + MOVL CX,148(DI) + MOVL R8,152(DI) + MOVL R9,156(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + XORL 208(SI),DX + XORL 212(SI),CX + XORL 216(SI),R8 + XORL 220(SI),R9 + MOVL DX,208(DI) + MOVL CX,212(DI) + MOVL R8,216(DI) + MOVL R9,220(DI) + PADDL 288(SP),X15 + PADDL 304(SP),X11 + PADDL 80(SP),X1 + PADDL 144(SP),X6 + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 32(SI),DX + XORL 36(SI),CX + XORL 40(SI),R8 + XORL 44(SI),R9 + MOVL DX,32(DI) + MOVL CX,36(DI) + MOVL R8,40(DI) + MOVL R9,44(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 96(SI),DX + XORL 100(SI),CX + XORL 104(SI),R8 + XORL 108(SI),R9 + MOVL DX,96(DI) + MOVL CX,100(DI) + MOVL R8,104(DI) + MOVL R9,108(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 160(SI),DX + XORL 164(SI),CX + XORL 168(SI),R8 + XORL 172(SI),R9 + MOVL DX,160(DI) + MOVL CX,164(DI) + MOVL R8,168(DI) + MOVL R9,172(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + XORL 224(SI),DX + XORL 228(SI),CX + XORL 232(SI),R8 + XORL 236(SI),R9 + MOVL DX,224(DI) + MOVL CX,228(DI) + MOVL R8,232(DI) + MOVL R9,236(DI) + PADDL 160(SP),X13 + PADDL 208(SP),X9 + PADDL 256(SP),X3 + PADDL 96(SP),X2 + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 48(SI),DX + XORL 52(SI),CX + XORL 56(SI),R8 + XORL 60(SI),R9 + MOVL DX,48(DI) + MOVL CX,52(DI) + MOVL R8,56(DI) + MOVL R9,60(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 112(SI),DX + XORL 116(SI),CX + XORL 120(SI),R8 + XORL 124(SI),R9 + MOVL DX,112(DI) + MOVL CX,116(DI) + MOVL R8,120(DI) + MOVL R9,124(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 176(SI),DX + XORL 180(SI),CX + XORL 184(SI),R8 + XORL 188(SI),R9 + MOVL DX,176(DI) + MOVL CX,180(DI) + MOVL R8,184(DI) + MOVL R9,188(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + XORL 240(SI),DX + XORL 244(SI),CX + XORL 248(SI),R8 + XORL 252(SI),R9 + MOVL DX,240(DI) + MOVL CX,244(DI) + MOVL R8,248(DI) + MOVL R9,252(DI) + MOVQ 352(SP),R9 + SUBQ $256,R9 + ADDQ $256,SI + ADDQ $256,DI + CMPQ R9,$256 + JAE BYTESATLEAST256 + CMPQ R9,$0 + JBE DONE + BYTESBETWEEN1AND255: + CMPQ R9,$64 + JAE NOCOPY + MOVQ DI,DX + LEAQ 360(SP),DI + MOVQ R9,CX + REP; MOVSB + LEAQ 360(SP),DI + LEAQ 360(SP),SI + NOCOPY: + MOVQ R9,352(SP) + MOVOA 48(SP),X0 + MOVOA 0(SP),X1 + MOVOA 16(SP),X2 + MOVOA 32(SP),X3 + MOVOA X1,X4 + MOVQ $20,CX + MAINLOOP2: + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + SUBQ $4,CX + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PXOR X7,X7 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + JA MAINLOOP2 + PADDL 48(SP),X0 + PADDL 0(SP),X1 + PADDL 16(SP),X2 + PADDL 32(SP),X3 + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 0(SI),CX + XORL 48(SI),R8 + XORL 32(SI),R9 + XORL 16(SI),AX + MOVL CX,0(DI) + MOVL R8,48(DI) + MOVL R9,32(DI) + MOVL AX,16(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 20(SI),CX + XORL 4(SI),R8 + XORL 52(SI),R9 + XORL 36(SI),AX + MOVL CX,20(DI) + MOVL R8,4(DI) + MOVL R9,52(DI) + MOVL AX,36(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 40(SI),CX + XORL 24(SI),R8 + XORL 8(SI),R9 + XORL 56(SI),AX + MOVL CX,40(DI) + MOVL R8,24(DI) + MOVL R9,8(DI) + MOVL AX,56(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + XORL 60(SI),CX + XORL 44(SI),R8 + XORL 28(SI),R9 + XORL 12(SI),AX + MOVL CX,60(DI) + MOVL R8,44(DI) + MOVL R9,28(DI) + MOVL AX,12(DI) + MOVQ 352(SP),R9 + MOVL 16(SP),CX + MOVL 36 (SP),R8 + ADDQ $1,CX + SHLQ $32,R8 + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $32,R8 + MOVL CX,16(SP) + MOVL R8, 36 (SP) + CMPQ R9,$64 + JA BYTESATLEAST65 + JAE BYTESATLEAST64 + MOVQ DI,SI + MOVQ DX,DI + MOVQ R9,CX + REP; MOVSB + BYTESATLEAST64: + DONE: + MOVQ R12,SP + RET + BYTESATLEAST65: + SUBQ $64,R9 + ADDQ $64,DI + ADDQ $64,SI + JMP BYTESBETWEEN1AND255 diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go new file mode 100644 index 0000000..8a46bd2 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go @@ -0,0 +1,14 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package salsa + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out must overlap entirely or not at all. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + genericXORKeyStream(out, in, counter, key) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go new file mode 100644 index 0000000..68169c6 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go @@ -0,0 +1,231 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package salsa + +const rounds = 20 + +// core applies the Salsa20 core function to 16-byte input in, 32-byte key k, +// and 16-byte constant c, and puts the result into 64-byte array out. +func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < rounds; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} + +// genericXORKeyStream is the generic implementation of XORKeyStream to be used +// when no assembly implementation is available. +func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + var block [64]byte + var counterCopy [16]byte + copy(counterCopy[:], counter[:]) + + for len(in) >= 64 { + core(&block, &counterCopy, key, &Sigma) + for i, x := range block { + out[i] = in[i] ^ x + } + u := uint32(1) + for i := 8; i < 16; i++ { + u += uint32(counterCopy[i]) + counterCopy[i] = byte(u) + u >>= 8 + } + in = in[64:] + out = out[64:] + } + + if len(in) > 0 { + core(&block, &counterCopy, key, &Sigma) + for i, v := range in { + out[i] = v ^ block[i] + } + } +} diff --git a/vendor/golang.org/x/sys/AUTHORS b/vendor/golang.org/x/sys/AUTHORS new file mode 100644 index 0000000..15167cd --- /dev/null +++ b/vendor/golang.org/x/sys/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/sys/CONTRIBUTORS b/vendor/golang.org/x/sys/CONTRIBUTORS new file mode 100644 index 0000000..1c4577e --- /dev/null +++ b/vendor/golang.org/x/sys/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE new file mode 100644 index 0000000..6a66aea --- /dev/null +++ b/vendor/golang.org/x/sys/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/sys/PATENTS b/vendor/golang.org/x/sys/PATENTS new file mode 100644 index 0000000..7330990 --- /dev/null +++ b/vendor/golang.org/x/sys/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s new file mode 100644 index 0000000..06f84b8 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s @@ -0,0 +1,17 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +#include "textflag.h" + +// +// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go +// + +TEXT ·syscall6(SB),NOSPLIT,$0-88 + JMP syscall·syscall6(SB) + +TEXT ·rawSyscall6(SB),NOSPLIT,$0-88 + JMP syscall·rawSyscall6(SB) diff --git a/vendor/golang.org/x/sys/cpu/byteorder.go b/vendor/golang.org/x/sys/cpu/byteorder.go new file mode 100644 index 0000000..da6b9e4 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/byteorder.go @@ -0,0 +1,30 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "encoding/binary" + "runtime" +) + +// hostByteOrder returns binary.LittleEndian on little-endian machines and +// binary.BigEndian on big-endian machines. +func hostByteOrder() binary.ByteOrder { + switch runtime.GOARCH { + case "386", "amd64", "amd64p32", + "arm", "arm64", + "mipsle", "mips64le", "mips64p32le", + "ppc64le", + "riscv", "riscv64": + return binary.LittleEndian + case "armbe", "arm64be", + "mips", "mips64", "mips64p32", + "ppc", "ppc64", + "s390", "s390x", + "sparc", "sparc64": + return binary.BigEndian + } + panic("unknown architecture") +} diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go new file mode 100644 index 0000000..679e78c --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -0,0 +1,126 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cpu implements processor feature detection for +// various CPU architectures. +package cpu + +// Initialized reports whether the CPU features were initialized. +// +// For some GOOS/GOARCH combinations initialization of the CPU features depends +// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm +// Initialized will report false if reading the file fails. +var Initialized bool + +// CacheLinePad is used to pad structs to avoid false sharing. +type CacheLinePad struct{ _ [cacheLineSize]byte } + +// X86 contains the supported CPU features of the +// current X86/AMD64 platform. If the current platform +// is not X86/AMD64 then all feature flags are false. +// +// X86 is padded to avoid false sharing. Further the HasAVX +// and HasAVX2 are only set if the OS supports XMM and YMM +// registers in addition to the CPUID feature bit being set. +var X86 struct { + _ CacheLinePad + HasAES bool // AES hardware implementation (AES NI) + HasADX bool // Multi-precision add-carry instruction extensions + HasAVX bool // Advanced vector extension + HasAVX2 bool // Advanced vector extension 2 + HasBMI1 bool // Bit manipulation instruction set 1 + HasBMI2 bool // Bit manipulation instruction set 2 + HasERMS bool // Enhanced REP for MOVSB and STOSB + HasFMA bool // Fused-multiply-add instructions + HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers. + HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM + HasPOPCNT bool // Hamming weight instruction POPCNT. + HasRDRAND bool // RDRAND instruction (on-chip random number generator) + HasRDSEED bool // RDSEED instruction (on-chip random number generator) + HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64) + HasSSE3 bool // Streaming SIMD extension 3 + HasSSSE3 bool // Supplemental streaming SIMD extension 3 + HasSSE41 bool // Streaming SIMD extension 4 and 4.1 + HasSSE42 bool // Streaming SIMD extension 4 and 4.2 + _ CacheLinePad +} + +// ARM64 contains the supported CPU features of the +// current ARMv8(aarch64) platform. If the current platform +// is not arm64 then all feature flags are false. +var ARM64 struct { + _ CacheLinePad + HasFP bool // Floating-point instruction set (always available) + HasASIMD bool // Advanced SIMD (always available) + HasEVTSTRM bool // Event stream support + HasAES bool // AES hardware implementation + HasPMULL bool // Polynomial multiplication instruction set + HasSHA1 bool // SHA1 hardware implementation + HasSHA2 bool // SHA2 hardware implementation + HasCRC32 bool // CRC32 hardware implementation + HasATOMICS bool // Atomic memory operation instruction set + HasFPHP bool // Half precision floating-point instruction set + HasASIMDHP bool // Advanced SIMD half precision instruction set + HasCPUID bool // CPUID identification scheme registers + HasASIMDRDM bool // Rounding double multiply add/subtract instruction set + HasJSCVT bool // Javascript conversion from floating-point to integer + HasFCMA bool // Floating-point multiplication and addition of complex numbers + HasLRCPC bool // Release Consistent processor consistent support + HasDCPOP bool // Persistent memory support + HasSHA3 bool // SHA3 hardware implementation + HasSM3 bool // SM3 hardware implementation + HasSM4 bool // SM4 hardware implementation + HasASIMDDP bool // Advanced SIMD double precision instruction set + HasSHA512 bool // SHA512 hardware implementation + HasSVE bool // Scalable Vector Extensions + HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + _ CacheLinePad +} + +// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms. +// If the current platform is not ppc64/ppc64le then all feature flags are false. +// +// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00, +// since there are no optional categories. There are some exceptions that also +// require kernel support to work (DARN, SCV), so there are feature bits for +// those as well. The minimum processor requirement is POWER8 (ISA 2.07). +// The struct is padded to avoid false sharing. +var PPC64 struct { + _ CacheLinePad + HasDARN bool // Hardware random number generator (requires kernel enablement) + HasSCV bool // Syscall vectored (requires kernel enablement) + IsPOWER8 bool // ISA v2.07 (POWER8) + IsPOWER9 bool // ISA v3.00 (POWER9) + _ CacheLinePad +} + +// S390X contains the supported CPU features of the current IBM Z +// (s390x) platform. If the current platform is not IBM Z then all +// feature flags are false. +// +// S390X is padded to avoid false sharing. Further HasVX is only set +// if the OS supports vector registers in addition to the STFLE +// feature bit being set. +var S390X struct { + _ CacheLinePad + HasZARCH bool // z/Architecture mode is active [mandatory] + HasSTFLE bool // store facility list extended + HasLDISP bool // long (20-bit) displacements + HasEIMM bool // 32-bit immediates + HasDFP bool // decimal floating point + HasETF3EH bool // ETF-3 enhanced + HasMSA bool // message security assist (CPACF) + HasAES bool // KM-AES{128,192,256} functions + HasAESCBC bool // KMC-AES{128,192,256} functions + HasAESCTR bool // KMCTR-AES{128,192,256} functions + HasAESGCM bool // KMA-GCM-AES{128,192,256} functions + HasGHASH bool // KIMD-GHASH function + HasSHA1 bool // K{I,L}MD-SHA-1 functions + HasSHA256 bool // K{I,L}MD-SHA-256 functions + HasSHA512 bool // K{I,L}MD-SHA-512 functions + HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions + HasVX bool // vector facility + HasVXE bool // vector-enhancements facility 1 + _ CacheLinePad +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go b/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go new file mode 100644 index 0000000..be60272 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go @@ -0,0 +1,34 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix,ppc64 + +package cpu + +const cacheLineSize = 128 + +const ( + // getsystemcfg constants + _SC_IMPL = 2 + _IMPL_POWER8 = 0x10000 + _IMPL_POWER9 = 0x20000 +) + +func init() { + impl := getsystemcfg(_SC_IMPL) + if impl&_IMPL_POWER8 != 0 { + PPC64.IsPOWER8 = true + } + if impl&_IMPL_POWER9 != 0 { + PPC64.IsPOWER9 = true + } + + Initialized = true +} + +func getsystemcfg(label int) (n uint64) { + r0, _ := callgetsystemcfg(label) + n = uint64(r0) + return +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm.go b/vendor/golang.org/x/sys/cpu/cpu_arm.go new file mode 100644 index 0000000..7f2348b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_arm.go @@ -0,0 +1,9 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go new file mode 100644 index 0000000..568bcd0 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go @@ -0,0 +1,21 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +package cpu + +// haveAsmFunctions reports whether the other functions in this file can +// be safely called. +func haveAsmFunctions() bool { return true } + +// The following feature detection functions are defined in cpu_s390x.s. +// They are likely to be expensive to call so the results should be cached. +func stfle() facilityList +func kmQuery() queryResult +func kmcQuery() queryResult +func kmctrQuery() queryResult +func kmaQuery() queryResult +func kimdQuery() queryResult +func klmdQuery() queryResult diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go new file mode 100644 index 0000000..f7cb469 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go @@ -0,0 +1,16 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build !gccgo + +package cpu + +// cpuid is implemented in cpu_x86.s for gc compiler +// and in cpu_gccgo.c for gccgo. +func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) + +// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// and in cpu_gccgo.c for gccgo. +func xgetbv() (eax, edx uint32) diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo.c b/vendor/golang.org/x/sys/cpu/cpu_gccgo.c new file mode 100644 index 0000000..e363c7d --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo.c @@ -0,0 +1,43 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build gccgo + +#include +#include + +// Need to wrap __get_cpuid_count because it's declared as static. +int +gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); +} + +// xgetbv reads the contents of an XCR (Extended Control Register) +// specified in the ECX register into registers EDX:EAX. +// Currently, the only supported value for XCR is 0. +// +// TODO: Replace with a better alternative: +// +// #include +// +// #pragma GCC target("xsave") +// +// void gccgoXgetbv(uint32_t *eax, uint32_t *edx) { +// unsigned long long x = _xgetbv(0); +// *eax = x & 0xffffffff; +// *edx = (x >> 32) & 0xffffffff; +// } +// +// Note that _xgetbv is defined starting with GCC 8. +void +gccgoXgetbv(uint32_t *eax, uint32_t *edx) +{ + __asm(" xorl %%ecx, %%ecx\n" + " xgetbv" + : "=a"(*eax), "=d"(*edx)); +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo.go new file mode 100644 index 0000000..ba49b91 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo.go @@ -0,0 +1,26 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build gccgo + +package cpu + +//extern gccgoGetCpuidCount +func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32) + +func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) { + var a, b, c, d uint32 + gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d) + return a, b, c, d +} + +//extern gccgoXgetbv +func gccgoXgetbv(eax, edx *uint32) + +func xgetbv() (eax, edx uint32) { + var a, d uint32 + gccgoXgetbv(&a, &d) + return a, d +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go new file mode 100644 index 0000000..aa986f7 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go @@ -0,0 +1,22 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build gccgo + +package cpu + +// haveAsmFunctions reports whether the other functions in this file can +// be safely called. +func haveAsmFunctions() bool { return false } + +// TODO(mundaym): the following feature detection functions are currently +// stubs. See https://golang.org/cl/162887 for how to fix this. +// They are likely to be expensive to call so the results should be cached. +func stfle() facilityList { panic("not implemented for gccgo") } +func kmQuery() queryResult { panic("not implemented for gccgo") } +func kmcQuery() queryResult { panic("not implemented for gccgo") } +func kmctrQuery() queryResult { panic("not implemented for gccgo") } +func kmaQuery() queryResult { panic("not implemented for gccgo") } +func kimdQuery() queryResult { panic("not implemented for gccgo") } +func klmdQuery() queryResult { panic("not implemented for gccgo") } diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux.go b/vendor/golang.org/x/sys/cpu/cpu_linux.go new file mode 100644 index 0000000..76b5f50 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go @@ -0,0 +1,59 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//+build !amd64,!amd64p32,!386 + +package cpu + +import ( + "io/ioutil" +) + +const ( + _AT_HWCAP = 16 + _AT_HWCAP2 = 26 + + procAuxv = "/proc/self/auxv" + + uintSize = int(32 << (^uint(0) >> 63)) +) + +// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2 +// These are initialized in cpu_$GOARCH.go +// and should not be changed after they are initialized. +var hwCap uint +var hwCap2 uint + +func init() { + buf, err := ioutil.ReadFile(procAuxv) + if err != nil { + // e.g. on android /proc/self/auxv is not accessible, so silently + // ignore the error and leave Initialized = false + return + } + + bo := hostByteOrder() + for len(buf) >= 2*(uintSize/8) { + var tag, val uint + switch uintSize { + case 32: + tag = uint(bo.Uint32(buf[0:])) + val = uint(bo.Uint32(buf[4:])) + buf = buf[8:] + case 64: + tag = uint(bo.Uint64(buf[0:])) + val = uint(bo.Uint64(buf[8:])) + buf = buf[16:] + } + switch tag { + case _AT_HWCAP: + hwCap = val + case _AT_HWCAP2: + hwCap2 = val + } + } + doinit() + + Initialized = true +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go new file mode 100644 index 0000000..fa7fb1b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 64 + +// HWCAP/HWCAP2 bits. These are exposed by Linux. +const ( + hwcap_FP = 1 << 0 + hwcap_ASIMD = 1 << 1 + hwcap_EVTSTRM = 1 << 2 + hwcap_AES = 1 << 3 + hwcap_PMULL = 1 << 4 + hwcap_SHA1 = 1 << 5 + hwcap_SHA2 = 1 << 6 + hwcap_CRC32 = 1 << 7 + hwcap_ATOMICS = 1 << 8 + hwcap_FPHP = 1 << 9 + hwcap_ASIMDHP = 1 << 10 + hwcap_CPUID = 1 << 11 + hwcap_ASIMDRDM = 1 << 12 + hwcap_JSCVT = 1 << 13 + hwcap_FCMA = 1 << 14 + hwcap_LRCPC = 1 << 15 + hwcap_DCPOP = 1 << 16 + hwcap_SHA3 = 1 << 17 + hwcap_SM3 = 1 << 18 + hwcap_SM4 = 1 << 19 + hwcap_ASIMDDP = 1 << 20 + hwcap_SHA512 = 1 << 21 + hwcap_SVE = 1 << 22 + hwcap_ASIMDFHM = 1 << 23 +) + +func doinit() { + // HWCAP feature bits + ARM64.HasFP = isSet(hwCap, hwcap_FP) + ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD) + ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) + ARM64.HasAES = isSet(hwCap, hwcap_AES) + ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL) + ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1) + ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2) + ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32) + ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS) + ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP) + ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP) + ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID) + ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM) + ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT) + ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA) + ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC) + ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP) + ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3) + ARM64.HasSM3 = isSet(hwCap, hwcap_SM3) + ARM64.HasSM4 = isSet(hwCap, hwcap_SM4) + ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP) + ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) + ARM64.HasSVE = isSet(hwCap, hwcap_SVE) + ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go new file mode 100644 index 0000000..6c8d975 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go @@ -0,0 +1,33 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build ppc64 ppc64le + +package cpu + +const cacheLineSize = 128 + +// HWCAP/HWCAP2 bits. These are exposed by the kernel. +const ( + // ISA Level + _PPC_FEATURE2_ARCH_2_07 = 0x80000000 + _PPC_FEATURE2_ARCH_3_00 = 0x00800000 + + // CPU features + _PPC_FEATURE2_DARN = 0x00200000 + _PPC_FEATURE2_SCV = 0x00100000 +) + +func doinit() { + // HWCAP2 feature bits + PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07) + PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00) + PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN) + PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV) +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go new file mode 100644 index 0000000..d579eae --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go @@ -0,0 +1,161 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 256 + +const ( + // bit mask values from /usr/include/bits/hwcap.h + hwcap_ZARCH = 2 + hwcap_STFLE = 4 + hwcap_MSA = 8 + hwcap_LDISP = 16 + hwcap_EIMM = 32 + hwcap_DFP = 64 + hwcap_ETF3EH = 256 + hwcap_VX = 2048 + hwcap_VXE = 8192 +) + +// bitIsSet reports whether the bit at index is set. The bit index +// is in big endian order, so bit index 0 is the leftmost bit. +func bitIsSet(bits []uint64, index uint) bool { + return bits[index/64]&((1<<63)>>(index%64)) != 0 +} + +// function is the code for the named cryptographic function. +type function uint8 + +const ( + // KM{,A,C,CTR} function codes + aes128 function = 18 // AES-128 + aes192 function = 19 // AES-192 + aes256 function = 20 // AES-256 + + // K{I,L}MD function codes + sha1 function = 1 // SHA-1 + sha256 function = 2 // SHA-256 + sha512 function = 3 // SHA-512 + sha3_224 function = 32 // SHA3-224 + sha3_256 function = 33 // SHA3-256 + sha3_384 function = 34 // SHA3-384 + sha3_512 function = 35 // SHA3-512 + shake128 function = 36 // SHAKE-128 + shake256 function = 37 // SHAKE-256 + + // KLMD function codes + ghash function = 65 // GHASH +) + +// queryResult contains the result of a Query function +// call. Bits are numbered in big endian order so the +// leftmost bit (the MSB) is at index 0. +type queryResult struct { + bits [2]uint64 +} + +// Has reports whether the given functions are present. +func (q *queryResult) Has(fns ...function) bool { + if len(fns) == 0 { + panic("no function codes provided") + } + for _, f := range fns { + if !bitIsSet(q.bits[:], uint(f)) { + return false + } + } + return true +} + +// facility is a bit index for the named facility. +type facility uint8 + +const ( + // cryptography facilities + msa4 facility = 77 // message-security-assist extension 4 + msa8 facility = 146 // message-security-assist extension 8 +) + +// facilityList contains the result of an STFLE call. +// Bits are numbered in big endian order so the +// leftmost bit (the MSB) is at index 0. +type facilityList struct { + bits [4]uint64 +} + +// Has reports whether the given facilities are present. +func (s *facilityList) Has(fs ...facility) bool { + if len(fs) == 0 { + panic("no facility bits provided") + } + for _, f := range fs { + if !bitIsSet(s.bits[:], uint(f)) { + return false + } + } + return true +} + +func doinit() { + // test HWCAP bit vector + has := func(featureMask uint) bool { + return hwCap&featureMask == featureMask + } + + // mandatory + S390X.HasZARCH = has(hwcap_ZARCH) + + // optional + S390X.HasSTFLE = has(hwcap_STFLE) + S390X.HasLDISP = has(hwcap_LDISP) + S390X.HasEIMM = has(hwcap_EIMM) + S390X.HasETF3EH = has(hwcap_ETF3EH) + S390X.HasDFP = has(hwcap_DFP) + S390X.HasMSA = has(hwcap_MSA) + S390X.HasVX = has(hwcap_VX) + if S390X.HasVX { + S390X.HasVXE = has(hwcap_VXE) + } + + // We need implementations of stfle, km and so on + // to detect cryptographic features. + if !haveAsmFunctions() { + return + } + + // optional cryptographic functions + if S390X.HasMSA { + aes := []function{aes128, aes192, aes256} + + // cipher message + km, kmc := kmQuery(), kmcQuery() + S390X.HasAES = km.Has(aes...) + S390X.HasAESCBC = kmc.Has(aes...) + if S390X.HasSTFLE { + facilities := stfle() + if facilities.Has(msa4) { + kmctr := kmctrQuery() + S390X.HasAESCTR = kmctr.Has(aes...) + } + if facilities.Has(msa8) { + kma := kmaQuery() + S390X.HasAESGCM = kma.Has(aes...) + } + } + + // compute message digest + kimd := kimdQuery() // intermediate (no padding) + klmd := klmdQuery() // last (padding) + S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1) + S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256) + S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512) + S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist + sha3 := []function{ + sha3_224, sha3_256, sha3_384, sha3_512, + shake128, shake256, + } + S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...) + } +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go new file mode 100644 index 0000000..f55e0c8 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips64 mips64le + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go new file mode 100644 index 0000000..cda87b1 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips mipsle + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go new file mode 100644 index 0000000..dd1e76d --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go @@ -0,0 +1,11 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !linux,arm64 + +package cpu + +const cacheLineSize = 64 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.s b/vendor/golang.org/x/sys/cpu/cpu_s390x.s new file mode 100644 index 0000000..e5037d9 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s @@ -0,0 +1,57 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +#include "textflag.h" + +// func stfle() facilityList +TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32 + MOVD $ret+0(FP), R1 + MOVD $3, R0 // last doubleword index to store + XC $32, (R1), (R1) // clear 4 doublewords (32 bytes) + WORD $0xb2b01000 // store facility list extended (STFLE) + RET + +// func kmQuery() queryResult +TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KM-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92E0024 // cipher message (KM) + RET + +// func kmcQuery() queryResult +TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMC-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92F0024 // cipher message with chaining (KMC) + RET + +// func kmctrQuery() queryResult +TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMCTR-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92D4024 // cipher message with counter (KMCTR) + RET + +// func kmaQuery() queryResult +TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMA-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xb9296024 // cipher message with authentication (KMA) + RET + +// func kimdQuery() queryResult +TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KIMD-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB93E0024 // compute intermediate message digest (KIMD) + RET + +// func klmdQuery() queryResult +TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KLMD-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB93F0024 // compute last message digest (KLMD) + RET diff --git a/vendor/golang.org/x/sys/cpu/cpu_wasm.go b/vendor/golang.org/x/sys/cpu/cpu_wasm.go new file mode 100644 index 0000000..bd9bbda --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go @@ -0,0 +1,15 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build wasm + +package cpu + +// We're compiling the cpu package for an unknown (software-abstracted) CPU. +// Make CacheLinePad an empty struct and hope that the usual struct alignment +// rules are good enough. + +const cacheLineSize = 0 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go new file mode 100644 index 0000000..d70d317 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go @@ -0,0 +1,59 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 + +package cpu + +const cacheLineSize = 64 + +func init() { + Initialized = true + + maxID, _, _, _ := cpuid(0, 0) + + if maxID < 1 { + return + } + + _, _, ecx1, edx1 := cpuid(1, 0) + X86.HasSSE2 = isSet(26, edx1) + + X86.HasSSE3 = isSet(0, ecx1) + X86.HasPCLMULQDQ = isSet(1, ecx1) + X86.HasSSSE3 = isSet(9, ecx1) + X86.HasFMA = isSet(12, ecx1) + X86.HasSSE41 = isSet(19, ecx1) + X86.HasSSE42 = isSet(20, ecx1) + X86.HasPOPCNT = isSet(23, ecx1) + X86.HasAES = isSet(25, ecx1) + X86.HasOSXSAVE = isSet(27, ecx1) + X86.HasRDRAND = isSet(30, ecx1) + + osSupportsAVX := false + // For XGETBV, OSXSAVE bit is required and sufficient. + if X86.HasOSXSAVE { + eax, _ := xgetbv() + // Check if XMM and YMM registers have OS support. + osSupportsAVX = isSet(1, eax) && isSet(2, eax) + } + + X86.HasAVX = isSet(28, ecx1) && osSupportsAVX + + if maxID < 7 { + return + } + + _, ebx7, _, _ := cpuid(7, 0) + X86.HasBMI1 = isSet(3, ebx7) + X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX + X86.HasBMI2 = isSet(8, ebx7) + X86.HasERMS = isSet(9, ebx7) + X86.HasRDSEED = isSet(18, ebx7) + X86.HasADX = isSet(19, ebx7) +} + +func isSet(bitpos uint, value uint32) bool { + return value&(1<