Skip to content

Commit

Permalink
vector: use asm opcode mnemonics
Browse files Browse the repository at this point in the history
There's no change in the binary output, just less mystery in the asm.

These mnemonics were introduced in Go 1.10:
https://golang.org/doc/go1.10#asm and https://golang.org/cl/75490

Current stable release (as of 2018-11-10) is Go 1.11, and
https://golang.org/doc/devel/release.html#policy says that Go 1.9 and
below are therefore no longer supported.

Change-Id: I1f9a63521bc8d5e8f8d395605f62bf7fb6a63bc5
Reviewed-on: https://go-review.googlesource.com/c/148997
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
  • Loading branch information
nigeltao committed Nov 15, 2018
1 parent 249dc85 commit 46e4eb7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 98 deletions.
106 changes: 26 additions & 80 deletions vector/acc_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,9 @@ fxAccOpOverLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -182,13 +174,10 @@ fxAccOpOverLoop4:
PSRLQ $32, X11

// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11

// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
Expand Down Expand Up @@ -223,17 +212,9 @@ fxAccOpOverLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -346,17 +327,9 @@ fxAccOpSrcLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -390,17 +363,9 @@ fxAccOpSrcLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -492,17 +457,9 @@ fxAccMaskLoop4:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -534,17 +491,9 @@ fxAccMaskLoop1:
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2

// z = convertToInt32(y)
// No-op.
Expand Down Expand Up @@ -696,13 +645,10 @@ flAccOpOverLoop4:
PSRLQ $32, X11

// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11

// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
Expand Down
25 changes: 7 additions & 18 deletions vector/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,17 +296,9 @@ const (
// y = abs(x)
// y >>= 2 // Shift by 2*ϕ - 16.
// y = min(y, fxAlmost65536)
//
// pabsd %xmm1,%xmm2
// psrld $0x2,%xmm2
// pminud %xmm5,%xmm2
//
// Hopefully we'll get these opcode mnemonics into the assembler for Go
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
// it's similar.
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
PABSD X1, X2
PSRLL $2, X2
PMINUD X5, X2
`
flClampAndScale = `
// y = x & flSignMask
Expand Down Expand Up @@ -356,13 +348,10 @@ const (
MOVOU X0, X11
PSRLQ $32, X11
// Multiply by magic, shift by magic.
//
// pmuludq %xmm10,%xmm0
// pmuludq %xmm10,%xmm11
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
PSRLQ $47, X0
PSRLQ $47, X11
PMULULQ X10, X0
PMULULQ X10, X11
PSRLQ $47, X0
PSRLQ $47, X11
// Merge the two registers back to one, X11, and add maskA.
PSLLQ $32, X11
XORPS X0, X11
Expand Down

0 comments on commit 46e4eb7

Please sign in to comment.