Skip to content

Commit

Permalink
cmd/compile: use TZCNT instruction for GOAMD64>=v3
Browse files Browse the repository at this point in the history
on my Intel CoffeeLake CPU:
name               old time/op  new time/op  delta
TrailingZeros-8    0.68ns ± 1%  0.64ns ± 1%  -6.26%  (p=0.000 n=10+10)
TrailingZeros8-8   0.70ns ± 1%  0.70ns ± 1%    ~     (p=0.697 n=10+10)
TrailingZeros16-8  0.70ns ± 1%  0.70ns ± 1%  +0.57%  (p=0.043 n=10+10)
TrailingZeros32-8  0.66ns ± 1%  0.64ns ± 1%  -3.35%  (p=0.000 n=10+10)
TrailingZeros64-8  0.68ns ± 1%  0.64ns ± 1%  -5.84%  (p=0.000 n=9+10)

Updates #45453

Change-Id: I228ff2d51df24b1306136f061432f8a12bb1d6fd
Reviewed-on: https://go-review.googlesource.com/c/go/+/353249
Trust: Michael Knyszek <[email protected]>
Run-TryBot: Michael Knyszek <[email protected]>
TryBot-Result: Go Bot <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
  • Loading branch information
wdvxdr1123 authored and randall77 committed Oct 5, 2021
1 parent f1f626d commit 060cd73
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 21 deletions.
3 changes: 2 additions & 1 deletion src/cmd/compile/internal/amd64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {

case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL,
ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
Expand Down
18 changes: 12 additions & 6 deletions src/cmd/compile/internal/ssa/gen/AMD64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,21 @@
(OffPtr [off] ptr) => (ADDQ (MOVQconst [off]) ptr)

// Lowering other arithmetic
(Ctz64 <t> x) => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
(Ctz32 x) => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
(Ctz64 x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x)
(Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
(Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
(Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x))
(Ctz8 x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x))

(Ctz64NonZero x) => (Select0 (BSFQ x))
(Ctz32NonZero ...) => (BSFL ...)
(Ctz16NonZero ...) => (BSFL ...)
(Ctz8NonZero ...) => (BSFL ...)
(Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x)
(Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz16NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz8NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x)
(Ctz64NonZero x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ x))
(Ctz32NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x)
(Ctz16NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x)
(Ctz8NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x)

// BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0.
// However, for zero-extended values, we can cheat a bit, and calculate
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/compile/internal/ssa/gen/AMD64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,10 @@ func init() {
{name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1)
{name: "BLSRQ", argLength: 1, reg: gp11, asm: "BLSRQ", clobberFlags: true}, // arg0 & (arg0 - 1)
{name: "BLSRL", argLength: 1, reg: gp11, asm: "BLSRL", clobberFlags: true}, // arg0 & (arg0 - 1)
// count the number of trailing zero bits, prefer TZCNTQ over BSFQ, as TZCNTQ(0)==64
// and BSFQ(0) is undefined. Same for TZCNTL(0)==32
{name: "TZCNTQ", argLength: 1, reg: gp11, asm: "TZCNTQ", clobberFlags: true},
{name: "TZCNTL", argLength: 1, reg: gp11, asm: "TZCNTL", clobberFlags: true},
}

var AMD64blocks = []blockData{
Expand Down
30 changes: 30 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

144 changes: 138 additions & 6 deletions src/cmd/compile/internal/ssa/rewriteAMD64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 16 additions & 8 deletions test/codegen/mathbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
// ------------------------ //

func TrailingZeros(n uint) int {
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v3:"TZCNTQ"
// arm:"CLZ"
// arm64:"RBIT","CLZ"
// s390x:"FLOGR"
Expand All @@ -285,7 +286,8 @@ func TrailingZeros(n uint) int {
}

func TrailingZeros64(n uint64) int {
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v3:"TZCNTQ"
// arm64:"RBIT","CLZ"
// s390x:"FLOGR"
// ppc64/power8:"ANDN","POPCNTD"
Expand All @@ -303,7 +305,8 @@ func TrailingZeros64Subtract(n uint64) int {
}

func TrailingZeros32(n uint32) int {
// amd64:"BTSQ\\t\\$32","BSFQ"
// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
// amd64/v3:"TZCNTL"
// arm:"CLZ"
// arm64:"RBITW","CLZW"
// s390x:"FLOGR","MOVWZ"
Expand Down Expand Up @@ -343,7 +346,8 @@ func TrailingZeros8(n uint8) int {
func IterateBits(n uint) int {
i := 0
for n != 0 {
// amd64:"BSFQ",-"CMOVEQ"
// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
// amd64/v3:"TZCNTQ"
i += bits.TrailingZeros(n)
n &= n - 1
}
Expand All @@ -353,7 +357,8 @@ func IterateBits(n uint) int {
func IterateBits64(n uint64) int {
i := 0
for n != 0 {
// amd64:"BSFQ",-"CMOVEQ"
// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
// amd64/v3:"TZCNTQ"
i += bits.TrailingZeros64(n)
n &= n - 1
}
Expand All @@ -363,7 +368,8 @@ func IterateBits64(n uint64) int {
func IterateBits32(n uint32) int {
i := 0
for n != 0 {
// amd64:"BSFL",-"BTSQ"
// amd64/v1,amd64/v2:"BSFL",-"BTSQ"
// amd64/v3:"TZCNTL"
i += bits.TrailingZeros32(n)
n &= n - 1
}
Expand All @@ -373,7 +379,8 @@ func IterateBits32(n uint32) int {
func IterateBits16(n uint16) int {
i := 0
for n != 0 {
// amd64:"BSFL",-"BTSL"
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
// amd64/v3:"TZCNTL"
// arm64:"RBITW","CLZW",-"ORR"
i += bits.TrailingZeros16(n)
n &= n - 1
Expand All @@ -384,7 +391,8 @@ func IterateBits16(n uint16) int {
func IterateBits8(n uint8) int {
i := 0
for n != 0 {
// amd64:"BSFL",-"BTSL"
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
// amd64/v3:"TZCNTL"
// arm64:"RBITW","CLZW",-"ORR"
i += bits.TrailingZeros8(n)
n &= n - 1
Expand Down

0 comments on commit 060cd73

Please sign in to comment.