Skip to content

Commit

Permalink
math, math/bits: add intrinsics for wasm
Browse files Browse the repository at this point in the history
This commit adds compiler intrinsics for the packages math and
math/bits on the wasm architecture for better performance.

benchmark                        old ns/op     new ns/op     delta
BenchmarkCeil                    8.31          3.21          -61.37%
BenchmarkCopysign                5.24          3.88          -25.95%
BenchmarkAbs                     5.42          3.34          -38.38%
BenchmarkFloor                   8.29          3.18          -61.64%
BenchmarkRoundToEven             9.76          3.26          -66.60%
BenchmarkSqrtLatency             8.13          4.88          -39.98%
BenchmarkSqrtPrime               5246          3535          -32.62%
BenchmarkTrunc                   8.29          3.15          -62.00%
BenchmarkLeadingZeros            13.0          4.23          -67.46%
BenchmarkLeadingZeros8           4.65          4.42          -4.95%
BenchmarkLeadingZeros16          7.60          4.38          -42.37%
BenchmarkLeadingZeros32          10.7          4.48          -58.13%
BenchmarkLeadingZeros64          12.9          4.31          -66.59%
BenchmarkTrailingZeros           6.52          4.04          -38.04%
BenchmarkTrailingZeros8          4.57          4.14          -9.41%
BenchmarkTrailingZeros16         6.69          4.16          -37.82%
BenchmarkTrailingZeros32         6.97          4.23          -39.31%
BenchmarkTrailingZeros64         6.59          4.00          -39.30%
BenchmarkOnesCount               7.93          3.30          -58.39%
BenchmarkOnesCount8              3.56          3.19          -10.39%
BenchmarkOnesCount16             4.85          3.19          -34.23%
BenchmarkOnesCount32             7.27          3.19          -56.12%
BenchmarkOnesCount64             8.08          3.28          -59.41%
BenchmarkRotateLeft              4.88          3.80          -22.13%
BenchmarkRotateLeft64            5.03          3.63          -27.83%

Change-Id: Ic1e0c2984878be8defb6eb7eb6ee63765c793222
Reviewed-on: https://go-review.googlesource.com/c/go/+/165177
Run-TryBot: Brad Fitzpatrick <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Cherry Zhang <[email protected]>
  • Loading branch information
neelance authored and bradfitz committed Mar 14, 2019
1 parent aa193c0 commit 5ee1b84
Show file tree
Hide file tree
Showing 9 changed files with 566 additions and 23 deletions.
42 changes: 21 additions & 21 deletions src/cmd/compile/internal/gc/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -3190,22 +3190,22 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0])
},
sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.S390X)
sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.S390X, sys.Wasm)
addF("math", "Trunc",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
},
sys.ARM64, sys.PPC64, sys.S390X)
sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
addF("math", "Ceil",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
},
sys.ARM64, sys.PPC64, sys.S390X)
sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
addF("math", "Floor",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
},
sys.ARM64, sys.PPC64, sys.S390X)
sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
addF("math", "Round",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
Expand All @@ -3215,17 +3215,17 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
},
sys.ARM64, sys.S390X)
sys.ARM64, sys.S390X, sys.Wasm)
addF("math", "Abs",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
},
sys.ARM64, sys.PPC64)
sys.ARM64, sys.PPC64, sys.Wasm)
addF("math", "Copysign",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
},
sys.PPC64)
sys.PPC64, sys.Wasm)

makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
Expand Down Expand Up @@ -3275,12 +3275,12 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz64, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "TrailingZeros32",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz32, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "TrailingZeros16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt16to32, types.Types[TUINT32], args[0])
Expand All @@ -3293,7 +3293,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64)
sys.AMD64, sys.ARM64, sys.Wasm)
addF("math/bits", "TrailingZeros16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
Expand All @@ -3314,7 +3314,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz8, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64)
sys.AMD64, sys.ARM64, sys.Wasm)
addF("math/bits", "TrailingZeros8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
Expand All @@ -3331,7 +3331,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "Len32",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen32, types.Types[TINT], args[0])
Expand All @@ -3345,7 +3345,7 @@ func init() {
x := s.newValue1(ssa.OpZeroExt32to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "Len16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
Expand All @@ -3355,7 +3355,7 @@ func init() {
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "Len16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen16, types.Types[TINT], args[0])
Expand All @@ -3370,7 +3370,7 @@ func init() {
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
addF("math/bits", "Len8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen8, types.Types[TINT], args[0])
Expand All @@ -3383,7 +3383,7 @@ func init() {
}
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
// LeadingZeros is handled because it trivially calls Len.
addF("math/bits", "Reverse64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
Expand Down Expand Up @@ -3432,7 +3432,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft64, types.Types[TUINT64], args[0], args[1])
},
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)

makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
Expand Down Expand Up @@ -3476,28 +3476,28 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount64, types.Types[TINT], args[0])
},
sys.PPC64, sys.ARM64, sys.S390X)
sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
addF("math/bits", "OnesCount32",
makeOnesCountAMD64(ssa.OpPopCount32, ssa.OpPopCount32),
sys.AMD64)
addF("math/bits", "OnesCount32",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount32, types.Types[TINT], args[0])
},
sys.PPC64, sys.ARM64, sys.S390X)
sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
addF("math/bits", "OnesCount16",
makeOnesCountAMD64(ssa.OpPopCount16, ssa.OpPopCount16),
sys.AMD64)
addF("math/bits", "OnesCount16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount16, types.Types[TINT], args[0])
},
sys.ARM64, sys.S390X, sys.PPC64)
sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
addF("math/bits", "OnesCount8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount8, types.Types[TINT], args[0])
},
sys.S390X, sys.PPC64)
sys.S390X, sys.PPC64, sys.Wasm)
addF("math/bits", "OnesCount",
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
sys.AMD64)
Expand Down
25 changes: 25 additions & 0 deletions src/cmd/compile/internal/ssa/gen/Wasm.rules
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,31 @@
// Write barrier.
(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)

// --- Intrinsics ---
(Sqrt x) -> (F64Sqrt x)
(Trunc x) -> (F64Trunc x)
(Ceil x) -> (F64Ceil x)
(Floor x) -> (F64Floor x)
(RoundToEven x) -> (F64Nearest x)
(Abs x) -> (F64Abs x)
(Copysign x y) -> (F64Copysign x y)

(Ctz64 x) -> (I64Ctz x)
(Ctz32 x) -> (I64Ctz (I64Or x (I64Const [0x100000000])))
(Ctz16 x) -> (I64Ctz (I64Or x (I64Const [0x10000])))
(Ctz8 x) -> (I64Ctz (I64Or x (I64Const [0x100])))

(Ctz(64|32|16|8)NonZero x) -> (I64Ctz x)

(BitLen64 x) -> (I64Sub (I64Const [64]) (I64Clz x))

(RotateLeft64 x y) -> (I64Rotl x y)

(PopCount64 x) -> (I64Popcnt x)
(PopCount32 x) -> (I64Popcnt (ZeroExt32to64 x))
(PopCount16 x) -> (I64Popcnt (ZeroExt16to64 x))
(PopCount8 x) -> (I64Popcnt (ZeroExt8to64 x))

// --- Optimizations ---
(I64Add (I64Const [x]) (I64Const [y])) -> (I64Const [x + y])
(I64Mul (I64Const [x]) (I64Const [y])) -> (I64Const [x * y])
Expand Down
13 changes: 13 additions & 0 deletions src/cmd/compile/internal/ssa/gen/WasmOps.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,19 @@ func init() {
{name: "I64TruncF64U", asm: "I64TruncF64U", argLength: 1, reg: regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}, typ: "Int64"}, // truncates the float arg0 to an unsigned integer
{name: "F64ConvertI64S", asm: "F64ConvertI64S", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}, typ: "Float64"}, // converts the signed integer arg0 to a float
{name: "F64ConvertI64U", asm: "F64ConvertI64U", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}, typ: "Float64"}, // converts the unsigned integer arg0 to a float

{name: "F64Sqrt", asm: "F64Sqrt", argLength: 1, reg: fp11, typ: "Float64"}, // sqrt(arg0)
{name: "F64Trunc", asm: "F64Trunc", argLength: 1, reg: fp11, typ: "Float64"}, // trunc(arg0)
{name: "F64Ceil", asm: "F64Ceil", argLength: 1, reg: fp11, typ: "Float64"}, // ceil(arg0)
{name: "F64Floor", asm: "F64Floor", argLength: 1, reg: fp11, typ: "Float64"}, // floor(arg0)
{name: "F64Nearest", asm: "F64Nearest", argLength: 1, reg: fp11, typ: "Float64"}, // round(arg0)
{name: "F64Abs", asm: "F64Abs", argLength: 1, reg: fp11, typ: "Float64"}, // abs(arg0)
{name: "F64Copysign", asm: "F64Copysign", argLength: 2, reg: fp21, typ: "Float64"}, // copysign(arg0, arg1)

{name: "I64Ctz", asm: "I64Ctz", argLength: 1, reg: gp11, typ: "Int64"}, // ctz(arg0)
{name: "I64Clz", asm: "I64Clz", argLength: 1, reg: gp11, typ: "Int64"}, // clz(arg0)
{name: "I64Rotl", asm: "I64Rotl", argLength: 2, reg: gp21, typ: "Int64"}, // rotl(arg0, arg1)
{name: "I64Popcnt", asm: "I64Popcnt", argLength: 1, reg: gp11, typ: "Int64"}, // popcnt(arg0)
}

archs = append(archs, arch{
Expand Down
156 changes: 156 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5ee1b84

Please sign in to comment.