Skip to content

Commit

Permalink
cmd/compile: intrinsify Add64 on riscv64
Browse files Browse the repository at this point in the history
According to RISCV instruction set manual v2.2 Sec 2.4, we can
implement overflowing check for unsigned addition cheaply using
SLTU instructions.

After this CL, the performance difference in crypto/elliptic
benchmarks on linux/riscv64 are:

name                 old time/op    new time/op    delta
ScalarBaseMult/P256    1.93ms ± 1%    1.64ms ± 1%  -14.96%  (p=0.008 n=5+5)
ScalarBaseMult/P224    1.80ms ± 2%    1.53ms ± 1%  -14.89%  (p=0.008 n=5+5)
ScalarBaseMult/P384    6.15ms ± 2%    5.12ms ± 2%  -16.73%  (p=0.008 n=5+5)
ScalarBaseMult/P521    25.9ms ± 1%    22.3ms ± 2%  -13.78%  (p=0.008 n=5+5)
ScalarMult/P256        5.59ms ± 1%    4.49ms ± 2%  -19.79%  (p=0.008 n=5+5)
ScalarMult/P224        5.42ms ± 1%    4.33ms ± 1%  -20.01%  (p=0.008 n=5+5)
ScalarMult/P384        19.9ms ± 2%    16.3ms ± 1%  -18.15%  (p=0.008 n=5+5)
ScalarMult/P521        97.3ms ± 1%   100.7ms ± 0%   +3.48%  (p=0.008 n=5+5)

Change-Id: Ic4c82ced4b072a4a6575343fa9f29dd09b0cabc4
Reviewed-on: https://go-review.googlesource.com/c/go/+/420094
Reviewed-by: David Chase <[email protected]>
Reviewed-by: Cherry Mui <[email protected]>
Run-TryBot: Wayne Zuo <[email protected]>
Reviewed-by: Joel Sing <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
  • Loading branch information
wdvxdr1123 authored and 4a6f656c committed Aug 27, 2022
1 parent a2d2e6e commit 969f48a
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 2 deletions.
7 changes: 7 additions & 0 deletions src/cmd/compile/internal/ssa/gen/RISCV64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
(Hmul32 x y) => (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
(Hmul32u x y) => (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))

(Select0 (Add64carry x y c)) => (ADD (ADD <typ.UInt64> x y) c)
(Select1 (Add64carry x y c)) =>
(OR (SLTU <typ.UInt64> s:(ADD <typ.UInt64> x y) x) (SLTU <typ.UInt64> (ADD <typ.UInt64> s c) s))

// (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1)
(Avg64u <t> x y) => (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))

Expand Down Expand Up @@ -743,6 +747,9 @@
(SLTI [x] (MOVDconst [y])) => (MOVDconst [b2i(int64(y) < int64(x))])
(SLTIU [x] (MOVDconst [y])) => (MOVDconst [b2i(uint64(y) < uint64(x))])

(SLT x x) => (MOVDconst [0])
(SLTU x x) => (MOVDconst [0])

// deadcode for LoweredMuluhilo
(Select0 m:(LoweredMuluhilo x y)) && m.Uses == 1 => (MULHU x y)
(Select1 m:(LoweredMuluhilo x y)) && m.Uses == 1 => (MUL x y)
Expand Down
76 changes: 76 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteRISCV64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions src/cmd/compile/internal/ssagen/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -4726,8 +4726,8 @@ func InitTables() {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
},
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X)
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X)
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchRISCV64)
addF("math/bits", "Sub64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
Expand Down
8 changes: 8 additions & 0 deletions test/codegen/mathbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ func Add(x, y, ci uint) (r, co uint) {
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add(x, y, ci)
}

Expand All @@ -451,6 +452,7 @@ func AddC(x, ci uint) (r, co uint) {
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add(x, 7, ci)
}

Expand All @@ -460,6 +462,7 @@ func AddZ(x, y uint) (r, co uint) {
// ppc64: "ADDC", -"ADDE", "ADDZE"
// ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add(x, y, 0)
}

Expand All @@ -469,6 +472,7 @@ func AddR(x, y, ci uint) uint {
// ppc64: "ADDC", "ADDE", -"ADDZE"
// ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD",-"SLTU"
r, _ := bits.Add(x, y, ci)
return r
}
Expand All @@ -489,6 +493,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add64(x, y, ci)
}

Expand All @@ -498,6 +503,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add64(x, 7, ci)
}

Expand All @@ -507,6 +513,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
// ppc64: "ADDC", -"ADDE", "ADDZE"
// ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
// riscv64: "ADD","SLTU"
return bits.Add64(x, y, 0)
}

Expand All @@ -516,6 +523,7 @@ func Add64R(x, y, ci uint64) uint64 {
// ppc64: "ADDC", "ADDE", -"ADDZE"
// ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
// riscv64: "ADD",-"SLTU"
r, _ := bits.Add64(x, y, ci)
return r
}
Expand Down

0 comments on commit 969f48a

Please sign in to comment.